Ejemplo n.º 1
0
def test_unique_kegg_names():
    kegg_list = Kegg.get_org_list()
    from collections import Counter
    l = []
    d = csv.DictReader(kegg_list.split('\n'), delimiter='\t', fieldnames=('code', 'org', 'tax'))
    for row in d:
        code, org, tax = row['code'], row['org'], row['tax']
        l.append(tax)
    c = Counter(l)
    max_val = max(c.values())
    print "Max val:", max_val
    while max_val > 1:
        print "#", max_val, " count:", len([v for v in c.values() if v == max_val])
        max_val -= 1
Ejemplo n.º 2
0
def initdb():
    """Creates the database."""
    db.create_all()

    logging.info('Getting kegg organism list...')
    kegg_list = Kegg.get_org_list()

    # use only get memory error otherwise
    # print BiomodelMongo.objects.only('organism', 'name').all()
    mongo_list = dict(
        (b.organism, b.name)
        for b in BiomodelMongo.objects.only('organism', 'name').all()
        )

    mongo_orgs = set(mongo_list.keys())

    # to prevent insertion porting the same name
    kegg_names = set()

    logging.info('Insertion begins...')
    d = csv.DictReader(kegg_list.split('\n'), delimiter='\t', fieldnames=('code', 'org', 'tax'))
    for row in d:
        code, org, tax = row['code'], row['org'], row['tax']
        if org in mongo_orgs:
            if tax not in kegg_names:
                o = Organism(row['code'], row['org'], row['tax'])
                o.save()
                b = Biomodel(name=tax, kegg_org=org)
                b.save()
                # finally tax to the set
                kegg_names.add(tax)

    # insert user
    u = User(username='******', email='*****@*****.**', password='******')
    u.save()

    logging.info('Done !')