def main():
    num_not_found_in_uniprot = 0
    text = open('./ecoliparse.txt', 'r').read().replace('\'', '\"')
    #print text
    dict = json.loads(text)
    #print dict
    db.drop_collection('uniprot')
    db.uniprot.insert(dict)
    db.uniprot.create_index('gene_name')

    gene_exist_list = []
    gene_multi_exit_list = []
    rna_gene_list = []
    for enzyme in db.node.find({'TYPE': 'Enzyme'}):
        for gene in enzyme['GENES']:
            if gene not in gene_exist_list:
                gene_exist_list.append(gene)
            elif gene not in gene_multi_exit_list:
                gene_multi_exit_list.append(gene)
    for RNA in db.product.find({'TYPE': {'$ne': 'Protein'}}):
        gene = RNA['Gene']
        if gene not in rna_gene_list:
            rna_gene_list.append(gene)
    print 'existed gene list has been built'
    fp.write('coding_gene_list\n\n')
    fp.write(str(gene_exist_list))
    fp.write('\n\ngene which product RNA\n\n')
    fp.write(str(rna_gene_list))
    fp.write('\n\n gene which product multi enzyme\n\n')
    fp.write(str(gene_multi_exit_list))
    #print gene_exist_list

    saved_count = 0
    cursor = db.node.find({'TYPE': 'Gene'}, timeout=False)
    for gene in cursor:

        fp.write(gene['NAME'] + '  ')
        if not search_exists(gene['NAME'], gene_exist_list, rna_gene_list):
            uniprot_name = gene['NAME'].replace('-', '').split('_')[0]
            log = db.uniprot.find_one({'gene_name': uniprot_name})
            if not log:
                fp.write('can\'t be found in uniprot\n')
                fp_not_found.write(gene['NAME'] + '\n')
                num_not_found_in_uniprot += 1
                continue
            fp.write(' should be saved\n')
            saved_count += 1
    cursor.close()
    print str(saved_count) + ' protein has been download'

    fp.write('\n\nnum_with_TF: ' + str(num_with_TF) + '\n')
    fp.write('num_with_Enzyme: ' + str(num_with_Enzyme) + 'compare: ' + str(len(gene_exist_list)) + '\n')
    fp.write('num of gene which product RNA: ' + str(num_with_RNA) +'\n')
    fp.write('num of gene with multi enzyme: ' + str(len(gene_multi_exit_list)) + '\n')
    fp.write('num found in uniprot: ' + str(num_not_found_in_uniprot) + '\n')
    fp.write('num of protein newly added: ' + str(saved_count))

    fp.close()
    fp_not_found.close()
Ejemplo n.º 2
0
def rebuild():
    if OVERWRITE:
        for collection in db.collection_names():
            if collection not in ['system.indexes', 'system.users']:
                db.drop_collection(collection)
    print 'count log creating'
    count()
    print 'run regulondb importing from super manage.py'
    regulondb()
    print 'product importing '
    product_process()
    print 'run kegg_node importing from super manage.py'
    kegg_node()
    print 'run kegg_reaction importing from super manage.py'
    kegg_reaction()
    print 'run reaction connection from super manage.py'

    db.node.create_index('NAME')
    kegg_connect()
    print 'run reaction function sort from super manage.py'
    kegg_reaction_function_link()
    print 'run link setting between gene and enzyme from super manage.py'
    database_link()

    print 'patch 1 built in August :adding alignment database'
    patch1()

    print 'working log creating'
    log_create()

    print 'uniprot updating'
    uniprot_update()

    print 'kegg rename'
    rename_enzyme()

    print '\nAdd Sysname to Gene'
    gene_sysname()

    print '\n Add sigma link'
    sigma_link()

    print 'Sort link type'
    sort_link()

    print 'BLAST database setup'
    alignment_data()

    print 'Fishing patch built in August 22'
    patch2()

    print 'Initial project information'
    project_init()
Ejemplo n.º 3
0
def kegg_node(number=None):
    basepath = './kegg/'
    #paths = [basepath + 'compound.py', basepath + 'enzyme.py', basepath + 'module.py', basepath + 'protein.py']
    #kind = {'0': 'Compound', '1': 'Enzyme', '2': 'Module', '3': 'Protein'}
    paths = [basepath + 'compound.py', basepath + 'module.py', basepath + 'enzyme.py']
    kind = {'0': 'Compound', '1': 'Module', '2': 'Enzyme'}
    if number == None:
        db.drop_collection('kegg_node')
        for path in paths:
            execfile(path, {})
    else:
        db.node.remove({'TYPE': kind[str(number)]})
        execfile(paths[number], {})
Ejemplo n.º 4
0
def patch2():
    db.drop_collection('node_ref')
    db.drop_collection('link_ref')
    path = './Patch/Fishing Patch.py'
    execfile(path, {})
Ejemplo n.º 5
0
def kegg_reaction_function_link():
    path = './kegg/mm_parse.py'
    db.drop_collection('module__function_link')
    #order = 'python ' + path
    #os.system(order)
    execfile(path, {})
Ejemplo n.º 6
0
def count():
    db.drop_collection('count')
    db.count.insert({'type': 'node', 'value': 0})
    db.count.insert({'type': 'link', 'value': 0})
    db.count.insert({'type': 'product', 'value': 0})