def main(): num_not_found_in_uniprot = 0 text = open('./ecoliparse.txt', 'r').read().replace('\'', '\"') #print text dict = json.loads(text) #print dict db.drop_collection('uniprot') db.uniprot.insert(dict) db.uniprot.create_index('gene_name') gene_exist_list = [] gene_multi_exit_list = [] rna_gene_list = [] for enzyme in db.node.find({'TYPE': 'Enzyme'}): for gene in enzyme['GENES']: if gene not in gene_exist_list: gene_exist_list.append(gene) elif gene not in gene_multi_exit_list: gene_multi_exit_list.append(gene) for RNA in db.product.find({'TYPE': {'$ne': 'Protein'}}): gene = RNA['Gene'] if gene not in rna_gene_list: rna_gene_list.append(gene) print 'existed gene list has been built' fp.write('coding_gene_list\n\n') fp.write(str(gene_exist_list)) fp.write('\n\ngene which product RNA\n\n') fp.write(str(rna_gene_list)) fp.write('\n\n gene which product multi enzyme\n\n') fp.write(str(gene_multi_exit_list)) #print gene_exist_list saved_count = 0 cursor = db.node.find({'TYPE': 'Gene'}, timeout=False) for gene in cursor: fp.write(gene['NAME'] + ' ') if not search_exists(gene['NAME'], gene_exist_list, rna_gene_list): uniprot_name = gene['NAME'].replace('-', '').split('_')[0] log = db.uniprot.find_one({'gene_name': uniprot_name}) if not log: fp.write('can\'t be found in uniprot\n') fp_not_found.write(gene['NAME'] + '\n') num_not_found_in_uniprot += 1 continue fp.write(' should be saved\n') saved_count += 1 cursor.close() print str(saved_count) + ' protein has been download' fp.write('\n\nnum_with_TF: ' + str(num_with_TF) + '\n') fp.write('num_with_Enzyme: ' + str(num_with_Enzyme) + 'compare: ' + str(len(gene_exist_list)) + '\n') fp.write('num of gene which product RNA: ' + str(num_with_RNA) +'\n') fp.write('num of gene with multi enzyme: ' + str(len(gene_multi_exit_list)) + '\n') fp.write('num found in uniprot: ' + str(num_not_found_in_uniprot) + '\n') fp.write('num of protein newly added: ' + str(saved_count)) fp.close() fp_not_found.close()
def rebuild(): if OVERWRITE: for collection in db.collection_names(): if collection not in ['system.indexes', 'system.users']: db.drop_collection(collection) print 'count log creating' count() print 'run regulondb importing from super manage.py' regulondb() print 'product importing ' product_process() print 'run kegg_node importing from super manage.py' kegg_node() print 'run kegg_reaction importing from super manage.py' kegg_reaction() print 'run reaction connection from super manage.py' db.node.create_index('NAME') kegg_connect() print 'run reaction function sort from super manage.py' kegg_reaction_function_link() print 'run link setting between gene and enzyme from super manage.py' database_link() print 'patch 1 built in August :adding alignment database' patch1() print 'working log creating' log_create() print 'uniprot updating' uniprot_update() print 'kegg rename' rename_enzyme() print '\nAdd Sysname to Gene' gene_sysname() print '\n Add sigma link' sigma_link() print 'Sort link type' sort_link() print 'BLAST database setup' alignment_data() print 'Fishing patch built in August 22' patch2() print 'Initial project information' project_init()
def kegg_node(number=None): basepath = './kegg/' #paths = [basepath + 'compound.py', basepath + 'enzyme.py', basepath + 'module.py', basepath + 'protein.py'] #kind = {'0': 'Compound', '1': 'Enzyme', '2': 'Module', '3': 'Protein'} paths = [basepath + 'compound.py', basepath + 'module.py', basepath + 'enzyme.py'] kind = {'0': 'Compound', '1': 'Module', '2': 'Enzyme'} if number == None: db.drop_collection('kegg_node') for path in paths: execfile(path, {}) else: db.node.remove({'TYPE': kind[str(number)]}) execfile(paths[number], {})
def patch2(): db.drop_collection('node_ref') db.drop_collection('link_ref') path = './Patch/Fishing Patch.py' execfile(path, {})
def kegg_reaction_function_link(): path = './kegg/mm_parse.py' db.drop_collection('module__function_link') #order = 'python ' + path #os.system(order) execfile(path, {})
def count(): db.drop_collection('count') db.count.insert({'type': 'node', 'value': 0}) db.count.insert({'type': 'link', 'value': 0}) db.count.insert({'type': 'product', 'value': 0})