def main(): try: search_dir = argv[1] model_name = argv[2] except: raise Exception('Not enough arguments.') model = load_model(model_name) ids = [id_for_new_id_style(x.id) for x in model.reactions] scores = []; size=len(listdir(search_dir)) for i, path in enumerate(listdir(search_dir)): # progress sys.stdout.write('\r') # the exact output you're looking for: sys.stdout.write("%d / %d" % (i + 1, size)) sys.stdout.flush() if path.endswith('.gz'): f = gzip.open(join(search_dir, path), 'r') else: f = open(join(search_dir, path), 'r') # (1) Compare the metabolite count m = json.load(f) try: met_count = len(m['MAPNODE']) reaction_count = len(m['MAPREACTION']) # diff = abs(len(m['MAPNODE']) - metabolite_count) except KeyError: continue # (2) Compare the reaction ids to the cobra model # f.seek(0) num_matches = 0 try: reactions = m['MAPREACTION'] except KeyError: continue for reaction in reactions: try: an_id = reaction['REACTIONABBREVATION'] except KeyError: continue if id_for_new_id_style(an_id) in ids: num_matches = num_matches + 1 # quit if not > 90% frac = 0.9 if num_matches < frac*len(reactions): continue scores.append((join(search_dir, path), float(num_matches) / len(reactions), met_count, reaction_count)) f.close() scores = sorted(scores, key=itemgetter(2), reverse=True) scores = sorted(scores, key=itemgetter(1)) outfile = '%s_maps.tsv' % model_name print print 'saving to %s' % outfile (pd .DataFrame(scores, columns=['path', 'score', 'n_metabolites', 'n_reactions']) .to_csv(outfile, sep='\t'))
def parse_nodes(nodes, compartment_id_key): for node in nodes: # assign new keys try_assignment(node, 'MAPOBJECT_ID', 'object_id', cast=str, require=True) try_assignment(node, 'MAPNODENODETYPE', 'node_type', cast=lambda x: str(x).lower(), require=True) try_assignment(node, 'MAPNODEPOSITIONX', 'x', cast=float, require=True) try_assignment(node, 'MAPNODEPOSITIONY', 'y', cast=float, require=True) if node['node_type'] == 'metabolite': try_assignment(node, 'MAPNODELABELPOSITIONX', 'label_x', cast=float, fallback=node['x']) try_assignment(node, 'MAPNODELABELPOSITIONY', 'label_y', cast=float, fallback=node['y']) try_assignment(node, 'MOLECULEABBREVIATION', 'bigg_id', cast=lambda x: id_for_new_id_style(x, is_metabolite=True), fallback='') try_assignment(node, 'MOLECULEOFFICIALNAME', 'name', cast=str, fallback='') try_assignment(node, 'MAPNODEISPRIMARY', 'node_is_primary', cast=lambda x: True if x=='Y' else False, fallback=False) try_assignment(node, 'MAPNODECOMPARTMENT_ID', 'compartment_id', cast=int) try_assignment(node, 'MAPNODECOMPARTMENT_ID', 'compartment_name', cast=lambda x: compartment_id_key[int(x)][0]) try_assignment(node, 'MAPNODECOMPARTMENT_ID', 'compartment_letter', cast=lambda x: compartment_id_key[int(x)][1]) if node['bigg_id'] != '' and node['compartment_letter'] is not None: node['bigg_id'] = "%s_%s" % (node['bigg_id'], node['compartment_letter']) # Make into dictionary return {a['object_id']: a for a in nodes if a['node_type'] in ['metabolite', 'multimarker', 'midmarker']}
def parse_nodes(nodes, compartment_id_key): for node in nodes: # assign new keys try_assignment(node, 'MAPOBJECT_ID', 'object_id', cast=str, require=True) try_assignment(node, 'MAPNODENODETYPE', 'node_type', cast=lambda x: str(x).lower(), require=True) try_assignment(node, 'MAPNODEPOSITIONX', 'x', cast=float, require=True) try_assignment(node, 'MAPNODEPOSITIONY', 'y', cast=float, require=True) if node['node_type'] == 'metabolite': try_assignment(node, 'MAPNODELABELPOSITIONX', 'label_x', cast=float, fallback=node['x']) try_assignment(node, 'MAPNODELABELPOSITIONY', 'label_y', cast=float, fallback=node['y']) try_assignment( node, 'MOLECULEABBREVIATION', 'bigg_id', cast=lambda x: id_for_new_id_style(x, is_metabolite=True), fallback='') try_assignment(node, 'MOLECULEOFFICIALNAME', 'name', cast=str, fallback='') try_assignment(node, 'MAPNODEISPRIMARY', 'node_is_primary', cast=lambda x: True if x == 'Y' else False, fallback=False) try_assignment(node, 'MAPNODECOMPARTMENT_ID', 'compartment_id', cast=int) try_assignment(node, 'MAPNODECOMPARTMENT_ID', 'compartment_name', cast=lambda x: compartment_id_key[int(x)][0]) try_assignment(node, 'MAPNODECOMPARTMENT_ID', 'compartment_letter', cast=lambda x: compartment_id_key[int(x)][1]) if node['bigg_id'] != '' and node['compartment_letter'] is not None: node['bigg_id'] = "%s_%s" % (node['bigg_id'], node['compartment_letter']) # Make into dictionary return { a['object_id']: a for a in nodes if a['node_type'] in ['metabolite', 'multimarker', 'midmarker'] }
def main(): try: search_dir = argv[1] model_name = argv[2] except: raise Exception('Not enough arguments.') model = load_model(model_name) ids = [id_for_new_id_style(x.id) for x in model.reactions] scores = [] size = len(listdir(search_dir)) for i, path in enumerate(listdir(search_dir)): # progress sys.stdout.write('\r') # the exact output you're looking for: sys.stdout.write("%d / %d" % (i + 1, size)) sys.stdout.flush() if path.endswith('.gz'): f = gzip.open(join(search_dir, path), 'r') else: f = open(join(search_dir, path), 'r') # (1) Compare the metabolite count m = json.load(f) try: met_count = len(m['MAPNODE']) reaction_count = len(m['MAPREACTION']) # diff = abs(len(m['MAPNODE']) - metabolite_count) except KeyError: continue # (2) Compare the reaction ids to the cobra model # f.seek(0) num_matches = 0 try: reactions = m['MAPREACTION'] except KeyError: continue for reaction in reactions: try: an_id = reaction['REACTIONABBREVATION'] except KeyError: continue if id_for_new_id_style(an_id) in ids: num_matches = num_matches + 1 # quit if not > 90% frac = 0.9 if num_matches < frac * len(reactions): continue scores.append((join(search_dir, path), float(num_matches) / len(reactions), met_count, reaction_count)) f.close() scores = sorted(scores, key=itemgetter(2), reverse=True) scores = sorted(scores, key=itemgetter(1)) outfile = '%s_maps.tsv' % model_name print print 'saving to %s' % outfile (pd.DataFrame(scores, columns=['path', 'score', 'n_metabolites', 'n_reactions']).to_csv(outfile, sep='\t'))