コード例 #1
0
def main():
    try:
        search_dir = argv[1]
        model_name = argv[2]
    except:
        raise Exception('Not enough arguments.')

    model = load_model(model_name)
    ids = [id_for_new_id_style(x.id) for x in model.reactions]
    
    scores = []; size=len(listdir(search_dir))
    for i, path in enumerate(listdir(search_dir)):

        # progress
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("%d / %d" % (i + 1, size))
        sys.stdout.flush()
                        
        if path.endswith('.gz'):
            f = gzip.open(join(search_dir, path), 'r')
        else:
            f = open(join(search_dir, path), 'r')
        # (1) Compare the metabolite count 
        m = json.load(f)
        try:
            met_count = len(m['MAPNODE'])
            reaction_count = len(m['MAPREACTION'])
            # diff = abs(len(m['MAPNODE']) - metabolite_count)
        except KeyError:
            continue
        # (2) Compare the reaction ids to the cobra model
        # f.seek(0)
        num_matches = 0
        try:
            reactions = m['MAPREACTION']
        except KeyError:
            continue
        for reaction in reactions:
            try:
                an_id = reaction['REACTIONABBREVATION'] 
            except KeyError:
                continue
            if id_for_new_id_style(an_id) in ids:
                num_matches = num_matches + 1
        # quit if not > 90%
        frac = 0.9
        if num_matches < frac*len(reactions): continue
        scores.append((join(search_dir, path),
                       float(num_matches) / len(reactions),
                       met_count, reaction_count))
        f.close()
    scores = sorted(scores, key=itemgetter(2), reverse=True)
    scores = sorted(scores, key=itemgetter(1))
    outfile = '%s_maps.tsv' % model_name
    print
    print 'saving to %s' % outfile
    (pd
     .DataFrame(scores, columns=['path', 'score', 'n_metabolites', 'n_reactions'])
     .to_csv(outfile, sep='\t'))
コード例 #2
0
def parse_nodes(nodes, compartment_id_key):
    for node in nodes:
        # assign new keys
        try_assignment(node, 'MAPOBJECT_ID', 'object_id',
                       cast=str, require=True)
        try_assignment(node, 'MAPNODENODETYPE', 'node_type',
                       cast=lambda x: str(x).lower(), require=True)
        try_assignment(node, 'MAPNODEPOSITIONX', 'x',
                       cast=float, require=True)
        try_assignment(node, 'MAPNODEPOSITIONY', 'y',
                       cast=float, require=True)
        if node['node_type'] == 'metabolite':
            try_assignment(node, 'MAPNODELABELPOSITIONX', 'label_x',
                           cast=float, fallback=node['x'])
            try_assignment(node, 'MAPNODELABELPOSITIONY', 'label_y',
                           cast=float, fallback=node['y'])
            try_assignment(node, 'MOLECULEABBREVIATION', 'bigg_id',
                           cast=lambda x: id_for_new_id_style(x, is_metabolite=True),
                           fallback='')
            try_assignment(node, 'MOLECULEOFFICIALNAME', 'name',
                           cast=str, fallback='')
            try_assignment(node, 'MAPNODEISPRIMARY', 'node_is_primary',
                           cast=lambda x: True if x=='Y' else False, fallback=False)
            try_assignment(node, 'MAPNODECOMPARTMENT_ID', 'compartment_id', cast=int)
            try_assignment(node, 'MAPNODECOMPARTMENT_ID', 'compartment_name',
                           cast=lambda x: compartment_id_key[int(x)][0])
            try_assignment(node, 'MAPNODECOMPARTMENT_ID', 'compartment_letter',
                           cast=lambda x: compartment_id_key[int(x)][1])
            if node['bigg_id'] != '' and node['compartment_letter'] is not None:
                node['bigg_id'] = "%s_%s" % (node['bigg_id'],
                                             node['compartment_letter'])
        
    # Make into dictionary
    return {a['object_id']: a for a in nodes
            if a['node_type'] in ['metabolite', 'multimarker', 'midmarker']}
コード例 #3
0
def parse_nodes(nodes, compartment_id_key):
    for node in nodes:
        # assign new keys
        try_assignment(node,
                       'MAPOBJECT_ID',
                       'object_id',
                       cast=str,
                       require=True)
        try_assignment(node,
                       'MAPNODENODETYPE',
                       'node_type',
                       cast=lambda x: str(x).lower(),
                       require=True)
        try_assignment(node, 'MAPNODEPOSITIONX', 'x', cast=float, require=True)
        try_assignment(node, 'MAPNODEPOSITIONY', 'y', cast=float, require=True)
        if node['node_type'] == 'metabolite':
            try_assignment(node,
                           'MAPNODELABELPOSITIONX',
                           'label_x',
                           cast=float,
                           fallback=node['x'])
            try_assignment(node,
                           'MAPNODELABELPOSITIONY',
                           'label_y',
                           cast=float,
                           fallback=node['y'])
            try_assignment(
                node,
                'MOLECULEABBREVIATION',
                'bigg_id',
                cast=lambda x: id_for_new_id_style(x, is_metabolite=True),
                fallback='')
            try_assignment(node,
                           'MOLECULEOFFICIALNAME',
                           'name',
                           cast=str,
                           fallback='')
            try_assignment(node,
                           'MAPNODEISPRIMARY',
                           'node_is_primary',
                           cast=lambda x: True if x == 'Y' else False,
                           fallback=False)
            try_assignment(node,
                           'MAPNODECOMPARTMENT_ID',
                           'compartment_id',
                           cast=int)
            try_assignment(node,
                           'MAPNODECOMPARTMENT_ID',
                           'compartment_name',
                           cast=lambda x: compartment_id_key[int(x)][0])
            try_assignment(node,
                           'MAPNODECOMPARTMENT_ID',
                           'compartment_letter',
                           cast=lambda x: compartment_id_key[int(x)][1])
            if node['bigg_id'] != '' and node['compartment_letter'] is not None:
                node['bigg_id'] = "%s_%s" % (node['bigg_id'],
                                             node['compartment_letter'])

    # Make into dictionary
    return {
        a['object_id']: a
        for a in nodes
        if a['node_type'] in ['metabolite', 'multimarker', 'midmarker']
    }
コード例 #4
0
def main():
    try:
        search_dir = argv[1]
        model_name = argv[2]
    except:
        raise Exception('Not enough arguments.')

    model = load_model(model_name)
    ids = [id_for_new_id_style(x.id) for x in model.reactions]

    scores = []
    size = len(listdir(search_dir))
    for i, path in enumerate(listdir(search_dir)):

        # progress
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("%d / %d" % (i + 1, size))
        sys.stdout.flush()

        if path.endswith('.gz'):
            f = gzip.open(join(search_dir, path), 'r')
        else:
            f = open(join(search_dir, path), 'r')
        # (1) Compare the metabolite count
        m = json.load(f)
        try:
            met_count = len(m['MAPNODE'])
            reaction_count = len(m['MAPREACTION'])
            # diff = abs(len(m['MAPNODE']) - metabolite_count)
        except KeyError:
            continue
        # (2) Compare the reaction ids to the cobra model
        # f.seek(0)
        num_matches = 0
        try:
            reactions = m['MAPREACTION']
        except KeyError:
            continue
        for reaction in reactions:
            try:
                an_id = reaction['REACTIONABBREVATION']
            except KeyError:
                continue
            if id_for_new_id_style(an_id) in ids:
                num_matches = num_matches + 1
        # quit if not > 90%
        frac = 0.9
        if num_matches < frac * len(reactions): continue
        scores.append((join(search_dir,
                            path), float(num_matches) / len(reactions),
                       met_count, reaction_count))
        f.close()
    scores = sorted(scores, key=itemgetter(2), reverse=True)
    scores = sorted(scores, key=itemgetter(1))
    outfile = '%s_maps.tsv' % model_name
    print
    print 'saving to %s' % outfile
    (pd.DataFrame(scores,
                  columns=['path', 'score', 'n_metabolites',
                           'n_reactions']).to_csv(outfile, sep='\t'))