Ejemplo n.º 1
0
def load_seq_pairs(fname, metab_exclude=None):
    """
    metab_exclude: should be in sequential_metab/metabolites_exclude.txt
    """
    S, entrez_enzymes, rnames, mnames = load_metabolic_data(fname)
    ez2en = ut.dict_inverse_sets(orth.convert_dict('Hs','Hs_entrez'))
    if metab_exclude:
        print "Excluding %s metabolites, filtering rxns" % len(metab_exclude)
        S, entrez_enzymes = filter_rxns_metabs(S, entrez_enzymes, rnames,
                mnames, metab_exclude) 
    else:
        print "No filtering of metabolites and rxns."
    sequentials = seq_pairs(S, entrez_enzymes, conv_dict=ez2en)
    return sequentials
Ejemplo n.º 2
0
def convdict_from_fname(species, ext_file):
    # Doesn't yet work for the general case of possibly needing to go two
    # steps--to the new species, then to a new seqdb
    totype = '_'.join(ext_file.split('/')[-1].split('_')[:2]) #Hs_entrez;Dm_fbgn
    # If there's no matching conversion file, assume it's not needed.
    genedict = None
    try:
        genedict = orth.convert_dict(species, totype)
    except IOError as e:
        print 'No external conversion file:', species, totype, e.strerror
    else: 
        if genedict is None:
            print 'No external conversion file:', species, totype
        else:
            print 'Conversion file:', species, totype, len(genedict), 'keys'
    return genedict
Ejemplo n.º 3
0
def load_kegg_sequentials(fname, do_convert=True):
    dkegg = load_kegg_brite(fname)
    kegg_paths = [ut.i1(v) for v in dkegg.values() if v]
    def path_pairs(list_path):
        return [(list_path[i],list_path[i+1]) for i in range(len(list_path)-1)]
    group_pairs = ut.flatten([path_pairs(lpath) for lpath in kegg_paths])
    #if return_groups:
        #if conv_dict:
            #return convert_groups_singles(labeled_pairs, conv_dict)
        #else:
            #return labeled_pairs
    single_pairs = [(xi,yi) for x,y in group_pairs for xi in x for yi in y]
    unique_pairs = pu.dedupe(single_pairs)
    print "%s total, %s single, %s unique pairs returned" % (
            len(group_pairs), len(single_pairs), len(unique_pairs)) 
    if do_convert:
        conv_dict = ut.dict_inverse_sets(orth.convert_dict('Hs','Hs_entrez'))
        conv_pairs = convert_pairs_singles(unique_pairs, conv_dict)
        print "%s converted pairs with 1-1 matches" % len(conv_pairs)
        return conv_pairs
    else:
        return unique_pairs