def hop_across(pairsa, pairsb, pairs_exclude): a,b = [ut.dict_sets_from_tuples(p) for p in pairsa,pairsb] pd_exclude = pd.PairDict(pairs_exclude) newpairs = [(x,z) for x in a for y in a.get(x,[]) for z in b.get(y,[]) if not (x==z or pd_exclude.contains((x,z)))] newpairs = pu.dedupe(newpairs) return newpairs
def seq_pairs(S, enzymes, mat_adj=None, conv_dict=None, return_groups=False): """ - S: stoichiometric matrix of reactants (-) and products (+) - enzymes: list of enzymes corresponding to columns in S """ mat_adj = mat_adj if mat_adj is not None else adj_matrix(S) rows, cols = np.where(mat_adj > 0) #wrestling with where output rows, cols = [np.array(x)[0] for x in rows, cols] rowcols = ut.zip_exact(rows, cols) # dedup, keeping only upper off-diagonal rowcols = [(row,col) for row,col in rowcols if row < col] pairs = [(enzymes[row],enzymes[col]) for row,col in rowcols] # filter out blanks labeled_pairs = [(x,y) for x,y in pairs if x and y] if return_groups: if conv_dict: return convert_groups_singles(labeled_pairs, conv_dict) else: return labeled_pairs single_pairs = [(xi,yi) for x,y in labeled_pairs for xi in x.split() for yi in y.split()] unique_pairs = pu.dedupe(single_pairs) print "%s total, %s labeled, %s single, %s unique pairs returned" % (len(pairs), len(labeled_pairs), len(single_pairs), len(unique_pairs)) if conv_dict: conv_pairs = convert_pairs_singles(unique_pairs, conv_dict) print "%s converted pairs with 1-1 matches" % len(conv_pairs) return conv_pairs else: return unique_pairs
def load_kegg_sequentials(fname, do_convert=True): dkegg = load_kegg_brite(fname) kegg_paths = [ut.i1(v) for v in dkegg.values() if v] def path_pairs(list_path): return [(list_path[i],list_path[i+1]) for i in range(len(list_path)-1)] group_pairs = ut.flatten([path_pairs(lpath) for lpath in kegg_paths]) #if return_groups: #if conv_dict: #return convert_groups_singles(labeled_pairs, conv_dict) #else: #return labeled_pairs single_pairs = [(xi,yi) for x,y in group_pairs for xi in x for yi in y] unique_pairs = pu.dedupe(single_pairs) print "%s total, %s single, %s unique pairs returned" % ( len(group_pairs), len(single_pairs), len(unique_pairs)) if do_convert: conv_dict = ut.dict_inverse_sets(orth.convert_dict('Hs','Hs_entrez')) conv_pairs = convert_pairs_singles(unique_pairs, conv_dict) print "%s converted pairs with 1-1 matches" % len(conv_pairs) return conv_pairs else: return unique_pairs
def pairs_from_complexes(complexes): raw_pairs = ut.flatten([[x for x in it.combinations(group,2)] for group in complexes]) deduped = pu.dedupe(raw_pairs) return deduped
def random_pairs(pairs, npairs): ps = list(set(ut.i0(pairs) + ut.i1(pairs))) rpairs = [(random.choice(ps), random.choice(ps)) for i in range(int(npairs*1.5))] return pu.dedupe(rpairs)[:npairs]