def seq_pairs(S, enzymes, mat_adj=None, conv_dict=None, return_groups=False):
    """
    - S: stoichiometric matrix of reactants (-) and products (+)
    - enzymes: list of enzymes corresponding to columns in S
    """
    mat_adj = mat_adj if mat_adj is not None else adj_matrix(S)
    rows, cols = np.where(mat_adj > 0)
    #wrestling with where output
    rows, cols = [np.array(x)[0] for x in rows, cols] 
    rowcols = ut.zip_exact(rows, cols)
    # dedup, keeping only upper off-diagonal
    rowcols = [(row,col) for row,col in rowcols if row < col]
    pairs = [(enzymes[row],enzymes[col]) for row,col in rowcols]
    # filter out blanks
    labeled_pairs = [(x,y) for x,y in pairs if x and y]
    if return_groups:
        if conv_dict:
            return convert_groups_singles(labeled_pairs, conv_dict)
        else:
            return labeled_pairs
    single_pairs = [(xi,yi) for x,y in labeled_pairs 
            for xi in x.split() for yi in y.split()]
    unique_pairs = pu.dedupe(single_pairs)
    print "%s total, %s labeled, %s single, %s unique pairs returned" % (len(pairs), 
            len(labeled_pairs), len(single_pairs), len(unique_pairs)) 
    if conv_dict:
        conv_pairs = convert_pairs_singles(unique_pairs, conv_dict)
        print "%s converted pairs with 1-1 matches" % len(conv_pairs)
        return conv_pairs
    else:
        return unique_pairs
Exemple #2
0
def pairs_exceeding(elut, skey, thresh):
    """
    Doesn't return self-self interactions.
    """
    arr_prots = np.array(elut.prots)
    if skey == 'apex':
        apexes = ApexScores(elut).apex_array
        pairs = matching_pairs(apexes, arr_prots)
    else: # loading precomputed indices is so far massively slower than this
        score_mat, _, new_prots = scorekey_elution(skey, elut, None)
        if new_prots is not None:
            arr_prots = np.array(new_prots)
        rows, cols = np.where(score_mat > thresh)
        p1s, p2s = [arr_prots[ids] for ids in rows, cols]
        pairs =  ut.zip_exact(p1s, p2s)
    return pairs
Exemple #3
0
def pairs_exceeding(elut, skey, thresh):
    """
    Doesn't return self-self interactions.
    """
    arr_prots = np.array(elut.prots)
    if skey == 'apex':
        apexes = ApexScores(elut).apex_array
        pairs = matching_pairs(apexes, arr_prots)
    else: # loading precomputed indices is so far massively slower than this
        score_mat, _, new_prots = scorekey_elution(skey, elut, None)
        if new_prots is not None:
            arr_prots = np.array(new_prots)
        rows, cols = np.where(score_mat > thresh)
        p1s, p2s = [arr_prots[ids] for ids in rows, cols]
        pairs =  ut.zip_exact(p1s, p2s)
    return pairs
def transpose(d, fin, fout):
    sys.path.append(d+'/..')
    import utils as ut
    lines = [l for l in ut.load_tab_file(fin)]
    if lines[-1][0].startswith('#'):
        #ignore comments, such as last line in spcount output
        lines = lines[:-1]
        print "skipping last line"
    cols = ut.zip_exact(*lines) #zip messes up if these files aren't neat
    # _After_ zipping, get rid of the column 1 header--R doesn't like it.
    col0list = list(cols[0])
    print col0list[0][0] 
    assert (col0list[0][0] == '#' or col0list[0] == 'Locus') # make sure we're removing what we should be
    col0list.remove(col0list[0])
    cols[0] = tuple(col0list)
    col2title = cols[1][0].lower()
    # get rid of the total/descr column
    if col2title.find('total') > -1 or col2title.find('descr') > -1:
        cols.remove(cols[1])
        print "removing second column--extraneous"
    ut.write_tab_file(cols, fout)
def combine_ppis_matched(ppisa, ppisb):
    return [(pa[0],pa[1],combine_or(pa[2],pb[2]),pa[3])
            for pa,pb in ut.zip_exact(ppisa, ppisb)]