def pairs_notfound_sps(df, fs, sps="Hs Mm Sp Dm Ce".split()): """ df: dataframe with id1, id2, and the sp_evidence columns. fs: all the elution filenames """ results = [] for sp in sps: pairs = [(r["id1"], r["id2"]) for i, r in df[df[sp + "_evidence"] != "frac"].iterrows()] print "%s pairs for %s" % (len(pairs), sp) odict = orth.odict("Hs", sp) orths = pairs_found(pairs, odict) if odict else len(pairs) # same sp fs_sp = [f for f in fs if f.find(sp + "_") > -1] print "%s fractionations for %s" % (len(fs_sp), sp) allps = el.all_prots(fs_sp) counts = pairs_orth_found(pairs, odict, allps) results.append((len(pairs), orths, counts)) return sps, results
def prot_counts(fs, min_count=2): """ Sum up all the spectral counts for all the proteins in a set of fractionations. Filtered s.t. any returned protein will have at least min_count counts in one fraction of one of the fractionations. Return a dict: {prot1:count1, prot2:count2, ...} """ allprots = el.all_prots(fs, min_count=min_count) pcounts = collections.defaultdict(float) for f in fs: e = el.load_elution(f) psums = np.sum(np.array(e.mat),axis=1) frac_sum = sum(psums) norm_term = 1 / (frac_sum * len(fs)) for p,psum in zip(e.prots,psums): if p in allprots: pcounts[p] += (psum * norm_term) return pcounts
def manysp_all_prots(sps, elutfs, **kwargs): d_allprots = dict([(s,el.all_prots(elutfs, sp_base=s, **kwargs)) for s in sps]) return d_allprots