def all_identities(source_ps, odict_fname, source_fasta, target_fasta, target_id_dict_fname=None): odict = load_odict(odict_fname) if target_id_dict_fname is not None: tid_dict = dict([(x[0],x[2]) for x in ut.load_lot(target_id_dict_fname)]) odict = dict([(k,[tid_dict[v] for v in vs]) for k,vs in odict.items()]) dsource = load_seq_dict(source_fasta) dtarget = load_seq_dict(target_fasta) pairs = [(s, odict[s][0]) for s in source_ps if s in odict] print "%s of %s with orthologs--getting identities" % (len(pairs), len(source_ps)) idents = [] for s,t in pairs: try: ident = seqs.percent_identity(dsource[s], dtarget[t]) idents.append(ident) except decorators.TimeoutError, ex: print "timeout for %s %s" %(s,t), ex.args except Exception, ex: print "unknown error for %s %s" % (s,t), ex.args
def exported_diff(cy_basefile, cy_difffile, col_header, diff_ppis=None, justids=False): """ Makes a new cy_ file labeling whether that interaction is also found in the cy_difffile (or the diff_ppis--pass None for cy_difffile in that case). """ def cy_ppi_to_pair(p): return (p[0].split('_')[1], p[1].split('_')[1]) if cy_difffile is not None: pd_diff = pd.PairDict([cy_ppi_to_pair(p) for p in ut.load_lot(cy_difffile)[1:]]) else: pd_diff = pd.PairDict(diff_ppis) header = ut.load_lol(cy_basefile)[0] lines = ut.load_lol(cy_basefile)[1:] if justids: lines = [l[:2] for l in lines] header = header[:2] header += [col_header] ut.write_tab_file([r + [pd_diff.contains(cy_ppi_to_pair(r))] for r in lines], ut.pre_ext(cy_basefile, col_header), header=header)
def load_hpa_localization(fname='./enrichment_datasets/subcellular_location.csv'): locs = ut.load_lot(fname, sep=",")[1:] locs_clean = [[x.strip("\"") for x in l] for l in locs] locs_filt = [l for l in locs_clean if l[4]=="Supportive"] locs = [(l[0],l[1]) for l in locs_filt] return locs