def get_expression(**kwargs): atype = "wormtile" simple = wp.get_simple_thr(**mem.rc(kwargs, atype=atype, dthr=3000, dsign=-1, sthr=1e-5)) idfun = lambda g: g.qualifiers["db_xref"][1][9:] genes = dict([(k, set([g for g in v["gnames"]])) for k, v in simple.iteritems()]) gene_union = set([]) for glist in genes.values(): gene_union.update(glist) crofs = wp.chromosome_offsets() gene_info = wp.gene_info(**mem.rc(kwargs)) genome_coords = dict([(k, gene_info[k]["genomestart"]) for i, k in enumerate(gene_union)]) gene_idxs = dict([(k, i) for i, k in enumerate(gene_union)]) gnames = list(gene_union) gene_srtlist = argsort([e[1] for e in sorted(gene_idxs.iteritems(), key=lambda x: genome_coords[x[0]])]) gene_srtidxs = dict([(k, gene_srtlist[e]) for k, e in gene_idxs.iteritems()]) na = len(simple) ng = len(gene_union) gc = zeros((na, ng)) assay_coords = dict([(k, i) for i, k in enumerate(simple.keys())]) gene_counts = dict( [(k, dict([(k2, len(list(g2))) for k2, g2 in it.groupby(sorted(v["gnames"]))])) for k, v in simple.iteritems()] ) for k, v in gene_counts.iteritems(): for k2, v2 in v.iteritems(): gc[assay_coords[k], gene_srtidxs[k2]] = v2 return 1 * greater(gc, 0)
def get_easy0(**kwargs): """an easy inference using the arbitrary distance cutoff of 3000 bases and grabbing the n highest scoring edges globally""" def set_easy0(**kwargs): atype = kwargs.get("atype") simple = wp.get_simple_thr(atype=atype, dthr=1500, dsign=-1, sthr=1e-4) raise Exception() score_soft_cut = -136 score_hard_cut = -90 sids = wu.symbol_ids() prop_tuples = [] for tf, props in simple.iteritems(): # for now, remove tfs that are not mappable if not tf in sids.keys(): continue ssrt = argsort(props["scores"]) lscores = log10(props["scores"][ssrt]) easy = nonzero(less(lscores, score_soft_cut))[0] medium = nonzero(greater(lscores, score_soft_cut) * less(lscores, score_hard_cut))[0] # generous_edges = concatenate([easy,medium]) prop_tuples.append( [ (tf, props["genes"][g], -(score_hard_cut - lscores[g]) / (score_soft_cut - score_hard_cut)) for g in medium ] ) prop_tuples.append([(tf, props["genes"][g], 1) for g in easy]) edgelist = array(list(it.chain(*prop_tuples))) edges = [(sids[e[0]], e[1].qualifiers["db_xref"][1][9:], e[2]) for e in edgelist] return edges return mem.getOrSet(set_easy0, **mem.rc(kwargs))
def consensus_graph(name = 'none', graphs =(), **kwargs): ''' Get a graph having nodes consisting of the union o the nodes in all graphs and having edges consisting of the intersection of edges in all graphs. ''' def get_cons_graph(**kwargs): graphs = kwargs.get('graphs') if type(graphs[0]) != nx.DiGraph: raise Exception('For now, this method is only compatible with digraph') all_nodes = set.union(*[set(g.nodes()) for g in graphs]) ##NOTE, THIS SYNTAX IS DESIGNED FOR DIRECTED GRAPHS ##FOR UNDIRECTED, IT WILL FAIL TO COUNT BIDIRECTIONAL EDGES all_edges = set.intersection(*[set(g.edges()) for g in graphs]) cons = nx.DiGraph() cons.add_nodes_from(all_nodes) cons.add_edges_from(all_edges) return cons return mem.getOrSet( get_cons_graph, **mem.rc(kwargs, on_fail = 'compute', register =name , graphs = graphs, name = name))
def run0(spec_ct = 8, **kwargs): def setLocusResults(**kwargs): spec_ct = kwargs.get('spec_ct') bases = (128693265,129266680) a0 = fetch_num_ali() names = fetch_alinames() ref = a0[0] ali_counts = sum(less(a0,4) * equal(a0, a0[0,:]) ,1) names_all = [names[i] for i in argsort(ali_counts)[::-1]] names = names_all[:spec_ct] a0 = a0[argsort(ali_counts)[::-1]][:spec_ct] ali_counts = sorted(ali_counts)[::-1][:spec_ct] wl = 150 n_runs = 500 locii = {} results = {} for n_specs in [3, 8]: locii[n_specs], results[n_specs] = run_windows(a0,ref, n_specs = n_specs, n_runs = n_runs, win_len = wl, win_ofs = wl/2, spec_names = names) return locii, results return mem.getOrSet(setLocusResults, **mem.rc(kwargs, on_fail = 'compute', spec_ct = spec_ct))
def chromosome_offsets(**kwargs): def set_chromosome_offsets(**kwargs): lens =[] names = chromosome_names() for name in names: root = cfg.dataPath('/data/genomes/Caenorhabditis_elegans') fdir = os.path.join(root,name) for r, d, files in os.walk(fdir): for f in files: if '.gb' in f: fopen = open(os.path.join(r,f)) break gb = list(sio.parse(fopen, 'genbank'))[0] fopen.close() lens.append( gb.features[0].location.end.position) offsets = {} cur_ofs = 0 for i, l in enumerate(lens): offsets[names[i]] = cur_ofs cur_ofs += l return offsets return mem.getOrSet(set_chromosome_offsets, **mem.rc(kwargs, hardcopy = True))
def get_tss(**kwargs): def load_tss(**kwargs): cnames = chromosome_names() genes = parse_genes() out = {} for name in cnames: crgenes= genes[name] gstrands =array([g.strand for g in crgenes]) fwd = nonzero(greater(gstrands, 0))[0] rev = nonzero(less(gstrands,0))[0] gstarts = array([g.location.start.position for g in crgenes]) gends =array( [g.location.end.position for g in crgenes]) fstarts = gstarts[fwd] rends = gends[rev] fstart_sorted = sorted([(fwd[i], s) for i, s in enumerate(fstarts)], key = lambda x: x[1]) fend_sorted = sorted([(rev[i], r) for i, r in enumerate(rends)], key = lambda x: x[1]) out[name] = {'fwd_genes': [e[0] for e in fstart_sorted], 'fwd_tss':[e[1] for e in fstart_sorted], 'rev_genes': [e[0] for e in fend_sorted], 'rev_tss':[e[1] for e in fend_sorted]} #note that gstarts begin sorted #gends on the other hand... do not. return out return mem.getOrSet(load_tss, **mem.rc(kwargs,hardcopy =True))
def get_simple_description(**kwargs): kwargs['atype'] = kwargs.get('atype', default_atype) def set_simple_description(**kwargs): props = get_assay_gprops(**mem.sr(kwargs)) chips = get_assay_info(**mem.sr(kwargs)) simple = {} for tf, assays in props.iteritems(): simple[tf] = {} assay_keys = assays.keys() idfun = lambda g: g.qualifiers['db_xref'][1][9:] simple[tf]['gnames'] = list(it.chain(*[ [idfun(e['gene']) for e in assays[k]['primaries']] for k in assay_keys ])) simple[tf]['genes'] = list(it.chain(*[ [e['gene'] for e in assays[k]['primaries']] for k in assay_keys ])) simple[tf]['dists'] = array(list(it.chain(*[ [e['dist'] for e in assays[k]['primaries']] for k in assay_keys ]))) simple[tf]['scores'] = array(list(it.chain(*[ [e['score'] for e in chips[tf][k]] for k in assay_keys ]))) return simple return mem.getOrSet(set_simple_description, **mem.rc(kwargs, name = kwargs['atype']))
def tf_chip_peaks(**kwargs): def setTf_Chip_Peaks(**kwargs): root = cfg.dataPath('wormchip') files = [os.path.join(root, f) for f in os.listdir(root)] out = {} for f in files: fopen= open(f) data = [l for l in fopen.readlines() if not l[0] == '#'] out[os.path.basename(f)] = \ [dict(zip(['chr', 'meth', 'type', 'start','end','score', 'blank','blank2','qValue' ], l.strip().split('\t'))) for l in data] vlens = [len(e) for e in out[os.path.basename(f)]] for k,v in out.iteritems(): for d in v: d['start'] = int(d['start']) d['end'] = int(d['end']) d['score'] = float(d['score']) d['qValue'] = float(d['qValue'].split('=')[1]) return out return mem.getOrSet(setTf_Chip_Peaks, **mem.rc(kwargs, hardcopy = True))
def term_groups(name = 'bdtnp', nterms = -1 ,**kwargs): ''' kwargs: nterms: defaults to -1 ''' def set_term_groups(**kwargs): nterms = kwargs.get('nterms') if name == 'bdtnp': gene_list = nio.getBDTNP().keys() elif name == 'kn': gene_list = graphs['kn'].nodes() #GET ALL CONTROLLED VOCAB TERMS APPLYING TO A GIVEN GENE LIST terms = [(gname,gt) for gname in gene_list for gt in gene_terms(gname) ] all_terms = set([t[1] for t in terms]) term_groups_tmp =[(k, list(g)) for k, g in it.groupby( sorted(terms, key = lambda x: x[1]), key = lambda x: x[1]) ] #SORT THE TERM GROUPS BY GENE COUNT AND ONLY TAKE TOP N if nterms == -1: nterms = len(term_groups_tmp) term_groups = sorted(term_groups_tmp, key = lambda x: len(x[1]))[::-1][:nterms] return term_groups return mem.getOrSet(set_term_groups, **mem.rc(kwargs, on_fail = 'compute', register = '{0}_{1}'.format(name, nterms), nterms = nterms))
def term_network(name = 'bdtnp', nterms = -1 , **kwargs): ''' kwargs: nterms: defaults to -1 ''' def set_term_network( **kwargs): nterms = kwargs.get('nterms') name = kwargs.get('name') if name == 'bdtnp': gene_list = nio.getBDTNP().keys() elif name == 'kn': gene_list = graphs['kn'].nodes() grps = term_groups(**mem.sr(kwargs, name = name)) network = nx.Graph() network.add_nodes_from(gene_list) for g in grps: edgelist = [[g1[0],g2[0]] for g1 in g[1] for g2 in g[1]] network.add_edges_from( edgelist ) return network return mem.getOrSet(set_term_network, **mem.rc(kwargs, on_fail = 'compute', register = '{0}_{1}'.format(name,nterms), nterms = nterms, name = name))
def getTrackChrGenes(**kwargs): ''' Get all of th genes from a bed file on a given chromosome. kwargs num: chromosome number fname: bedfile path returns a list of attributes for every gene. ''' def setTrackChrGenes(**kwargs): fname = kwargs.get('fname', mousefile) num = kwargs.get('num', 1) t = track.load(fname); chromosome_data = t.read('chr{0}'.format(num)) rows = [dict(zip(r.keys(),r.data)) for r in iter(chromosome_data)] return rows return mem.getOrSet(setTrackChrGenes, **mem.rc( kwargs, onfail = 'compute', name = '{0}_{1}'.format(kwargs.get('fname',os.path.basename(mousefile)), kwargs.get('num', 1)) ))
def get_results(**kwargs): def set_results(**kwargs): cells = fetch_cluster_results([t[0] for t in kwargs.get('tsrt')]) mod_list = list( set([m['module'] for c in cells for m in c[1]] ) ) mods =dict([(mod, [{'tissue':c[0], 'gene':m['gene']} for c in cells for m in c[1] if m['module'] == mod] ) for mod in mod_list]) tf_list = set(it.chain(*mods.keys())) gene_list = set([elt['gene'] for v in mods.values() for elt in v ]) tfs = dict([(tf, [{'tissue':elt['tissue'], 'module':k, 'gene':elt['gene']} for k, v in mods.iteritems() if tf in k for elt in v ]) for tf in tf_list]) genes = dict([(g, [{'tissue':elt['tissue'], 'module':k} for k, v in mods.iteritems() for elt in v if elt ['gene'] == g]) for g in gene_list]) return mods, genes, tfs return mem.getOrSet(set_results, **mem.rc(kwargs, on_fail = 'compute'))
def get_motifs(**kwargs): def set_motifs(**kwargs): mfpath = cfg.dataPath('motifs/all_vert_motifs.txt') fpath = cfg.dataPath('CRE/{0}_for_motifs.txt'.format(promoter_type)) cmd = 'motif-match -n 1 -m {0} -V 1'.format(mfpath) cmd2 = 'xargs echo' prc = spc.Popen(cmd, shell = True, stdin = spc.PIPE, stdout = spc.PIPE) mlines = prc.communicate(input = open(fpath).read())[0].splitlines() seqs = {} for o in mlines: o = o.split(' ') name = o[1] entry = seqs.get(name, []) entry.append({'motif':o[0], 'start':int(o[2]), 'end':int(o[3]), 'strand':o[4], 'score':float(o[6])}) seqs[name] = entry return seqs return mem.getOrSet(set_motifs, **mem.rc(kwargs, on_fail = 'fail', register = promoter_type))
def get_mean_induction(**kwargs): def set_mind(**kwargs): cre, cre_rndvals, keys = get_mutants() return mean(cre_rndvals[:,0])/ mean(cre_rndvals[:,1]) return mem.getOrSet(set_mind, **mem.rc(kwargs, register = promoter_type, on_fail = 'compute'))
def tiling_peaks(**kwargs): def set_tiling_peaks(**kwargs): root = cfg.dataPath('modencode/wormtile/computed-peaks_gff3') files = [os.path.join(root, f) for f in os.listdir(root)] out = {} for f in files: if f[-2:] != 'gz': continue fopen= gzip.open(f) data = [l for l in fopen.readlines() if not l[0] == '#'] out[os.path.basename(f)] = \ [dict(zip(['chr', 'meth', 'type', 'start','end','score', 'blank','blank2','annotations' ], l.strip().split('\t'))) for l in data] for k,v in out.iteritems(): for d in v: d['start'] = int(d['start']) d['end'] = int(d['end']) d['score'] = float(d['score']) return out return mem.getOrSet(set_tiling_peaks, **mem.rc(kwargs, hardcopy = True, name = 'default'))
def get_assay_gprops(**kwargs): kwargs['atype'] = kwargs.get('atype', default_atype) def set_assay_gprops(**kwargs): chips = get_assay_info(**mem.sr(kwargs)) genes = parse_genes() tf_stats = {} for k, v in chips.iteritems(): tf_stats[k] = {} for k2,v2 in v.iteritems(): print 'n_exps = {0}'.\ format(np.sum([len(v) for v in tf_stats.values()])) tf_stats[k][k2] = {} cs = [e['chr'] for e in v2] f_gups=[genes[cs[i]][e['fup_gene']] for i,e in enumerate(v2)] f_gdowns=[genes[cs[i]][e['fdown_gene']] for i,e in enumerate(v2)] fup_deltas = [e['mean'] - f_gups[i].location.start.position for i,e in enumerate(v2)] fdown_deltas=[e['mean'] - f_gdowns[i].location.start.position for i,e in enumerate(v2)] r_gups=[genes[cs[i]][e['rup_gene']] for i,e in enumerate(v2)] r_gdowns=[genes[cs[i]][e['rdown_gene']] for i,e in enumerate(v2)] rup_deltas = [e['mean'] - r_gups[i].location.end.position for i,e in enumerate(v2)] rdown_deltas=[e['mean'] - r_gdowns[i].location.end.position for i,e in enumerate(v2)] deltas = array([fdown_deltas, fup_deltas, rdown_deltas, rup_deltas]).T closest = argmin(np.abs(deltas),1) csrt = argsort(np.abs(deltas),1) primaries = [] secondaries = [] for i,c in enumerate(csrt): for j,e in enumerate(c[:2]): arr = primaries if j == 0 else secondaries d = {} if e == 0 : d['gene'] = f_gdowns[i] elif e==1 : d['gene'] = f_gups[i] elif e==2 : d['gene'] = r_gdowns[i] elif e==3 : d['gene'] = r_gups[i] d['dist'] = deltas[i,e] * (-1 if e < 2 else 1) arr.append(d) tf_stats[k][k2]['primaries'] = primaries tf_stats[k][k2]['secondaries'] = secondaries return tf_stats return mem.getOrSet(set_assay_gprops, **mem.rc(kwargs, name = kwargs['atype'], hardcopy = True))
def site_mut_inds(**kwargs): def set_site_muts(**kwargs): l = len(get_cons()) site_muts = [set( get_trip_muts(idx) ) for idx in range(l)] return site_muts return mem.getOrSet(set_site_muts, **mem.rc(kwargs, register = promoter_type, on_fail = 'compute'))
def get_num_seqs(**kwargs): def set_num_seqs(**kwargs): ntdict = nt_ids() cre, cre_rndvals, keys = get_mutants() return array([[ntdict[let] for let in seq] for seq in cre]) return mem.getOrSet(set_num_seqs, **mem.rc(kwargs, register = promoter_type, on_fail = 'compute'))
def get_synapse_dict(**kwargs): def set_synapse_dict(**kwargs): rows = get_rows() all_out_cxns = dict([(k, [ e for e in list(val) ]) for k,val in it.groupby(\ sorted(rows, key = lambda x: x[0]), key = lambda x: x[0])]) return all_out_cxns return mem.getOrSet(set_synapse_dict, **mem.rc(kwargs))
def peak_thr_histograms(**kwargs): '''histograms of score and distance''' dthr = 1500 sthr = 1e-2 dsign = -1 simple = wp.get_simple_thr(**mem.rc(kwargs, dthr = dthr, sthr = sthr, dsign = dsign ) ) min_score = -1 max_score = -1 for k,v in simple.iteritems(): smax = np.max(v['scores']) if max_score == -1 or smax > max_score: max_score = smax smin = np.min(v['scores']) if min_score == -1 or smin < min_score: min_score = smin lrange = [int(floor(log10(min_score))), int(ceil(log10(max_score)))] sbin_mids = range(lrange[0],lrange[1]+1) nsb = len(sbin_mids) sbins = zeros((nsb)) dbin_size = 50 dbin_mids = range(-dthr, dthr, dbin_size) ndb = len(dbin_mids) dbins = zeros(( ndb)) for k,v in simple.iteritems(): for d in v['dists']: dbins[int(d + dthr)/dbin_size] += 1 for s in v['scores']: sbins[int(log10(s) - lrange[0])] += 1 f= myplots.fignum(1,(10,6)) ax = f.add_subplot(121) ax.set_ylabel('log 10 counts') ax.set_xlabel('distance') ax.set_title('simplified tss distances (d<{0})'.format(dthr)) ax.plot(dbin_mids,log10(dbins), color = 'black') f.savefig(myplots.figpath('chip_simple_distance.pdf')) ax = f.add_subplot(122) ax.set_title('simplified tss scores (s<{0:2.2})'.format(sthr)) ax.set_ylabel('log10 counts') ax.set_xlabel('log10 peak score') ax.plot(sbin_mids,log10(sbins), color = 'black') f.savefig(myplots.figpath('chip_simple_scores.pdf'))
def get_map_rows(**kwargs): def set_map_rows(**kwargs): mapfile = cfg.dataPath("wormbase/loci_all.txt") fopen = open(mapfile) lines = fopen.readlines() cols = [e.strip() for e in lines[0].strip().split(",")] rows = [dict(zip(cols, [e.strip() for e in l.strip().split(",")])) for l in lines[1:-1]] return rows return mem.getOrSet(set_map_rows, **mem.rc(kwargs))
def get_cons(**kwargs): def consensus_seq(seqs): return [ sorted([(k,list(g)) for k, g in it.groupby(sorted(c)) ], key = lambda x: len(x[1]))[-1][0] for c in seqs.T] def set_cons(**kwargs): seqs, seqs_rndvals, keys = get_mutants(**mem.sr(kwargs)) cons = consensus_seq(seqs[::100]) return cons cons = mem.getOrSet(set_cons, **mem.rc(kwargs, register = promoter_type, on_fail = 'compute')) return cons
def get_rows(**kwargs): def set_rows(**kwargs): root = cfg.dataPath('wormbrain/2006') connect_file = os.path.join(root, 'NeuronConnect.xls') fp_file = os.path.join(root,'NeuronFixedPoints.xls') cwb = xlrd.open_workbook(connect_file) sh = cwb.sheets()[0] rows = [[e.value for e in sh.row(i)] for i in range(1,sh.nrows) ] return rows return mem.getOrSet(set_rows, **mem.rc(kwargs))
def get_graph(**kwargs): def set_graph(**kwargs): edge_set = get_edge_set() rows = get_rows(**mem.sr(kwargs)) sub_cxns = dict([(k, [ e for e in list(val) if e[2] in edge_set]) for k,val in it.groupby(\ sorted(rows, key = lambda x: x[0]), key = lambda x: x[0])]) g = nx.DiGraph(); for k, v in sub_cxns.iteritems(): g.add_weighted_edges_from([(e[0], e[1], e[3]) for e in v]) return g return mem.getOrSet(set_graph,**mem.rc(kwargs))
def getGraph(name = 'easy0', control = 'in_degree',atype='wormtile',**kwargs): edges = getNet(**mem.rc(kwargs, atype = atype)) dg = nx.DiGraph() dg.add_weighted_edges_from(edges) if control == 'in_degree': in_degrees = dict([(n,0) for n in dg.nodes()]) for e in dg.edges(): in_degrees[e[1]] += 1 for n in dg.nodes(): if len(dg[n]) == 0 and in_degrees[n] < 4: dg.remove_node(n) return dg
def peak_distance_histogram(**kwargs): atype = kwargs.get('atype', wp. default_atype) chips = wp.get_assay_gprops(**mem.rc(kwargs)) chiplist = chips.values() chipkeys = chips.keys() xs = [] ys = [] sec_spread = np.max([ np.max([ np.max(np.abs([e['dist'] for e in v2['secondaries']])) for v2 in v.values()]) for v in chips.values() ]) hist_spread = 10000 bin_wid = 200 bin_mids = arange(-1* hist_spread, 1*hist_spread,bin_wid) bin_starts = bin_mids - bin_wid/2 nb = len(bin_starts) prim_hists = zeros((len(chips), len(bin_starts))) sec_hists = zeros((len(chips), len(bin_starts))) for i,e in enumerate(chiplist): for k,v in e.iteritems(): pbins = array([e2['dist']/bin_wid for e2 in v['primaries']],int) sbins = array([e2['dist']/bin_wid for e2 in v['secondaries']],int) pbins += nb /2 sbins += nb /2 sbins[less(sbins,0)] = 0 sbins[greater(sbins,nb-1)] = nb-1 pbins[less(pbins,0)] = 0 pbins[greater(pbins,nb-1)] = nb-1 for b in pbins: prim_hists[i][b]+=1 for b in sbins: sec_hists[i][b]+=1 f= myplots.fignum(1,(8,6)) ax = f.add_subplot(111) ax.set_title('chip peak distances to primary/sec tss for {0}'.format(atype)) for p in prim_hists: ax.plot(bin_mids,p, color = 'green') for s in sec_hists: ax.plot(bin_mids,s, color = 'red') f.savefig(myplots.figpath('chip_distance_hists_for{0}.pdf'.format(atype)))
def get_motif_dicts(pad = 2, **kwargs): def set_motif_dicts(**kwargs): masks = cre_masks(kwargs.get('pad')) out = {} cons = [nt_ids()[let] for let in get_cons()] for j, seq in enumerate(get_num_seqs()): key = tuple([ i for i , mask in enumerate( masks ) if sum(not_equal(seq,cons) * mask) != 0 ]) if not out.has_key(key): out[key] = [] out[key].append(j) return out return mem.getOrSet(set_motif_dicts, **mem.rc(kwargs, pad = pad, register = '{0}_{1}'.format(promoter_type, pad), on_fail = 'compute'))
def last_5(**kwargs): dnums = range(1, 40) delts = [(int(9 + floor(d / 30)), 1 + int(d % 30)) for d in dnums] days = ["2011-{0}-{1}".format(*delt) for delt in delts] def set_l5(**kwargs): days = kwargs.get("days") all_results = {} for h in hashtags: all_results[h] = [] search = ["#{0} since:{1}".format(h, d) for d in days] for s in search: all_results[h].append(tweepy.api.search(s, rpp=100)) return all_results return mem.getOrSet(set_l5, **mem.rc(kwargs, days=days, name=",".join(days)[:20]))
def get_array_imaps(**kwargs): def set_array_imaps(**kwargs): sdict =get_synapse_dict(**mem.sr(kwargs)) nameset = set([]) for k,v in sdict.iteritems(): nameset.add(k) nameset.update([r[1] for r in v]) nnames = list(nameset) ctypes = [u'Rp', u'EJ', u'Sp', u'S', u'R', u'NMJ'] ctypes_imap = dict([(k,i) for i, k in enumerate(ctypes)]) nnames_imap = dict([(k,i) for i, k in enumerate(nnames)]) return {'ctypes':ctypes, 'ctypes_imap':ctypes_imap, 'nnames':nnames, 'nnames_imap':nnames_imap} return mem.getOrSet(set_array_imaps, **mem.rc(kwargs))
def get_simple_thr(**kwargs): kwargs['dthr'] = kwargs.get('dthr',None) kwargs['sthr'] = kwargs.get('sthr',None) kwargs['dsign'] = kwargs.get('dsign',None) kwargs['atype'] = kwargs.get('atype', default_atype) def set_simple_thr(**kwargs): dthr = kwargs['dthr'] dsign = kwargs['dsign'] sthr = kwargs['sthr'] simple = get_simple_description(**mem.sr(kwargs)) out = {} for k,v in simple.iteritems(): criteria = ones(len(v['scores'])) if sthr != None: criteria *= less(v['scores'],sthr) if dsign != None: criteria *= greater(v['dists']*dsign, 0) if dthr != None: criteria *= less(abs(v['dists']),dthr) allowed = nonzero(criteria)[0] out[k] = {'genes':[v['genes'][i] for i in allowed], 'gnames':[v['gnames'][i] for i in allowed], 'dists':v['dists'][allowed], 'scores':v['scores'][allowed]} return out #names = {'wormtile':'sthr_{0}'.format(kwargs['sthr']), # 'tfchip':'dthr_{0}_sthr_{1}'.\ # format(kwargs['dthr'], # kwargs['sthr']) # } tkwargs = dict([(k,kwargs[k]) for k in ['dthr','sthr', 'dsign']]) name = '{0}:'.format(kwargs['atype']) + \ '_'.join(it.chain(*sorted([(str(k),str(v)) for k,v in tkwargs.iteritems()], key = lambda x: x[0]))) return mem.getOrSet(set_simple_thr, **mem.rc(kwargs, name =name))