def set_simple_description(**kwargs): props = get_assay_gprops(**mem.sr(kwargs)) chips = get_assay_info(**mem.sr(kwargs)) simple = {} for tf, assays in props.iteritems(): simple[tf] = {} assay_keys = assays.keys() idfun = lambda g: g.qualifiers['db_xref'][1][9:] simple[tf]['gnames'] = list(it.chain(*[ [idfun(e['gene']) for e in assays[k]['primaries']] for k in assay_keys ])) simple[tf]['genes'] = list(it.chain(*[ [e['gene'] for e in assays[k]['primaries']] for k in assay_keys ])) simple[tf]['dists'] = array(list(it.chain(*[ [e['dist'] for e in assays[k]['primaries']] for k in assay_keys ]))) simple[tf]['scores'] = array(list(it.chain(*[ [e['score'] for e in chips[tf][k]] for k in assay_keys ]))) return simple
def set_synapse_array(**kwargs): imaps = get_array_imaps(**mem.sr(kwargs)) ctypes =imaps['ctypes'] ctypes_imap = imaps['ctypes_imap'] nnames = imaps['nnames'] nnames_imap = imaps['nnames_imap'] all_out_cxns = get_synapse_dict(**mem.sr(kwargs)) cxns = zeros((len(nnames),len(nnames), len(ctypes))) for k1,rows in all_out_cxns.iteritems(): for row in rows: cxns[nnames_imap[k1],nnames_imap[row[1]], ctypes_imap[row[2]]] +=row[3] return cxns
def plot_city_posts(**kwargs): cp = city_posts(**mem.sr(kwargs)) xs = cp["lons"] ys = cp["lats"] rs = [len(x) for x in cp["posts"]] f = myplots.fignum(3, (4, 4)) ax = f.add_subplot(111) ax.scatter(xs, ys, s=rs)
def set_assay_gprops(**kwargs): chips = get_assay_info(**mem.sr(kwargs)) genes = parse_genes() tf_stats = {} for k, v in chips.iteritems(): tf_stats[k] = {} for k2,v2 in v.iteritems(): print 'n_exps = {0}'.\ format(np.sum([len(v) for v in tf_stats.values()])) tf_stats[k][k2] = {} cs = [e['chr'] for e in v2] f_gups=[genes[cs[i]][e['fup_gene']] for i,e in enumerate(v2)] f_gdowns=[genes[cs[i]][e['fdown_gene']] for i,e in enumerate(v2)] fup_deltas = [e['mean'] - f_gups[i].location.start.position for i,e in enumerate(v2)] fdown_deltas=[e['mean'] - f_gdowns[i].location.start.position for i,e in enumerate(v2)] r_gups=[genes[cs[i]][e['rup_gene']] for i,e in enumerate(v2)] r_gdowns=[genes[cs[i]][e['rdown_gene']] for i,e in enumerate(v2)] rup_deltas = [e['mean'] - r_gups[i].location.end.position for i,e in enumerate(v2)] rdown_deltas=[e['mean'] - r_gdowns[i].location.end.position for i,e in enumerate(v2)] deltas = array([fdown_deltas, fup_deltas, rdown_deltas, rup_deltas]).T closest = argmin(np.abs(deltas),1) csrt = argsort(np.abs(deltas),1) primaries = [] secondaries = [] for i,c in enumerate(csrt): for j,e in enumerate(c[:2]): arr = primaries if j == 0 else secondaries d = {} if e == 0 : d['gene'] = f_gdowns[i] elif e==1 : d['gene'] = f_gups[i] elif e==2 : d['gene'] = r_gdowns[i] elif e==3 : d['gene'] = r_gups[i] d['dist'] = deltas[i,e] * (-1 if e < 2 else 1) arr.append(d) tf_stats[k][k2]['primaries'] = primaries tf_stats[k][k2]['secondaries'] = secondaries return tf_stats
def set_graph(**kwargs): edge_set = get_edge_set() rows = get_rows(**mem.sr(kwargs)) sub_cxns = dict([(k, [ e for e in list(val) if e[2] in edge_set]) for k,val in it.groupby(\ sorted(rows, key = lambda x: x[0]), key = lambda x: x[0])]) g = nx.DiGraph(); for k, v in sub_cxns.iteritems(): g.add_weighted_edges_from([(e[0], e[1], e[3]) for e in v]) return g
def set_term_network( **kwargs): nterms = kwargs.get('nterms') name = kwargs.get('name') if name == 'bdtnp': gene_list = nio.getBDTNP().keys() elif name == 'kn': gene_list = graphs['kn'].nodes() grps = term_groups(**mem.sr(kwargs, name = name)) network = nx.Graph() network.add_nodes_from(gene_list) for g in grps: edgelist = [[g1[0],g2[0]] for g1 in g[1] for g2 in g[1]] network.add_edges_from( edgelist ) return network
def set_array_imaps(**kwargs): sdict =get_synapse_dict(**mem.sr(kwargs)) nameset = set([]) for k,v in sdict.iteritems(): nameset.add(k) nameset.update([r[1] for r in v]) nnames = list(nameset) ctypes = [u'Rp', u'EJ', u'Sp', u'S', u'R', u'NMJ'] ctypes_imap = dict([(k,i) for i, k in enumerate(ctypes)]) nnames_imap = dict([(k,i) for i, k in enumerate(nnames)]) return {'ctypes':ctypes, 'ctypes_imap':ctypes_imap, 'nnames':nnames, 'nnames_imap':nnames_imap}
def plot_ronn(**kwargs): l5 = last_5(**mem.sr(kwargs)) ys = zeros((len(l5), len(l5.values()[0]))) for i, h in enumerate(l5.values()): for j, d in enumerate(h): if len(d) == 0: continue cas = [e.created_at for e in d] secs = np.sum([(((ca.hour) * 60 + ca.minute) * 60) + ca.second for ca in cas]) ys[i, j] = secs ys = ys[:, -20:] ys = ys - np.min(ys, 1)[:, newaxis] colors = mycolors.getct(len(ys)) seismic.seismic(ys, stacked=True, colors=colors)
def set_simple_thr(**kwargs): dthr = kwargs['dthr'] dsign = kwargs['dsign'] sthr = kwargs['sthr'] simple = get_simple_description(**mem.sr(kwargs)) out = {} for k,v in simple.iteritems(): criteria = ones(len(v['scores'])) if sthr != None: criteria *= less(v['scores'],sthr) if dsign != None: criteria *= greater(v['dists']*dsign, 0) if dthr != None: criteria *= less(abs(v['dists']),dthr) allowed = nonzero(criteria)[0] out[k] = {'genes':[v['genes'][i] for i in allowed], 'gnames':[v['gnames'][i] for i in allowed], 'dists':v['dists'][allowed], 'scores':v['scores'][allowed]} return out
def setAllGenes(**kwargs): allPeaks = getPeaks() all_results = {} for num in range(1,20) + ['X']: print 'Parsing Chromosome: chr{0}'.format(num) genes_dict = {} all_results['chr{0}'.format(num)] = genes_dict chrgenes = getTrackChrGenes(**mem.sr(kwargs, num = num)) peaks = allPeaks['chr{0}'.format(num)] for i, g in enumerate(chrgenes): name = g['name'] startpos = g['start'] if g['strand'] == 1 else g['end'] hits = [] for p in peaks: stranded_offset =array([ g['strand'] * (p['start'] - startpos), g['strand'] * (p['end'] - startpos)]) if( np.min(abs(stranded_offset)) < 2000 \ or np.prod(stranded_offset) < 0): stranded_offset.sort() hits.append({'peak_info':p, 'peak_stranded_offset':stranded_offset}) hits = sorted(hits,key = lambda x: x['peak_stranded_offset'][0]) gene_object = { 'dnase_peaks':hits, 'name':name, 'gene_info':g, 'start':g['start'], 'end':g['end'], 'strand':g['strand'] } genes_dict[name] = gene_object if (mod(i,100) == 0): print 'Gene {0}: {1}, {2} hits'.format(i, g['name'], len(hits)) return all_results;
def set_cons(**kwargs): seqs, seqs_rndvals, keys = get_mutants(**mem.sr(kwargs)) cons = consensus_seq(seqs[::100]) return cons
def set_assay_info(**kwargs): #peaks and keys atype = kwargs['atype'] if atype == 'tfchip': assay_peaks = tf_chip_peaks(**mem.sr(kwargs)) elif atype=='wormtile': assay_peaks = tiling_peaks(**mem.sr(kwargs)) else: raise Exception() pkeys = assay_peaks.keys() keyvalfuns = {'tfchip':lambda x: x[:x.index(':')], 'wormtile':lambda x:re.compile('Tissue=([^\(]*)')\ .search(x).group(1)} #index and uniquify tfs pkeyvals = dict([(k, keyvalfuns[atype](k)) for k in pkeys]) tfnames = set(pkeyvals.values()) tfkeys= dict([(name, [k for k,v in pkeyvals.iteritems() if v == name]) for name in tfnames]) chrmap = dict(zip(['I','II', 'III','IV', 'V', 'X'], chromosome_names())) all_tss = get_tss() all_genes = parse_genes() bind_infos = {} #loop through tfs, assays, and finally peaks for tfname in tfnames: print tfname bind_infos[tfname] = {} for ekey in tfkeys[tfname]: print 'n_exps = {0}'.format(np.sum([len(v) for v in bind_infos.values()])) exp = assay_peaks[ekey] bind_infos[tfname][ekey] = [] for e in exp: start = e['start'] end = e['end'] c = e['chr'] #if we have non chromosomal DNA, skip it! if c in chrmap.keys(): crkey = chrmap[c] else: continue genes = all_genes[crkey] tss = all_tss[crkey] #grab indexes in the list of upstream and ds genes fdownstream_idx =searchsorted(tss['fwd_tss'],(start+end) /2) rdownstream_idx =searchsorted(tss['rev_tss'],(start+end) /2, 'right')-1 #handle the case where the factor hits past the #last gene fup_ofs = -1 if fdownstream_idx == len(tss['fwd_genes']): fdownstream_idx = fdownstream_idx -1 fup_ofs = 0 rup_ofs = 1 if fdownstream_idx == len(tss['fwd_genes']): fdownstream_idx = fdownstream_idx -1 rup_ofs = 0 #get gene indexes in the chromosomal gene dicts #for upstream and downstream fdown_gene = tss['fwd_genes'][fdownstream_idx] fup_gene = tss['fwd_genes'][fdownstream_idx +fup_ofs]\ if fdownstream_idx > 0 \ else fdown_gene rdown_gene = tss['rev_genes'][rdownstream_idx] rup_gene = tss['rev_genes'][rdownstream_idx +rup_ofs]\ if rdownstream_idx < len(tss['rev_genes']) - 1\ else rdown_gene bind_infos[tfname][ekey].append({ 'rup_gene':rup_gene, 'rdown_gene':rdown_gene, 'fup_gene':fup_gene, 'fdown_gene':fdown_gene, 'chr':crkey, 'start':start, 'end':end, 'mean':(start+end)/2, 'score':e['score'] }) return bind_infos
def load(plots = defplots, reset = False): kwargs = dict(reset = reset) edge_set = get_edge_set() g = get_graph(**mem.sr(kwargs)) pos = get_pos(**mem.sr(kwargs)) trips = set([]) for k1 in g: for k2 in g[k1].keys(): for k3 in g[k1].keys(): if g[k2].has_key(k3): trips.add((k2,k3,k1)) tripoints = dict([((e[0],e[1]),pos[e[2]]) for e in trips]) if plots.get('basic_structure', False): f = myplots.fignum(1) ax = f.add_subplot(111) gd.easy_draw(g, pos) f.savefig(myplots.figpath('basic_structure_edges={0}'.format(edge_set))) if plots.get('feed_forward', True): gd.overlay(g,pos,g.edges(), tripoints = tripoints, alphas = dict([(e,.1) for e in g.edges()])) f.savefig(myplots.figpath('feed_forward_edges={0}'.format(edge_set))) if plots.get('degrees' , False): make_degree_plots_0(); maxflow = nx.algorithms.ford_fulkerson(g, 'AVAL','PVPL','weight') imaps = get_array_imaps() nnames = imaps['nnames'] node_stats = dict([(k,{}) for k in nnames]) for k,v in node_stats.iteritems(): v['out_degree'] = len([e for e in g.edges() if e[0] == k]) v['in_degree'] = len([e for e in g.edges() if e[1] == k]) f = myplots.fignum(3, (12,6)) outs = [v['out_degree'] for k, v in node_stats.iteritems()] ins =[v['in_degree'] for k , v in node_stats.iteritems()] raw_data= array([outs,ins]).T make_data_transform(raw_data) data = transform_data(raw_data) kd = make_kdtree(data) k = 5 nn = compute_nns(kd, k) knn= nn['nn'] knn_dists = nn['dists'] dists = compute_dists(data) mean_dists = np.mean(knn_dists[:,1:],1) mean_colors =sqrt(mean_dists[:,newaxis] * [1/np.max(mean_dists), 0,0]) ax = f.add_subplot(121) ax.scatter(data[:,0],data[:,1],s = 15, facecolor = mean_colors, edgecolor = 'none' ) ax.set_xlabel('scaled out degree') ax.set_ylabel('scaled in degree') ax2 = f.add_subplot(122) ax2.imshow(dists, interpolation = 'nearest', aspect = 'auto') ax2.set_title('distance matrix for scaled degrees') f.savefig(myplots.figpath('distances_{0}'.format(edge_set))) return g
def set_pos(**kwargs): g = get_graph(**mem.sr(kwargs)) pos = gd.getpos(g) return pos