def add_rs_bars(self, rs, options, sim_rs=[]): r_width = 0.2 s_width = (1.0 - r_width) / len(sim_rs) myticks, myticklabels = [], [] for i, k in enumerate(self.r_key): myticks.append(i) myticklabels.append(' > ' + str(k)) self.ax.bar(i, rs[i], width=r_width, color=options.plotColors['RESULTS']) iLoc = i + r_width for b in sorted([bh[i] for bh in sim_rs], reverse=True): self.ax.bar(iLoc, b, width=s_width, color=options.plotColors['SIMS']) iLoc += s_width self.ax.set_xticks(myticks) self.ax.set_xticklabels(myticklabels, horizontalalignment='left') items = [ Rect((0, 0), 1, 1, fc=options.plotColors['RESULTS']), Rect((0, 0), 1, 1, fc=options.plotColors['SIMS']) ] self.ax.legend(items, ['Results', 'Simulation'], loc='upper right', bbox_to_anchor=(1, 1), fontsize=10) return self
def fill_boxes(self,opts,vals,colors,alt_colors,JITTERS=None,SIG_STARS=None): self.bp = self.axes[-1].boxplot(vals,positions=self.pos, widths=self.width, patch_artist=True, showmeans=True,whis=0.7) for i,opt in enumerate(opts): clr,ac = colors[i], alt_colors[i] self.bp['boxes'][i].set_edgecolor(clr) self.bp['boxes'][i].set_linewidth(1) plt.setp(self.bp['medians'][i], color=clr, linewidth=3) plt.setp(self.bp['means'][i], marker='h',markersize=9,markerfacecolor=clr) plt.setp(self.bp['caps'][(i*2)+1], color=clr,linewidth=1) plt.setp(self.bp['caps'][(i*2)], color=clr,linewidth=1) plt.setp(self.bp['whiskers'][i*2], color=clr,linewidth=1) plt.setp(self.bp['whiskers'][1+(i*2)], color=clr,linewidth=1) plt.setp(self.bp['fliers'][i], markerfacecolor=clr, markeredgecolor = clr, marker='s',markersize=2.0) if clr == ac: self.items[opt] = Rect((0,0),1,1,fc=clr) else: self.items[opt] = Rect((0,0),1,1,fc=a,ec=b,hatch='*') for patch, clevel in zip(self.bp['boxes'], colors): patch.set_facecolor(clevel); patch.set_alpha(0.5) if JITTERS != None: for i,(xJ,yJ) in enumerate(JITTERS): plt.scatter(xJ, yJ, c=colors[i], alpha=0.7,s=4,zorder=9) if SIG_STARS != None: for i,SIG in enumerate(SIG_STARS): if SIG: plt.scatter(self.pos[i],max(vals[i])*1.2,s=100,marker='*',color='gold')
def finish(self): if self.options.verbose: sys.stderr.write('\nAdding Legend...') plt.axis('off') items,labels=[],[] leg_items,leg_labels,leg_ids = [],[],[] for ID in self.multi_legend: items.append([]) labels.append([]) for a,b in self.multi_colors[ID].items(): items[-1].append(Rect((0,0),1,1,fc=b)) labels[-1].append(a) leg_ids.append(ID) maxLen= max([len(x) for x in labels]) leg_items,leg_labels = [[] for x in range(maxLen)],[[] for x in range(maxLen)] for I,L in zip(items,labels): while len(I) < maxLen: I.append(Rect((0,0),1,1,fc='white',ec='white')) L.append('') for k in range(len(L)): leg_items[k].append(I[k]) leg_labels[k].append(L[k]) leg_items = [[Rect((0,0),1,1,fc='white',ec='white') for x in leg_ids]]+leg_items leg_labels = [leg_ids]+leg_labels ncols = len(leg_ids) ncols = maxLen+1 leg_items = [a for b in leg_items for a in b] leg_labels = [a for b in leg_labels for a in b] #hp,cs = 0.1, 0.1 hp,cs = 0.13, 0.3 plt.legend(leg_items,leg_labels,ncol=ncols,bbox_to_anchor=(1.2,1.7),loc='upper left',handletextpad=hp,columnspacing=cs,fontsize=12) plt.subplots_adjust(left=0.02, bottom=0.02, right=0.98, top=0.82) # ,wspace=0.25,hspace=1.05) plt.savefig(self.mname+'.png',dpi=500) if options.show: plt.show() if self.options.verbose: sys.stderr.write('\n')
def make_minor_boxes(self, major_id, opts, vals): #self.ax.bar(xLoc+xOffset,X_key[X][s],width=1,bottom=0,color=a,ec=b,hatch='*') #items.append(Rect((0,0),1,1,fc=a,ec=b,hatch='*')) major_clrs, minor_clrs, global_sigs, local_sigs, jitters = [ self.get_color(major_id) for X in opts ], [self.get_color(X) for X in opts], [1 for X in opts], [1 for X in opts], [] self.pos, self.width = [ self.xOffset + 0.20 * x for x in range(len(opts)) ], 0.16 all_vals = [a for b in vals for a in b] try: valMin, valMean, valMax, valMeans = min(all_vals), np.mean( all_vals), max(all_vals) * 1.25, [np.mean(v) for v in vals] except ValueError: valMin, valMean, valMax, valMeans = 0, 0, 0, [0 for v in vals] for i in range(len(vals)): other_vals = [ a for b in [vals[k] for k in range(len(vals)) if k != i] for a in b ] pv = stats.ttest_ind(vals[i], other_vals)[-1] if valMeans[i] > valMean and pv < global_sigs[i]: global_sigs[i] = pv jitters.append([[ self.pos[i] + np.random.normal(0, 0.02) for m in range(len(vals[i])) ], [v * np.random.normal(1, 0.005) for v in vals[i]]]) for j in range(i + 1, len(vals)): pv = stats.ttest_ind(vals[i], vals[j])[-1] if valMeans[i] > valMeans[j] and pv < local_sigs[i]: local_sigs[i] = pv elif valMeans[i] < valMeans[j] and pv < local_sigs[j]: local_sigs[j] = pv self.major_legend_items[major_id] = Rect( (0, 0), 1, 1, fc=major_clrs[0]) #,ec=clr,linewidth=2,hatch='+') self.fill_boxes(opts, vals, minor_clrs, major_clrs, JITTERS=jitters, SIG_STARS=(global_sigs, local_sigs)) if valMax > self.yTop: self.yTop = valMax self.xOffset = self.pos[-1] + 2 * self.width self.axes[-1].text(np.mean(self.pos), 0 - (self.yTop) * 0.1, major_id.split(';')[-1], fontsize=16, fontweight='bold', horizontalalignment='center', verticalalignment='top')
def print_clusters(pp, clusters, tuples=[], title=''): fig = plt.figure(figsize=(8, 8)) sub = fig.add_subplot(111) clusters.sort(key=lambda c: c.error) for cluster in clusters: x, y = tuple(map(list, zip(*cluster.bbox)))[:2] x[0] = max(0, x[0]) x[1] = min(100, x[1]) y[0] = max(0, y[0]) y[1] = min(100, y[1]) c = cm.jet(cluster.error) r = Rect((x[0], y[0]), x[1]-x[0], y[1]-y[0], alpha=max(0.1,cluster.error), ec=c, fill=False, lw=1.5) sub.add_patch(r) if tuples: cols = zip(*tuples) xs, ys, cs = cols[0], cols[1], cols[-1] sub.scatter(ys, xs, c=cs, alpha=0.5, lw=0) sub.set_ylim(-5, 105) sub.set_xlim(-5, 105) sub.set_title(title) plt.savefig(pp, format='pdf')
def __init__(self, f_data, f_key, options): self.options = options self.colors = [ 'darkblue', 'g', 'brown', 'r', 'purple', 'gray', 'orange', 'k', 'cornflowerblue', 'magenta', 'cyan', 'lime', 'yellow', 'pink', 'blue' ] self.tFS = 25 self.log = True self.color_key = { 'PANTHER': 'darkblue', 'GORILLA': 'green', 'ELEPHANT': 'brown', 'HUMAN': 'red', 'POLAR': 'purple', 'DOLPHIN': 'gray', 'TIGER': 'orange', 'ORCA': 'k' } self.feats = [] self.vals = [] self.log_vals = [] self.f_name = f_data for line in open(f_data): line = line.split() if line[0] == '---': self.samples = line[1::] else: self.feats.append(line[0]) self.vals.append([float(x) for x in line[1::]]) self.log_vals.append( [math.log(v + 1.0) for v in self.vals[-1]]) self.key = {} for line in open(f_key): line = line.split() if line[0] == '---': continue self.key[line[0]] = line[1] try: self.sample_color = { k: self.color_key[self.key[k]] for k in self.key.keys() } except KeyError: self.color_key = {} for i, k in enumerate(list(set(self.key.values()))): self.color_key[k] = self.colors[i] self.sample_color = { k: self.color_key[self.key[k]] for k in self.key.keys() } self.ncol = len(list(set(self.key.values()))) self.labels, self.items = [], [] for xI, yI in self.color_key.items(): if xI == 'NA': continue self.labels.append(xI) self.items.append(Rect((0, 0), 1, 1, fc=yI))
def add_pv_bar(self, pvs, sim_pvs=[]): self.set_pv_key(pvs) bar_cnts = [] pvk = sorted(self.pv_key.keys()) for sp in [pvs] + sim_pvs: sp.sort() pcnts, k, p = [0 for i in pvk], 0, 0 while True: while k < len(pvk) and sp[p] > pvk[k]: k += 1 while k < len(pvk) and p < len(sp) and sp[p] <= pvk[k]: for j in range(k, len(pvk)): pcnts[j] += 1 p += 1 if k == len(pvk) or p == len(sp): break bar_cnts.append(pcnts) myticks, myticklabels = [], [] r_width = 0.2 s_width = (1.0 - r_width) / len(bar_cnts) for i, k in enumerate(pvk[-1::-1]): myticks.append(i) myticklabels.append(' < ' + str(k)) idx = -1 - i self.ax.bar(i, bar_cnts[0][idx], width=r_width, color='blue') iLoc = i + r_width bh = sorted([b[idx] for b in bar_cnts[1::]], reverse=True) for b in bh: self.ax.bar(iLoc, b, width=s_width, color='red') iLoc += s_width self.ax.set_xticks(myticks) self.ax.set_xticklabels(myticklabels, horizontalalignment='left') items = [Rect((0, 0), 1, 1, fc='b'), Rect((0, 0), 1, 1, fc='r')] self.ax.legend(items, ['Results', 'Simulation'], loc='upper right', bbox_to_anchor=(1, 1), fontsize=10) return self
def add_legend(self, labels, colors): labs, items = [], [] for a, b in zip(labels, colors): labs.append(a) items.append(Rect((0, 0), 1, 1, fc=b)) self.ax_key[(0, self.yLen - 1)].legend(items, labs, ncol=1, loc='upper right', bbox_to_anchor=(1.2, 1.0), fontsize=10)
def add_legend(self, labels, colors): labs, items = [], [] for a, b in zip(labels, colors): labs.append(a) items.append(Rect((0, 0), 1, 1, fc=b)) plt.legend(items, labs, loc='upper left', ncol=len(labs) / 3, bbox_to_anchor=(-0.1, 1.16), fontsize=10)
def visualize_detection(ax, target): boxes = target['boxes'] labels = target['labels'] colors = plt.cm.tab20(np.linspace(0, 1, 20)) colors = colors[labels % 20] colors = [mpl.colors.rgb2hex(c[:3]) for c in colors] for (x0, y0, x1, y1), color in zip(boxes, colors): x, y, w, h = x0, y0, x1 - x0, y1 - y0 ax.add_patch(Rect((x, y), w, h, ec=color, fc='none', lw=2)) return ax
def add_rs_bars(self, ax, p_key, pvs, sim_pvs=[]): r_width = 0.2 s_width = (1.0 - r_width) / len(sim_pvs) myticks, myticklabels = [], [] for i, k in enumerate(p_key): myticks.append(i) myticklabels.append(' >= ' + str(k)) ax.bar(i, pvs[i], width=r_width, color='blue') iLoc = i + r_width for b in sorted([bh[i] for bh in sim_pvs], reverse=True): ax.bar(iLoc, b, width=s_width, color='red') iLoc += s_width ax.set_xticks(myticks) ax.set_xticklabels(myticklabels, horizontalalignment='left') ax.set_title('Variance Explained') items = [Rect((0, 0), 1, 1, fc='b'), Rect((0, 0), 1, 1, fc='r')] ax.legend(items, ['Results', 'Simulation'], loc='upper right', bbox_to_anchor=(1, 1), fontsize=10) return self
def prepare_labels(self): try: self.sample_color = {k: self.color_key[self.sample_key[k]] for k in self.sample_key.keys()} except KeyError: self.color_key = {} for i,k in enumerate(list(set(self.key.values()))): self.color_key[k] = self.colors[i] self.sample_color = {k: self.color_key[self.sample_key[k]] for k in self.sample_key.keys()} self.ncol = len(list(set(self.key.values()))) self.labels,self.items = [],[] for xI,yI in self.color_key.items(): if xI == 'NA': continue self.labels.append(xI) self.items.append(Rect((0,0),1,1,fc=yI))
def sample_data(self, num, randvar=None): self.num = num SAMPLES = num animals = {} for x in [ 'HUMANS', 'ELEPHANTS', 'GORILLAS', 'PANTHERS', 'POLAR_BEARS', 'ORCAS', 'DOLPHINS', 'TIGERS' ]: if randvar: myNum = int(np.random.normal(num, num / 8.0)) if myNum < 10: myNum = 10 animals[x] = [ sample_creature(self.stats[x], self.er, self.dp, self.noise) for i in range(myNum) ] else: animals[x] = [ sample_creature(self.stats[x], self.er, self.dp, self.noise) for i in range(SAMPLES) ] self.key = animals self.labels, self.items = [], [] for xI, yI in self.color_key.items(): if len(self.key[xI]) < 100: self.labels.append(xI + ' (' + str(len(self.key[xI])) + ' )') else: self.labels.append(xI + ' (' + str(len(self.key[xI])) + ')') self.items.append(Rect((0, 0), 1, 1, fc=yI)) self.merge_data()
def plot_clusters(self, clusters, cols=None, color=None, alpha=None): self.clusters = clusters errors = [c.error for c in clusters] errors = np.array(errors) mean, std = np.mean(errors), np.std(errors) if std == 0: std = 1 errors = (errors - mean) / std if not cols: cols = [0, 1] if isinstance(cols[0], basestring): cols = map(clusters.cols.index, cols) for idx, cluster in enumerate(clusters): tup = tuple(map(list, zip(*cluster.bbox))) x, y = tup[cols[0]], tup[cols[1]] x, y = self.transform_box(x, y) if not self.xbound: self.xbound = [x[0], x[1]] self.ybound = [y[0], y[1]] else: self.xbound = r_union(self.xbound, x) self.ybound = r_union(self.ybound, y) a = alpha or min(1, max(0.1, errors[idx])) c = color or cm.jet(errors[idx]) r = Rect((x[0], y[0]), x[1] - x[0], y[1] - y[0], alpha=a, ec=c, fill=False, lw=1.5) self.sub.add_patch(r) self.set_lims()
def recurse_labels(ax, mtree, xlims, ymax, all_size, cur_j=0, clr_mtype=False, add_lbls=True, mut_clr=None): all_wdth = len(MuType(mtree.allkey()).leaves()) cur_x = xlims[0] muts_dict = dict(mtree) if mut_clr is None: mut_clr = variant_clrs['Point'] for lbl in sort_levels(list(muts_dict.keys()), mtree.mut_level): muts = muts_dict[lbl] if isinstance(muts, MuTree): lf_wdth = len(MuType(muts.allkey()).leaves()) else: lf_wdth = 1 lbl_prop = (lf_wdth / all_wdth) * (xlims[1] - xlims[0]) ax.plot([cur_x + lbl_prop / 2, (xlims[0] + xlims[1]) / 2], [ymax - 0.18 - cur_j, ymax + 0.19 - cur_j], c='black', linewidth=0.9, solid_capstyle='round') if add_lbls: mut_lbl = clean_label(lbl, mtree.mut_level) if (lbl_prop / all_size) > 1 / 41 and len(tuple(mtree)) > 1: if len(muts) == 1: mut_lbl = "{}\n(1 sample)".format(mut_lbl) else: mut_lbl = "{}\n({} samps)".format(mut_lbl, len(muts)) if (lbl_prop / all_size) <= 1 / 19: use_rot = 90 else: use_rot = 0 ax.text(cur_x + lbl_prop / 2, ymax - 0.5 - cur_j, mut_lbl, size=14 - 2.9 * cur_j, ha='center', va='center', rotation=use_rot) # colour this branch and all subtypes if clr_mtype is None or clr_mtype is False: use_clr = mut_clr eg_clr = mut_clr sub_mtype = clr_mtype # do not colour this branch or any of its subtypes elif clr_mtype.is_empty(): use_clr = variant_clrs['WT'] eg_clr = variant_clrs['WT'] sub_mtype = clr_mtype # otherwise, check to see which subtypes need to be coloured else: sub_dict = dict(clr_mtype.subtype_iter()) if lbl not in sub_dict: use_clr = variant_clrs['WT'] eg_clr = variant_clrs['WT'] sub_mtype = MuType({}) elif (isinstance(mtree[lbl], dict) or sub_dict[lbl] is None or (sub_dict[lbl] == MuType(mtree[lbl].allkey()))): use_clr = mut_clr eg_clr = mut_clr sub_mtype = None else: use_clr = 'none' eg_clr = mut_clr sub_mtype = sub_dict[lbl] if clr_mtype is False: sub_lbls = add_lbls else: sub_lbls = False ax.add_patch( Rect((cur_x + lbl_prop * 0.12, ymax - 0.8 - cur_j), lbl_prop * 0.76, 0.6, facecolor=use_clr, edgecolor=eg_clr, alpha=0.41, linewidth=1.3)) if isinstance(muts, MuTree): ax = recurse_labels( ax, muts, (cur_x + lbl_prop * 0.06, cur_x + lbl_prop * 0.94), ymax, all_size, cur_j + 1, sub_mtype, sub_lbls, mut_clr) cur_x += lbl_prop return ax
def plot_custom_pts(self, pts, key, cnts=None, cnt_idx=0, genes=[], neg_genes=[], color=None, VERSION='LOC', TITLE='HI', BAR_MSG=None, AXES=[]): if len(AXES) == 0: self.axes.append( plt.subplot2grid((self.xLen, self.yLen), (self.xLoc, self.yLoc), rowspan=1, colspan=1)) else: xl, yl, rs, cs = AXES self.axes.append( plt.subplot2grid((self.xLen, self.yLen), (xl, yl), rowspan=rs, colspan=cs)) cell_key = CellKey(key, self.options) p_data, p_genes, p_names, label_key = [], [], [], {} if len(genes) > 0: TITLE = ",".join(genes) if len(neg_genes) > 0: TITLE += '\n' + ",".join(neg_genes) genes += neg_genes p_genes = [[] for p in genes] for s in pts: x, y = pts[s] try: cell = cell_key.identify(s) except KeyError: continue if cell.type == 'NA': continue if VERSION == 'FIRING': clr, clr_label = cell.fire_color, cell.fs else: clr, clr_label = cell.type_color, cell.type if cell.type == 'MINI': cell.type = 'canonical' #print s,x,y,cell.type, cell.type2, cell.marker #if cell.type == 'MZ' and cell.type2 == 'MZ': continue if clr_label not in label_key: self.color_key[clr_label] = clr label_key[clr_label] = Rect((0, 0), 1, 1, fc=clr) if cnts == None: if cell.marker == 'o': continue if VERSION == 'FIRING': self.axes[-1].scatter(x, y, marker=cell.marker, c=cell.fire_color, s=70, alpha=1) else: self.axes[-1].scatter(x, y, marker=cell.marker, c=clr, s=70, alpha=1) else: for j in range(len(genes)): p_genes[j].append(log(cnts[genes[j]][s] + 1, 2)) #print j,p_genes[j],cnts[genes[j]] p_data.append([ np.array([log(cnts[g][s] + 1, 2) for g in genes]), x, y, clr, cell ]) if cnts != None: #print len(p_data) #print len(p_data[0]) #print p_data[0] #sys.exit() if len(genes) > 0: scaler = MinMaxScaler() t_data = scaler.fit_transform(np.array([p[0] for p in p_data])) for j in range(len(p_data)): pos_cont = sum([ t_data[j][x] for x in range(len(genes)) if genes[x] not in neg_genes ]) neg_cont = sum([ t_data[j][x] for x in range(len(genes)) if genes[x] in neg_genes ]) #for x in range(len(genes)): # print x,genes[x],t_data[j][x],p_data[j][0] p_data[j][0] = pos_cont - neg_cont p_data.sort(reverse=True) p_data.sort() #reverse=True) X = np.array([p[1] for p in p_data]) Y = np.array([p[2] for p in p_data]) I = np.array([p[0] for p in p_data]) #print iI #z = gaussian_kde(I)(I) Xs = scaler.fit_transform(X.reshape(-1, 1)) Ys = scaler.fit_transform(Y.reshape(-1, 1)) scatterplot = self.axes[-1].scatter(X, Y, c=I, cmap='seismic', s=40, edgecolor='') plt.colorbar(scatterplot, ax=self.axes[-1], orientation='horizontal', fraction=0.04, pad=0.1, ticks=[]) self.axes[-1].scatter(X, Y, c=I, cmap='seismic', s=40, edgecolor='') if BAR_MSG == 'RELN': self.axes[-1].text(0.6, -7.5, ' CR Markers\n(RELN,CXCR4,PCP4)', fontsize=11, verticalalignment='bottom') elif BAR_MSG[0:3] == 'CHL': self.axes[-1].text(0.3, -7.5, ' Choride Ratio\nSLC12A2:SLC12A5', fontsize=11, verticalalignment='bottom') elif BAR_MSG[0:3] == 'GLU': self.axes[-1].text( 0.0, -7.5, ' Glutamatergic:GABA \n(SLC17A7,SLC1A2,GRIK3:ABAT,GAD)', fontsize=9, verticalalignment='bottom') elif BAR_MSG[0:3] == 'VEN': self.axes[-1].text(0.3, -7.5, ' VEN Markers\n(FEZF2,BCL11B)', fontsize=11, verticalalignment='bottom') else: self.axes[-1].text(0.3, -7.5, BAR_MSG, fontsize=11, verticalalignment='bottom') #self.axes[-1].scatter(X,Y,c=z,cmap='jet',s=50,edgecolor='') #self.axes[-1].scatter(X,Y,c=C,s=100,alpha=0.5) #self.axes[-1].scatter(X,Y,c=I,cmap='hot',s=50,edgecolor='') #labels = label_key.keys() #items = [label_key[x] for x in labels] #nc,fs,cs,hp,bb1,bb2 = 2,12 ,0.5,0.5,-0.01,1.1 #leg = self.axes[-1].legend(items,labels,title=TITLE,handletextpad=hp,columnspacing=cs, fontsize=fs,ncol=nc,bbox_to_anchor=(bb1,bb2),loc='upper center') else: nc, fs, cs, hp, bb1, bb2 = 2, 9.0, 0.62, 0.62, 0.465, 1.0 PROX = [ Line2D([], [], color=self.color_key[k], marker='s', mec='k', markeredgewidth=0.2, linestyle='None', markersize=10, label=k) for k in label_key if k != 'NA' ] leg = self.axes[-1].legend(handles=PROX + self.loc_labs, title=TITLE, handletextpad=hp, columnspacing=cs, fontsize=fs, ncol=nc, bbox_to_anchor=(bb1, bb2), loc='upper center') self.axes[-1].axis('off') self.yLoc += 1 if self.yLoc == self.yLen: self.yLoc = 0 self.xLoc += 1
def add_multi_box(self, h_data, TITLE, BW=False): iKey = dd(int) data = sorted([[iKey[d[0]], d[0], d[1]] for d in h_data.items()]) data = [ opt for opt in data if data[1] not in ['NA', 'UNK', 'UNKNOWN', 'UNCLEAR', 'NON'] ] opts, vals = [], [] for d in data: d[2] = [float(dx) for dx in d[2] if dx != 'NA'] if d[1] in [ 'NA', 'UNK', 'UNKNOWN', 'UNAVAIL', 'UNCLEAR', 'DOUBLETS' ]: continue elif d[1] == 'GT_18': opts.append('>18') elif d[1] == 'SUB_16': opts.append('<16') elif len(d[1].split('_')) > 1: continue else: opts.append(d[1]) vals.append(d[2]) colors = [self.get_color(X) for X in opts] colors = ['pink', 'orange', 'purple', 'orange'] if BW: colors = ['k' for X in opts] if opts == ['SVZ', '*SVZ*', 'IZ', '*IZ*']: opts = ['$SVZ$', '$SVZ_{novel}$', '$IZ$', '$IZ_{novel}$'] if opts == ['CRMZ', 'MEGA', 'MINI']: opts = ['CAJAL-RETZIUS', 'NOVEL', 'CANONICAL'] items, labels = [], [] if type(colors[0]) == tuple: colors, alt_colors = [c[0] for c in colors], [c[1] for c in colors] else: alt_colors = [c for c in colors] R, pv = 'NA', 'NA' pos, width = [self.xOffset + 0.20 * x for x in range(len(opts))], 0.16 self.bp = self.ax.boxplot(vals, positions=pos, widths=width, patch_artist=True, showmeans=True, whis=1) clevels = np.linspace(0., 3., len(opts) + 1) self.xOffset = pos[-1] + 2 * width xMid = np.mean(pos) xticks = [] means = sorted([(np.mean(v), opt) for (v, opt) in zip(vals, opts) if opt[0:2] == 'si']) medians = sorted([(np.percentile(v, 60), opt) for (v, opt) in zip(vals, opts) if opt[0:2] == 'si']) sorts = [(sorted(v, reverse=True), opt) for (v, opt) in zip(vals, opts) if opt[0:2] == 'si'] for i, opt in enumerate(opts): clr, ac = colors[i], alt_colors[i] self.bp['boxes'][i].set_edgecolor(clr) self.bp['boxes'][i].set_linewidth(1) plt.setp(self.bp['medians'][i], color=clr, linewidth=3) plt.setp(self.bp['means'][i], marker='h', markersize=9, markerfacecolor=clr) plt.setp(self.bp['caps'][(i * 2) + 1], color=clr, linewidth=1) plt.setp(self.bp['caps'][(i * 2)], color=clr, linewidth=1) plt.setp(self.bp['whiskers'][i * 2], color=clr, linewidth=1) plt.setp(self.bp['whiskers'][1 + (i * 2)], color=clr, linewidth=1) plt.setp(self.bp['fliers'][i], markerfacecolor=clr, markeredgecolor=clr, marker='s', markersize=2.0) if clr == ac: self.items[opt] = Rect((0, 0), 1, 1, fc=clr) else: self.items[opt] = Rect((0, 0), 1, 1, fc=a, ec=b, hatch='*') clevels = np.linspace(0., 1., len(vals)) xJitters = [ np.random.normal(pos[i], 0.01, len(vals[i])) for i in range(len(vals)) ] yJitters = [[ vals[i][j] * np.random.normal(1, 0.005, len(vals[i]))[j] for j in range(len(vals[i])) ] for i in range(len(vals))] for xJ, yJ, clevel in zip(xJitters, yJitters, colors): plt.scatter(xJ, yJ, c=clevel, alpha=0.7, s=4, zorder=9) for patch, clevel in zip(self.bp['boxes'], colors): patch.set_facecolor(clevel) #cm.prism(clevel)) patch.set_alpha(0.5) yMin, yMax = self.ax.get_ylim() yStep = (yMax - yMin) / 30.0 # for j,xpos in enumerate(self.ax.get_xticks()): # self.ax.text(xpos,yMin-yStep,opts[j].split('~')[-1],fontweight='bold',rotation=60,verticalalignment='center',horizontalalignment='center') #self.ax.text(xpos,yMin-yStep,opts[j].split('~')[-1],fontsize=25,fontweight='bold',verticalalignment='bottom',horizontalalignment='center') # self.ax.axis('off') title_key = {'LOC': 'LAYER', 'FS': 'FIRING STYLE'} if TITLE: self.ax.text(xMid, yMax, TITLE.split(';')[-1], fontsize=20, fontweight='bold') return if TITLE: if TITLE in title_key: TITLE = title_key[TITLE]
def EEG_Comb(t, data, names, network, t0=None, t1=None, amplitude=0.5, iis=None, **spec): ## t0, t1 unit: second if is type float import Utility as utl from bisect import bisect if t0 is None: t0i = 0 t0 = t[0] else: t0i = bisect(t, float(t0)) if t1 is None: t1i = len(t) t1 = t[-1] else: t1i = bisect(t, float(t1)) t_plot = utl.DataSegment(t, t0i, t1i) d_plot = utl.DataSegment(data, t0i, t1i) t0_nest = utl.Time(seconds=float(t0) - t[0]) t1_nest = utl.Time(seconds=float(t1) - t[0]) clks, phase = network.get_WSN_spike_phases(t0=t0_nest, t1=t1_nest, group=True, inverse=False) clks_sec = clks / 1000.0 + t[0] #N_ch,N_sig,N_delay,N_clk = phase.shape #N_p_ch = N_sig * N_delay #y_values = np.arange(0, N_ch, 1.0/N_sig, dtype=float)[::-1] #phase = np.reshape(phase, (N_ch*N_sig, N_delay, N_clk)) #np.place(phase,np.isnan(phase),np.inf) #phase = np.amin(phase, -2) #np.place(phase,np.isinf(phase),np.nan) N_ch, N_delay, N_clk = phase.shape y_values = np.arange(0, N_ch, 1.0 / N_delay, dtype=float)[::-1] y_ticks = np.arange(0, N_ch, dtype=float) + 0.5 phase = np.reshape(phase, (N_ch * N_delay, N_clk)) if 'font' in spec: font = spec['font'] else: font = default_plot_font if 'figsize' in spec: figsize = spec['figsize'] else: figsize = [3.45 * 1.5, 5.5 * 1.5] if 'dpi' in spec: dpi = spec['dpi'] else: dpi = 100 if 'plt_pos' in spec: plt_pos = spec['plt_pos'] else: plt_pos = { 'left': 0.1, 'right': 0.8, 'bottom': 0.1, 'top': 0.9, 'hspace': 0.2 } if 'cbar_pos' in spec: cbar_pos = spec['cbar_pos'] else: cbar_pos = [0.85, 0.1, 0.03, 0.35] if 'c_ticks' in spec: c_ticks = spec['c_ticks'] else: c_ticks = np.arange(0, 100.1, 20) import matplotlib matplotlib.rc('font', **font) import matplotlib.pyplot as plt fig, axes = plt.subplots(nrows=2, ncols=1, figsize=figsize, dpi=dpi) PlotEEG(t_plot, d_plot, channels=names, amplitude=amplitude, figaxes=axes[0]) pShapes = pcolor(clks_sec, y_values, phase, x_label='', x_range=[float(t0), float(t1)], y_label='', y_ticks=y_ticks, y_ticklabels=names[::-1], Ax=axes[1]) if iis is not None: from matplotlib.patches import Rectangle as Rect for iis_t0, iis_t1 in iis: if (iis_t0 >= t0 and iis_t0 <= t1) or (iis_t1 >= t0 and iis_t1 <= t1): rec_width = float(iis_t1) - float(iis_t0) for ax in axes: ylim = ax.get_ylim() rec_loc = (float(iis_t0), ylim[0]) rec_height = ylim[1] - ylim[0] ax.add_patch( Rect(rec_loc, rec_width, rec_height, edgecolor='none', facecolor='red', alpha=0.3)) from Utility import Time def second_to_timestr(seconds, loc): tmptime = Time(seconds) return repr(tmptime) from matplotlib.ticker import FuncFormatter axes[1].get_xaxis().set_major_formatter(FuncFormatter(second_to_timestr)) fig.subplots_adjust(**plt_pos) cbar_ax = fig.add_axes(cbar_pos) cb = fig.colorbar(pShapes, cax=cbar_ax) cb.set_ticks(c_ticks) plt.show() return fig
def add_data(self, gkey, ax, KEY=False): opts = gkey.keys() vals = gkey.values() colors = [ 'DarkSalmon', 'DarkSlateGray', 'DeepPink', 'DarkTurquoise', 'ForestGreen', 'Crimson', 'Plum', 'Chocolate', 'FireBrick' ] colors = VWCOLORS axillary = [ 'DarkSalmon', 'DarkSlateGray', 'DeepPink', 'DarkTurquoise', 'ForestGreen', 'Crimson', 'Plum', 'Chocolate', 'FireBrick' ] if KEY: if len(opts) == 2: colors = ['magenta', 'lime'] elif len(opts) == 3: colors = ['green', 'cyan', 'red'] else: colors = [ 'olive', 'DarkSalmon', 'DarkSlateGray', 'Plum', 'Chocolate', 'FireBrick' ] + VWCOLORS try: color_key = {opts[j]: colors[j] for j in range(len(opts))} except IndexError: print "okay this is error", opts, len(opts), len(colors) sys.exit() vals = [gkey[opt] if opt in gkey else [] for opt in opts] colors = [color_key[opt] if opt in color_key else 'k' for opt in opts] labels, items = [], [] for v, c in color_key.items(): labels.append(v) items.append(Rect((0, 0), 1, 1, fc=c)) pos, width = [self.xOffset + 0.10 * x for x in range(len(opts))], 0.075 self.bp = ax.boxplot(vals, positions=pos, widths=width, patch_artist=True, showmeans=True, whis=1) clevels = np.linspace(0., 3., len(opts) + 1) xticks = [] means = sorted([(np.mean(v), opt) for (v, opt) in zip(vals, opts) if opt[0:2] == 'si']) medians = sorted([(np.percentile(v, 60), opt) for (v, opt) in zip(vals, opts) if opt[0:2] == 'si']) sorts = [(sorted(v, reverse=True), opt) for (v, opt) in zip(vals, opts) if opt[0:2] == 'si'] for i, opt in enumerate(opts): if opt == '': continue clr = color_key[opt] self.bp['boxes'][i].set_edgecolor(clr) self.bp['boxes'][i].set_linewidth(1) plt.setp(self.bp['medians'][i], color=clr, linewidth=3) plt.setp(self.bp['means'][i], marker='h', markersize=9, markerfacecolor=clr) plt.setp(self.bp['caps'][(i * 2) + 1], color=clr, linewidth=1) plt.setp(self.bp['caps'][(i * 2)], color=clr, linewidth=1) plt.setp(self.bp['whiskers'][i * 2], color=clr, linewidth=1) plt.setp(self.bp['whiskers'][1 + (i * 2)], color=clr, linewidth=1) plt.setp(self.bp['fliers'][i], markerfacecolor=clr, markeredgecolor=clr, marker='s', markersize=2.0) clevels = np.linspace(0., 1., len(vals)) xJitters = [ np.random.normal(pos[i], 0.01, len(vals[i])) for i in range(len(vals)) ] yJitters = [[ vals[i][j] * np.random.normal(1, 0.005, len(vals[i]))[j] for j in range(len(vals[i])) ] for i in range(len(vals))] for xJ, yJ, clevel in zip(xJitters, yJitters, colors): ax.scatter(xJ, yJ, c=clevel, alpha=0.7, s=4, zorder=9) for patch, clevel in zip(self.bp['boxes'], colors): patch.set_facecolor(clevel) #cm.prism(clevel)) patch.set_alpha(0.2) ax.set_xticklabels([opt.split('~')[-1] for opt in opts], fontsize=10, rotation=-30) ax.set_xlim([pos[0] - 0.2, pos[-1] + 0.2]) return labels, items
def add_hist(self, h_data, TITLE=None, DNAME=None, MINSIZE=30): self.ax = plt.subplot2grid((self.xLen, self.yLen), (self.xLoc, self.yLoc), rowspan=1, colspan=1) if type(h_data) == list: y = sorted(h_data) if len(self.ax.hist(y, bins='auto')[0]) < 2: self.ax.clear() self.ax.hist(np.hstack(y), bins=min(4, len(cc(y)))) else: x, y = h_data.keys(), sorted(h_data.values()) X_key = dd(lambda: dd(int)) R, pv = 'NA', 'NA' yOffset = 0 xOffset = 0 xLoc = 0 xticks = [] if DNAME == 'REL': for X, Y in h_data.items(): if len(Y) < MINSIZE: continue for y in Y: yAdd = int(y) if yAdd < -5: yAdd = -5 if yAdd > 5: yAdd = 5 X_key[X][yAdd] += 1 Y_key = dd(lambda: dd(int)) for xx in X_key: xsum = sum(X_key[xx].values()) for yy in X_key[xx]: Y_key[xx][yy] = X_key[xx][yy] / float(xsum) try: my_means = [] my_x = [int(xx) for xx in X_key.keys()] for xx in X_key.keys(): x_len = sum(X_key[xx].values()) x_total = sum( [int(a) * b for a, b in X_key[xx].items()]) my_means.append(x_total / x_len) R, pv = stats.pearsonr(my_x, my_means) except ValueError: my_ex = [] X_key = Y_key xJump = len(X_key) hRange = [-3, -2, -1, 0, 1, 2, 3, 4, 5] items, labels = [], [] color_idx = 0 for X in X_key: xLoc = 0 labels.append(X) clr = self.get_color(X) if type(clr) == tuple: a, b = clr items.append(Rect((0, 0), 1, 1, fc=a, ec=b, hatch='*')) for s in hRange: self.ax.bar(xLoc + xOffset, X_key[X][s], width=1, bottom=0, color=a, ec=b, hatch='*') if xOffset == 0: xticks.append((xLoc + (xJump / 2), s)) xLoc += xJump xOffset += 1 else: items.append(Rect((0, 0), 1, 1, fc=clr)) for s in hRange: self.ax.bar(xLoc + xOffset, X_key[X][s], width=1, bottom=0, color=clr) if xOffset == 0: xticks.append((xLoc + (xJump / 2), s)) xLoc += xJump xOffset += 1 else: for X, Y in h_data.items(): if len(Y) < MINSIZE: continue for y in Y: yAdd = y % 100 if yAdd >= 50: yAdd = 50 else: yAdd = 0 yRnd = (int(y / 100) * 100) + yAdd if yRnd > 1000: yRnd = 1000 X_key[X][yRnd] += 1 Y_key = dd(lambda: dd(int)) for xx in X_key: xsum = sum(X_key[xx].values()) for yy in X_key[xx]: Y_key[xx][yy] = X_key[xx][yy] / float(xsum) try: my_means = [] my_x = [int(xx) for xx in X_key.keys()] for xx in X_key.keys(): x_len = sum(X_key[xx].values()) x_total = sum( [int(a) * b for a, b in X_key[xx].items()]) my_means.append(x_total / x_len) R, pv = stats.pearsonr(my_x, my_means) except ValueError: my_ex = [] X_key = Y_key xJump = len(X_key) hRange = range(0, 1000, 50) items, labels = [], [] color_idx = 0 for X in X_key: xLoc = 0 labels.append(X) clr = self.get_color(X) if type(clr) == tuple: a, b = clr items.append(Rect((0, 0), 1, 1, fc=a, ec=b, hatch='*')) for s in hRange: self.ax.bar(xLoc + xOffset, X_key[X][s], width=1, bottom=0, color=a, ec=b, hatch='*') if xOffset == 0: xticks.append((xLoc + (xJump / 2), s)) xLoc += xJump xOffset += 1 else: items.append(Rect((0, 0), 1, 1, fc=clr)) for s in hRange: self.ax.bar(xLoc + xOffset, X_key[X][s], width=1, bottom=0, color=clr) if xOffset == 0: xticks.append((xLoc + (xJump / 2), s)) xLoc += xJump xOffset += 1 xt, xl = [xx[0] for xx in xticks], [str(xx[1]) for xx in xticks] self.ax.set_xticks(xt) self.ax.set_xticklabels(xl) if R != 'NA': rSS = str(round(R, 5)) pSS = str(round(pv, 8)) self.ax.set_title('CORR: ' + rSS + ' pv= ' + pSS) if TITLE: nc, fs, cs, hp, bb1, bb2 = 1, 12, 0.5, 0.5, 0.9, 1.1 if len(items) > 5: if len(items) < 10: nc, fs, cs, hp = 2, 10, 0.4, 0.4 elif len(items) < 15: nc, fs, cs, hp, bb1, bb2 = 3, 10, 0.35, 0.35, 0.85, 1.1 elif len(items) < 25: nc, fs, cs, hp, bb1, bb2 = 4, 9, 0.25, 0.25, 0.85, 1.1 elif len(items) < 35: nc, fs, cs, hp, bb1, bb2 = 5, 8, 0.20, 0.20, 0.8, 1.1 else: nc, fs, cs, hp, bb1, bb2 = 6, 7, 0.15, 0.15, 0.75, 1.1 leg = self.ax.legend(items, labels, title=TITLE, handletextpad=hp, columnspacing=cs, fontsize=fs, ncol=nc, bbox_to_anchor=(bb1, bb2), loc='upper center') self.yLoc += 1 if self.yLoc == self.yLen: self.xLoc += 1 self.yLoc = 0
def add_iso_boxes(self,box_data,TITLE=None,BW=False): opts,counts,vals = [], [],[] for obs,avg,name,cnts in box_data: opts.append(name) counts.append(cnts) vals.append([log(c+1,2) for c in cnts]) colors = [self.get_color(X) for X in opts] # colors = ['pink','orange','purple','orange'] if BW: colors = ['k' for X in opts] #if opts == ['SVZ', '*SVZ*', 'IZ', '*IZ*']: opts = ['$SVZ$','$SVZ_{novel}$','$IZ$','$IZ_{novel}$'] items,labels = [],[] if type(colors[0]) == tuple: colors,alt_colors = [c[0] for c in colors], [c[1] for c in colors] else: alt_colors = [c for c in colors] R,pv = 'NA','NA' pos, width = [self.xOffset+0.20*x for x in range(len(opts))], 0.16 self.bp = self.ax.boxplot(vals,positions=pos, widths=width, patch_artist=True, showmeans=True,whis=1) clevels = np.linspace(0., 3., len(opts)+1) self.xOffset = pos[-1]+2*width xMid = np.mean(pos) xticks = [] means = sorted([(np.mean(v),opt) for (v,opt) in zip(vals,opts) if opt[0:2] == 'si']) medians = sorted([(np.percentile(v,60),opt) for (v,opt) in zip(vals,opts) if opt[0:2] == 'si']) sorts = [(sorted(v,reverse=True),opt) for (v,opt) in zip(vals,opts) if opt[0:2] == 'si'] for i,opt in enumerate(opts): clr,ac = colors[i], alt_colors[i] self.bp['boxes'][i].set_edgecolor(clr) self.bp['boxes'][i].set_linewidth(1) plt.setp(self.bp['medians'][i], color=clr, linewidth=3) plt.setp(self.bp['means'][i], marker='h',markersize=9,markerfacecolor=clr) plt.setp(self.bp['caps'][(i*2)+1], color=clr,linewidth=1) plt.setp(self.bp['caps'][(i*2)], color=clr,linewidth=1) plt.setp(self.bp['whiskers'][i*2], color=clr,linewidth=1) plt.setp(self.bp['whiskers'][1+(i*2)], color=clr,linewidth=1) plt.setp(self.bp['fliers'][i], markerfacecolor=clr, markeredgecolor = clr, marker='s',markersize=2.0) if clr == ac: self.items[opt] = Rect((0,0),1,1,fc=clr) else: self.items[opt] = Rect((0,0),1,1,fc=a,ec=b,hatch='*') clevels = np.linspace(0., 1., len(vals)) xJitters = [np.random.normal(pos[i],0.01,len(vals[i])) for i in range(len(vals))] yJitters = [[vals[i][j]*np.random.normal(1,0.005,len(vals[i]))[j] for j in range(len(vals[i]))] for i in range(len(vals))] for xJ, yJ, clevel in zip(xJitters, yJitters, colors): plt.scatter(xJ, yJ, c=clevel, alpha=0.7,s=4,zorder=9) for patch, clevel in zip(self.bp['boxes'], colors): patch.set_facecolor(clevel) #cm.prism(clevel)) patch.set_alpha(0.5) yMin,yMax = self.ax.get_ylim() yStep = (yMax - yMin)/30.0 # for j,xpos in enumerate(self.ax.get_xticks()): # self.ax.text(xpos,yMin-yStep,opts[j].split('~')[-1],fontweight='bold',rotation=60,verticalalignment='center',horizontalalignment='center') #self.ax.text(xpos,yMin-yStep,opts[j].split('~')[-1],fontsize=25,fontweight='bold',verticalalignment='bottom',horizontalalignment='center') # self.ax.set_xticks([]) if TITLE: self.ax.text(xMid,yMax,TITLE.split(';')[-1],fontsize=20,fontweight='bold') return
def add_boxes(self, h_data, TITLE=None, DNAME=None, MINSIZE=30, BW=True): self.ax = plt.subplot2grid((self.xLen, self.yLen), (self.xLoc, self.yLoc), rowspan=1, colspan=1) self.xOffset = 0 #clr = self.get_color(X) iKey = dd(int) iKey['16'], iKey['17'], iKey['18'], iKey['GT_18'] = 1, 2, 3, 4 iKey['IZ'], iKey['SP'], iKey['CP'], iKey['CR'], iKey[ 'MZ'] = 1, 2, 3, 4, 5 iKey['SINGLE'], iKey['BRIEF'], iKey['REPETITIVE'] = 1, 2, 3 iKey['*SVZ*'], iKey['IZ'], iKey['*IZ*'] = 0.5, 1, 2 data = sorted([[iKey[d[0]], d[0], d[1]] for d in h_data.items()]) data = [ opt for opt in data if data[1] not in ['NA', 'UNK', 'UNKNOWN', 'UNCLEAR', 'NON'] ] opts, vals = [], [] for d in data: d[2] = [float(dx) for dx in d[2] if dx != 'NA'] if d[1] in [ 'NA', 'UNK', 'UNKNOWN', 'UNAVAIL', 'UNCLEAR', 'DOUBLETS' ]: continue elif d[1] == 'GT_18': opts.append('>18') elif d[1] == 'SUB_16': opts.append('<16') elif len(d[1].split('_')) > 1: continue else: opts.append(d[1]) vals.append(d[2]) colors = [self.get_color(X) for X in opts] colors = ['pink', 'orange', 'purple', 'orange'] if BW: colors = ['k' for X in opts] if opts == ['SVZ', '*SVZ*', 'IZ', '*IZ*']: opts = ['$SVZ$', '$SVZ_{novel}$', '$IZ$', '$IZ_{novel}$'] items, labels = [], [] if type(colors[0]) == tuple: colors, alt_colors = [c[0] for c in colors], [c[1] for c in colors] else: alt_colors = [c for c in colors] R, pv = 'NA', 'NA' pos, width = [self.xOffset + 0.20 * x for x in range(len(opts))], 0.16 self.bp = self.ax.boxplot(vals, positions=pos, widths=width, patch_artist=True, showmeans=True, whis=1) clevels = np.linspace(0., 3., len(opts) + 1) xticks = [] means = sorted([(np.mean(v), opt) for (v, opt) in zip(vals, opts) if opt[0:2] == 'si']) medians = sorted([(np.percentile(v, 60), opt) for (v, opt) in zip(vals, opts) if opt[0:2] == 'si']) sorts = [(sorted(v, reverse=True), opt) for (v, opt) in zip(vals, opts) if opt[0:2] == 'si'] for i, opt in enumerate(opts): clr, ac = colors[i], alt_colors[i] self.bp['boxes'][i].set_edgecolor(clr) self.bp['boxes'][i].set_linewidth(1) plt.setp(self.bp['medians'][i], color=clr, linewidth=3) plt.setp(self.bp['means'][i], marker='h', markersize=9, markerfacecolor=clr) plt.setp(self.bp['caps'][(i * 2) + 1], color=clr, linewidth=1) plt.setp(self.bp['caps'][(i * 2)], color=clr, linewidth=1) plt.setp(self.bp['whiskers'][i * 2], color=clr, linewidth=1) plt.setp(self.bp['whiskers'][1 + (i * 2)], color=clr, linewidth=1) plt.setp(self.bp['fliers'][i], markerfacecolor=clr, markeredgecolor=clr, marker='s', markersize=2.0) if clr == ac: items.append(Rect((0, 0), 1, 1, fc=clr)) else: items.append(Rect((0, 0), 1, 1, fc=a, ec=b, hatch='*')) labels.append(opt) clevels = np.linspace(0., 1., len(vals)) xJitters = [ np.random.normal(pos[i], 0.01, len(vals[i])) for i in range(len(vals)) ] yJitters = [[ vals[i][j] * np.random.normal(1, 0.005, len(vals[i]))[j] for j in range(len(vals[i])) ] for i in range(len(vals))] for xJ, yJ, clevel in zip(xJitters, yJitters, colors): plt.scatter(xJ, yJ, c=clevel, alpha=0.7, s=4, zorder=9) for patch, clevel in zip(self.bp['boxes'], colors): patch.set_facecolor(clevel) #cm.prism(clevel)) patch.set_alpha(0.5) yMin, yMax = self.ax.get_ylim() yStep = (yMax - yMin) / 30.0 # self.ax.set_xticklabels([opt.split('~')[-1] for opt in opts],fontsize=10,fontweight='bold',rotation=-30) for j, xpos in enumerate(self.ax.get_xticks()): #self.ax.text(xpos,yMin-yStep,opts[j].split('~')[-1],fontweight='bold',rotation=60,verticalalignment='top',horizontalalignment='center') self.ax.text(xpos, yMin + yStep, opts[j].split('~')[-1], fontsize=25, fontweight='bold', verticalalignment='bottom', horizontalalignment='center') self.ax.set_xlim([pos[0] - 0.2, pos[-1] + 0.2]) # self.ax.plot((pos[0],pos[-1]),(yMin-yStep*2,yMin-2*yStep)) self.ax.axis('off') title_key = {'LOC': 'LAYER', 'FS': 'FIRING STYLE'} if TITLE: if TITLE in title_key: TITLE = title_key[TITLE] nc, fs, cs, hp, bb1, bb2 = 1, 12, 0.5, 0.5, 0.2, 1.1 if BW: self.ax.set_title(TITLE) else: if len(items) > 4: if len(items) < 10: nc, fs, cs, hp = 2, 10, 0.4, 0.4 elif len(items) < 15: nc, fs, cs, hp, bb1, bb2 = 3, 10, 0.35, 0.35, 0.85, 1.1 elif len(items) < 25: nc, fs, cs, hp, bb1, bb2 = 4, 9, 0.25, 0.25, 0.85, 1.1 elif len(items) < 35: nc, fs, cs, hp, bb1, bb2 = 5, 8, 0.20, 0.20, 0.8, 1.1 else: nc, fs, cs, hp, bb1, bb2 = 6, 7, 0.15, 0.15, 0.75, 1.1 leg = self.ax.legend(items, labels, title=TITLE, handletextpad=hp, columnspacing=cs, fontsize=fs, ncol=nc, bbox_to_anchor=(bb1, bb2), loc='upper center') self.yLoc += 1 if self.yLoc == self.yLen: self.xLoc += 1 self.yLoc = 0
def plot_tree_classif(pred_dict, phn_dict, auc_dict, use_lvls, cdata_dict, args): base_cdata = tuple(cdata_dict.values())[0] base_lvls = 'Exon__Location__Protein' lvls_k = tuple(use_lvls.split('__')) base_cdata.add_mut_lvls(('Gene', ) + lvls_k) use_mtree = base_cdata.mtrees[('Gene', ) + lvls_k][args.gene] use_mtypes = {(src, clf): [MuType({('Gene', args.gene): pnt_mtype})] for src, lvls, clf in pred_dict if lvls == use_lvls} use_criter = {(src, clf): [(np.mean(phn_dict[src, base_lvls, clf][mtypes[0]]), auc_dict[src, base_lvls, clf].loc[mtypes[0]])] for (src, clf), mtypes in use_mtypes.items()} for src, clf in use_mtypes: cur_mtypes = { mtype.subtype_iter()[0][1] for mtype, phn in phn_dict[src, use_lvls, clf].items() if (not isinstance(mtype, RandomType) and mtype.subtype_iter()[0] [1] != pnt_mtype and (mtype.subtype_iter()[0][1] & copy_mtype).is_empty()) } if len(cur_mtypes) >= 5: use_mtypes[src, clf] += [ MuType({('Gene', args.gene): mtype}) for mtype in random.sample(sorted(cur_mtypes), k=5) ] use_criter[src, clf] += [(np.mean(phn_dict[src, use_lvls, clf][mtype]), auc_dict[src, use_lvls, clf].loc[mtype]) for mtype in use_mtypes[src, clf][1:]] use_src, use_clf = sorted( use_criter.items(), key=lambda x: np.prod(np.var(np.array(x[1]), axis=0)))[-1][0] plt_mtypes = use_mtypes[use_src, use_clf] fig = plt.figure(figsize=(1.1 + 3.1 * len(plt_mtypes), 12)) gs = gridspec.GridSpec(nrows=3, ncols=len(plt_mtypes) + 1, width_ratios=[1] + [3] * len(plt_mtypes), height_ratios=[2, 1, 4]) lbl_ax = fig.add_subplot(gs[:, 0]) lbl_ax.axis('off') tree_ax = fig.add_subplot(gs[0, 1:]) tree_ax.axis('off') leaf_count = len(MuType(use_mtree.allkey()).leaves()) tree_ax.add_patch( Rect((leaf_count * 0.03, len(lvls_k) + 0.17), leaf_count * 0.23, 0.93, facecolor=variant_clrs['WT'], alpha=0.41, clip_on=False, linewidth=0)) tree_ax.text( leaf_count * 0.15, len(lvls_k) + 0.59, "Wild-Type for\n{} Point Mutations\n({} samples)".format( args.gene, len(set(base_cdata.get_samples()) - set(use_mtree.get_samples()))), size=19, ha='center', va='center', weight='semibold') tree_ax.add_patch( Rect((leaf_count * 0.31, len(lvls_k) + 0.23), leaf_count * 0.43, 0.79, facecolor=variant_clrs['Point'], alpha=0.41, clip_on=False, linewidth=0)) tree_ax.text(leaf_count / 2, len(lvls_k) + 0.61, "All {} Point Mutations\n({} samples)".format( args.gene, len(use_mtree)), size=19, ha='center', va='center', weight='semibold') tree_ax = recurse_labels(tree_ax, use_mtree, (0, leaf_count), len(lvls_k), leaf_count, clr_mtype=False, add_lbls=True) tree_ax.set_xlim(0, leaf_count * 1.03) tree_ax.set_ylim(0, len(lvls_k) + 0.6) for i, lvl in enumerate(lvls_k): lbl_ax.text(1.31, 0.88 - i / 10.11, clean_level(lvl), size=19, ha='right', va='center') for i, plt_mtype in enumerate(plt_mtypes): mtype_ax = fig.add_subplot(gs[1, i + 1]) mtype_ax.axis('off') if plt_mtype == MuType({('Gene', args.gene): pnt_mtype}): tree_mtype = None top_fc = variant_clrs['Point'] top_ec = 'none' else: tree_mtype = plt_mtype.subtype_iter()[0][1].subtype_iter()[0][1] top_fc = 'none' top_ec = variant_clrs['Point'] mtype_ax.add_patch( Rect((leaf_count * 0.19, len(lvls_k) - 0.19), leaf_count * 0.67, 0.31, clip_on=False, facecolor=top_fc, edgecolor=top_ec, alpha=0.41, linewidth=2.7)) mtype_ax = recurse_labels(mtype_ax, use_mtree, (0, leaf_count), len(lvls_k) - 0.4, leaf_count, clr_mtype=tree_mtype, add_lbls=False) mtype_ax.set_xlim(0, leaf_count * 1.03) mtype_ax.set_ylim(0, len(lvls_k) + 0.6) if i == 0: pred_vals = pred_dict[use_src, base_lvls, use_clf].loc[plt_mtype] use_phn = phn_dict[src, base_lvls, clf][plt_mtype] else: pred_vals = pred_dict[use_src, use_lvls, use_clf].loc[plt_mtype] use_phn = phn_dict[src, use_lvls, clf][plt_mtype] pred_vals = pred_vals.apply(np.mean) use_auc = use_criter[src, clf][i][1] viol_ax = fig.add_subplot(gs[2, i + 1]) sns.violinplot(x=pred_vals[~use_phn].values, ax=viol_ax, palette=[variant_clrs['WT']], orient='v', linewidth=0, cut=0, width=0.67) sns.violinplot(x=pred_vals[use_phn].values, ax=viol_ax, palette=[variant_clrs['Point']], orient='v', linewidth=0, cut=0, width=0.67) viol_ax.text(0.5, 1.01, '\n'.join(get_fancy_label(plt_mtype).split('\n')[1:]), size=12, ha='center', va='top', transform=viol_ax.transAxes) viol_ax.text(1.07, 0.83, '\n'.join([str(np.sum(use_phn)), "mutated", "samples"]), size=13, ha='right', va='top', c=variant_clrs['Point'], weight='semibold', transform=viol_ax.transAxes) viol_ax.text(0.5, -0.05, "AUC: {:.3f}".format(use_auc), size=21, ha='center', va='bottom', transform=viol_ax.transAxes) viol_ax.get_children()[0].set_alpha(0.41) viol_ax.get_children()[2].set_alpha(0.41) viol_ax.set_yticklabels([]) viol_ylims = viol_ax.get_ylim() ylim_gap = (viol_ylims[1] - viol_ylims[0]) / 13 viol_ax.set_ylim([viol_ylims[0], viol_ylims[1] + ylim_gap]) # save the plot to file fig.tight_layout(w_pad=1.9, h_pad=1.5) fig.savefig(os.path.join( plot_dir, args.cohort, "{}_tree-classif__{}.svg".format(args.gene, use_lvls)), bbox_inches='tight', format='svg') plt.close()
def plot_lollipop(cdata_dict, domain_dict, args): fig, ax = plt.subplots(figsize=(9, 5)) use_cdata = tuple(cdata_dict.values())[0] gn_annot = use_cdata.gene_annot[args.gene] loc_lvls = 'Gene', 'Scale', 'Copy', 'Consequence', 'Position', 'HGVSp' if loc_lvls not in use_cdata.mtrees: use_cdata.add_mut_lvls(loc_lvls) use_mtree = use_cdata.mtrees[loc_lvls][args.gene] mut_count = len(use_mtree.get_samples()) var_count = len(use_mtree['Point'].get_samples()) pie_clrs = [ choose_subtype_colour(pnt_mtype), choose_subtype_colour(pnt_mtype | gains_mtype), choose_subtype_colour(pnt_mtype | dels_mtype) ] pie_ax = ax.inset_axes(bounds=(0.05, 0.41, 0.27, 0.27)) pie_ax.pie(x=get_pie_counts(pnt_mtype, use_mtree), colors=pie_clrs, labels=[ 'point muts\nwithout CNAs', 'point & any gain', 'point & any loss' ], labeldistance=1.23, explode=[0.19] * 3, startangle=90, autopct=lambda x: format(x * var_count / 100, '.0f'), pctdistance=0.37, wedgeprops=dict(alpha=0.67), textprops=dict(size=9)) for pie_indx1, pie_indx2 in [(3, 5), (5, 7)]: txt_arts1 = pie_ax.get_children()[pie_indx1] txt_arts2 = pie_ax.get_children()[pie_indx2] txt_x1, txt_y1 = txt_arts1.get_position() txt_x2, txt_y2 = txt_arts2.get_position() if abs(txt_x1 - txt_x2) < 1 and abs(txt_y1 - txt_y2) < 0.37: if txt_y1 >= txt_y2: txt_arts2.set_position((txt_x2, txt_y2 - 0.41)) else: txt_arts1.set_position((txt_x1, txt_y1 - 0.41)) loc_dict = { form: sorted( [(int(loc.split('-')[0]), len(loc_muts.get_samples())) for loc, loc_muts in form_muts if loc.split('-')[0].isnumeric()], key=itemgetter(0)) for form, form_muts in use_mtree['Point'] } # calculate the minimum and maximum amino acid positions for the # mutations to be plotted, as well as the most samples at any hotspot min_pos = min(pos for loc_counts in loc_dict.values() for pos, _ in loc_counts if pos >= 0) max_pos = max(pos for loc_counts in loc_dict.values() for pos, _ in loc_counts) min_pos -= (max_pos - min_pos) / 43 max_count = max( max(count for loc_counts in loc_dict.values() for _, count in loc_counts), 13) pos_rng = max_pos - min_pos lgnd_ptchs = [] for form, form_muts in use_mtree['Point']: if loc_dict[form]: form_mtype = MuType({ ('Scale', 'Point'): { ('Consequence', form): None } }) lgnd_ptchs += [ Patch(color=form_clrs[form], alpha=0.67, label=get_fancy_label(form_mtype)) ] mrks, stms, basl = ax.stem(*zip(*loc_dict[form]), use_line_collection=True) plt.setp(mrks, markersize=7, markeredgecolor='black', markerfacecolor=form_clrs[form], zorder=5) plt.setp(stms, linewidth=0.8, color='black', zorder=1) plt.setp(basl, linewidth=1.1, color='black', zorder=2) for loc, loc_muts in form_muts: loc_size = len(loc_muts.get_samples()) if loc != '-' and loc_size >= 10: loc_int = int(loc.split('-')[0]) loc_mtypes = sorted( [ MuType({('HGVSp', lbl): None}) for lbl, _ in loc_muts ], key=lambda mtype: len(mtype.get_samples(form_muts) ))[::-1] mut_lbls = [ get_fancy_label(loc_mtype) for loc_mtype in loc_mtypes ] root_strs = { re.match('[A-Z][0-9]+', lbl).group() for lbl in mut_lbls } if len(root_strs) > 1: loc_lbl = '\n'.join(mut_lbls) else: lbl_root = tuple(root_strs)[0] if max(len(lbl) - len(lbl_root) for lbl in mut_lbls) > 4: loc_lbl = '\n'.join([mut_lbls[0]] + [ ''.join([ ' ' * len(lbl_root) * 2, lbl.split(lbl_root)[1] ]) for lbl in mut_lbls[1:] ]) else: loc_lbl = "/".join([mut_lbls[0]] + [ lbl.split(lbl_root)[1] for lbl in mut_lbls[1:] ]) ax.text(loc_int + pos_rng / 115, loc_size + max_count / 151, loc_lbl, size=8, ha='left', va='center') loc_mtype = MuType({ ('Scale', 'Point'): { ('Consequence', form): { ('Position', loc): None } } }) pie_ax = ax.inset_axes(bounds=(loc_int - pos_rng / 7, loc_size, max_pos / 5, max_count / 5), transform=ax.transData) pie_ax.pie(x=get_pie_counts(loc_mtype, use_mtree), colors=pie_clrs, explode=[0.09] * 3, startangle=90, wedgeprops=dict(alpha=0.67)) use_tx = use_cdata._muts.loc[ (use_cdata._muts.Gene == args.gene) & ~use_cdata._muts.Feature.isnull()].Feature.unique() assert len(use_tx) == 1, ( "Multiple transcripts detected in {} for {} !".format( args.cohort, args.gene)) # TODO: do domains need to be included in these plots? use_tx = use_tx[0] prot_patches = [] for i, (domn_lbl, domn_data) in enumerate(domain_dict.items()): tx_annot = gn_annot['Transcripts'][use_tx] gene_domns = domn_data[(domn_data.Gene == gn_annot['Ens']) & (domn_data.Transcript == use_tx)] min_pos = min(min_pos, gene_domns.DomainStart.min() - max_pos / 67) for domn_id, domn_start, domn_end in zip(gene_domns.DomainID, gene_domns.DomainStart, gene_domns.DomainEnd): prot_patches.append( Rect((domn_start, -max_count * (0.15 + i * 0.11)), domn_end - domn_start, max_count * 0.09)) ax.text((domn_start + domn_end) / 2, -max_count * (0.11 + i * 0.11), domn_id, size=9, ha='center', va='center') ax.add_collection( PatchCollection(prot_patches, color='#D99100', alpha=0.4, linewidth=0)) for i, domn_nm in enumerate(domain_dict): ax.text(min_pos, -max_count * (0.07 + i * 0.13), "{}\nDomains".format(domn_nm), size=8, ha='right', va='top', linespacing=0.71, rotation=37) ax.text(0.03, 0.98, args.gene, size=14, fontweight='semibold', ha='left', va='center', transform=ax.transAxes) ax.text(0.03, 0.941, "{} point mutants\n{} gain mutants\n{} loss mutants" "\n{:.1%} of {} affected".format( var_count, len(gains_mtype.get_samples(use_mtree)), len(dels_mtype.get_samples(use_mtree)), mut_count / len(use_cdata.get_samples()), get_cohort_label(args.cohort), ), size=12, ha='left', va='top', transform=ax.transAxes) # add the legend for the colour used for each form of mutation plt_lgnd = ax.legend(handles=lgnd_ptchs, frameon=False, fontsize=11, ncol=2, loc=1, handletextpad=0.7, bbox_to_anchor=(0.98, 1.02)) ax.add_artist(plt_lgnd) ax.grid(linewidth=0.41, alpha=0.41) ax.set_xlabel("Amino Acid Position", size=17, weight='semibold') ax.set_ylabel(" # of Mutated Samples", size=17, weight='semibold') ax.set_xlim(min_pos, max_pos * 1.02) ax.set_ylim(-max_count * (0.05 + len(domain_dict) * 0.13), max_count * 17 / 11) ax.set_yticks([tck for tck in ax.get_yticks() if tck >= 0]) fig.savefig(os.path.join(plot_dir, args.cohort, "{}_lollipop.svg".format(args.gene)), bbox_inches='tight', format='svg') plt.close()
def plot_lollipop(cdata_dict, domain_dict, args): fig, main_ax = plt.subplots(figsize=(11, 4)) base_cdata = tuple(cdata_dict.values())[0] gn_annot = base_cdata.gene_annot[args.gene] base_lvls = 'Gene', 'Scale', 'Form_base', 'Location', 'Protein', domn_lvls = [('Gene', 'Scale', 'Transcript', '_'.join(['Domain', domn_nm]), 'Location') for domn_nm in domain_dict] for lvls in [base_lvls] + domn_lvls: if lvls not in base_cdata.mtrees: base_cdata.add_mut_lvls(lvls) loc_mtree = base_cdata.mtrees[base_lvls][args.gene]['Point'] #TODO: double-check mutations in cohorts such as METABRIC that are not # numeric but maybe should be (i.e. in CDH1, MAP3K1) loc_dict = { form: sorted([(int(loc), len(loc_muts)) if loc.isnumeric() else (-1, len(loc_muts)) for loc, loc_muts in form_muts], key=itemgetter(0)) for form, form_muts in loc_mtree } # calculate the minimum and maximum amino acid positions for the # mutations to be plotted, as well as the most samples at any hotspot min_pos = min(pos for loc_counts in loc_dict.values() for pos, _ in loc_counts if pos >= 0) max_pos = max(pos for loc_counts in loc_dict.values() for pos, _ in loc_counts) max_count = max(count for loc_counts in loc_dict.values() for _, count in loc_counts) lgnd_ptchs = [] for form, form_muts in loc_mtree: lgnd_ptchs += [ Patch(color=form_clrs[form], alpha=0.53, label=clean_label(form, 'Form_base')) ] mrks, stms, basl = main_ax.stem(*zip(*loc_dict[form]), use_line_collection=True) plt.setp(mrks, markersize=7, markeredgecolor='black', markerfacecolor=form_clrs[form], zorder=5) plt.setp(stms, linewidth=0.8, color='black', zorder=1) plt.setp(basl, linewidth=1.1, color='black', zorder=2) for loc, loc_muts in form_muts: if len(loc_muts) >= 10: mut_lbls = sorted(lbl for lbl, _ in loc_muts) root_indx = re.match('p.[A-Z][0-9]+', mut_lbls[0]).span()[1] lbl_root = mut_lbls[0][2:root_indx] if max(len(lbl) - len(lbl_root) for lbl in mut_lbls) > 4: loc_lbl = '\n'.join([mut_lbls[0][2:]] + [ ''.join( [' ' * len(lbl_root) * 2, lbl.split(lbl_root)[1]]) for lbl in mut_lbls[1:] ]) else: loc_lbl = "/".join( [mut_lbls[0][2:]] + [lbl.split(lbl_root)[1] for lbl in mut_lbls[1:]]) main_ax.text(int(loc) + (max_pos - min_pos) / 115, len(loc_muts) + max_count / 151, loc_lbl, size=8, ha='left', va='center') prot_patches = [] for i, lvls in enumerate(domn_lvls): tx_mtree = base_cdata.mtrees[lvls][args.gene]['Point'] tx_id = tuple(tx_mtree)[0][0] tx_annot = gn_annot['Transcripts'][tx_id] domn_nm = lvls[3].split('_')[1] domn_df = domain_dict[domn_nm] gene_domns = domn_df[(domn_df.Gene == gn_annot['Ens']) & (domn_df.Transcript == tx_id)] for domn_id, domn_start, domn_end in zip(gene_domns.DomainID, gene_domns.DomainStart, gene_domns.DomainEnd): prot_patches.append( Rect((domn_start, -max_count * (0.3 + i * 0.13)), domn_end - domn_start, max_count * 0.11)) main_ax.text((domn_start + domn_end) / 2, -max_count * (0.25 + i * 0.13), domn_id, size=9, ha='center', va='center') main_ax.add_collection( PatchCollection(prot_patches, alpha=0.4, linewidth=0, color='#D99100')) exn_patches = [] exn_pos = 1 for i, exn_annot in enumerate(tx_annot['Exons']): exn_len = exn_annot['End'] - exn_annot['Start'] + 1 if 'UTRs' in tx_annot: for utr_annot in tx_annot['UTRs']: if (exn_annot['Start'] <= utr_annot['Start'] <= exn_annot['End'] <= utr_annot['End']): exn_len -= exn_annot['End'] - utr_annot['Start'] + 1 elif (exn_annot['Start'] <= utr_annot['Start'] <= utr_annot['End'] <= exn_annot['End']): exn_len -= utr_annot['End'] - utr_annot['Start'] + 1 if exn_len > 0 and exn_pos <= max_pos: exn_len //= 3 if i == (len(tx_annot['Exons']) - 1): if (exn_pos + exn_len) > max_pos: exn_len = max_pos - exn_pos + 10 if (exn_pos + exn_len) >= min_pos: exn_patches.append( Rect((exn_pos, max_count * -0.15), exn_len, max_count * 0.11, color='green')) main_ax.text(max(exn_pos + exn_len / 2, min_pos + 5), max_count * -0.1, exn_annot['number'], size=min(11, (531 * exn_len / max_pos)**0.6), ha='center', va='center') exn_pos += exn_len for i, domn_nm in enumerate(domain_dict): main_ax.text(min_pos - exn_pos / 29, -max_count * (0.23 + i * 0.15), "{}\nDomains".format(domn_nm), size=7, ha='right', va='top', linespacing=0.65, rotation=37) # add the patches describing the boundaries of each exon and annotate them main_ax.add_collection( PatchCollection(exn_patches, alpha=0.4, linewidth=1.4, color='#002C91')) main_ax.text(min_pos - exn_pos / 29, max_count * -0.08, "{}\nExons".format(tx_annot['transcript_name']), size=7, ha='right', va='top', linespacing=0.65, rotation=37) if '_' in args.cohort: coh_lbl = "{}({})".format(*args.cohort.split('_')) else: coh_lbl = str(args.cohort) main_ax.text(0.03, 0.97, "{} {}-mutated samples\n{:.1%} of {} affected".format( len(loc_mtree), args.gene, len(loc_mtree) / len(base_cdata.get_samples()), coh_lbl, ), size=13, ha='left', va='top', transform=main_ax.transAxes) # add the legend for the colour used for each form of mutation plt_lgnd = main_ax.legend(handles=lgnd_ptchs, frameon=False, fontsize=11, ncol=3, loc=1, handletextpad=0.7, bbox_to_anchor=(0.98, 1.02)) main_ax.add_artist(plt_lgnd) main_ax.grid(linewidth=0.31) main_ax.set_xlabel("Amino Acid Position", size=17, weight='semibold') main_ax.set_ylabel(" # of Mutated Samples", size=17, weight='semibold') main_ax.set_xlim(min_pos - exn_pos / 29, exn_pos * 1.02) main_ax.set_ylim(-max_count * (0.03 + len(domain_dict) * 0.23), max_count * 24 / 17) main_ax.set_yticks([tck for tck in main_ax.get_yticks() if tck >= 0]) # save the plot to file fig.savefig(os.path.join(plot_dir, args.cohort, "{}_lollipop.svg".format(args.gene)), bbox_inches='tight', format='svg') plt.close()
def fill_boxes(self, opts, vals, colors, alt_colors, JITTERS=None, SIG_STARS=None): G_THRES, L_THRES = 0.05, 0.05 self.bp = self.axes[-1].boxplot(vals, positions=self.pos, widths=self.width, patch_artist=True, showmeans=True, whis=0.7) for i, opt in enumerate(opts): clr, ac = colors[i], alt_colors[i] self.bp['boxes'][i].set_edgecolor(clr) self.bp['boxes'][i].set_linewidth(1) plt.setp(self.bp['medians'][i], color=clr, linewidth=3) plt.setp(self.bp['means'][i], marker='h', markersize=9, markerfacecolor=clr, markeredgecolor=ac) plt.setp(self.bp['caps'][(i * 2) + 1], color=clr, linewidth=1) plt.setp(self.bp['caps'][(i * 2)], color=clr, linewidth=1) plt.setp(self.bp['whiskers'][i * 2], color=clr, linewidth=1) plt.setp(self.bp['whiskers'][1 + (i * 2)], color=clr, linewidth=1) plt.setp(self.bp['fliers'][i], markerfacecolor=clr, markeredgecolor=ac, markeredgewidth=2, marker='s', markersize=2.0) for box, opt, clr, ac in zip(self.bp['boxes'], opts, colors, alt_colors): if clr == ac: box.set_facecolor(clr) self.major_legend_items[opt] = Rect((0, 0), 1, 1, fc=clr) else: box.set(ec=clr, facecolor=ac, linewidth=4, hatch='+') self.minor_legend_items[opt] = Rect((0, 0), 1, 1, fc='white', ec=clr, linewidth=2, hatch='+') box.set_alpha(0.5) if JITTERS != None: for i, (xJ, yJ) in enumerate(JITTERS): plt.scatter(xJ, yJ, c=colors[i], alpha=0.7, s=4, zorder=9) if SIG_STARS != None: for i, (g, l) in enumerate(zip(SIG_STARS[0], SIG_STARS[1])): if l < L_THRES: plt.scatter(self.pos[i], max(vals[i]) * 1.1, s=pv_2_size(l), marker='*', color='silver', edgecolor='black', linewidth=1) if g < G_THRES: plt.scatter(self.pos[i], (max(vals[i]) * 1.1) + 2, s=1.5 * pv_2_size(l), marker='*', color='gold', edgecolor='black', linewidth=1)
def __init__(self, args): self.tFS = 25 self.log = False self.color_key = { 'PANTHERS': 'darkblue', 'GORILLAS': 'green', 'ELEPHANTS': 'brown', 'HUMANS': 'red', 'POLAR_BEARS': 'purple' } self.coord_key = {'weight': 0, 'brain': 1, 'speed': 2, 'shade': 3} self.label_key = { 'weight': 'Body Mass (g)', 'brain': 'Brain Mass (g)', 'speed': 'Top Speed (m/s)', 'shade': 'Grayscale Shade' } self.color_key['ORCAS'] = 'k' self.color_key['DOLPHINS'] = 'gray' self.color_key['TIGERS'] = 'orange' self.noise = [] self.noise.append([5, [(100, 50), (1000, 500)]]) self.noise.append([5, [('U', 0, 5000)]]) self.noise.append([10, [(100, 25)]]) self.noise.append([10, [(1000, 250)]]) # self.noise.append([5,[(25,25)]]) # self.noise.append([5,[(250,250)]]) # self.noise.append([5,[(2500,2500)]]) # self.noise.append([5,[(100,200)]]) # self.noise.append([5,[(1000,2000)]]) # self.noise.append([1,[(1000,2000)]]) # self.noise.append([5,[(1000,1000)]]) # self.noise.append([10,[(1,250)]]) # self.noise.append([5,[(50,25)]]) # self.noise.append([10,[(1,250)]]) # self.noise.append([1,[(1000,750)]]) # self.noise.append([5, [(50,10),(500,100)]]) # self.noise += [[5,[(20000,10000)]]] # self.noise = [[5,[(2000,1000)]]] # self.noise = [[5,[(2000,1000)]]] self.er, self.dp = 0.05, 0.025 # self.er,self.dp = 0.1,0.000001 self.ncol = len(self.color_key) self.labels, self.items = [], [] for xI, yI in self.color_key.items(): self.labels.append(xI) self.items.append(Rect((0, 0), 1, 1, fc=yI)) self.stats = {} self.stats['HUMANS'] = [70, 20, 1400, 30, 8.5, 1.0, 50, 20] self.stats['ELEPHANTS'] = [3000, 100, 500, 20, 7.0, 1.0, 15, 8] self.stats['GORILLAS'] = [200, 50, 500, 20, 9.75, 0.75, 88, 2] self.stats['PANTHERS'] = [60, 14, 150, 20, 18.5, 1, 90, 2] self.stats['POLAR_BEARS'] = [400, 75, 480, 20, 10.25, 0.75, 8, 2] self.stats['ORCAS'] = [10000, 1000, 5000, 20, 0.5, 0.01, 90, 2] self.stats['DOLPHINS'] = [250, 25, 1600, 100, 0.5, 0.1, 85, 10] self.stats['TIGERS'] = [300, 50, 250, 20, 15.5, 1, 60, 20] self.tXLoc, self.tYLoc = 0.94, 1.035 ds = [ 'mass(kg)', '', 'brain_size(g)', '', 'Top_Land_Speed(m/s)', '', 'Grayscale_Shade(%)' ]
def plot_mutation_lollipop(cdata, domain_data, args): fig, main_ax = plt.subplots(figsize=(10, 4)) # get tree of point mutations and the samples carrying # each of the major mutation types pnt_muts = cdata.train_mut['Point'] samp_dict = { lbl: mtype.get_samples(cdata.train_mut) for lbl, mtype in variant_mtypes } # get the number of samples with a point mutation at each amino acid mut_counts = sorted([(int(loc), len(muts)) for loc, muts in pnt_muts if loc != '.'], key=itemgetter(0)) # create the mutation count lollipops and set the aesthetics of # the individual lollipop elements mrks, stms, basl = main_ax.stem(*zip(*mut_counts)) plt.setp(mrks, markersize=5, markeredgecolor='black', zorder=5) plt.setp(stms, linewidth=0.8, color='black', zorder=1) plt.setp(basl, linewidth=1.1, color='black', zorder=2) # for each amino acid location with at least twenty mutated samples, get # the list of specific amino acid substitutions present at that location for loc, mut_count in mut_counts: if mut_count >= 20: mut_lbls = sorted(lbl for lbl, _ in pnt_muts[str(loc)]) lbl_root = mut_lbls[0][2:-1] # create a label on the plot next to the head of the lollipop for # this point mutation location listing the aa substitutions main_ax.text(loc + mut_counts[-1][0] / 109, mut_count, lbl_root + "/".join(lbl.split(lbl_root)[-1] for lbl in mut_lbls), size=11, ha='left', va='bottom') # create a plotting space for a Venn diagram showing the overlap # between these point mutations and copy number alterations pie_ax = inset_axes(main_ax, width=0.57, height=0.57, bbox_to_anchor=(loc, mut_count), bbox_transform=main_ax.transData, loc=4, axes_kwargs=dict(aspect='equal'), borderpad=0) # get the number of samples with a point mutation at this location # that also have a gain or loss alteration, or neither loc_samps = pnt_muts[str(loc)].get_samples() loc_ovlps = [ len(loc_samps & samp_dict[lbl]) if lbl != 'Point' else len(loc_samps - samp_dict['Gain'] - samp_dict['Loss']) for lbl, _ in variant_mtypes ] # get the 2x2 tables of sample overlap for those with point # mutations and those with gain or loss alterations loc_croxs = [[[loc_ovlp, len(loc_samps - samp_dict[lbl])], [ len(samp_dict[lbl] - loc_samps), len(cdata.samples - loc_samps - samp_dict[lbl]) ]] if lbl != 'Point' else None for loc_ovlp, (lbl, _) in zip(loc_ovlps, variant_mtypes)] # test for statistically significant co-occurence or mutual # exclusivity between these point mutations and alterations loc_tests = [(fisher_exact(loc_crox, alternative='less')[1], fisher_exact(loc_crox, alternative='greater')[1]) if lbl != 'Point' else None for loc_crox, (lbl, _) in zip(loc_croxs, variant_mtypes)] # create labels for sample ounts and significance for the overlap # Venn diagram loc_lbls = [str(loc_ovlp) for loc_ovlp in loc_ovlps] for i, (loc_test, (lbl, _)) in enumerate(zip(loc_tests, variant_mtypes)): if lbl != 'Point': if loc_test[0] < 0.05: loc_lbls[i] += '(-)' if loc_test[0] < 0.001: loc_lbls[i] += '**' else: loc_lbls[i] += '*' if loc_test[1] < 0.05: loc_lbls[i] += '(+)' if loc_test[1] < 0.001: loc_lbls[i] += '**' else: loc_lbls[i] += '*' # plot the overlap Venn diagram next to the head of the lollipop pie_ptchs, pie_txts = pie_ax.pie( x=loc_ovlps, labels=loc_lbls, explode=[0.13, 0, 0.13], colors=[ variant_clrs[lbl] if lbl == 'Point' else mcomb_clrs["Point+{}".format(lbl)] for lbl, _ in variant_mtypes ], labeldistance=0.47, wedgeprops=dict(alpha=0.71)) # adjust the properties of the Venn diagram's text annotation for i in range(len(pie_txts)): pie_txts[i].set_fontsize(7) pie_txts[i].set_horizontalalignment('center') gn_annot = cdata.gene_annot[args.gene] main_tx = { tx_id for tx_id, tx_annot in gn_annot['Transcripts'].items() if tx_annot['transcript_name'] == '{}-001'.format(args.gene) } prot_patches = [] max_count = max(count for _, count in mut_counts) gene_doms = domain_data[(domain_data['Gene'] == gn_annot['Ens']) & (domain_data['Transcript'].isin(main_tx))] for dom_id, dom_start, dom_end in zip(gene_doms.DomainID, gene_doms.DomainStart, gene_doms.DomainEnd): prot_patches.append( Rect((dom_start, max_count * -0.12), dom_end - dom_start, max_count * 0.08)) main_ax.text((dom_start + dom_end) / 2, max_count * -0.086, dom_id, size=9, ha='center', va='center') main_ax.add_collection( PatchCollection(prot_patches, alpha=0.4, linewidth=0, color='#D99100')) exn_patches = [] exn_pos = 1 for i, exn_annot in enumerate(gn_annot['Exons']): exn_len = exn_annot['End'] - exn_annot['Start'] if 'UTR' in exn_annot: for utr_annot in exn_annot['UTR']: exn_len -= utr_annot['End'] - utr_annot['Start'] if exn_len > 0 and exn_pos <= mut_counts[-1][0]: exn_len /= 3 exn_patches.append( Rect((exn_pos, max_count * -0.23), exn_len, max_count * 0.08, color='green')) main_ax.text(exn_pos + exn_len / 2, max_count * -0.196, "{}/{}".format(i + 1, len(gn_annot['Exons'])), size=min(11, (531 * exn_len / mut_counts[-1][0])**0.6), ha='center', va='center') exn_pos += exn_len main_ax.add_collection( PatchCollection(exn_patches, alpha=0.4, linewidth=1.4, color='#002C91')) main_ax.text(exn_pos / -391, max_count * -0.05, "{}\nDomains".format(args.domains), size=7, ha='right', va='top', linespacing=0.65, rotation=37) main_ax.text(exn_pos / -391, max_count * -0.16, "{}-001\nExons".format(args.gene), size=7, ha='right', va='top', linespacing=0.65, rotation=37) main_ax.text(0.02, 0.34, "{} {}-mutated samples\n{:.1%} of {} cohort affected".format( len(pnt_muts), args.gene, len(pnt_muts) / len(cdata.samples), args.cohort, ), size=9, va='bottom', transform=main_ax.transAxes) main_ax.set_xlabel("Amino Acid Position", size=15, weight='semibold') main_ax.set_ylabel("# of Mutated Samples", size=15, weight='semibold') main_ax.grid(linewidth=0.31) main_ax.set_xlim(exn_pos / -519, exn_pos * 1.01) main_ax.set_ylim(max_count / -3.6, max_count * 1.21) main_ax.set_yticks([tck for tck in main_ax.get_yticks() if tck >= 0]) venn_ax = inset_axes(main_ax, width=2.19, height=1.31, loc=3, bbox_to_anchor=(mut_counts[-1][0] / 103, max_count * 0.67), bbox_transform=main_ax.transData, borderpad=0) v_plot = venn3([samp_dict[lbl] for lbl, _ in variant_mtypes[::-1]], ["Gains", "Point\nMutations", "Losses"], [variant_clrs[lbl] for lbl, _ in variant_mtypes[::-1]], alpha=0.71, ax=venn_ax) for i in range(len(v_plot.set_labels)): if v_plot.set_labels[i] is not None: v_plot.set_labels[i].set_fontsize(11) for i in range(len(v_plot.subset_labels)): if v_plot.subset_labels[i] is not None: v_plot.subset_labels[i].set_fontsize(10) # save the plot to file fig.savefig(os.path.join( plot_dir, "mut-lollipop_{}__{}_domains-{}.svg".format(args.cohort, args.gene, args.domains)), dpi=350, bbox_inches='tight', format='svg') plt.close()
def add_data(self, dim_members, dim_run, key={}): pts = dim_run['pts'] axes_labels = dim_run['axes'] x_comp, y_comp = self.xLoc, self.yLoc + 1 legend = {} while True: self.axes.append( plt.subplot2grid((self.xLen, self.yLen), (self.xLoc, self.yLoc), rowspan=1, colspan=1)) pt_id, pt_color, pt_mark = '', 'w', 'o' for p, m in zip(pts, dim_members): if 'color' in m.notes: pt_id = m.notes['labels'] pt_color = m.notes['color'] if pt_id != 'NA': if 'size' in key: self.axes[-1].scatter(p[x_comp], p[y_comp], color=pt_color, alpha=0.6, s=key['size']) elif m.notes['size']: self.axes[-1].scatter(p[x_comp], p[y_comp], color=pt_color, s=m.notes['size'], alpha=0.6) else: self.axes[-1].scatter(p[x_comp], p[y_comp], color=pt_color, alpha=0.85) if pt_id: legend[pt_id[0]] = (pt_color, pt_mark) if self.options.plotnames: if 'color' in m.notes: self.axes[-1].text(p[x_comp], p[y_comp], m.name.split(';')[-1], color='white') else: self.axes[-1].text(p[x_comp], p[y_comp], m.name.split(';')[-1], color='cyan') if 'zoom' in key: pX = sorted([p[x_comp] for p in pts]) pY = sorted([p[y_comp] for p in pts]) xQ1, xQ3 = int(len(pX) * 0.25), int(len(pX) * 0.75) yQ1, yQ3 = int(len(pY) * 0.25), int(len(pY) * 0.75) iqX = pX[xQ3] - pX[xQ1] iqY = pY[yQ3] - pY[yQ1] self.axes[-1].set_xlim(pX[xQ1] - (iqX * 2.5), pX[xQ3] + (iqX * 2.5)) self.axes[-1].set_ylim(pY[yQ1] - (iqY * 2.5), pY[yQ3] + (iqY * 2.5)) self.axes[-1].set_xlabel(axes_labels[x_comp], fontweight='bold') self.axes[-1].set_ylabel(axes_labels[y_comp], fontweight='bold') self.axes[-1].set_xticks([]) self.axes[-1].set_yticks([]) if self.yLoc + 1 == self.yLen: self.xLoc, self.yLoc = self.xLoc + 1, 0 else: self.yLoc += 1 if y_comp == x_comp + 1: y_comp += 1 else: x_comp += 1 # x_comp, y_comp = x_comp +2, y_comp +2 if self.xLoc == self.xLen: break if 'title' in key: plt.suptitle(key['title'], fontsize=20, fontweight='bold') items, labels = [ Rect((0, 0), 1, 1, fc=b[0]) for a, b in legend.items() if a != 'NA' ], [a for a in legend.keys() if a != 'NA'] self.axes[0].legend(items, labels, loc='upper left', ncol=len(legend.values()), bbox_to_anchor=(1.7, 1.1)) if 'out' in key: self.fig.savefig(key['out'], dpi=300) if self.options.show: plt.show() return self