def main(args): print(f'Loading data from `{args.data}`...') docs = load_docs(args.data) # Fix an error in the conversion. TODO docs = { title: text for title, text in docs.items() if isinstance(text, str) and len(text) > 0 } titles = tuple(docs.keys()) all_text = '' for title in titles: if args.lower: docs[title] = docs[title].lower() all_text += ' ' + docs[title] vocab = Counter(all_text.split()) if args.num_words is None: args.num_words = len(vocab) counts = vocab.most_common(args.num_words) vocab = [word for word, _ in counts] print(f'Num docs: {len(titles):,}') print(f'Num words: {len(vocab):,}') print('Collecting unigrams...') total = sum(count for _, count in counts) unigrams = dict((word, count / total) for word, count in counts) w2i = dict((word, i) for i, word in enumerate(unigrams.keys())) print('Collecting bigrams...') bigrams = dict() for title in tqdm(titles): text = (word for word in docs[title].split() if word in vocab) word_counts = Counter(text) total = sum(count for _, count in word_counts.items()) bigrams[title] = dict( (word, count / total) for word, count in word_counts.items()) print('Making PPMI matrix...') pxy, py = make_matrices(unigrams, bigrams, w2i) if args.ppmi: mat = ppmi(pxy, py) else: mat = pxy U, s, V = svds(mat, k=args.dim) print('Saving results...') write_vectors(U, titles, args.outpath) emb_scatter(U, titles, model_name='wikitext-2', tsne=args.no_tsne, perplexity=args.perplexity) heatmap(U, 'plots/U.pdf') heatmap(mat, 'plots/mat.pdf')
def plot_mri_cad_factors(cad_factors, out): fa_dataset = xr.open_dataset(cad_factors).load() assert all(f in feature_order for f in fa_dataset['cad_feature'].values) fa_dataset = fa_dataset.reindex(cad_feature=feature_order) with plot.subplots(figsize=(3.5, 3.5)) as (fig, ax): plot.heatmap( fa_dataset['loadings'].T, aspect='equal', xlabel="MRI Factor", ylabel="MRI Feature", yticklabels=[ feature_display_names[f] for f in fa_dataset.coords['cad_feature'].values ], zlabel="Loading", ax=ax, ) fig.savefig(out, format="svg")
def plotheatmaps(data, title=''): local = get_local_full() glob = get_global_full() gden = [('%4.0f'%float(i)).lstrip('0') for i in glob['density']] gcnt = [int(i) for i in glob['count']] max_gden = max([float(i) for i in glob['density']]) for tbin in data.keys(): c = np.array(data[tbin]) # gcnt = np.sum(c, axis=1) # lcnt = np.sum(c, axis=0) lcnt = [int(i) for i in local[tbin]['count']] lden = [float(i) for i in local[tbin]['density']] lden_norm = [i / sum(lden) for i in lden] lden_scaled = [i * max_gden for i in lden_norm] denlab = [('%3.0f'%i) for i in lden_scaled] print(local[tbin]['volume']) glabels = ['%4d/%4s' % i for i in zip(gcnt,gden)] llabels = ['%4d/%4s' % i for i in zip(lcnt,denlab)] norm_c = np.nan_to_num(c / np.linalg.norm(c, axis=-1)[:, np.newaxis]).T P.heatmap(norm_c, glabels, llabels, title+tbin+'_col') d = c.T norm_r = np.nan_to_num(d / np.linalg.norm(d, axis=-1)[:, np.newaxis]) P.heatmap(norm_r, glabels, llabels, title+tbin+'_row') combined = (norm_c + norm_r) / 2 P.heatmap(combined, glabels, llabels, title+tbin+'_combined') print(combined)
def plotheatmaps(data, title=''): local = get_local_full() glob = get_global_full() gden = [('%4.0f' % float(i)).lstrip('0') for i in glob['density']] gcnt = [int(i) for i in glob['count']] max_gden = max([float(i) for i in glob['density']]) for tbin in data.keys(): c = np.array(data[tbin]) # gcnt = np.sum(c, axis=1) # lcnt = np.sum(c, axis=0) lcnt = [int(i) for i in local[tbin]['count']] lden = [float(i) for i in local[tbin]['density']] lden_norm = [i / sum(lden) for i in lden] lden_scaled = [i * max_gden for i in lden_norm] denlab = [('%3.0f' % i) for i in lden_scaled] print(local[tbin]['volume']) glabels = ['%4d/%4s' % i for i in zip(gcnt, gden)] llabels = ['%4d/%4s' % i for i in zip(lcnt, denlab)] norm_c = np.nan_to_num(c / np.linalg.norm(c, axis=-1)[:, np.newaxis]).T P.heatmap(norm_c, glabels, llabels, title + tbin + '_col') d = c.T norm_r = np.nan_to_num(d / np.linalg.norm(d, axis=-1)[:, np.newaxis]) P.heatmap(norm_r, glabels, llabels, title + tbin + '_row') combined = (norm_c + norm_r) / 2 P.heatmap(combined, glabels, llabels, title + tbin + '_combined') print(combined)
def main(**kwargs): levels = ('dy', 'sg', 'hr') if kwargs['--level'] not in levels: raise ValueError('Specified level must be one of {}'.format(levels)) what = [] if kwargs['created']: what.append('created') if kwargs['revised']: what.append('revisions') if kwargs['comment']: what.append('comments') if kwargs.get('drive'): api, title = 'drive', 'User Activity on Google Drive' elif kwargs.get('reports'): api, title = 'reports', 'Something Awesome' else: # This should never happen since docopt validates commands for us raise ValueError('No known command given') args = dict( api=api, impersonated_user_email=kwargs['--email'], start=kwargs['--start'], end=kwargs['--end'], timezone=kwargs['--tz'], ) api_obj = get_api(**args) data = api_obj.activity(use_cached=kwargs['--cached'], what=what, level=kwargs['--level']) fig = heatmap(title=title, **data) plot_args = { 'figure_or_data': fig, 'filename': '{}-activity-heatmap'.format(api), } try: __IPYTHON__ except NameError: url = py.plot(**plot_args) print('The plotted figure is now available at:\n{}\n'.format(url)) else: py.iplot(**plot_args)
def plot_mri_cad_factor_correlation(mri_features, out): mri_ds = xr.open_dataset(mri_features) del mri_ds['Comment'] del mri_ds['MultiFocal'] mri = mri_ds.to_array('cad_feature', 'mri_cad_features') mri_ds.close() mri = mri.isel(case=np.where(mri.isnull().sum('cad_feature') == 0)[0]) mri = mri.transpose('case', 'cad_feature') assert all(f.item() in feature_order for f in mri['cad_feature'].values) mri = mri.reindex(cad_feature=feature_order) cor = xr.DataArray( data=np.corrcoef(mri.values, rowvar=False), dims=('cad_feature', 'cad_feature'), coords={'cad_feature': mri.coords['cad_feature']}, ) cor.name = 'correlation' with plot.subplots(figsize=(3.5, 3.5)) as (fig, ax): c = plot.heatmap( cor, mask=np.tri(cor.shape[0]) < 0.5, aspect='equal', xlabel="MR Feature", ylabel="MR Feature", xticklabels=[''] * cor.shape[0], yticklabels=[ feature_display_names[f] for f in cor.coords['cad_feature'].values ], cbar=False, ax=ax, ) cax = fig.add_axes([.7, .5, .05, .25]) cbar = fig.colorbar(c, cax=cax) cbar.set_ticks([-1.0, -0.5, 0.0, 0.5, 1.0]) cbar.ax.set_yticklabels(cbar.ax.get_yticklabels(), fontsize=9) cbar.ax.set_title("Pearson Correlation", fontsize=10) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) fig.savefig(out, format="svg")
def doHeatmap(self, title, DS=True): # Save grid hcubes which actually have sufficient data points nodeA_size = [len(i) for i in self.kdg.grid] nodeA = [i for i, size in enumerate(nodeA_size) if size > 10] nodeB = list(self.hc.keys()) idxB = {k: i for i, k in enumerate(nodeB)} # For each HCube (among clustered temporal data): for k in nodeB: self.hc[k]['reweight'] = {} for cov in self.hc[k]['elm']: probe = self.kdg.toindex(self.fealcov[cov][10:]) if probe not in self.hc[k]['reweight']: self.hc[k]['reweight'][probe] = 0 self.hc[k]['reweight'][probe] += 1 if probe not in nodeA: nodeA.append(probe) idxA = {k: i for i, k in enumerate(nodeA)} # Map projection to edges (and subsequent heatmap chart) edge = [] projmap = np.zeros(shape=(len(nodeA), len(nodeB))) for kB in nodeB: for kA, proj_cnt in self.hc[kB]['reweight'].items(): edge.append((kA, kB, proj_cnt)) A = idxA[kA] B = idxB[kB] projmap[A][B] = proj_cnt xlabel = 'SOURCE: Temporal Windows (Covariance -> KPCA -> KDTree)' ylabel = 'DEST: ATemporal Data (K-D Grid w/feature Landscape, 0-1..3-4 vals)' Asize = [nodeA_size[i] for i in nodeA] proj_total = [int(i) for i in np.sum(projmap, axis=1)] Bsize = [self.hc[k]['count'] for k in nodeB] alabel = ['#%04d/ %5d/ %3d' % x for x in zip(nodeA, Asize, proj_total)] blabel = ['%6s/ %d' % x for x in zip(nodeB, Bsize)] pmap_bal_row_norm = np.nan_to_num(projmap / np.linalg.norm(projmap, axis=-1)[:, np.newaxis]).T pmap_bal_col_norm = np.nan_to_num(projmap.T / np.linalg.norm(projmap.T, axis=-1)[:, np.newaxis]) P.heatmap(projmap, alabel, blabel, title, ylabel=ylabel, xlabel=xlabel) P.heatmap(pmap_bal_col_norm.T, alabel, blabel, title+'_NormCol', ylabel=ylabel, xlabel=xlabel) P.heatmap(pmap_bal_row_norm.T, alabel, blabel, title+'_NormRow', ylabel=ylabel, xlabel=xlabel)
def plot_heatmaps(): P.heatmap(arr, rowlist, lcnt, tkey+'_reproj') P.heatmap(norm.T, gcnt, lcnt, tbin+'_Norm_by_Col') P.heatmap(d.reshape(53,1), ['biased'], local['2_0']['count'], 'testbiased')
# channels = [*range(24), *range(26, 50)] # channels = [*range(12), *range(28, 36)] csi = csi[[x for x in channels]] # sampling_rate = 100 #Hz lowpass_cutoff = 10 #Hz order = 5 for x in range(csi.shape[0]): csi[x] = filters.lowpass(csi[x], lowpass_cutoff, sampling_rate, order) csi = np.nan_to_num(csi) # plot.heatmap_3d(csi, timestamps) plot.heatmap(csi, timestamps) csi_trans = np.transpose(csi) csi_trans = csi_trans[::10] timestamps = timestamps[::10] prev_frame = None sti_values = [] corr_values = [] for x in range(csi_trans.shape[0]): frame = csi_trans[x] if prev_frame is None:
def plot_gsea_heatmap(gsea, genesets_annot, factor_idx, fig, abs): plusminus_sign = chr(0x00B1) genesets = genesets_annot['gene_set'].values assert all(np.isin(genesets, gsea['gene_set'])) wf_prop = 0.3 table_prop = 0.3 hm_prop = 1 - wf_prop - table_prop cbar_vmargin = 0.25 wf_hmargin = 0.05 wf_vmargin = 0.06 sel_gsea = gsea.reindex_like(genesets_annot) # Heatmap if abs: cmap = copy(matplotlib.cm.Reds) else: cmap = copy(matplotlib.cm.RdBu_r) cmap.set_bad('0.8') sel_gsea['slogfdr'] = np.sign(sel_gsea['nes']) * -np.log10(sel_gsea['fdr']) ax = fig.add_axes([wf_hmargin, table_prop, 1 - wf_hmargin, hm_prop]) if abs: zlim = [0, -np.log10(0.05)] norm = FDRNormalize(sig_threshold=-np.log10(0.25)) else: zlim = [np.log10(0.05), -np.log10(0.05)] norm = SFDRNormalize(sig_threshold=-np.log10(0.25)) hm = plot.heatmap( sel_gsea['slogfdr'][::-1, :], mask=sel_gsea['fdr'][::-1, :] > 0.25, zlim=zlim, norm=norm, cmap=cmap, method='pcolormesh', cbar=False, ax=ax, ) for i in range(sel_gsea['slogfdr'].shape[1]): ax.axvline(i, color='white', linewidth=2) ax_cbar = fig.add_axes([ wf_hmargin + cbar_vmargin, 0.02, 1 - wf_hmargin - 2 * cbar_vmargin, 0.03 ], ) cbar = fig.colorbar(hm, ax_cbar, orientation='horizontal') fdr_ticks_at = np.array([0.25, 0.1, 0.05]) lfdr_ticks_at = -np.log10(fdr_ticks_at) if abs: cbar_tick_lv = np.append([0.0], lfdr_ticks_at) cbar_tick_v = np.append([1.0], fdr_ticks_at) else: cbar_tick_lv = np.append(np.append(-lfdr_ticks_at[::-1], [0.0]), lfdr_ticks_at) cbar_tick_v = np.append(-np.append(fdr_ticks_at[::-1], [1.0]), fdr_ticks_at) cbar.set_ticks(cbar_tick_lv) if abs: ax_cbar.set_xlabel("FDR") else: ax_cbar.set_xlabel("signed FDR") cbar_tick_labels = [f"{v}" for v in cbar_tick_v] if not abs: cbar_tick_labels[len(cbar_tick_labels) // 2] = plusminus_sign + "1.0" cbar.ax.set_xticklabels(cbar_tick_labels) ax.set_xticklabels("") ax.tick_params(bottom='off') ax.set_ylabel("MRI Factor") ax.set_xlabel("") ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['right'].set_visible(False) # Top waterfall plots ax_nes = fig.add_axes([(0 / 3) + wf_hmargin, 1 - wf_prop + wf_vmargin, (1 / 3) - wf_hmargin, wf_prop - wf_vmargin]) wf_plot(gsea['nes'][factor_idx, :], genesets, ax_nes, 'NES') ax_mesa = fig.add_axes([(1 / 3) + wf_hmargin, 1 - wf_prop + wf_vmargin, (1 / 3) - wf_hmargin, wf_prop - wf_vmargin]) if gsea.attrs['absolute']: mesa_mid = 1 else: mesa_mid = int(gsea['max_es_at'].max() / 2) wf_plot(gsea['max_es_at'][factor_idx, :], genesets, ax_mesa, 'Max. ES at', xbaseline=mesa_mid, reverse=True) ax_mesa.ticklabel_format(axis='y', style='sci', scilimits=(0, 0)) ax_le = fig.add_axes([(2 / 3) + wf_hmargin, 1 - wf_prop + wf_vmargin, (1 / 3) - wf_hmargin, wf_prop - wf_vmargin]) wf_plot(gsea['le_prop'][factor_idx, :], genesets, ax_le, 'Leading Edge') # Bottom table ga = genesets_annot.copy() if 'source' in ga and 'source_year' in ga: sy = zip(ga['source'].values, ga['source_year'].values) ga['source'] = ('gene_set', [f"{s} ({y})" for s, y in sy]) del ga['source_year'] gaa = ga.to_array() xlabels = [gs_labels[i] for i in range(gaa.shape[1])] table = ax.table( cellText=np.vstack((xlabels, gaa.values)), cellLoc='center', rowLabels=['gene set'] + list(gaa['variable'].values), loc='bottom', ) table.auto_set_font_size(False) table.set_fontsize(8) for (col, row), cell in table.get_celld().items(): cell.set_linewidth(2) cell.set_edgecolor('w') if col % 2 == 1: cell.set_facecolor('#eeeeee') if col == 3: cell.set_height(3 * cell.get_height()) if col == 0 and row >= 0: cell.set_text_props(**gs_label_props) if row == -1: cell.set_text_props(weight='bold')
def plot_heatmaps(): P.heatmap(arr, rowlist, lcnt, tkey + '_reproj') P.heatmap(norm.T, gcnt, lcnt, tbin + '_Norm_by_Col') P.heatmap(d.reshape(53, 1), ['biased'], local['2_0']['count'], 'testbiased')
#old setup # mean_corr = 0.986 # diff_corr = 0.06 #new setup # mean_corr = 0.978 # diff_corr = 0.33 #Chris setup. mean_corr = 0.9987 diff_corr = 0.0039 # plot.heatmap_3d(csi, timestamps) import numpy as np plot.heatmap(np.transpose(csi), timestamps) #%diff thresholds. moving_threshold = 0.15 # moving_threshold = 0.10 notmoving_threshold = 0.05 # notmoving_threshold = 0.10 #PCC-activation threshold. #Let's try and establish this by calibrating with a "no presence" example. #Then we can use take a multiplier of the average max-min diff as our threshold. # containsmovement_threshold = 0.92 containsmovement_threshold = mean_corr - (diff_corr * 2) prev_frame = None
(len(spots_reduced), 3)) mask_c = mask.copy() mask_reduced = mask_c[:, nuc_pos[0] - 50:nuc_pos[1] + 50] spots_reduced[:, 0] -= nuc_pos[0] - 50 grid_mat = helpers.build_density_by_stripe(spots_reduced, z_lines, mask_reduced, band_n=band_n) # spline graph density by band_n tgt_image_name = constants.analysis_config[ 'FIGURE_NAME_FORMAT_GRAPH_STRIPE'].format( image=str(band_n) + im._path.replace("/", "_") + "_" + str(mask_count)) tgt_fp = pathlib.Path( constants.analysis_config['FIGURE_OUTPUT_PATH'].format( root_dir=global_root_dir), tgt_image_name) plot.spline_graph(grid_mat, tgt_fp, band_n) # heatmap density by band_n tgt_image_name = constants.analysis_config[ 'FIGURE_NAME_FORMAT_HEATMAP'].format( image=str(band_n) + im._path.replace("/", "_") + "_" + str(mask_count)) tgt_fp = pathlib.Path( constants.analysis_config['FIGURE_OUTPUT_PATH'].format( root_dir=global_root_dir), tgt_image_name) plot.heatmap(grid_mat, tgt_fp, band_n) mask_count += 1 image_counter += 1
elif (input=='plot'): print printPlotMenu() input = raw_input() print if (input=='heatmap'): db_path = project_folder+'/dbs/'+raw_input("Enter name of database (leave blank if you want to use all databases available): ") print while (not(os.path.isdir(db_path))): print "What you entered does not exist as a directory." print db_path = project_folder+'/dbs/'+raw_input("Enter name of database (leave blank if you want to use all databases available): ") print plot.heatmap(db_path) elif (input=='cnn'): db_path = project_folder+'/logs/'+raw_input("Enter name of log file: ") print while (not(os.path.isfile(db_path))): print "What you entered does not exist as a file." print db_path = project_folder+'/logs/'+raw_input("Enter name of log file: ") print plot.train_valid_convergence(db_path) elif (input=='svr'): db_path = project_folder+'/logs/'+raw_input("Enter name of log file: ") print while (not(os.path.isfile(db_path))): print "What you entered does not exist as a file." print
for line in provfile.read().strip().split('\n'): if line.startswith('BASIN'): _,bid,targ,actual,labelseq = line.split(',') data.append((bid,targ,actual,labelseq)) h, w, t = np.identity(10), np.identity(5), np.identity(5) for _,a,b,_ in data: i, j = tidx[a], tidx[b] h[i][j] += 1 w[int(a[1])][int(b[1])] += 1 if a[0] == 'T': t[int(a[1])][int(b[1])] += 1 h_norm = (h.T/h.sum()).T w_norm = (w.T/w.sum()).T t_norm = (t.T/t.sum()).T # P.heatmap(np.rot90(h_norm,3).T, TBIN10[::-1], TBIN10, expname+'_accuracy_10bin', ylabel='Start State', xlabel='Output Distribution') P.heatmap(np.rot90(w_norm,3).T, BIN5[::-1], BIN5, fname=expname+'_acc_states', figsize=fsize, latex=True) # P.heatmap(np.rot90(t_norm,3).T, BIN5[::-1], BIN5, fname=expname+'_acc_trans', ylabel='Start State', xlabel='Output Distribution') outbin = {k:defaultdict(list) for k in ['all', 'W', 'T']} exp = 'lattice2' h, w, t = np.identity(10), np.identity(5), np.identity(5) for exp in ['lattice2', 'lattrans']: lab_all = np.load(home+'/work/results/label_{0}.npy'.format(exp)).astype(int) with open(home+'/work/results/{0}/jclist'.format(exp)) as inf: idlist = inf.read().strip().split('\n') for i, tid in enumerate(idlist): for a,b in TS.TimeScape.windows(home+'/work/jc/{0}/{1}/{1}_transitions.log'.format(exp,tid)): outbin['all'][tid].append(LABEL10(lab_all[i][a:b])) if exp=='lattice2': outbin['W'][tid].append(LABEL10(lab_all[i][a:b]))