Beispiel #1
0
def main(args):
    print(f'Loading data from `{args.data}`...')
    docs = load_docs(args.data)
    # Fix an error in the conversion. TODO
    docs = {
        title: text
        for title, text in docs.items()
        if isinstance(text, str) and len(text) > 0
    }
    titles = tuple(docs.keys())

    all_text = ''
    for title in titles:
        if args.lower:
            docs[title] = docs[title].lower()
        all_text += ' ' + docs[title]
    vocab = Counter(all_text.split())
    if args.num_words is None:
        args.num_words = len(vocab)
    counts = vocab.most_common(args.num_words)
    vocab = [word for word, _ in counts]

    print(f'Num docs: {len(titles):,}')
    print(f'Num words: {len(vocab):,}')

    print('Collecting unigrams...')
    total = sum(count for _, count in counts)
    unigrams = dict((word, count / total) for word, count in counts)

    w2i = dict((word, i) for i, word in enumerate(unigrams.keys()))

    print('Collecting bigrams...')
    bigrams = dict()
    for title in tqdm(titles):
        text = (word for word in docs[title].split() if word in vocab)
        word_counts = Counter(text)
        total = sum(count for _, count in word_counts.items())
        bigrams[title] = dict(
            (word, count / total) for word, count in word_counts.items())

    print('Making PPMI matrix...')
    pxy, py = make_matrices(unigrams, bigrams, w2i)
    if args.ppmi:
        mat = ppmi(pxy, py)
    else:
        mat = pxy
    U, s, V = svds(mat, k=args.dim)

    print('Saving results...')
    write_vectors(U, titles, args.outpath)
    emb_scatter(U,
                titles,
                model_name='wikitext-2',
                tsne=args.no_tsne,
                perplexity=args.perplexity)
    heatmap(U, 'plots/U.pdf')
    heatmap(mat, 'plots/mat.pdf')
Beispiel #2
0
def plot_mri_cad_factors(cad_factors, out):
    fa_dataset = xr.open_dataset(cad_factors).load()

    assert all(f in feature_order for f in fa_dataset['cad_feature'].values)
    fa_dataset = fa_dataset.reindex(cad_feature=feature_order)

    with plot.subplots(figsize=(3.5, 3.5)) as (fig, ax):
        plot.heatmap(
            fa_dataset['loadings'].T,
            aspect='equal',
            xlabel="MRI Factor",
            ylabel="MRI Feature",
            yticklabels=[
                feature_display_names[f]
                for f in fa_dataset.coords['cad_feature'].values
            ],
            zlabel="Loading",
            ax=ax,
        )

        fig.savefig(out, format="svg")
Beispiel #3
0
def plotheatmaps(data, title=''):
  local = get_local_full()
  glob = get_global_full()
  gden = [('%4.0f'%float(i)).lstrip('0') for i in glob['density']]
  gcnt = [int(i) for i in glob['count']]
  max_gden = max([float(i) for i in glob['density']])
  for tbin in data.keys():
    c = np.array(data[tbin])
    # gcnt = np.sum(c, axis=1)
    # lcnt = np.sum(c, axis=0)
    lcnt = [int(i) for i in local[tbin]['count']]
    lden = [float(i) for i in local[tbin]['density']]
    lden_norm = [i / sum(lden) for i in lden]
    lden_scaled = [i * max_gden for i in lden_norm]
    denlab = [('%3.0f'%i) for i in lden_scaled]
    print(local[tbin]['volume'])
    glabels = ['%4d/%4s' % i for i in zip(gcnt,gden)]
    llabels = ['%4d/%4s' % i for i in zip(lcnt,denlab)]
    norm_c = np.nan_to_num(c / np.linalg.norm(c, axis=-1)[:, np.newaxis]).T
    P.heatmap(norm_c, glabels, llabels, title+tbin+'_col')
    d = c.T
    norm_r = np.nan_to_num(d / np.linalg.norm(d, axis=-1)[:, np.newaxis])
    P.heatmap(norm_r, glabels, llabels, title+tbin+'_row')

    combined = (norm_c + norm_r) / 2
    P.heatmap(combined, glabels, llabels, title+tbin+'_combined')
    print(combined)
Beispiel #4
0
def plotheatmaps(data, title=''):
    local = get_local_full()
    glob = get_global_full()
    gden = [('%4.0f' % float(i)).lstrip('0') for i in glob['density']]
    gcnt = [int(i) for i in glob['count']]
    max_gden = max([float(i) for i in glob['density']])
    for tbin in data.keys():
        c = np.array(data[tbin])
        # gcnt = np.sum(c, axis=1)
        # lcnt = np.sum(c, axis=0)
        lcnt = [int(i) for i in local[tbin]['count']]
        lden = [float(i) for i in local[tbin]['density']]
        lden_norm = [i / sum(lden) for i in lden]
        lden_scaled = [i * max_gden for i in lden_norm]
        denlab = [('%3.0f' % i) for i in lden_scaled]
        print(local[tbin]['volume'])
        glabels = ['%4d/%4s' % i for i in zip(gcnt, gden)]
        llabels = ['%4d/%4s' % i for i in zip(lcnt, denlab)]
        norm_c = np.nan_to_num(c / np.linalg.norm(c, axis=-1)[:, np.newaxis]).T
        P.heatmap(norm_c, glabels, llabels, title + tbin + '_col')
        d = c.T
        norm_r = np.nan_to_num(d / np.linalg.norm(d, axis=-1)[:, np.newaxis])
        P.heatmap(norm_r, glabels, llabels, title + tbin + '_row')

        combined = (norm_c + norm_r) / 2
        P.heatmap(combined, glabels, llabels, title + tbin + '_combined')
        print(combined)
Beispiel #5
0
def main(**kwargs):

    levels = ('dy', 'sg', 'hr')
    if kwargs['--level'] not in levels:
        raise ValueError('Specified level must be one of {}'.format(levels))

    what = []
    if kwargs['created']:
        what.append('created')
    if kwargs['revised']:
        what.append('revisions')
    if kwargs['comment']:
        what.append('comments')

    if kwargs.get('drive'):
        api, title = 'drive', 'User Activity on Google Drive'
    elif kwargs.get('reports'):
        api, title = 'reports', 'Something Awesome'
    else:
        # This should never happen since docopt validates commands for us
        raise ValueError('No known command given')

    args = dict(
        api=api,
        impersonated_user_email=kwargs['--email'],
        start=kwargs['--start'],
        end=kwargs['--end'],
        timezone=kwargs['--tz'],
    )
    api_obj = get_api(**args)
    data = api_obj.activity(use_cached=kwargs['--cached'],
                            what=what,
                            level=kwargs['--level'])
    fig = heatmap(title=title, **data)

    plot_args = {
        'figure_or_data': fig,
        'filename': '{}-activity-heatmap'.format(api),
    }
    try:
        __IPYTHON__
    except NameError:
        url = py.plot(**plot_args)
        print('The plotted figure is now available at:\n{}\n'.format(url))
    else:
        py.iplot(**plot_args)
def plot_mri_cad_factor_correlation(mri_features, out):
    mri_ds = xr.open_dataset(mri_features)
    del mri_ds['Comment']
    del mri_ds['MultiFocal']
    mri = mri_ds.to_array('cad_feature', 'mri_cad_features')
    mri_ds.close()
    mri = mri.isel(case=np.where(mri.isnull().sum('cad_feature') == 0)[0])
    mri = mri.transpose('case', 'cad_feature')

    assert all(f.item() in feature_order for f in mri['cad_feature'].values)
    mri = mri.reindex(cad_feature=feature_order)

    cor = xr.DataArray(
        data=np.corrcoef(mri.values, rowvar=False),
        dims=('cad_feature', 'cad_feature'),
        coords={'cad_feature': mri.coords['cad_feature']},
    )
    cor.name = 'correlation'

    with plot.subplots(figsize=(3.5, 3.5)) as (fig, ax):
        c = plot.heatmap(
            cor,
            mask=np.tri(cor.shape[0]) < 0.5,
            aspect='equal',
            xlabel="MR Feature",
            ylabel="MR Feature",
            xticklabels=[''] * cor.shape[0],
            yticklabels=[
                feature_display_names[f]
                for f in cor.coords['cad_feature'].values
            ],
            cbar=False,
            ax=ax,
        )
        cax = fig.add_axes([.7, .5, .05, .25])
        cbar = fig.colorbar(c, cax=cax)
        cbar.set_ticks([-1.0, -0.5, 0.0, 0.5, 1.0])
        cbar.ax.set_yticklabels(cbar.ax.get_yticklabels(), fontsize=9)
        cbar.ax.set_title("Pearson Correlation", fontsize=10)
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)
        fig.savefig(out, format="svg")
Beispiel #7
0
  def doHeatmap(self, title, DS=True):

    # Save grid hcubes which actually have sufficient data points
    nodeA_size = [len(i) for i in self.kdg.grid]
    nodeA = [i for i, size in enumerate(nodeA_size) if size > 10]
    nodeB = list(self.hc.keys())
    idxB  = {k: i for i, k in enumerate(nodeB)}

    # For each HCube (among clustered temporal data):
    for k in nodeB:
      self.hc[k]['reweight'] = {}
      for cov in self.hc[k]['elm']:
        probe = self.kdg.toindex(self.fealcov[cov][10:])
        if probe not in self.hc[k]['reweight']:
          self.hc[k]['reweight'][probe] = 0
        self.hc[k]['reweight'][probe] += 1
        if probe not in nodeA:
          nodeA.append(probe)

    idxA  = {k: i for i, k in enumerate(nodeA)}

    # Map projection to edges (and subsequent heatmap chart)
    edge = []
    projmap = np.zeros(shape=(len(nodeA), len(nodeB)))
    for kB in nodeB:
      for kA, proj_cnt in self.hc[kB]['reweight'].items():
        edge.append((kA, kB, proj_cnt))
        A = idxA[kA]
        B = idxB[kB]
        projmap[A][B] = proj_cnt

    xlabel = 'SOURCE: Temporal Windows (Covariance -> KPCA -> KDTree)'
    ylabel = 'DEST: ATemporal Data (K-D Grid w/feature Landscape, 0-1..3-4 vals)'

    Asize = [nodeA_size[i] for i in nodeA]
    proj_total = [int(i) for i in np.sum(projmap, axis=1)]
    Bsize = [self.hc[k]['count'] for k in nodeB]
    alabel = ['#%04d/ %5d/ %3d' % x for x in zip(nodeA, Asize, proj_total)]
    blabel = ['%6s/ %d' % x for x in zip(nodeB, Bsize)]
    pmap_bal_row_norm = np.nan_to_num(projmap / np.linalg.norm(projmap, axis=-1)[:, np.newaxis]).T
    pmap_bal_col_norm = np.nan_to_num(projmap.T / np.linalg.norm(projmap.T, axis=-1)[:, np.newaxis])


    P.heatmap(projmap, alabel, blabel, title, ylabel=ylabel, xlabel=xlabel)
    P.heatmap(pmap_bal_col_norm.T, alabel, blabel, title+'_NormCol', ylabel=ylabel, xlabel=xlabel)
    P.heatmap(pmap_bal_row_norm.T, alabel, blabel, title+'_NormRow', ylabel=ylabel, xlabel=xlabel)  
Beispiel #8
0
def plot_heatmaps():
  P.heatmap(arr, rowlist, lcnt, tkey+'_reproj')
  P.heatmap(norm.T, gcnt, lcnt, tbin+'_Norm_by_Col')
  P.heatmap(d.reshape(53,1), ['biased'], local['2_0']['count'], 'testbiased')
Beispiel #9
0
# channels = [*range(24), *range(26, 50)]
# channels = [*range(12), *range(28, 36)]

csi = csi[[x for x in channels]]

#
sampling_rate = 100  #Hz
lowpass_cutoff = 10  #Hz
order = 5

for x in range(csi.shape[0]):
    csi[x] = filters.lowpass(csi[x], lowpass_cutoff, sampling_rate, order)
csi = np.nan_to_num(csi)

# plot.heatmap_3d(csi, timestamps)
plot.heatmap(csi, timestamps)

csi_trans = np.transpose(csi)

csi_trans = csi_trans[::10]
timestamps = timestamps[::10]

prev_frame = None

sti_values = []
corr_values = []

for x in range(csi_trans.shape[0]):
    frame = csi_trans[x]

    if prev_frame is None:
Beispiel #10
0
def plot_gsea_heatmap(gsea, genesets_annot, factor_idx, fig, abs):
    plusminus_sign = chr(0x00B1)

    genesets = genesets_annot['gene_set'].values
    assert all(np.isin(genesets, gsea['gene_set']))
    wf_prop = 0.3
    table_prop = 0.3
    hm_prop = 1 - wf_prop - table_prop
    cbar_vmargin = 0.25
    wf_hmargin = 0.05
    wf_vmargin = 0.06

    sel_gsea = gsea.reindex_like(genesets_annot)

    # Heatmap
    if abs:
        cmap = copy(matplotlib.cm.Reds)
    else:
        cmap = copy(matplotlib.cm.RdBu_r)
    cmap.set_bad('0.8')
    sel_gsea['slogfdr'] = np.sign(sel_gsea['nes']) * -np.log10(sel_gsea['fdr'])
    ax = fig.add_axes([wf_hmargin, table_prop, 1 - wf_hmargin, hm_prop])
    if abs:
        zlim = [0, -np.log10(0.05)]
        norm = FDRNormalize(sig_threshold=-np.log10(0.25))
    else:
        zlim = [np.log10(0.05), -np.log10(0.05)]
        norm = SFDRNormalize(sig_threshold=-np.log10(0.25))
    hm = plot.heatmap(
        sel_gsea['slogfdr'][::-1, :],
        mask=sel_gsea['fdr'][::-1, :] > 0.25,
        zlim=zlim,
        norm=norm,
        cmap=cmap,
        method='pcolormesh',
        cbar=False,
        ax=ax,
    )
    for i in range(sel_gsea['slogfdr'].shape[1]):
        ax.axvline(i, color='white', linewidth=2)
    ax_cbar = fig.add_axes([
        wf_hmargin + cbar_vmargin, 0.02, 1 - wf_hmargin - 2 * cbar_vmargin,
        0.03
    ], )
    cbar = fig.colorbar(hm, ax_cbar, orientation='horizontal')
    fdr_ticks_at = np.array([0.25, 0.1, 0.05])
    lfdr_ticks_at = -np.log10(fdr_ticks_at)
    if abs:
        cbar_tick_lv = np.append([0.0], lfdr_ticks_at)
        cbar_tick_v = np.append([1.0], fdr_ticks_at)
    else:
        cbar_tick_lv = np.append(np.append(-lfdr_ticks_at[::-1], [0.0]),
                                 lfdr_ticks_at)
        cbar_tick_v = np.append(-np.append(fdr_ticks_at[::-1], [1.0]),
                                fdr_ticks_at)
    cbar.set_ticks(cbar_tick_lv)
    if abs:
        ax_cbar.set_xlabel("FDR")
    else:
        ax_cbar.set_xlabel("signed FDR")
    cbar_tick_labels = [f"{v}" for v in cbar_tick_v]
    if not abs:
        cbar_tick_labels[len(cbar_tick_labels) // 2] = plusminus_sign + "1.0"
    cbar.ax.set_xticklabels(cbar_tick_labels)
    ax.set_xticklabels("")
    ax.tick_params(bottom='off')
    ax.set_ylabel("MRI Factor")
    ax.set_xlabel("")
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['right'].set_visible(False)

    # Top waterfall plots
    ax_nes = fig.add_axes([(0 / 3) + wf_hmargin, 1 - wf_prop + wf_vmargin,
                           (1 / 3) - wf_hmargin, wf_prop - wf_vmargin])
    wf_plot(gsea['nes'][factor_idx, :], genesets, ax_nes, 'NES')

    ax_mesa = fig.add_axes([(1 / 3) + wf_hmargin, 1 - wf_prop + wf_vmargin,
                            (1 / 3) - wf_hmargin, wf_prop - wf_vmargin])
    if gsea.attrs['absolute']:
        mesa_mid = 1
    else:
        mesa_mid = int(gsea['max_es_at'].max() / 2)
    wf_plot(gsea['max_es_at'][factor_idx, :],
            genesets,
            ax_mesa,
            'Max. ES at',
            xbaseline=mesa_mid,
            reverse=True)
    ax_mesa.ticklabel_format(axis='y', style='sci', scilimits=(0, 0))

    ax_le = fig.add_axes([(2 / 3) + wf_hmargin, 1 - wf_prop + wf_vmargin,
                          (1 / 3) - wf_hmargin, wf_prop - wf_vmargin])
    wf_plot(gsea['le_prop'][factor_idx, :], genesets, ax_le, 'Leading Edge')

    # Bottom table
    ga = genesets_annot.copy()
    if 'source' in ga and 'source_year' in ga:
        sy = zip(ga['source'].values, ga['source_year'].values)
        ga['source'] = ('gene_set', [f"{s} ({y})" for s, y in sy])
        del ga['source_year']
    gaa = ga.to_array()
    xlabels = [gs_labels[i] for i in range(gaa.shape[1])]
    table = ax.table(
        cellText=np.vstack((xlabels, gaa.values)),
        cellLoc='center',
        rowLabels=['gene set'] + list(gaa['variable'].values),
        loc='bottom',
    )
    table.auto_set_font_size(False)
    table.set_fontsize(8)
    for (col, row), cell in table.get_celld().items():
        cell.set_linewidth(2)
        cell.set_edgecolor('w')
        if col % 2 == 1:
            cell.set_facecolor('#eeeeee')
        if col == 3:
            cell.set_height(3 * cell.get_height())
        if col == 0 and row >= 0:
            cell.set_text_props(**gs_label_props)
        if row == -1:
            cell.set_text_props(weight='bold')
Beispiel #11
0
def plot_heatmaps():
    P.heatmap(arr, rowlist, lcnt, tkey + '_reproj')
    P.heatmap(norm.T, gcnt, lcnt, tbin + '_Norm_by_Col')
    P.heatmap(d.reshape(53, 1), ['biased'], local['2_0']['count'],
              'testbiased')
Beispiel #12
0
#old setup
# mean_corr = 0.986
# diff_corr = 0.06

#new setup
# mean_corr = 0.978
# diff_corr = 0.33

#Chris setup.
mean_corr = 0.9987
diff_corr = 0.0039

# plot.heatmap_3d(csi, timestamps)
import numpy as np
plot.heatmap(np.transpose(csi), timestamps)

#%diff thresholds.
moving_threshold = 0.15
# moving_threshold = 0.10
notmoving_threshold = 0.05
# notmoving_threshold = 0.10

#PCC-activation threshold.
#Let's try and establish this by calibrating with a "no presence" example.
#Then we can use take a multiplier of the average max-min diff as our threshold.
# containsmovement_threshold = 0.92
containsmovement_threshold = mean_corr - (diff_corr * 2)

prev_frame = None
Beispiel #13
0
                    (len(spots_reduced), 3))
                mask_c = mask.copy()
                mask_reduced = mask_c[:, nuc_pos[0] - 50:nuc_pos[1] + 50]
                spots_reduced[:, 0] -= nuc_pos[0] - 50
                grid_mat = helpers.build_density_by_stripe(spots_reduced,
                                                           z_lines,
                                                           mask_reduced,
                                                           band_n=band_n)

                # spline graph density by band_n
                tgt_image_name = constants.analysis_config[
                    'FIGURE_NAME_FORMAT_GRAPH_STRIPE'].format(
                        image=str(band_n) + im._path.replace("/", "_") + "_" +
                        str(mask_count))
                tgt_fp = pathlib.Path(
                    constants.analysis_config['FIGURE_OUTPUT_PATH'].format(
                        root_dir=global_root_dir), tgt_image_name)
                plot.spline_graph(grid_mat, tgt_fp, band_n)

                # heatmap density by band_n
                tgt_image_name = constants.analysis_config[
                    'FIGURE_NAME_FORMAT_HEATMAP'].format(
                        image=str(band_n) + im._path.replace("/", "_") + "_" +
                        str(mask_count))
                tgt_fp = pathlib.Path(
                    constants.analysis_config['FIGURE_OUTPUT_PATH'].format(
                        root_dir=global_root_dir), tgt_image_name)
                plot.heatmap(grid_mat, tgt_fp, band_n)
                mask_count += 1
        image_counter += 1
Beispiel #14
0
		elif (input=='plot'):
			print
			printPlotMenu()
			input = raw_input()
			print

			if (input=='heatmap'):
				db_path = project_folder+'/dbs/'+raw_input("Enter name of database (leave blank if you want to use all databases available): ")
				print
				while (not(os.path.isdir(db_path))):
					print "What you entered does not exist as a directory."
					print
					db_path = project_folder+'/dbs/'+raw_input("Enter name of database (leave blank if you want to use all databases available): ")
					print
				plot.heatmap(db_path)
			elif (input=='cnn'):
				db_path = project_folder+'/logs/'+raw_input("Enter name of log file: ")
				print
				while (not(os.path.isfile(db_path))):
					print "What you entered does not exist as a file."
					print
					db_path = project_folder+'/logs/'+raw_input("Enter name of log file: ")
					print
				plot.train_valid_convergence(db_path)
			elif (input=='svr'):
				db_path = project_folder+'/logs/'+raw_input("Enter name of log file: ")
				print
				while (not(os.path.isfile(db_path))):
					print "What you entered does not exist as a file."
					print
Beispiel #15
0
    for line in provfile.read().strip().split('\n'):
      if line.startswith('BASIN'):
        _,bid,targ,actual,labelseq = line.split(',')
        data.append((bid,targ,actual,labelseq))
  h, w, t = np.identity(10), np.identity(5), np.identity(5)
  for _,a,b,_ in data:
    i, j = tidx[a], tidx[b]
    h[i][j] += 1
    w[int(a[1])][int(b[1])] += 1
    if a[0] == 'T':
      t[int(a[1])][int(b[1])] += 1
  h_norm = (h.T/h.sum()).T
  w_norm = (w.T/w.sum()).T
  t_norm = (t.T/t.sum()).T
  # P.heatmap(np.rot90(h_norm,3).T, TBIN10[::-1], TBIN10, expname+'_accuracy_10bin', ylabel='Start State', xlabel='Output Distribution')
  P.heatmap(np.rot90(w_norm,3).T, BIN5[::-1], BIN5, fname=expname+'_acc_states', figsize=fsize, latex=True)
  # P.heatmap(np.rot90(t_norm,3).T, BIN5[::-1], BIN5, fname=expname+'_acc_trans', ylabel='Start State', xlabel='Output Distribution')


outbin = {k:defaultdict(list) for k in ['all', 'W', 'T']}
exp = 'lattice2'
h, w, t = np.identity(10), np.identity(5), np.identity(5)
for exp in ['lattice2', 'lattrans']:
  lab_all = np.load(home+'/work/results/label_{0}.npy'.format(exp)).astype(int)
  with open(home+'/work/results/{0}/jclist'.format(exp)) as inf: 
    idlist = inf.read().strip().split('\n')
  for i, tid in enumerate(idlist):
    for a,b in TS.TimeScape.windows(home+'/work/jc/{0}/{1}/{1}_transitions.log'.format(exp,tid)):
      outbin['all'][tid].append(LABEL10(lab_all[i][a:b]))
      if exp=='lattice2': 
        outbin['W'][tid].append(LABEL10(lab_all[i][a:b]))