def errorbar_plot(data, x_spec, y_spec, fname): """ Dynamically create errorbar plot """ x_label, x_func = x_spec y_label, y_func = y_spec # compute data points = collections.defaultdict(list) for syst, mat, _ in data: x_value = x_func(syst, mat) y_value = y_func(syst, mat) if x_value is None or y_value is None: continue points[x_value].append(y_value) # plot figure densities = [] averages = [] errbars = [] for dens, avgs in points.items(): densities.append(dens) averages.append(np.mean(avgs)) errbars.append(np.std(avgs)) plt.errorbar( densities, averages, yerr=errbars, fmt='o', clip_on=False) plt.title('') plt.xlabel(x_label) plt.ylabel(y_label) plt.tight_layout() save_figure('images/%s' % fname, bbox_inches='tight') plt.close()
def errorbar_plot(data, x_spec, y_spec, fname): """ Dynamically create errorbar plot """ x_label, x_func = x_spec y_label, y_func = y_spec # compute data points = collections.defaultdict(list) for syst, mat, _ in data: x_value = x_func(syst, mat) y_value = y_func(syst, mat) if x_value is None or y_value is None: continue points[x_value].append(y_value) # plot figure densities = [] averages = [] errbars = [] for dens, avgs in points.items(): densities.append(dens) averages.append(np.mean(avgs)) errbars.append(np.std(avgs)) plt.errorbar(densities, averages, yerr=errbars, fmt='o', clip_on=False) plt.title('') plt.xlabel(x_label) plt.ylabel(y_label) plt.tight_layout() save_figure('images/%s' % fname, bbox_inches='tight') plt.close()
def plot_mz_distribution(data, fname='data/peaklist_filtered_assigned.csv'): """ Plot MZ values of data and highlight real-life entries """ # aggregate data mzs = [] single_matches = [] for name, info in data.items(): mz = info['mass'] mzs.append(mz) assert len(info['intensities']) > 0 if len(info['intensities']) == 1: single_matches.append(mz) peak_data = read_peak_data(fname) # plot fig = plt.figure() plt.hist(mzs, 100, alpha=0.7, linewidth=0,) for mz, _ in peak_data.items(): plt.axvline(mz, color='red', alpha=0.03) #for mz in single_matches: # plt.axvline(mz, color='green', alpha=0.06) plt.xlabel('MZ value') plt.ylabel('count') plt.title('MZ histogram of all generated products') plt.tight_layout() plotter.save_figure('images/rl_mz_hist.pdf', bbox_inches='tight')
def plot_correlation_histogram(motifs, data): """ Plot histogram of all observed intensity correlations """ colors = itertools.cycle(['b', 'r', 'g', 'c', 'm', 'y', 'k']) plt.figure() for (m, lbl) in motifs: # compute correlations corrs = [] for cs in tqdm(m): for c1 in cs: for c2 in cs: if c1 == c2: break # compute correlations for i, int1 in enumerate(data[c1]['intensities']): for j, int2 in enumerate(data[c2]['intensities']): cc, _ = scis.pearsonr(int1, int2) corrs.append(cc) # plot plotter.plot_histogram( corrs, plt.gca(), facecolor=next(colors), alpha=0.5, label=lbl) plt.title('Comparison of intensity correlation distributions') plt.xlabel('intensity vector correlation') plt.legend(loc='best') plt.tight_layout() plotter.save_figure('images/rl_corr_hist.pdf', bbox_inches='tight')
def single_corr_coeff_hist(reps=200): """ Plot distribution of single correlation coefficients for various parameters """ def do(gs, res): param_range = np.linspace(.1, 5, res) currow = 0 for k_m in tqdm(param_range): for k_23 in tqdm(param_range): syst = generate_basic_system(k_m=k_m, k_23=k_23) single_run_matrices = [] for r in trange(reps): _,mat,sol = analyze_system(syst, repetition_num=1) if mat is None: continue sol_extract = sol.T[int(len(sol.T)*3/4):] if r == 0: plot_system_evolution( sol_extract.T, plt.subplot(gs[currow,2]), show_legend=False) single_run_mat = compute_correlation_matrix(np.array([sol_extract])) if single_run_mat.shape == (4, 4): single_run_mat = single_run_mat[:-1,:-1] assert single_run_mat.shape == (3, 3) single_run_matrices.append(single_run_mat) plot_system(syst, plt.subplot(gs[currow,0])) single_run_matrices = np.asarray(single_run_matrices) for i, row in enumerate(single_run_matrices.T): for j, series in enumerate(row): if i == j: break ax = plt.subplot(gs[currow,1]) sns.distplot(series, ax=ax, label=r'$c_{{{},{}}}$'.format(i,j)) ax.set_xlim((-1,1)) currow += 1 # generate plots res = 3 plt.figure(figsize=(20,30)) gs = mpl.gridspec.GridSpec(res**2, 3, width_ratios=[1,1,2]) sns.set_style('white') plt.style.use('seaborn-poster') do(gs, res) plt.tight_layout() save_figure('images/correlation_distribution.pdf', bbox_inches='tight')
def network_density(data): """ Plot network edge density vs correlation quotient """ def gen(it): """ Compute all possible heterogeneous pairs of `it` """ return filter(lambda e: e[0] < e[1], itertools.product(it, repeat=2)) points = collections.defaultdict( lambda: collections.defaultdict(list)) for syst, mat, _ in data: max_edge_num = syst.jacobian.shape[0] * (syst.jacobian.shape[0]+1) dens = np.count_nonzero(syst.jacobian) / max_edge_num dim = syst.jacobian.shape[0] indices = gen(range(dim)) quot_pairs = gen(indices) for pair in quot_pairs: p1, p2 = pair quot = mat[p1] / mat[p2] if mat[p2] != 0 else 0 points[pair][dens].append(quot) # plot figure fig = plt.figure(figsize=(6, 4*len(points))) gs = gridspec.GridSpec(len(points), 1) def plot(ax, data, title): """ Plot given data """ densities = [] quotients = [] errbars = [] for dens, quots in data.items(): densities.append(dens) quotients.append(np.mean(quots)) errbars.append(np.std(quots)) ax.errorbar( densities, quotients, yerr=errbars, fmt='o', clip_on=False) ax.set_title(title) ax.set_xlabel('motif edge density') ax.set_ylabel('correlation quotient') for i, (spec, dat) in enumerate(points.items()): (x1, y1), (x2, y2) = spec plot(plt.subplot(gs[i]), dat, r'Quotient: $C_{%d%d} / C_{%d%d}$' % (x1, y1, x2, y2)) plt.tight_layout() save_figure('images/edens_quot.pdf', bbox_inches='tight') plt.close()
def network_density(data): """ Plot network edge density vs correlation quotient """ def gen(it): """ Compute all possible heterogeneous pairs of `it` """ return filter(lambda e: e[0] < e[1], itertools.product(it, repeat=2)) points = collections.defaultdict(lambda: collections.defaultdict(list)) for syst, mat, _ in data: max_edge_num = syst.jacobian.shape[0] * (syst.jacobian.shape[0] + 1) dens = np.count_nonzero(syst.jacobian) / max_edge_num dim = syst.jacobian.shape[0] indices = gen(range(dim)) quot_pairs = gen(indices) for pair in quot_pairs: p1, p2 = pair quot = mat[p1] / mat[p2] if mat[p2] != 0 else 0 points[pair][dens].append(quot) # plot figure fig = plt.figure(figsize=(6, 4 * len(points))) gs = gridspec.GridSpec(len(points), 1) def plot(ax, data, title): """ Plot given data """ densities = [] quotients = [] errbars = [] for dens, quots in data.items(): densities.append(dens) quotients.append(np.mean(quots)) errbars.append(np.std(quots)) ax.errorbar(densities, quotients, yerr=errbars, fmt='o', clip_on=False) ax.set_title(title) ax.set_xlabel('motif edge density') ax.set_ylabel('correlation quotient') for i, (spec, dat) in enumerate(points.items()): (x1, y1), (x2, y2) = spec plot(plt.subplot(gs[i]), dat, r'Quotient: $C_{%d%d} / C_{%d%d}$' % (x1, y1, x2, y2)) plt.tight_layout() save_figure('images/edens_quot.pdf', bbox_inches='tight') plt.close()
def plot_result(motifs, data, fname_app='', sub_num=10): """ Create result plot """ mpl.style.use('default') sub_num = min(sub_num, len(motifs)) print(' > Plotting results ({})'.format(fname_app[1:])) # overview plots plt.figure(figsize=(30, 4 * sub_num)) gs = mpl.gridspec.GridSpec(sub_num, 3, width_ratios=[1, 2, 1]) idx = map(int, np.linspace(0, len(motifs), num=sub_num, endpoint=False)) corrs = [] for ai, i in tqdm(enumerate(idx), total=sub_num): c1, c2, c3 = motifs[i] # plot all possible correlations and select optimal one sel, all_corrs = plot_all_correlations( (c1, c2, c3), data, plt.subplot(gs[ai, 2]) if ai < sub_num else None) # get intensities sols = [] for foo in [c1, c2, c3]: sols.append(data[foo]['intensities'][sel[foo]]) # compute correlation matrix corr_mat = get_correlation_matrix(sols) corrs.extend(all_corrs) # plot rest plotter.plot_corr_mat(corr_mat, plt.subplot(gs[ai, 0])) series_ax = plt.subplot(gs[ai, 1]) plotter.plot_system_evolution( sols, series_ax, xlabel='sample') series_ax.set_title('{}\n{}\n{}'.format(c1, c2, c3)) plt.tight_layout() plotter.save_figure('images/rl_motifs{}.pdf'.format(fname_app), bbox_inches='tight') # correlation histogram plt.figure() plotter.plot_histogram(corrs, plt.gca()) plt.tight_layout() plotter.save_figure('images/rl_corr_hist{}.pdf'.format(fname_app), bbox_inches='tight')
def single_corr_coeff_hist(reps=5000): """ Plot distribution of single correlation coefficients """ def do(syst, ax): # data single_run_matrices = [] for _ in trange(reps): sol = solve_system(syst) sol_extract = sol.T[int(len(sol.T)*3/4):] single_run_mat = compute_correlation_matrix(np.array([sol_extract])) if single_run_mat.shape == (4, 4): single_run_mat = single_run_mat[:-1,:-1] assert single_run_mat.shape == (3, 3) single_run_matrices.append(single_run_mat) single_run_matrices = np.asarray(single_run_matrices) # plotting cols = cycle(['b', 'r', 'g', 'c', 'm', 'y', 'k']) for i, row in enumerate(single_run_matrices.T): for j, series in enumerate(row): if i == j: break plot_histogram( series[series!=1], ax, label=r'$c_{{{},{}}}$'.format(i,j), facecolor=next(cols), alpha=0.5, bins=100) # data syst = generate_basic_system() more = add_node_to_system(syst)[::10] print('#more', len(more)) # plot f, axes = plt.subplots(len(more), 2, figsize=(9,20)) do(syst, axes[0,0]); print() for i, m in tqdm(enumerate(more), total=len(more)): if i > 0: plot_system(m, axes[i,0]) do(m, axes[i,1]) plt.tight_layout() save_figure('images/correlation_distribution.pdf', bbox_inches='tight')
def plot_system_overview(data, sample_size=20): """ Plot systems vs correlations """ # extract sample dsample = [data[i] for i in sorted(random.sample(range(len(data)), min(len(data), sample_size)))] # plot sample fig = plt.figure(figsize=(13, 4*len(dsample))) gs = gridspec.GridSpec(len(dsample), 3, width_ratios=[1, 1, 2]) for i, (system, corr_mat, solution) in enumerate(dsample): plot_system(system, plt.subplot(gs[i, 0])) plot_corr_mat(corr_mat, plt.subplot(gs[i, 1])) plot_system_evolution(solution, plt.subplot(gs[i, 2])) plt.tight_layout() save_figure('images/overview.pdf', bbox_inches='tight', dpi=300) plt.close()
def main(fname, skip_filtered=True): """ Main interface """ if os.path.isfile(fname): systems = load_systems(fname) if systems.ndim == 0: systems = [np.asscalar(systems)] print('Integrating %d systems' % len(systems)) core_num = int(multiprocessing.cpu_count() * 4 / 5) print('Using %d cores' % core_num) data = [] with tqdm(total=len(systems)) as pbar: with multiprocessing.Pool(core_num) as p: for res in p.imap(analyze_system, systems, chunksize=10): if not skip_filtered or not res[1] is None: data.append(res) pbar.update() print('Found result for %d systems' % len(data)) if not skip_filtered: data = cluster_data(data) cache_data(data) else: syst = system_from_string(fname) syst, mat, sol = analyze_system(syst, use_ode_sde_diff=False) if mat is None: print('No sensible steady-state found') else: print(mat) fig = plt.figure(figsize=(20, 6)) gs = gridspec.GridSpec(1, 3, width_ratios=[1, 1, 2]) plt.style.use('seaborn-poster') plot_system(syst, plt.subplot(gs[0, 0])) plot_corr_mat(mat, plt.subplot(gs[0, 1])) plot_system_evolution(sol, plt.subplot(gs[0, 2])) plt.tight_layout() save_figure('images/single_case.pdf', bbox_inches='tight', dpi=300) plt.close()
def simulate_graph(graph): """ Generate dynamics on graph """ # create system J = np.copy(nx.to_numpy_matrix(graph)) np.fill_diagonal(J, -1) D = np.zeros((J.shape[0],)) E = np.zeros((J.shape[0],)) I = np.ones((J.shape[0],)) # add input to nodes of zero in-degree zero_indgr = [] for i, row in enumerate(J.T): inp = np.sum(row) inp += 1 # compensate for self-inhibition if inp == 0: zero_indgr.append(i) D[zero_indgr] = 1 E[zero_indgr] = 1 print('>', '{}/{} nodes with zero indegree'.format(len(zero_indgr), len(graph.nodes()))) # simulate system syst = SDESystem(J, D, E, I) syst, mat, sol = analyze_system(syst, filter_trivial_ss=False) # plot results fig = plt.figure(figsize=(30, 15)) gs = mpl.gridspec.GridSpec(1, 2, width_ratios=[1, 2]) if not mat is None: # only keep non-zero indegree node correlations mat = extract_sub_matrix(mat, zero_indgr) node_inds = list_diff(range(J.shape[0]), zero_indgr) used_nodes = np.array(graph.nodes())[node_inds] plot_corr_mat( mat, plt.subplot(gs[0]), show_values=False, labels=used_nodes) plot_system_evolution(sol, plt.subplot(gs[1]), show_legend=False) save_figure('images/peak_network_simulation.pdf', bbox_inches='tight', dpi=300)
def investigate_reactions( compounds_level0, intensities_level0, reaction_data ): """ Find out if reactions induce correlation patterns """ # combine initial compounds comp_tmp = iterate_once(compounds_level0, reaction_data) intensities_level1 = match_masses(comp_tmp) print('Found {} new compounds'.format(len(intensities_level1))) intensities_all = {} intensities_all.update(intensities_level0) intensities_all.update(intensities_level1) # compute all correlations for all compounds rea_corrs = [] for compound in tqdm(intensities_level1.keys()): c1, rea, c2 = parse_compound_name(compound) if c2 == 'None': continue for int1 in intensities_all[c1]: for int2 in intensities_all[c2]: cc, _ = scis.pearsonr(int1, int2) rea_corrs.append({'reaction': rea, 'correlation': cc}) df = pd.DataFrame.from_dict(rea_corrs) # plot result fig = plt.figure() for rea in df.reaction.unique(): corrs = df[df.reaction==rea].correlation sns.distplot( corrs, label=rea, kde=False, bins=np.linspace(-1, 1, 200)) plt.legend(loc='best') plt.tight_layout() plotter.save_figure('images/rl_reaction_patterns.pdf', bbox_inches='tight') plt.close()
def plot_system_overview(data, sample_size=20): """ Plot systems vs correlations """ # extract sample dsample = [ data[i] for i in sorted( random.sample(range(len(data)), min(len(data), sample_size))) ] # plot sample fig = plt.figure(figsize=(13, 4 * len(dsample))) gs = gridspec.GridSpec(len(dsample), 3, width_ratios=[1, 1, 2]) for i, (system, corr_mat, solution) in enumerate(dsample): plot_system(system, plt.subplot(gs[i, 0])) plot_corr_mat(corr_mat, plt.subplot(gs[i, 1])) plot_system_evolution(solution, plt.subplot(gs[i, 2])) plt.tight_layout() save_figure('images/overview.pdf', bbox_inches='tight', dpi=300) plt.close()
def node_degree(data, bin_num_x=100, bin_num_y=100): """ Compare node degree and correlation """ # get data ndegs = [] avg_corrs = [] node_num = -1 for syst, mat, _ in data: graph = nx.DiGraph(syst.jacobian) for i in graph.nodes(): ndegs.append(graph.degree(i)) ncorrs = [abs(mat[i, j]) for j in graph.neighbors(i) if i != j] avg_corrs.append( np.mean(ncorrs) if len(ncorrs) > 0 else 0) node_num = graph.number_of_nodes() assert node_num >= 0, 'Invalid data found' # plot data heatmap, xedges, yedges = np.histogram2d( avg_corrs, ndegs, bins=(bin_num_x, bin_num_y)) extent = [yedges[0], yedges[-1], xedges[0], xedges[-1]] heatmap = heatmap[::-1] plt.imshow( heatmap, extent=extent, interpolation='nearest', aspect=abs((extent[1]-extent[0])/(extent[3]-extent[2]))) plt.colorbar() cc = get_correlation(ndegs, avg_corrs) plt.title(r'Corr: $%.2f$' % cc) plt.xlabel('node degree') plt.ylabel('average absolute correlation to other nodes') plt.tight_layout() save_figure('images/ndegree_corr.pdf', bbox_inches='tight') plt.close()
def node_degree(data, bin_num_x=100, bin_num_y=100): """ Compare node degree and correlation """ # get data ndegs = [] avg_corrs = [] node_num = -1 for syst, mat, _ in data: graph = nx.DiGraph(syst.jacobian) for i in graph.nodes(): ndegs.append(graph.degree(i)) ncorrs = [abs(mat[i, j]) for j in graph.neighbors(i) if i != j] avg_corrs.append(np.mean(ncorrs) if len(ncorrs) > 0 else 0) node_num = graph.number_of_nodes() assert node_num >= 0, 'Invalid data found' # plot data heatmap, xedges, yedges = np.histogram2d(avg_corrs, ndegs, bins=(bin_num_x, bin_num_y)) extent = [yedges[0], yedges[-1], xedges[0], xedges[-1]] heatmap = heatmap[::-1] plt.imshow(heatmap, extent=extent, interpolation='nearest', aspect=abs((extent[1] - extent[0]) / (extent[3] - extent[2]))) plt.colorbar() cc = get_correlation(ndegs, avg_corrs) plt.title(r'Corr: $%.2f$' % cc) plt.xlabel('node degree') plt.ylabel('average absolute correlation to other nodes') plt.tight_layout() save_figure('images/ndegree_corr.pdf', bbox_inches='tight') plt.close()
def plot_individuals(examples, fname, val_func=None): """ Plot a selection of individual results """ if val_func is None: mod = -1 else: mod = 0 # plot selected networks if len(examples[0]) == 2: # is pair of networks fig = plt.figure(figsize=(50, 4*len(examples))) gs = mpl.gridspec.GridSpec( len(examples), 6+mod, width_ratios=[1, 2, 1, 2, 1+(-3*mod), 4]) else: # each entry is single network fig = plt.figure(figsize=(25, 4*len(examples))) gs = mpl.gridspec.GridSpec(len(examples), 3, width_ratios=[1, 1, 2]) counter = 0 for i, net in enumerate(examples): if len(net) == 2: # pair of networks raw_p, enh_p = net # -.- ... if len(raw_p) == 2: _, raw = raw_p _, enh = enh_p else: raw = raw_p enh = enh_p plot_system(raw[0], plt.subplot(gs[i, 0])) plot_corr_mat(raw[1], plt.subplot(gs[i, 1])) plot_system(enh[0], plt.subplot(gs[i, 2])) plot_corr_mat(enh[1], plt.subplot(gs[i, 3])) plot_system_evolution(enh[2], plt.subplot(gs[i, 5+mod])) # plot marker mark_ax = plt.subplot(gs[i, 4]) if not val_func is None: mark_ax.imshow( [[handle_enh_entry(raw_p, enh_p, val_func)]], cmap=get_matrix_cmap(), vmin=0, vmax=3) mark_ax.axis('off') else: print('Tried to use `val_func`, but it\'s None') else: # single network if net[1] is None: counter += 1 plot_system(net[0], plt.subplot(gs[i, 0])) plot_system_evolution(net[2], plt.subplot(gs[i, 2])) continue plot_system(net[0], plt.subplot(gs[i, 0])) plot_corr_mat(net[1], plt.subplot(gs[i, 1])) plot_system_evolution(net[2], plt.subplot(gs[i, 2])) if counter > 0: #print('{} broken results'.format(counter)) pass plt.tight_layout() save_figure('%s_zoom.pdf' % fname.replace('.pdf', ''), bbox_inches='tight', dpi=300) plt.close()
def threshold_influence(inp, ax=None, value_func=get_sign_changes, resolution=500): """ Investigate influence of threshold """ def plot_matrix(data): plt.tick_params(axis='both', which='both', labelleft='off', bottom='off', top='off', labelbottom='off', left='off', right='off') plt.imshow(data, interpolation='nearest', cmap=get_matrix_cmap(), vmin=0, vmax=3) plt.colorbar(ticks=range(np.max(data) + 1), extend='min') # produce data pairs, area, imp_thres = aggregate_motif_data(np.asarray(inp['data']), value_func=value_func, resolution=resolution) # plot result value_func_name = value_func.__name__[4:] plt.figure() if not ax is None: plt.sca(ax) nz_vec = [(t, m) for t, m in pairs if m > 0] z_vec = [(t, m) for t, m in pairs if m <= 0] if len(nz_vec) > 0: plt.plot(*zip(*nz_vec), 'o') if len(z_vec) > 0: plt.plot(*zip(*z_vec), 'o', color='red') plt.axvspan(xmin=min([t for t, m in pairs]), xmax=imp_thres, alpha=0.1, color='blue') if ax is None: plt.annotate('half the correlation stdev ({:.02})'.format(imp_thres), xy=(imp_thres, .025), xycoords='data', xytext=(50, 20), textcoords='offset points', arrowprops=dict(arrowstyle='->')) plt.xscale('log') plt.title('Influence of binning threshold on number of {}'.format( value_func_name)) plt.xlabel('binning threshold') plt.ylabel('frequency of {}'.format(value_func_name)) # inside plots #plt.style.use('default') #ax = plt.axes([0.1, 0.5, .2, .2]) #plot_matrix(first_data) #ax = plt.axes([0.7, 0.4, .2, .2]) #plot_matrix(last_data) #ax = plt.axes([0.4, 0.2, .2, .2]) #plot_matrix(std_data) # save result if ax is None: save_figure( 'images/threshold_influence_{}.pdf'.format(value_func_name), bbox_inches='tight') return area
def plot_individuals(examples, fname, val_func=None): """ Plot a selection of individual results """ if val_func is None: mod = -1 else: mod = 0 # plot selected networks if len(examples[0]) == 2: # is pair of networks fig = plt.figure(figsize=(50, 4 * len(examples))) gs = mpl.gridspec.GridSpec( len(examples), 6 + mod, width_ratios=[1, 2, 1, 2, 1 + (-3 * mod), 4]) else: # each entry is single network fig = plt.figure(figsize=(25, 4 * len(examples))) gs = mpl.gridspec.GridSpec(len(examples), 3, width_ratios=[1, 1, 2]) counter = 0 for i, net in enumerate(examples): if len(net) == 2: # pair of networks raw_p, enh_p = net # -.- ... if len(raw_p) == 2: _, raw = raw_p _, enh = enh_p else: raw = raw_p enh = enh_p plot_system(raw[0], plt.subplot(gs[i, 0])) plot_corr_mat(raw[1], plt.subplot(gs[i, 1])) plot_system(enh[0], plt.subplot(gs[i, 2])) plot_corr_mat(enh[1], plt.subplot(gs[i, 3])) plot_system_evolution(enh[2], plt.subplot(gs[i, 5 + mod])) # plot marker mark_ax = plt.subplot(gs[i, 4]) if not val_func is None: mark_ax.imshow([[handle_enh_entry(raw_p, enh_p, val_func)]], cmap=get_matrix_cmap(), vmin=0, vmax=3) mark_ax.axis('off') else: print('Tried to use `val_func`, but it\'s None') else: # single network if net[1] is None: counter += 1 plot_system(net[0], plt.subplot(gs[i, 0])) plot_system_evolution(net[2], plt.subplot(gs[i, 2])) continue plot_system(net[0], plt.subplot(gs[i, 0])) plot_corr_mat(net[1], plt.subplot(gs[i, 1])) plot_system_evolution(net[2], plt.subplot(gs[i, 2])) if counter > 0: #print('{} broken results'.format(counter)) pass plt.tight_layout() save_figure('%s_zoom.pdf' % fname.replace('.pdf', ''), bbox_inches='tight', dpi=300) plt.close()
def plot_result(inp, vfunc, sfuncs, title, fname): """ Plot generated matrix `sfuncs` can either be a list of functions or a string of the form: * cluster:euclidean * cluster:hamming (generally every metric for scipy.spatial.distance.pdist) """ print('Plotting "{}"'.format(fname), end='... ', flush=True) # preprocess data data, xticks, yticks = preprocess_data(inp['data'], vfunc, sfuncs) # stop, it's plotting time! if isinstance(sfuncs, tuple): # there will be clustering xtick_func, spec = sfuncs metr = spec.split(':')[1] # remove noisy signals for clustering data[data < 0] = 0 dat = [] for i, row in enumerate(data): dat.append((yticks[i], row)) df = pd.DataFrame.from_items(dat, columns=xticks, orient='index') plt.figure() cg = sns.clustermap(df, cmap=get_matrix_cmap(), vmin=0, vmax=3, row_cluster=False, metric=metr) plt.setp(cg.ax_heatmap.xaxis.get_ticklabels(), rotation=90, size=6) plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, size=5) save_figure(fname, bbox_inches='tight') plt.close() else: # "normal" plot mpl.style.use('default') # possibly reset seaborn styles plt.figure() plt.xticks(np.arange(len(data[0]), dtype=np.int), xticks) plt.yticks(np.arange(len(data), dtype=np.int), yticks) plt.setp(plt.gca().get_xticklabels(), fontsize=3, rotation='vertical') plt.setp(plt.gca().get_yticklabels(), fontsize=3) plt.tick_params(axis='both', which='both', labelleft='on', bottom='off', top='off', labelbottom='on', left='off', right='off') plt.title(title) plt.xlabel(sfuncs[0].__doc__) plt.ylabel('absolute mean of Jacobian') plt.imshow(data, interpolation='nearest', cmap=get_matrix_cmap(), vmin=0, vmax=3) plt.colorbar(ticks=range(np.max(data) + 1), extend='min') # mark "zoomed" columns sel_one, netws_one = select_column_by_jacobian( inp['data'], np.array([[1, 0, 0, 1], [1, 1, 0, 0], [1, 1, 1, 0], [0, 0, 1, 1]])) sel_two, netws_two = select_column_by_jacobian( inp['data'], np.array([[1, 0, 0, 1], [1, 1, 0, 0], [1, 1, 1, 0], [1, 0, 0, 1]])) sel_xticks = [item for item in plt.gca().get_xticklabels()] sel_xticks[sel_one].set_weight('bold') sel_xticks[sel_two].set_weight('bold') plt.gca().set_xticklabels(sel_xticks) # mark "zoomed" rows sel_blue, netws_blue = select_row_by_count(inp['data'], data, 1) sel_red, netws_red = select_row_by_count(inp['data'], data, 2) sel_yticks = [item for item in plt.gca().get_yticklabels()] sel_yticks[sel_blue].set_weight('bold') sel_yticks[sel_red].set_weight('bold') plt.gca().set_yticklabels(sel_yticks) # save figure save_figure(fname, bbox_inches='tight') # plot best examples plot_individuals(netws_one, '{}_col_one'.format(fname), vfunc) plot_individuals(netws_two, '{}_col_two'.format(fname), vfunc) plot_individuals(netws_blue, '{}_row_blue'.format(fname)) plot_individuals(netws_red, '{}_row_red'.format(fname)) print('Done')
def find_optimal_assignments(motifs, initial_compound_names): """ Find optimal compound assignments by (weighted) randomly selecting motifs of low initial assignment number and choose assignments which maximize intensity correlation coefficients. Goal: Enhance partially annotated MS peak file All steps: * Read compound/reaction data * note known MZ values * generate new compounds using reaction rules * compute theoretical MZ values * find motifs in all available compounds * assume that compounds in motifs have high intensity correlations * for each compound receive all possible annotations from peak file * use randomized iterative procedure to find "optimal" MZ assignments """ # find assignments def get_assignment_number(entry): c1, c2, c3, ints, info = entry return len(ints[c1]) * len(ints[c2]) * len(ints[c3]) def assign(motifs): info_all = {} assignments = {} single_assignment_names = [] sorted_motifs = sorted( motifs, key=get_assignment_number, reverse=True) while len(sorted_motifs) > 0: # weighted choice of starting motif size = len(sorted_motifs) probs = np.exp(range(size))/np.sum(np.exp(range(size))) idx = np.random.choice(range(size), 1, p=probs) entry = sorted_motifs[idx[0]] sorted_motifs.remove(entry) c1, c2, c3, ints, info = entry # process motif comps = c1, c2, c3 #print( # len(ints[c1]), len(ints[c2]), len(ints[c3]), # len(ints[c1])*len(ints[c2])*len(ints[c3])) info_all.update(info) # single matches for c in comps: if len(ints[c]) == 1: if c in assignments: assert ints[c][0] == assignments[c] else: assignments[c] = ints[c][0] single_assignment_names.append(c) # multiple matches for c1 in comps: for c2 in comps: if c1 == c2: break corrs = {} # skip if compounds are already assigned if c1 in assignments and c2 in assignments: continue # compute correlations if not c1 in assignments and not c2 in assignments: for i, int1 in enumerate(ints[c1]): for j, int2 in enumerate(ints[c2]): cc, _ = scis.pearsonr(int1, int2) corrs[(i,j)] = cc # choose highest correlation c1_idx, c2_idx = max(corrs.keys(), key=lambda k: abs(corrs[k])) if c1 in assignments and not c2 in assignments: int1 = assignments[c1] for j, int2 in enumerate(ints[c2]): cc, _ = scis.pearsonr(int1, int2) corrs[j] = cc c1_idx, c2_idx = None, max(corrs.keys(), key=lambda k: abs(corrs[k])) if not c1 in assignments and c2 in assignments: int2 = assignments[c2] for i, int1 in enumerate(ints[c1]): cc, _ = scis.pearsonr(int1, int2) corrs[i] = cc c1_idx, c2_idx = max(corrs.keys(), key=lambda k: abs(corrs[k])), None for c, idx in zip([c1, c2], [c1_idx, c2_idx]): if c == c1 and c1 in assignments: assert c1_idx is None if c == c2 and c2 in assignments: assert c2_idx is None if c in assignments: continue else: assignments[c] = ints[c][idx] return assignments, info_all, single_assignment_names def plot_assignments(assignments, ax): colors = [] values_initial = [] values_single = [] values_other = [] for i, (name, ints) in enumerate(assignments.items()): mz = info_all[name]['mass'] if name in initial_compound_names: values_initial.append(mz) elif name in single_assignment_names: values_single.append(mz) else: values_other.append(mz) ax.scatter( values_initial, [0]*len(values_initial), color='red', alpha=0.5, label='initial') ax.scatter( values_single, [0]*len(values_single), color='green', alpha=0.5, label='single') ax.scatter( values_other, [0]*len(values_other), color='blue', alpha=0.5, label='other') ax.yaxis.set_major_locator(plt.NullLocator()) # plots N = 10 f, axes = plt.subplots(N, 1, figsize=(9,9)) for ax in axes: assignments, info_all, single_assignment_names = assign(motifs) for c, ints in assignments.items(): if c != 'Hexose': continue print(c, sum(ints)) plot_assignments(assignments, ax) plt.xlabel('MZ') plt.legend(loc='best') plt.tight_layout() plotter.save_figure('images/assignments.pdf', bbox_inches='tight')
def threshold_influence(inp, value_func=get_sign_changes, resolution=100): """ Investigate influence of threshold """ def plot_matrix(data): plt.tick_params( axis='both', which='both', labelleft='off', bottom='off', top='off', labelbottom='off', left='off', right='off') plt.imshow( data, interpolation='nearest', cmap=get_matrix_cmap(), vmin=0, vmax=3) plt.colorbar(ticks=range(np.max(data)+1), extend='min') global THRESHOLD threshold_list = np.logspace(-4, 0, resolution) # compute stdev of difference between reference and embedded 3 node motif cur_diffs = [] for raw, enh_res in inp['data']: _, rd = raw _, rdm, _ = rd for enh in enh_res: _, ed = enh _, edm, _ = ed if not edm is None: diff = abs(rdm - edm[:-1,:-1]) cur_diffs.extend(diff.ravel()) stdev = np.std(cur_diffs) # produce data first_data, last_data, std_data = None, None, None pairs = [] for thres in tqdm(threshold_list): THRESHOLD = thres data = [] for raw, enh_res in inp['data']: # for each parameter configuration data.append([handle_enh_entry(raw, enh, value_func) for enh in enh_res]) data = np.array(data) if thres == threshold_list[0]: first_data = data if thres == threshold_list[-1]: last_data = data if thres >= stdev and std_data is None: std_data = data mat_res = np.sum(data[data>0]) pairs.append((thres, mat_res)) print('Data shape:', data.shape) total_num = data[data>=0].size * 3 # plot result value_func_name = value_func.__name__[4:] plt.figure() nz_vec = [(t, m/total_num) for t,m in pairs if m>0] z_vec = [(t, m/total_num) for t,m in pairs if m<=0] plt.plot(*zip(*nz_vec), 'o') plt.plot(*zip(*z_vec), 'o', color='red') plt.axvspan( xmin=1e-6, xmax=stdev, alpha=0.1, color='blue') plt.annotate('correlation stdev ({:.02})'.format(stdev), xy=(stdev, .03), xycoords='data', xytext=(50, 20), textcoords='offset points', arrowprops=dict(arrowstyle='->')) plt.xscale('log') plt.title('Influence of binning threshold on number of {}'.format(value_func_name)) plt.xlabel('binning threshold') plt.ylabel('frequency of {}'.format(value_func_name)) # inside plots plt.style.use('default') ax = plt.axes([0.1, 0.5, .2, .2]) plot_matrix(first_data) ax = plt.axes([0.7, 0.4, .2, .2]) plot_matrix(last_data) ax = plt.axes([0.4, 0.2, .2, .2]) plot_matrix(std_data) # save result save_figure('images/threshold_influence_{}.pdf'.format(value_func_name), bbox_inches='tight')
def plot_result(inp, vfunc, sfuncs, title, fname): """ Plot generated matrix `sfuncs` can either be a list of functions or a string of the form: * cluster:euclidean * cluster:hamming (generally every metric for scipy.spatial.distance.pdist) """ print('Plotting "{}"'.format(fname), end='... ', flush=True) # preprocess data data, xticks, yticks = preprocess_data(inp['data'], vfunc, sfuncs) # stop, it's plotting time! if isinstance(sfuncs, tuple): # there will be clustering xtick_func, spec = sfuncs metr = spec.split(':')[1] # remove noisy signals for clustering data[data < 0] = 0 dat = [] for i, row in enumerate(data): dat.append((yticks[i], row)) df = pd.DataFrame.from_items(dat, columns=xticks, orient='index') plt.figure() cg = sns.clustermap( df, cmap=get_matrix_cmap(), vmin=0, vmax=3, row_cluster=False, metric=metr) plt.setp(cg.ax_heatmap.xaxis.get_ticklabels(), rotation=90, size=6) plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, size=5) save_figure(fname, bbox_inches='tight') plt.close() else: # "normal" plot mpl.style.use('default') # possibly reset seaborn styles plt.figure() plt.xticks(np.arange(len(data[0]), dtype=np.int), xticks) plt.yticks(np.arange(len(data), dtype=np.int), yticks) plt.setp(plt.gca().get_xticklabels(), fontsize=3, rotation='vertical') plt.setp(plt.gca().get_yticklabels(), fontsize=3) plt.tick_params( axis='both', which='both', labelleft='on', bottom='off', top='off', labelbottom='on', left='off', right='off') plt.title(title) plt.xlabel(sfuncs[0].__doc__) plt.ylabel('absolute mean of Jacobian') plt.imshow( data, interpolation='nearest', cmap=get_matrix_cmap(), vmin=0, vmax=3) plt.colorbar(ticks=range(np.max(data)+1), extend='min') # mark "zoomed" columns sel_one, netws_one = select_column_by_jacobian(inp['data'], np.array([ [1,0,0,1], [1,1,0,0], [1,1,1,0], [0,0,1,1] ])) sel_two, netws_two = select_column_by_jacobian(inp['data'], np.array([ [1,0,0,1], [1,1,0,0], [1,1,1,0], [1,0,0,1] ])) sel_xticks = [item for item in plt.gca().get_xticklabels()] sel_xticks[sel_one].set_weight('bold') sel_xticks[sel_two].set_weight('bold') plt.gca().set_xticklabels(sel_xticks) # mark "zoomed" rows sel_blue, netws_blue = select_row_by_count(inp['data'], data, 1) sel_red, netws_red = select_row_by_count(inp['data'], data, 2) sel_yticks = [item for item in plt.gca().get_yticklabels()] sel_yticks[sel_blue].set_weight('bold') sel_yticks[sel_red].set_weight('bold') plt.gca().set_yticklabels(sel_yticks) # save figure save_figure(fname, bbox_inches='tight') # plot best examples plot_individuals(netws_one, '{}_col_one'.format(fname), vfunc) plot_individuals(netws_two, '{}_col_two'.format(fname), vfunc) plot_individuals(netws_blue, '{}_row_blue'.format(fname)) plot_individuals(netws_red, '{}_row_red'.format(fname)) print('Done')