Esempio n. 1
0
def errorbar_plot(data, x_spec, y_spec, fname):
    """ Dynamically create errorbar plot
    """
    x_label, x_func = x_spec
    y_label, y_func = y_spec

    # compute data
    points = collections.defaultdict(list)
    for syst, mat, _ in data:
        x_value = x_func(syst, mat)
        y_value = y_func(syst, mat)

        if x_value is None or y_value is None: continue
        points[x_value].append(y_value)

    # plot figure
    densities = []
    averages = []
    errbars = []
    for dens, avgs in points.items():
        densities.append(dens)
        averages.append(np.mean(avgs))
        errbars.append(np.std(avgs))

    plt.errorbar(
        densities, averages, yerr=errbars,
        fmt='o', clip_on=False)

    plt.title('')
    plt.xlabel(x_label)
    plt.ylabel(y_label)

    plt.tight_layout()
    save_figure('images/%s' % fname, bbox_inches='tight')
    plt.close()
Esempio n. 2
0
def errorbar_plot(data, x_spec, y_spec, fname):
    """ Dynamically create errorbar plot
    """
    x_label, x_func = x_spec
    y_label, y_func = y_spec

    # compute data
    points = collections.defaultdict(list)
    for syst, mat, _ in data:
        x_value = x_func(syst, mat)
        y_value = y_func(syst, mat)

        if x_value is None or y_value is None: continue
        points[x_value].append(y_value)

    # plot figure
    densities = []
    averages = []
    errbars = []
    for dens, avgs in points.items():
        densities.append(dens)
        averages.append(np.mean(avgs))
        errbars.append(np.std(avgs))

    plt.errorbar(densities, averages, yerr=errbars, fmt='o', clip_on=False)

    plt.title('')
    plt.xlabel(x_label)
    plt.ylabel(y_label)

    plt.tight_layout()
    save_figure('images/%s' % fname, bbox_inches='tight')
    plt.close()
Esempio n. 3
0
def plot_mz_distribution(data, fname='data/peaklist_filtered_assigned.csv'):
    """ Plot MZ values of data and highlight real-life entries
    """
    # aggregate data
    mzs = []
    single_matches = []
    for name, info in data.items():
        mz = info['mass']
        mzs.append(mz)

        assert len(info['intensities']) > 0
        if len(info['intensities']) == 1:
            single_matches.append(mz)

    peak_data = read_peak_data(fname)

    # plot
    fig = plt.figure()
    plt.hist(mzs, 100, alpha=0.7, linewidth=0,)

    for mz, _ in peak_data.items():
        plt.axvline(mz, color='red', alpha=0.03)
    #for mz in single_matches:
    #    plt.axvline(mz, color='green', alpha=0.06)

    plt.xlabel('MZ value')
    plt.ylabel('count')
    plt.title('MZ histogram of all generated products')

    plt.tight_layout()
    plotter.save_figure('images/rl_mz_hist.pdf', bbox_inches='tight')
Esempio n. 4
0
def plot_correlation_histogram(motifs, data):
    """ Plot histogram of all observed intensity correlations
    """
    colors = itertools.cycle(['b', 'r', 'g', 'c', 'm', 'y', 'k'])

    plt.figure()
    for (m, lbl) in motifs:
        # compute correlations
        corrs = []
        for cs in tqdm(m):
            for c1 in cs:
                for c2 in cs:
                    if c1 == c2: break
                    # compute correlations
                    for i, int1 in enumerate(data[c1]['intensities']):
                        for j, int2 in enumerate(data[c2]['intensities']):
                            cc, _ = scis.pearsonr(int1, int2)
                            corrs.append(cc)

        # plot
        plotter.plot_histogram(
            corrs, plt.gca(),
            facecolor=next(colors), alpha=0.5,
            label=lbl)

    plt.title('Comparison of intensity correlation distributions')
    plt.xlabel('intensity vector correlation')

    plt.legend(loc='best')
    plt.tight_layout()
    plotter.save_figure('images/rl_corr_hist.pdf', bbox_inches='tight')
Esempio n. 5
0
def single_corr_coeff_hist(reps=200):
    """ Plot distribution of single correlation coefficients for various parameters
    """
    def do(gs, res):
        param_range = np.linspace(.1, 5, res)
        currow = 0
        for k_m in tqdm(param_range):
            for k_23 in tqdm(param_range):
                syst = generate_basic_system(k_m=k_m, k_23=k_23)

                single_run_matrices = []
                for r in trange(reps):
                    _,mat,sol = analyze_system(syst, repetition_num=1)
                    if mat is None:
                        continue

                    sol_extract = sol.T[int(len(sol.T)*3/4):]

                    if r == 0:
                        plot_system_evolution(
                            sol_extract.T,
                            plt.subplot(gs[currow,2]), show_legend=False)

                    single_run_mat = compute_correlation_matrix(np.array([sol_extract]))

                    if single_run_mat.shape == (4, 4):
                        single_run_mat = single_run_mat[:-1,:-1]
                    assert single_run_mat.shape == (3, 3)

                    single_run_matrices.append(single_run_mat)

                plot_system(syst, plt.subplot(gs[currow,0]))

                single_run_matrices = np.asarray(single_run_matrices)
                for i, row in enumerate(single_run_matrices.T):
                    for j, series in enumerate(row):
                        if i == j: break

                        ax = plt.subplot(gs[currow,1])
                        sns.distplot(series, ax=ax, label=r'$c_{{{},{}}}$'.format(i,j))
                        ax.set_xlim((-1,1))

                currow += 1

    # generate plots
    res = 3

    plt.figure(figsize=(20,30))
    gs = mpl.gridspec.GridSpec(res**2, 3, width_ratios=[1,1,2])

    sns.set_style('white')
    plt.style.use('seaborn-poster')

    do(gs, res)

    plt.tight_layout()
    save_figure('images/correlation_distribution.pdf', bbox_inches='tight')
Esempio n. 6
0
def network_density(data):
    """ Plot network edge density vs correlation quotient
    """
    def gen(it):
        """ Compute all possible heterogeneous pairs of `it`
        """
        return filter(lambda e: e[0] < e[1], itertools.product(it, repeat=2))

    points = collections.defaultdict(
        lambda: collections.defaultdict(list))
    for syst, mat, _ in data:
        max_edge_num = syst.jacobian.shape[0] * (syst.jacobian.shape[0]+1)
        dens = np.count_nonzero(syst.jacobian) / max_edge_num

        dim = syst.jacobian.shape[0]
        indices = gen(range(dim))
        quot_pairs = gen(indices)

        for pair in quot_pairs:
            p1, p2 = pair
            quot = mat[p1] / mat[p2] if mat[p2] != 0 else 0
            points[pair][dens].append(quot)

    # plot figure
    fig = plt.figure(figsize=(6, 4*len(points)))
    gs = gridspec.GridSpec(len(points), 1)

    def plot(ax, data, title):
        """ Plot given data
        """
        densities = []
        quotients = []
        errbars = []
        for dens, quots in data.items():
            densities.append(dens)
            quotients.append(np.mean(quots))
            errbars.append(np.std(quots))

        ax.errorbar(
            densities, quotients, yerr=errbars,
            fmt='o', clip_on=False)

        ax.set_title(title)
        ax.set_xlabel('motif edge density')
        ax.set_ylabel('correlation quotient')

    for i, (spec, dat) in enumerate(points.items()):
        (x1, y1), (x2, y2) = spec
        plot(plt.subplot(gs[i]), dat,
            r'Quotient: $C_{%d%d} / C_{%d%d}$' % (x1, y1, x2, y2))

    plt.tight_layout()
    save_figure('images/edens_quot.pdf', bbox_inches='tight')
    plt.close()
Esempio n. 7
0
def network_density(data):
    """ Plot network edge density vs correlation quotient
    """
    def gen(it):
        """ Compute all possible heterogeneous pairs of `it`
        """
        return filter(lambda e: e[0] < e[1], itertools.product(it, repeat=2))

    points = collections.defaultdict(lambda: collections.defaultdict(list))
    for syst, mat, _ in data:
        max_edge_num = syst.jacobian.shape[0] * (syst.jacobian.shape[0] + 1)
        dens = np.count_nonzero(syst.jacobian) / max_edge_num

        dim = syst.jacobian.shape[0]
        indices = gen(range(dim))
        quot_pairs = gen(indices)

        for pair in quot_pairs:
            p1, p2 = pair
            quot = mat[p1] / mat[p2] if mat[p2] != 0 else 0
            points[pair][dens].append(quot)

    # plot figure
    fig = plt.figure(figsize=(6, 4 * len(points)))
    gs = gridspec.GridSpec(len(points), 1)

    def plot(ax, data, title):
        """ Plot given data
        """
        densities = []
        quotients = []
        errbars = []
        for dens, quots in data.items():
            densities.append(dens)
            quotients.append(np.mean(quots))
            errbars.append(np.std(quots))

        ax.errorbar(densities, quotients, yerr=errbars, fmt='o', clip_on=False)

        ax.set_title(title)
        ax.set_xlabel('motif edge density')
        ax.set_ylabel('correlation quotient')

    for i, (spec, dat) in enumerate(points.items()):
        (x1, y1), (x2, y2) = spec
        plot(plt.subplot(gs[i]), dat,
             r'Quotient: $C_{%d%d} / C_{%d%d}$' % (x1, y1, x2, y2))

    plt.tight_layout()
    save_figure('images/edens_quot.pdf', bbox_inches='tight')
    plt.close()
Esempio n. 8
0
def plot_result(motifs, data, fname_app='', sub_num=10):
    """ Create result plot
    """
    mpl.style.use('default')
    sub_num = min(sub_num, len(motifs))
    print(' > Plotting results ({})'.format(fname_app[1:]))

    # overview plots
    plt.figure(figsize=(30, 4 * sub_num))
    gs = mpl.gridspec.GridSpec(sub_num, 3, width_ratios=[1, 2, 1])

    idx = map(int,
        np.linspace(0, len(motifs), num=sub_num, endpoint=False))

    corrs = []
    for ai, i in tqdm(enumerate(idx), total=sub_num):
        c1, c2, c3 = motifs[i]

        # plot all possible correlations and select optimal one
        sel, all_corrs = plot_all_correlations(
            (c1, c2, c3), data,
            plt.subplot(gs[ai, 2]) if ai < sub_num else None)

        # get intensities
        sols = []
        for foo in [c1, c2, c3]:
            sols.append(data[foo]['intensities'][sel[foo]])

        # compute correlation matrix
        corr_mat = get_correlation_matrix(sols)
        corrs.extend(all_corrs)

        # plot rest
        plotter.plot_corr_mat(corr_mat, plt.subplot(gs[ai, 0]))

        series_ax = plt.subplot(gs[ai, 1])
        plotter.plot_system_evolution(
            sols, series_ax,
            xlabel='sample')
        series_ax.set_title('{}\n{}\n{}'.format(c1, c2, c3))

    plt.tight_layout()
    plotter.save_figure('images/rl_motifs{}.pdf'.format(fname_app), bbox_inches='tight')

    # correlation histogram
    plt.figure()

    plotter.plot_histogram(corrs, plt.gca())

    plt.tight_layout()
    plotter.save_figure('images/rl_corr_hist{}.pdf'.format(fname_app), bbox_inches='tight')
Esempio n. 9
0
def single_corr_coeff_hist(reps=5000):
    """ Plot distribution of single correlation coefficients
    """
    def do(syst, ax):
        # data
        single_run_matrices = []
        for _ in trange(reps):
            sol = solve_system(syst)

            sol_extract = sol.T[int(len(sol.T)*3/4):]
            single_run_mat = compute_correlation_matrix(np.array([sol_extract]))

            if single_run_mat.shape == (4, 4):
                single_run_mat = single_run_mat[:-1,:-1]
            assert single_run_mat.shape == (3, 3)

            single_run_matrices.append(single_run_mat)
        single_run_matrices = np.asarray(single_run_matrices)

        # plotting
        cols = cycle(['b', 'r', 'g', 'c', 'm', 'y', 'k'])
        for i, row in enumerate(single_run_matrices.T):
            for j, series in enumerate(row):
                if i == j: break
                plot_histogram(
                    series[series!=1], ax,
                    label=r'$c_{{{},{}}}$'.format(i,j),
                    facecolor=next(cols), alpha=0.5,
                    bins=100)

    # data
    syst = generate_basic_system()
    more = add_node_to_system(syst)[::10]
    print('#more', len(more))

    # plot
    f, axes = plt.subplots(len(more), 2, figsize=(9,20))

    do(syst, axes[0,0]); print()
    for i, m in tqdm(enumerate(more), total=len(more)):
        if i > 0:
            plot_system(m, axes[i,0])
        do(m, axes[i,1])

    plt.tight_layout()
    save_figure('images/correlation_distribution.pdf', bbox_inches='tight')
Esempio n. 10
0
def plot_system_overview(data, sample_size=20):
    """ Plot systems vs correlations
    """
    # extract sample
    dsample = [data[i]
        for i in sorted(random.sample(range(len(data)), min(len(data), sample_size)))]

    # plot sample
    fig = plt.figure(figsize=(13, 4*len(dsample)))
    gs = gridspec.GridSpec(len(dsample), 3, width_ratios=[1, 1, 2])

    for i, (system, corr_mat, solution) in enumerate(dsample):
        plot_system(system, plt.subplot(gs[i, 0]))
        plot_corr_mat(corr_mat, plt.subplot(gs[i, 1]))
        plot_system_evolution(solution, plt.subplot(gs[i, 2]))

    plt.tight_layout()
    save_figure('images/overview.pdf', bbox_inches='tight', dpi=300)
    plt.close()
Esempio n. 11
0
def main(fname, skip_filtered=True):
    """ Main interface
    """
    if os.path.isfile(fname):
        systems = load_systems(fname)
        if systems.ndim == 0:
            systems = [np.asscalar(systems)]
        print('Integrating %d systems' % len(systems))

        core_num = int(multiprocessing.cpu_count() * 4 / 5)
        print('Using %d cores' % core_num)

        data = []
        with tqdm(total=len(systems)) as pbar:
            with multiprocessing.Pool(core_num) as p:
                for res in p.imap(analyze_system, systems, chunksize=10):
                    if not skip_filtered or not res[1] is None:
                        data.append(res)
                    pbar.update()
        print('Found result for %d systems' % len(data))

        if not skip_filtered:
            data = cluster_data(data)
        cache_data(data)
    else:
        syst = system_from_string(fname)
        syst, mat, sol = analyze_system(syst, use_ode_sde_diff=False)
        if mat is None:
            print('No sensible steady-state found')
        else:
            print(mat)

        fig = plt.figure(figsize=(20, 6))
        gs = gridspec.GridSpec(1, 3, width_ratios=[1, 1, 2])
        plt.style.use('seaborn-poster')

        plot_system(syst, plt.subplot(gs[0, 0]))
        plot_corr_mat(mat, plt.subplot(gs[0, 1]))
        plot_system_evolution(sol, plt.subplot(gs[0, 2]))

        plt.tight_layout()
        save_figure('images/single_case.pdf', bbox_inches='tight', dpi=300)
        plt.close()
Esempio n. 12
0
def simulate_graph(graph):
    """ Generate dynamics on graph
    """
    # create system
    J = np.copy(nx.to_numpy_matrix(graph))
    np.fill_diagonal(J, -1)
    D = np.zeros((J.shape[0],))
    E = np.zeros((J.shape[0],))
    I = np.ones((J.shape[0],))

    # add input to nodes of zero in-degree
    zero_indgr = []
    for i, row in enumerate(J.T):
        inp = np.sum(row)
        inp += 1 # compensate for self-inhibition
        if inp == 0: zero_indgr.append(i)

    D[zero_indgr] = 1
    E[zero_indgr] = 1
    print('>', '{}/{} nodes with zero indegree'.format(len(zero_indgr), len(graph.nodes())))

    # simulate system
    syst = SDESystem(J, D, E, I)
    syst, mat, sol = analyze_system(syst, filter_trivial_ss=False)

    # plot results
    fig = plt.figure(figsize=(30, 15))
    gs = mpl.gridspec.GridSpec(1, 2, width_ratios=[1, 2])

    if not mat is None:
        # only keep non-zero indegree node correlations
        mat = extract_sub_matrix(mat, zero_indgr)

        node_inds = list_diff(range(J.shape[0]), zero_indgr)
        used_nodes = np.array(graph.nodes())[node_inds]

        plot_corr_mat(
            mat, plt.subplot(gs[0]),
            show_values=False, labels=used_nodes)
    plot_system_evolution(sol, plt.subplot(gs[1]), show_legend=False)

    save_figure('images/peak_network_simulation.pdf', bbox_inches='tight', dpi=300)
Esempio n. 13
0
def investigate_reactions(
    compounds_level0, intensities_level0,
    reaction_data
):
    """ Find out if reactions induce correlation patterns
    """
    # combine initial compounds
    comp_tmp = iterate_once(compounds_level0, reaction_data)

    intensities_level1 = match_masses(comp_tmp)
    print('Found {} new compounds'.format(len(intensities_level1)))

    intensities_all = {}
    intensities_all.update(intensities_level0)
    intensities_all.update(intensities_level1)

    # compute all correlations for all compounds
    rea_corrs = []
    for compound in tqdm(intensities_level1.keys()):
        c1, rea, c2 = parse_compound_name(compound)
        if c2 == 'None': continue

        for int1 in intensities_all[c1]:
            for int2 in intensities_all[c2]:
                cc, _ = scis.pearsonr(int1, int2)
                rea_corrs.append({'reaction': rea, 'correlation': cc})
    df = pd.DataFrame.from_dict(rea_corrs)

    # plot result
    fig = plt.figure()
    for rea in df.reaction.unique():
        corrs = df[df.reaction==rea].correlation
        sns.distplot(
            corrs, label=rea,
            kde=False, bins=np.linspace(-1, 1, 200))

    plt.legend(loc='best')

    plt.tight_layout()
    plotter.save_figure('images/rl_reaction_patterns.pdf', bbox_inches='tight')
    plt.close()
Esempio n. 14
0
def plot_system_overview(data, sample_size=20):
    """ Plot systems vs correlations
    """
    # extract sample
    dsample = [
        data[i] for i in sorted(
            random.sample(range(len(data)), min(len(data), sample_size)))
    ]

    # plot sample
    fig = plt.figure(figsize=(13, 4 * len(dsample)))
    gs = gridspec.GridSpec(len(dsample), 3, width_ratios=[1, 1, 2])

    for i, (system, corr_mat, solution) in enumerate(dsample):
        plot_system(system, plt.subplot(gs[i, 0]))
        plot_corr_mat(corr_mat, plt.subplot(gs[i, 1]))
        plot_system_evolution(solution, plt.subplot(gs[i, 2]))

    plt.tight_layout()
    save_figure('images/overview.pdf', bbox_inches='tight', dpi=300)
    plt.close()
Esempio n. 15
0
def node_degree(data, bin_num_x=100, bin_num_y=100):
    """ Compare node degree and correlation
    """
    # get data
    ndegs = []
    avg_corrs = []
    node_num = -1
    for syst, mat, _ in data:
        graph = nx.DiGraph(syst.jacobian)
        for i in graph.nodes():
            ndegs.append(graph.degree(i))
            ncorrs = [abs(mat[i, j]) for j in graph.neighbors(i) if i != j]
            avg_corrs.append(
                np.mean(ncorrs) if len(ncorrs) > 0 else 0)
        node_num = graph.number_of_nodes()
    assert node_num >= 0, 'Invalid data found'

    # plot data
    heatmap, xedges, yedges = np.histogram2d(
        avg_corrs, ndegs,
        bins=(bin_num_x, bin_num_y))
    extent = [yedges[0], yedges[-1], xedges[0], xedges[-1]]
    heatmap = heatmap[::-1]
    plt.imshow(
        heatmap,
        extent=extent, interpolation='nearest',
        aspect=abs((extent[1]-extent[0])/(extent[3]-extent[2])))
    plt.colorbar()

    cc = get_correlation(ndegs, avg_corrs)
    plt.title(r'Corr: $%.2f$' % cc)

    plt.xlabel('node degree')
    plt.ylabel('average absolute correlation to other nodes')

    plt.tight_layout()
    save_figure('images/ndegree_corr.pdf', bbox_inches='tight')
    plt.close()
Esempio n. 16
0
def node_degree(data, bin_num_x=100, bin_num_y=100):
    """ Compare node degree and correlation
    """
    # get data
    ndegs = []
    avg_corrs = []
    node_num = -1
    for syst, mat, _ in data:
        graph = nx.DiGraph(syst.jacobian)
        for i in graph.nodes():
            ndegs.append(graph.degree(i))
            ncorrs = [abs(mat[i, j]) for j in graph.neighbors(i) if i != j]
            avg_corrs.append(np.mean(ncorrs) if len(ncorrs) > 0 else 0)
        node_num = graph.number_of_nodes()
    assert node_num >= 0, 'Invalid data found'

    # plot data
    heatmap, xedges, yedges = np.histogram2d(avg_corrs,
                                             ndegs,
                                             bins=(bin_num_x, bin_num_y))
    extent = [yedges[0], yedges[-1], xedges[0], xedges[-1]]
    heatmap = heatmap[::-1]
    plt.imshow(heatmap,
               extent=extent,
               interpolation='nearest',
               aspect=abs((extent[1] - extent[0]) / (extent[3] - extent[2])))
    plt.colorbar()

    cc = get_correlation(ndegs, avg_corrs)
    plt.title(r'Corr: $%.2f$' % cc)

    plt.xlabel('node degree')
    plt.ylabel('average absolute correlation to other nodes')

    plt.tight_layout()
    save_figure('images/ndegree_corr.pdf', bbox_inches='tight')
    plt.close()
Esempio n. 17
0
def plot_individuals(examples, fname, val_func=None):
    """ Plot a selection of individual results
    """
    if val_func is None:
        mod = -1
    else:
        mod = 0

    # plot selected networks
    if len(examples[0]) == 2: # is pair of networks
        fig = plt.figure(figsize=(50, 4*len(examples)))
        gs = mpl.gridspec.GridSpec(
            len(examples), 6+mod,
            width_ratios=[1, 2, 1, 2, 1+(-3*mod), 4])
    else: # each entry is single network
        fig = plt.figure(figsize=(25, 4*len(examples)))
        gs = mpl.gridspec.GridSpec(len(examples), 3, width_ratios=[1, 1, 2])

    counter = 0
    for i, net in enumerate(examples):
        if len(net) == 2: # pair of networks
            raw_p, enh_p = net

            # -.- ...
            if len(raw_p) == 2:
                _, raw = raw_p
                _, enh = enh_p
            else:
                raw = raw_p
                enh = enh_p

            plot_system(raw[0], plt.subplot(gs[i, 0]))
            plot_corr_mat(raw[1], plt.subplot(gs[i, 1]))
            plot_system(enh[0], plt.subplot(gs[i, 2]))
            plot_corr_mat(enh[1], plt.subplot(gs[i, 3]))
            plot_system_evolution(enh[2], plt.subplot(gs[i, 5+mod]))

            # plot marker
            mark_ax = plt.subplot(gs[i, 4])
            if not val_func is None:
                mark_ax.imshow(
                    [[handle_enh_entry(raw_p, enh_p, val_func)]],
                    cmap=get_matrix_cmap(), vmin=0, vmax=3)
                mark_ax.axis('off')
            else:
                print('Tried to use `val_func`, but it\'s None')
        else: # single network
            if net[1] is None:
                counter += 1
                plot_system(net[0], plt.subplot(gs[i, 0]))
                plot_system_evolution(net[2], plt.subplot(gs[i, 2]))
                continue

            plot_system(net[0], plt.subplot(gs[i, 0]))
            plot_corr_mat(net[1], plt.subplot(gs[i, 1]))
            plot_system_evolution(net[2], plt.subplot(gs[i, 2]))

    if counter > 0:
        #print('{} broken results'.format(counter))
        pass

    plt.tight_layout()
    save_figure('%s_zoom.pdf' % fname.replace('.pdf', ''), bbox_inches='tight', dpi=300)
    plt.close()
Esempio n. 18
0
def threshold_influence(inp,
                        ax=None,
                        value_func=get_sign_changes,
                        resolution=500):
    """ Investigate influence of threshold
    """
    def plot_matrix(data):
        plt.tick_params(axis='both',
                        which='both',
                        labelleft='off',
                        bottom='off',
                        top='off',
                        labelbottom='off',
                        left='off',
                        right='off')

        plt.imshow(data,
                   interpolation='nearest',
                   cmap=get_matrix_cmap(),
                   vmin=0,
                   vmax=3)
        plt.colorbar(ticks=range(np.max(data) + 1), extend='min')

    # produce data
    pairs, area, imp_thres = aggregate_motif_data(np.asarray(inp['data']),
                                                  value_func=value_func,
                                                  resolution=resolution)

    # plot result
    value_func_name = value_func.__name__[4:]

    plt.figure()
    if not ax is None:
        plt.sca(ax)

    nz_vec = [(t, m) for t, m in pairs if m > 0]
    z_vec = [(t, m) for t, m in pairs if m <= 0]

    if len(nz_vec) > 0:
        plt.plot(*zip(*nz_vec), 'o')
    if len(z_vec) > 0:
        plt.plot(*zip(*z_vec), 'o', color='red')

    plt.axvspan(xmin=min([t for t, m in pairs]),
                xmax=imp_thres,
                alpha=0.1,
                color='blue')
    if ax is None:
        plt.annotate('half the correlation stdev ({:.02})'.format(imp_thres),
                     xy=(imp_thres, .025),
                     xycoords='data',
                     xytext=(50, 20),
                     textcoords='offset points',
                     arrowprops=dict(arrowstyle='->'))

    plt.xscale('log')
    plt.title('Influence of binning threshold on number of {}'.format(
        value_func_name))
    plt.xlabel('binning threshold')
    plt.ylabel('frequency of {}'.format(value_func_name))

    # inside plots
    #plt.style.use('default')

    #ax = plt.axes([0.1, 0.5, .2, .2])
    #plot_matrix(first_data)

    #ax = plt.axes([0.7, 0.4, .2, .2])
    #plot_matrix(last_data)

    #ax = plt.axes([0.4, 0.2, .2, .2])
    #plot_matrix(std_data)

    # save result
    if ax is None:
        save_figure(
            'images/threshold_influence_{}.pdf'.format(value_func_name),
            bbox_inches='tight')

    return area
Esempio n. 19
0
def plot_individuals(examples, fname, val_func=None):
    """ Plot a selection of individual results
    """
    if val_func is None:
        mod = -1
    else:
        mod = 0

    # plot selected networks
    if len(examples[0]) == 2:  # is pair of networks
        fig = plt.figure(figsize=(50, 4 * len(examples)))
        gs = mpl.gridspec.GridSpec(
            len(examples),
            6 + mod,
            width_ratios=[1, 2, 1, 2, 1 + (-3 * mod), 4])
    else:  # each entry is single network
        fig = plt.figure(figsize=(25, 4 * len(examples)))
        gs = mpl.gridspec.GridSpec(len(examples), 3, width_ratios=[1, 1, 2])

    counter = 0
    for i, net in enumerate(examples):
        if len(net) == 2:  # pair of networks
            raw_p, enh_p = net

            # -.- ...
            if len(raw_p) == 2:
                _, raw = raw_p
                _, enh = enh_p
            else:
                raw = raw_p
                enh = enh_p

            plot_system(raw[0], plt.subplot(gs[i, 0]))
            plot_corr_mat(raw[1], plt.subplot(gs[i, 1]))
            plot_system(enh[0], plt.subplot(gs[i, 2]))
            plot_corr_mat(enh[1], plt.subplot(gs[i, 3]))
            plot_system_evolution(enh[2], plt.subplot(gs[i, 5 + mod]))

            # plot marker
            mark_ax = plt.subplot(gs[i, 4])
            if not val_func is None:
                mark_ax.imshow([[handle_enh_entry(raw_p, enh_p, val_func)]],
                               cmap=get_matrix_cmap(),
                               vmin=0,
                               vmax=3)
                mark_ax.axis('off')
            else:
                print('Tried to use `val_func`, but it\'s None')
        else:  # single network
            if net[1] is None:
                counter += 1
                plot_system(net[0], plt.subplot(gs[i, 0]))
                plot_system_evolution(net[2], plt.subplot(gs[i, 2]))
                continue

            plot_system(net[0], plt.subplot(gs[i, 0]))
            plot_corr_mat(net[1], plt.subplot(gs[i, 1]))
            plot_system_evolution(net[2], plt.subplot(gs[i, 2]))

    if counter > 0:
        #print('{} broken results'.format(counter))
        pass

    plt.tight_layout()
    save_figure('%s_zoom.pdf' % fname.replace('.pdf', ''),
                bbox_inches='tight',
                dpi=300)
    plt.close()
Esempio n. 20
0
def plot_result(inp, vfunc, sfuncs, title, fname):
    """ Plot generated matrix

        `sfuncs` can either be a list of functions or a string of the form:
            * cluster:euclidean
            * cluster:hamming
        (generally every metric for scipy.spatial.distance.pdist)
    """
    print('Plotting "{}"'.format(fname), end='... ', flush=True)

    # preprocess data
    data, xticks, yticks = preprocess_data(inp['data'], vfunc, sfuncs)

    # stop, it's plotting time!
    if isinstance(sfuncs, tuple):  # there will be clustering
        xtick_func, spec = sfuncs
        metr = spec.split(':')[1]

        # remove noisy signals for clustering
        data[data < 0] = 0

        dat = []
        for i, row in enumerate(data):
            dat.append((yticks[i], row))
        df = pd.DataFrame.from_items(dat, columns=xticks, orient='index')

        plt.figure()
        cg = sns.clustermap(df,
                            cmap=get_matrix_cmap(),
                            vmin=0,
                            vmax=3,
                            row_cluster=False,
                            metric=metr)

        plt.setp(cg.ax_heatmap.xaxis.get_ticklabels(), rotation=90, size=6)
        plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, size=5)

        save_figure(fname, bbox_inches='tight')
        plt.close()
    else:
        # "normal" plot
        mpl.style.use('default')  # possibly reset seaborn styles

        plt.figure()

        plt.xticks(np.arange(len(data[0]), dtype=np.int), xticks)
        plt.yticks(np.arange(len(data), dtype=np.int), yticks)

        plt.setp(plt.gca().get_xticklabels(), fontsize=3, rotation='vertical')
        plt.setp(plt.gca().get_yticklabels(), fontsize=3)

        plt.tick_params(axis='both',
                        which='both',
                        labelleft='on',
                        bottom='off',
                        top='off',
                        labelbottom='on',
                        left='off',
                        right='off')

        plt.title(title)
        plt.xlabel(sfuncs[0].__doc__)
        plt.ylabel('absolute mean of Jacobian')

        plt.imshow(data,
                   interpolation='nearest',
                   cmap=get_matrix_cmap(),
                   vmin=0,
                   vmax=3)
        plt.colorbar(ticks=range(np.max(data) + 1), extend='min')

        # mark "zoomed" columns
        sel_one, netws_one = select_column_by_jacobian(
            inp['data'],
            np.array([[1, 0, 0, 1], [1, 1, 0, 0], [1, 1, 1, 0], [0, 0, 1, 1]]))
        sel_two, netws_two = select_column_by_jacobian(
            inp['data'],
            np.array([[1, 0, 0, 1], [1, 1, 0, 0], [1, 1, 1, 0], [1, 0, 0, 1]]))

        sel_xticks = [item for item in plt.gca().get_xticklabels()]
        sel_xticks[sel_one].set_weight('bold')
        sel_xticks[sel_two].set_weight('bold')
        plt.gca().set_xticklabels(sel_xticks)

        # mark "zoomed" rows
        sel_blue, netws_blue = select_row_by_count(inp['data'], data, 1)
        sel_red, netws_red = select_row_by_count(inp['data'], data, 2)

        sel_yticks = [item for item in plt.gca().get_yticklabels()]
        sel_yticks[sel_blue].set_weight('bold')
        sel_yticks[sel_red].set_weight('bold')
        plt.gca().set_yticklabels(sel_yticks)

        # save figure
        save_figure(fname, bbox_inches='tight')

        # plot best examples
        plot_individuals(netws_one, '{}_col_one'.format(fname), vfunc)
        plot_individuals(netws_two, '{}_col_two'.format(fname), vfunc)

        plot_individuals(netws_blue, '{}_row_blue'.format(fname))
        plot_individuals(netws_red, '{}_row_red'.format(fname))

    print('Done')
Esempio n. 21
0
def find_optimal_assignments(motifs, initial_compound_names):
    """ Find optimal compound assignments by (weighted) randomly selecting
        motifs of low initial assignment number and choose assignments
        which maximize intensity correlation coefficients.

        Goal:
            Enhance partially annotated MS peak file

        All steps:
            * Read compound/reaction data
                * note known MZ values
            * generate new compounds using reaction rules
                * compute theoretical MZ values
            * find motifs in all available compounds
                * assume that compounds in motifs have high intensity correlations

            * for each compound receive all possible annotations from peak file
            * use randomized iterative procedure to find "optimal" MZ assignments
    """
    # find assignments
    def get_assignment_number(entry):
        c1, c2, c3, ints, info = entry
        return len(ints[c1]) * len(ints[c2]) * len(ints[c3])

    def assign(motifs):
        info_all = {}
        assignments = {}
        single_assignment_names = []

        sorted_motifs = sorted(
            motifs, key=get_assignment_number,
            reverse=True)
        while len(sorted_motifs) > 0:
            # weighted choice of starting motif
            size = len(sorted_motifs)
            probs = np.exp(range(size))/np.sum(np.exp(range(size)))

            idx = np.random.choice(range(size), 1, p=probs)
            entry = sorted_motifs[idx[0]]
            sorted_motifs.remove(entry)
            c1, c2, c3, ints, info = entry

            # process motif
            comps = c1, c2, c3
            #print(
            #    len(ints[c1]), len(ints[c2]), len(ints[c3]),
            #    len(ints[c1])*len(ints[c2])*len(ints[c3]))

            info_all.update(info)

            # single matches
            for c in comps:
                if len(ints[c]) == 1:
                    if c in assignments:
                        assert ints[c][0] == assignments[c]
                    else:
                        assignments[c] = ints[c][0]
                        single_assignment_names.append(c)

            # multiple matches
            for c1 in comps:
                for c2 in comps:
                    if c1 == c2: break
                    corrs = {}

                    # skip if compounds are already assigned
                    if c1 in assignments and c2 in assignments:
                        continue

                    # compute correlations
                    if not c1 in assignments and not c2 in assignments:
                        for i, int1 in enumerate(ints[c1]):
                            for j, int2 in enumerate(ints[c2]):
                                cc, _ = scis.pearsonr(int1, int2)
                                corrs[(i,j)] = cc

                        # choose highest correlation
                        c1_idx, c2_idx = max(corrs.keys(), key=lambda k: abs(corrs[k]))
                    if c1 in assignments and not c2 in assignments:
                        int1 = assignments[c1]
                        for j, int2 in enumerate(ints[c2]):
                            cc, _ = scis.pearsonr(int1, int2)
                            corrs[j] = cc
                        c1_idx, c2_idx = None, max(corrs.keys(), key=lambda k: abs(corrs[k]))
                    if not c1 in assignments and c2 in assignments:
                        int2 = assignments[c2]
                        for i, int1 in enumerate(ints[c1]):
                            cc, _ = scis.pearsonr(int1, int2)
                            corrs[i] = cc
                        c1_idx, c2_idx = max(corrs.keys(), key=lambda k: abs(corrs[k])), None

                    for c, idx in zip([c1, c2], [c1_idx, c2_idx]):
                        if c == c1 and c1 in assignments:
                            assert c1_idx is None
                        if c == c2 and c2 in assignments:
                            assert c2_idx is None

                        if c in assignments:
                            continue
                        else:
                            assignments[c] = ints[c][idx]
        return assignments, info_all, single_assignment_names

    def plot_assignments(assignments, ax):
        colors = []
        values_initial = []
        values_single = []
        values_other = []

        for i, (name, ints) in enumerate(assignments.items()):
            mz = info_all[name]['mass']

            if name in initial_compound_names:
                values_initial.append(mz)
            elif name in single_assignment_names:
                values_single.append(mz)
            else:
                values_other.append(mz)

        ax.scatter(
            values_initial, [0]*len(values_initial),
            color='red', alpha=0.5,
            label='initial')
        ax.scatter(
            values_single, [0]*len(values_single),
            color='green', alpha=0.5,
            label='single')
        ax.scatter(
            values_other, [0]*len(values_other),
            color='blue', alpha=0.5,
            label='other')

        ax.yaxis.set_major_locator(plt.NullLocator())

    # plots
    N = 10
    f, axes = plt.subplots(N, 1, figsize=(9,9))

    for ax in axes:
        assignments, info_all, single_assignment_names = assign(motifs)

        for c, ints in assignments.items():
            if c != 'Hexose': continue

            print(c, sum(ints))

        plot_assignments(assignments, ax)

    plt.xlabel('MZ')
    plt.legend(loc='best')

    plt.tight_layout()
    plotter.save_figure('images/assignments.pdf', bbox_inches='tight')
Esempio n. 22
0
def threshold_influence(inp, value_func=get_sign_changes, resolution=100):
    """ Investigate influence of threshold
    """
    def plot_matrix(data):
        plt.tick_params(
            axis='both', which='both', labelleft='off',
            bottom='off', top='off', labelbottom='off', left='off', right='off')

        plt.imshow(
            data,
            interpolation='nearest', cmap=get_matrix_cmap(),
            vmin=0, vmax=3)
        plt.colorbar(ticks=range(np.max(data)+1), extend='min')

    global THRESHOLD
    threshold_list = np.logspace(-4, 0, resolution)

    # compute stdev of difference between reference and embedded 3 node motif
    cur_diffs = []
    for raw, enh_res in inp['data']:
        _, rd = raw
        _, rdm, _ = rd
        for enh in enh_res:
            _, ed = enh
            _, edm, _ = ed
            if not edm is None:
                diff = abs(rdm - edm[:-1,:-1])
                cur_diffs.extend(diff.ravel())
    stdev = np.std(cur_diffs)

    # produce data
    first_data, last_data, std_data = None, None, None
    pairs = []
    for thres in tqdm(threshold_list):
        THRESHOLD = thres

        data = []
        for raw, enh_res in inp['data']: # for each parameter configuration
            data.append([handle_enh_entry(raw, enh, value_func) for enh in enh_res])
        data = np.array(data)

        if thres == threshold_list[0]:
            first_data = data
        if thres == threshold_list[-1]:
            last_data = data
        if thres >= stdev and std_data is None:
            std_data = data

        mat_res = np.sum(data[data>0])
        pairs.append((thres, mat_res))

    print('Data shape:', data.shape)
    total_num = data[data>=0].size * 3

    # plot result
    value_func_name = value_func.__name__[4:]

    plt.figure()

    nz_vec = [(t, m/total_num) for t,m in pairs if m>0]
    z_vec = [(t, m/total_num) for t,m in pairs if m<=0]

    plt.plot(*zip(*nz_vec), 'o')
    plt.plot(*zip(*z_vec), 'o', color='red')

    plt.axvspan(
        xmin=1e-6, xmax=stdev,
        alpha=0.1, color='blue')
    plt.annotate('correlation stdev ({:.02})'.format(stdev),
        xy=(stdev, .03), xycoords='data',
        xytext=(50, 20), textcoords='offset points',
        arrowprops=dict(arrowstyle='->'))

    plt.xscale('log')
    plt.title('Influence of binning threshold on number of {}'.format(value_func_name))
    plt.xlabel('binning threshold')
    plt.ylabel('frequency of {}'.format(value_func_name))

    # inside plots
    plt.style.use('default')

    ax = plt.axes([0.1, 0.5, .2, .2])
    plot_matrix(first_data)

    ax = plt.axes([0.7, 0.4, .2, .2])
    plot_matrix(last_data)

    ax = plt.axes([0.4, 0.2, .2, .2])
    plot_matrix(std_data)

    # save result
    save_figure('images/threshold_influence_{}.pdf'.format(value_func_name), bbox_inches='tight')
Esempio n. 23
0
def plot_result(inp, vfunc, sfuncs, title, fname):
    """ Plot generated matrix

        `sfuncs` can either be a list of functions or a string of the form:
            * cluster:euclidean
            * cluster:hamming
        (generally every metric for scipy.spatial.distance.pdist)
    """
    print('Plotting "{}"'.format(fname), end='... ', flush=True)

    # preprocess data
    data, xticks, yticks = preprocess_data(inp['data'], vfunc, sfuncs)

    # stop, it's plotting time!
    if isinstance(sfuncs, tuple): # there will be clustering
        xtick_func, spec = sfuncs
        metr = spec.split(':')[1]

        # remove noisy signals for clustering
        data[data < 0] = 0

        dat = []
        for i, row in enumerate(data):
            dat.append((yticks[i], row))
        df = pd.DataFrame.from_items(dat, columns=xticks, orient='index')

        plt.figure()
        cg = sns.clustermap(
            df, cmap=get_matrix_cmap(), vmin=0, vmax=3,
            row_cluster=False, metric=metr)

        plt.setp(cg.ax_heatmap.xaxis.get_ticklabels(), rotation=90, size=6)
        plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, size=5)

        save_figure(fname, bbox_inches='tight')
        plt.close()
    else:
        # "normal" plot
        mpl.style.use('default') # possibly reset seaborn styles

        plt.figure()

        plt.xticks(np.arange(len(data[0]), dtype=np.int), xticks)
        plt.yticks(np.arange(len(data), dtype=np.int), yticks)

        plt.setp(plt.gca().get_xticklabels(), fontsize=3, rotation='vertical')
        plt.setp(plt.gca().get_yticklabels(), fontsize=3)

        plt.tick_params(
            axis='both', which='both', labelleft='on',
            bottom='off', top='off', labelbottom='on', left='off', right='off')

        plt.title(title)
        plt.xlabel(sfuncs[0].__doc__)
        plt.ylabel('absolute mean of Jacobian')

        plt.imshow(
            data,
            interpolation='nearest', cmap=get_matrix_cmap(),
            vmin=0, vmax=3)
        plt.colorbar(ticks=range(np.max(data)+1), extend='min')

        # mark "zoomed" columns
        sel_one, netws_one = select_column_by_jacobian(inp['data'], np.array([
            [1,0,0,1],
            [1,1,0,0],
            [1,1,1,0],
            [0,0,1,1]
        ]))
        sel_two, netws_two = select_column_by_jacobian(inp['data'], np.array([
            [1,0,0,1],
            [1,1,0,0],
            [1,1,1,0],
            [1,0,0,1]
        ]))

        sel_xticks = [item for item in plt.gca().get_xticklabels()]
        sel_xticks[sel_one].set_weight('bold')
        sel_xticks[sel_two].set_weight('bold')
        plt.gca().set_xticklabels(sel_xticks)

        # mark "zoomed" rows
        sel_blue, netws_blue = select_row_by_count(inp['data'], data, 1)
        sel_red, netws_red = select_row_by_count(inp['data'], data, 2)

        sel_yticks = [item for item in plt.gca().get_yticklabels()]
        sel_yticks[sel_blue].set_weight('bold')
        sel_yticks[sel_red].set_weight('bold')
        plt.gca().set_yticklabels(sel_yticks)

        # save figure
        save_figure(fname, bbox_inches='tight')

        # plot best examples
        plot_individuals(netws_one, '{}_col_one'.format(fname), vfunc)
        plot_individuals(netws_two, '{}_col_two'.format(fname), vfunc)

        plot_individuals(netws_blue, '{}_row_blue'.format(fname))
        plot_individuals(netws_red, '{}_row_red'.format(fname))

    print('Done')