コード例 #1
0
def dist(X, xtitle, xlim, width):
    """
    Plot histograms of data.

    """
    fig, ax = plt.subplots(figsize=(8, 5))

    if xlim is None:
        xlim = (min(X)-2*width, max(X)+2*width)
    X_bins = np.arange(xlim[0], xlim[1], width)
    hist, bin_edges = np.histogram(a=X, bins=X_bins)

    if xtitle == 'purchasability class':
        align = 'center'
    else:
        align = 'edge'

    ax.bar(
        bin_edges[:-1],
        hist,
        align=align,
        alpha=1.0,
        width=width,
        color='#2980B9',
        edgecolor='k'
    )
    pfn.define_standard_plot(
        ax,
        xtitle=xtitle,
        ytitle='count',
        xlim=xlim,
        ylim=None
    )

    return fig, ax
コード例 #2
0
def cs_logPvsNHA(logPs, Xs, HlogPs, HXs):
    fig, ax = plt.subplots(figsize=(8, 5))
    xlim = (0, 40)
    ylim = (-9, 14)
    CS = [(1.0, 1.0, 1.0), (44 / 255, 62 / 255, 80 / 255)]
    cm = colors.LinearSegmentedColormap.from_list('test', CS, N=10)
    fig, ax, hist = pfn.twoD_histogram(X_data=Xs,
                                       Y_data=logPs,
                                       xlim=xlim,
                                       ylim=ylim,
                                       cmap=cm,
                                       fig=fig,
                                       ax=ax)
    cbar = fig.colorbar(hist[3], ax=ax)
    cbar.ax.set_ylabel('count', fontsize=16)
    cbar.ax.tick_params(labelsize=16)

    ax.scatter(HXs,
               HlogPs,
               c='#E74C3C',
               edgecolors='k',
               marker='o',
               alpha=1.0,
               s=120)

    pfn.define_standard_plot(
        ax,
        ylim=ylim,
        xlim=xlim,
        # xtitle='number of heavy atoms',
        ytitle=r'logP',
        xtitle=r'no. heavy atoms',
    )
    fig.tight_layout()
    fig.savefig(f'chemical_space_logPNHA.pdf', dpi=720, bbox_inches='tight')
コード例 #3
0
def mid_plots(parameter_sets, molecules, test_mol, full_results, colours,
              markers):
    for t in parameter_sets:
        fig, ax = plt.subplots()
        for name in molecules:
            if name not in test_mol:
                continue
            X = []
            Y = []
            Y_err = []
            for i, v in enumerate(parameter_sets[t]):
                RES = full_results[t][name][v]
                _, _, mid_diam_avg, mid_diam_std, _ = RES
                avg = float(mid_diam_avg)
                std = float(mid_diam_std)
                # if i == 0:
                #     ax.errorbar(float(v), avg, c=colours[name],
                #         yerr=std, fmt=markers[name], label=name)
                # else:
                #     ax.errorbar(float(v), avg, c=colours[name],
                #         yerr=std, fmt=markers[name])
                X.append(float(v))
                Y.append(avg)
                Y_err.append(std)
            X = np.asarray(X)
            Y = np.asarray(Y)
            Y_err = np.asarray(Y_err)
            ax.plot(X, Y, c=colours[name], marker=markers[name], label=name)
            ax.fill_between(X,
                            Y - Y_err,
                            Y + Y_err,
                            alpha=0.2,
                            facecolor=colours[name])
        if t == 'N_conformers':
            t_lim = (0, 1100)
            t_name = '$N$'  # 'no. conformers'
        if t == 'spacing':
            t_lim = (0.2, 1.1)
            t_name = r'grid spacing [$\mathrm{\AA}$]'
        if t == 'vdw':
            t_lim = (0.4, 1.1)
            t_name = 'vdW scale parameter'
        if t == 'boxMargin':
            t_lim = (3, 9)
            t_name = r'box margin [$\mathrm{\AA}$]'
        pfn.define_standard_plot(
            ax,
            xtitle=t_name,
            ytitle=r'avg. intermediate diameter [$\mathrm{\AA}$]',
            xlim=t_lim,
            ylim=(3.5, 9))
        # ax.legend(fontsize=16, ncol=2)
        fig.tight_layout()
        fig.savefig(f"mid_{t}.pdf", dpi=720, bbox_inches='tight')
コード例 #4
0
def cs_purchCT(purch, not_purch):
    fig, ax = plt.subplots(figsize=(8, 5))
    plot_prop = {
        't': {
            'c': '#FA7268',
            'e': 'none',
            'a': 0.5,
            'm': 'o',
            's': 50,
            'label': 'purchasable'
        },
        'f': {
            'c': '#DAF7A6',
            'e': 'none',
            'a': 0.5,
            'm': 'x',
            's': 50,
            'label': 'not purchasable'
        }
    }

    # bin each of the sets of data based on X value
    for p in plot_prop:
        pp = plot_prop[p]
        if p == 't':
            data = purch
        else:
            data = not_purch
        width = 50
        X_bins = np.arange(0, 2000, width)
        hist, bin_edges = np.histogram(a=data, bins=X_bins, density=False)
        ax.bar(
            bin_edges[:-1],
            hist,
            align='edge',
            alpha=0.8,
            width=width,
            color=pp['c'],
            edgecolor='k',
            label=pp['label'],
        )

    ax.legend(fontsize=16)

    pfn.define_standard_plot(
        ax,
        # xtitle='number of heavy atoms',
        xtitle=r'BertzCT',
        ytitle='frequency',
    )
    fig.tight_layout()
    fig.savefig(f'chemical_space_purchCT.pdf', dpi=720, bbox_inches='tight')
コード例 #5
0
def shapes_with_known(molecules, known_df, threshold, output_dir):
    """
    Plot molecule shapes considering experimental results.

    """
    fig, ax = plt.subplots(figsize=(5, 5))
    for name in molecules:
        out_file = (f"{output_dir}/{name.replace(' ', '_').replace('/', '__')}"
                    '_diam_result.csv')
        if os.path.exists(out_file) is False:
            continue
        results = pd.read_csv(out_file)
        if len(results) == 0:
            continue
        mid_diam = min(results['diam2'])
        lit_d = known_df[known_df['molecule'] == name]['diffuse'].iloc[0]
        if lit_d == 't':
            if mid_diam <= threshold:
                C = 'b'
                M = 'o'
            else:
                C = 'b'
                M = 'X'
        elif lit_d == 'f':
            if mid_diam <= threshold:
                C = 'r'
                M = 'X'
            else:
                C = 'r'
                M = 'o'
        else:
            continue
        ax.scatter(np.average(results['ratio_1']),
                   np.average(results['ratio_2']),
                   c=C,
                   edgecolors='k',
                   marker=M,
                   alpha=1.0,
                   s=80)

    ax.plot([0, 0.5, 1, 0], [1, 0.5, 1, 1], c='k', lw=2)
    ax.text(0.75, 1.03, 'sphere', fontsize=20)
    ax.text(0.4, 0.45, 'oblate', fontsize=20)
    ax.text(-0.05, 1.03, 'prolate', fontsize=20)

    pfn.define_standard_plot(ax,
                             xtitle='$I_1$ / $I_3$',
                             ytitle='$I_2$ / $I_3$',
                             xlim=(-0.1, 1.1),
                             ylim=(0.4, 1.1))
    fig.tight_layout()
    fig.savefig("shape.pdf", dpi=720, bbox_inches='tight')
コード例 #6
0
def mol_parity(propx, propy, file, xtitle, ytitle, mol_file=None):
    """
    Plot a parity of two molecular properties.

    """

    if mol_file is None:
        molecule_list = glob.glob('*_unopt.mol')
    else:
        molecule_list = IO.read_molecule_list(mol_file)

    # iterate over molecules
    Xs = []
    Ys = []
    for mol in molecule_list:
        name = mol.replace('_unopt.mol', '')
        prop_file = name + '_prop.json'

        if not exists(prop_file):
            continue

        with open(prop_file, 'r') as f:
            prop_dict = json.load(f)

        Xs.append(prop_dict[propx])
        Ys.append(prop_dict[propy])

    fig, ax = plt.subplots(figsize=(8, 5))
    ax.scatter(Xs,
               Ys,
               c='#FA7268',
               edgecolors='k',
               marker='o',
               alpha=1.0,
               s=80)
    xlim = None
    ylim = None
    if propx == 'Synth_score':
        xlim = (0, 10)
    elif propy == 'Synth_score':
        ylim = (0, 10)

    pfn.define_standard_plot(ax,
                             xtitle=xtitle,
                             ytitle=ytitle,
                             xlim=xlim,
                             ylim=ylim)
    fig.tight_layout()
    fig.savefig(f'parity_{file}.pdf', dpi=720, bbox_inches='tight')
コード例 #7
0
def biomin_known(molecules, output_dir, plot_suffix):
    """
    Scatter plot of all molecule sizes in dictionary.

    """
    fig, ax = plt.subplots(figsize=(8, 5))
    m_diams = []
    for name in molecules:
        out_file = (f"{output_dir}/"
                    f"{name.replace(' ', '_').replace('/', '__')}"
                    '_diam_result.csv')
        if os.path.exists(out_file) is False:
            continue
        results = pd.read_csv(out_file)
        mid_diam = min(results['diam2'])
        print('-----', name, mid_diam, '-----')
        m_diams.append(mid_diam)

    m_diams = np.asarray(m_diams)

    X_bins = np.arange(0.1, 21, 0.5)
    hist, bin_edges = np.histogram(a=m_diams, bins=X_bins)
    ax.bar(bin_edges[:-1],
           hist,
           align='edge',
           width=0.5,
           color='#2C3E50',
           edgecolor='k',
           alpha=0.8)
    ax.axvline(x=3.4, c='k')
    ax.axvspan(
        xmin=4.0,
        xmax=6.6,
        facecolor='k',
        alpha=0.25,
        # hatch="/"
    )
    # ax.axvspan(xmin=5.4, xmax=6.6, facecolor='k', alpha=0.2)
    pfn.define_standard_plot(
        ax,
        # xtitle='intermediate diameter [$\mathrm{\AA}$]',
        xtitle=r'$d$ [$\mathrm{\AA}$]',
        ytitle='count',
        xlim=(0, 15),
        ylim=(0, 15))
    fig.tight_layout()
    fig.savefig(f"molecule_size_{plot_suffix}.pdf",
                dpi=720,
                bbox_inches='tight')
コード例 #8
0
def min_of_mid_plots(parameter_sets, molecules, test_mol, full_results,
                     colours, markers):
    for t in parameter_sets:
        fig, ax = plt.subplots()
        for name in molecules:
            if name not in test_mol:
                continue
            X = []
            Y = []
            Y_err = []
            for i, v in enumerate(parameter_sets[t]):
                _, _, _, _, min_mid = full_results[t][name][v]
                # if i == 0:
                #     ax.errorbar(float(v), avg, c=colours[name],
                #         yerr=std, fmt=markers[name], label=name)
                # else:
                #     ax.errorbar(float(v), avg, c=colours[name],
                #         yerr=std, fmt=markers[name])
                X.append(float(v))
                Y.append(min_mid)
            X = np.asarray(X)
            Y = np.asarray(Y)
            print(name, '--', max([i - min(Y) for i in Y]))
            Y_err = np.asarray(Y_err)
            ax.plot(X, Y, c=colours[name], marker=markers[name], label=name)
        if t == 'N_conformers':
            t_lim = (0, 1100)
            t_name = '$N$'  # 'no. conformers'
        if t == 'spacing':
            t_lim = (0.2, 0.7)
            t_name = r'grid spacing [$\mathrm{\AA}$]'
        if t == 'vdw':
            t_lim = (0.4, 1.1)
            t_name = 'vdW scale parameter'
        if t == 'boxMargin':
            t_lim = (3, 9)
            t_name = r'box margin [$\mathrm{\AA}$]'
        pfn.define_standard_plot(ax,
                                 xtitle=t_name,
                                 ytitle=r'$d$ [$\mathrm{\AA}$]',
                                 xlim=t_lim,
                                 ylim=(3.5, 8))
        # ax.legend(fontsize=16, ncol=3)
        fig.tight_layout()
        fig.savefig(f"min_of_mid_{t}.pdf", dpi=720, bbox_inches='tight')
        input(f'^^ is max dev from min_of_mid for {t}')
コード例 #9
0
def cs_NHA(Xs, Ys):
    fig, ax = plt.subplots(figsize=(8, 5))
    ylim = (0, 17)
    xlim = (0, 40)
    CS = [(1.0, 1.0, 1.0), (44 / 255, 62 / 255, 80 / 255)]
    cm = colors.LinearSegmentedColormap.from_list('test', CS, N=10)
    fig, ax, hist = pfn.twoD_histogram(X_data=Xs,
                                       Y_data=Ys,
                                       xlim=xlim,
                                       ylim=ylim,
                                       cmap=cm,
                                       fig=fig,
                                       ax=ax)
    cbar = fig.colorbar(hist[3], ax=ax)
    cbar.ax.set_ylabel('count', fontsize=16)
    cbar.ax.tick_params(labelsize=16)
    #
    # ax.scatter(
    #     Xs,
    #     Ys,
    #     c='#FF7900',
    #     edgecolors='k',
    #     marker='o',
    #     alpha=1.0,
    #     s=120
    # )

    # Horizontal lines for different materials.
    ax.axhspan(ymin=4.0, ymax=6.6, facecolor='k', alpha=0.2)
    # ax.axvspan(xmin=5.4, xmax=6.6, facecolor='k', alpha=0.2)
    # plot possible region of ZIF pore limiting diameters from
    # Banerjee 2008 - 10.1126/science.1152516
    # ax.axvspan(0.0, 13, facecolor='#2ca02c', alpha=0.2)
    # HOF size limit:
    # ax.axvline(x=13.1, c='k', lw=2, linestyle='--')

    pfn.define_standard_plot(
        ax,
        # xtitle='number of heavy atoms',
        ylim=ylim,
        xlim=xlim,
        ytitle=r'intermediate diameter [$\mathrm{\AA}$]',
        xtitle=r'no. heavy atoms',
    )
    fig.tight_layout()
    fig.savefig(f'chemical_space_NHA.pdf', dpi=720, bbox_inches='tight')
コード例 #10
0
def min_plots(parameter_sets, molecules, test_mol, full_results, colours,
              markers):
    for t in parameter_sets:
        fig, ax = plt.subplots()
        for name in molecules:
            if name not in test_mol:
                continue
            X = []
            Y = []
            Y_err = []
            for i, v in enumerate(parameter_sets[t]):
                RES = full_results[t][name][v]
                min_diam_avg, min_diam_std, _, _, _ = RES
                avg = float(min_diam_avg)
                std = float(min_diam_std)
                # if i == 0:
                #     ax.errorbar(float(v), avg, c=colours[name],
                #             yerr=std, fmt=markers[name], label=name)
                # else:
                #     ax.errorbar(float(v), avg, c=colours[name],
                #             yerr=std, fmt=markers[name])
                X.append(float(v))
                Y.append(avg)
                Y_err.append(std)
            ax.plot(X, Y, c=colours[name], marker=markers[name], label=name)
        if t == 'N_conformers':
            t_lim = (0, 1100)
            t_name = '$N$'  # 'no. conformers'
        if t == 'spacing':
            t_lim = (0, 1.2)
            t_name = r'grid spacing [$\mathrm{\AA}$]'
        if t == 'vdw':
            t_lim = (0.4, 1.2)
            t_name = r'vdW scale parameter'
        if t == 'boxMargin':
            t_lim = (2, 10)
            t_name = r'box margin [$\mathrm{\AA}$]'
        pfn.define_standard_plot(
            ax,
            xtitle=t_name,
            ytitle=r'avg. minimum diameter [$\mathrm{\AA}$]',
            xlim=t_lim,
            ylim=(0, 10))
        ax.legend(loc=1, fontsize=16)
        fig.tight_layout()
        fig.savefig(f"min_{t}.pdf", dpi=720, bbox_inches='tight')
コード例 #11
0
def shapes(molecules, threshold, output_dir, plot_suffix):
    """
    Plot molecule shapes of all molecules in dictionary.

    """
    fig, ax = plt.subplots(figsize=(5, 5))
    for name in molecules:
        out_file = (f"{output_dir}/"
                    f"{name.replace(' ', '_').replace('/', '__')}"
                    '_diam_result.csv')
        if os.path.exists(out_file) is False:
            continue
        results = pd.read_csv(out_file)
        mid_diam = min(results['diam2'])
        if mid_diam <= threshold:
            C = 'b'
            M = 'o'
            E = 'k'
        else:
            C = 'r'
            M = 'o'
            E = 'k'

        ax.scatter(np.average(results['ratio_1']),
                   np.average(results['ratio_2']),
                   c=C,
                   edgecolors=E,
                   marker=M,
                   alpha=1.0,
                   s=80)

    ax.plot([0, 0.5, 1, 0], [1, 0.5, 1, 1], c='k', lw=2)
    ax.text(0.75, 1.03, 'sphere', fontsize=20)
    ax.text(0.4, 0.45, 'oblate', fontsize=20)
    ax.text(-0.05, 1.03, 'prolate', fontsize=20)

    pfn.define_standard_plot(ax,
                             title='',
                             xtitle='$I_1$ / $I_3$',
                             ytitle='$I_2$ / $I_3$',
                             xlim=(-0.1, 1.1),
                             ylim=(0.4, 1.1))
    fig.tight_layout()
    fig.savefig(f"shape_{plot_suffix}.pdf", dpi=720, bbox_inches='tight')
コード例 #12
0
def parity_with_known_min2(molecules, diameters, output_dir):
    """
    Parity plot of calculated diameters and known kinetic diameters.

    """

    fig, ax = plt.subplots(figsize=(5, 5))
    for name in molecules:
        try:
            min2_diam = float(diameters[name])
        except ValueError:
            print('no radius given for this molecule - skipped')
            continue
        out_file = (f"{output_dir}/{name.replace(' ', '_').replace('/', '__')}"
                    '_diam_result.csv')
        if os.path.exists(out_file) is False:
            continue
        results = pd.read_csv(out_file)
        if len(results) == 0:
            continue
        mid_diam = min(results['diam2'])
        C = '#E74C3C'
        M = 'o'
        print(name, min2_diam, mid_diam)
        ax.scatter(min2_diam,
                   mid_diam,
                   c=C,
                   edgecolors='k',
                   marker=M,
                   alpha=1.0,
                   s=120)

    ax.plot(np.linspace(-1, 12, 2), np.linspace(-1, 12, 2), c='k', alpha=0.4)
    # plot the limit from the two Sholl papers on diffusion
    # ax.axvspan(4.0, 4.2, facecolor='r', alpha=0.5)

    pfn.define_standard_plot(ax,
                             xtitle=r'critical diameter [$\mathrm{\AA}$]',
                             ytitle=r'$d$ [$\mathrm{\AA}$]',
                             xlim=(1, 10),
                             ylim=(1, 10))
    fig.tight_layout()
    fig.savefig("parity_min2.pdf", dpi=720, bbox_inches='tight')
コード例 #13
0
def HOF_examples(output_dir):
    """
    Prepare figure showing the value of d for all molecules used in the
    BioHOFs from: 10.1021/jacs.9b06589

    """
    # the n-phenyl esters
    mol_list_1 = [
        'fluorescein', 'hydrogen_peroxide', 'methanol', 'formaldehyde', 'urea'
    ]
    smiles_list_1 = [
        'C1=CC=C2C(=C1)C(=O)OC23C4=C(C=C(C=C4)O)OC5=C3C=CC(=C5)O', 'OO', 'CO',
        'C=O', 'C(=O)(N)N'
    ]
    fig, ax = plt.subplots(figsize=(8, 5))
    for i, name in enumerate(mol_list_1):
        out_file = (f"{output_dir}/"
                    f"{name.replace(' ', '_').replace('/', '__')}"
                    '_diam_result.csv')
        if os.path.exists(out_file) is False:
            continue
        results = pd.read_csv(out_file)
        mid_diam = min(results['diam2'])
        mol = Chem.AddHs(Chem.MolFromSmiles(smiles_list_1[i]))
        MW = Descriptors.MolWt(mol)
        print(name, mol_list_1[i], MW, mid_diam)
        ax.scatter(MW,
                   mid_diam,
                   c='#5499C7',
                   edgecolors='k',
                   marker='o',
                   alpha=1.0,
                   s=140)

    # ax.axhline(y=11.8, c='k', alpha=0.2)
    pfn.define_standard_plot(ax,
                             xtitle='molecular weight [g/mol]',
                             ytitle=r'$d$ [$\mathrm{\AA}$]',
                             xlim=(10, 500),
                             ylim=(2.5, 15))
    fig.tight_layout()
    fig.savefig("HOF_examples.pdf", dpi=720, bbox_inches='tight')
コード例 #14
0
def cyt_C_perox_assay(output_dir):
    """
    Prepare figure showing the change in intermediate diameter for 3
    peroxide molcules degraded by Cyt-C in ZIF-8 (One-Pot Synthesis of
    Protein-Embedded Metal–Organic Frameworks with Enhanced Biological
    Activities, DOI:10.1021/nl5026419)

    """
    # the n-phenyl esters
    mol_list_1 = [
        'hydrogen peroxide', 'methyl ethyl ketone peroxide',
        'tert-butyl hydroperoxide'
    ]
    smiles_list_1 = ['OO', 'CCC(C)(OO)OOC(C)(CC)OO', 'CC(C)(C)OO']
    fig, ax = plt.subplots()
    for i, name in enumerate(mol_list_1):
        out_file = (f"{output_dir}/"
                    f"{name.replace(' ', '_').replace('/', '__')}"
                    '_diam_result.csv')
        if os.path.exists(out_file) is False:
            continue
        results = pd.read_csv(out_file)
        mid_diam = min(results['diam2'])
        mol = Chem.AddHs(Chem.MolFromSmiles(smiles_list_1[i]))
        MW = Descriptors.MolWt(mol)
        print(name, mol_list_1[i], MW, mid_diam)
        ax.scatter(MW,
                   mid_diam,
                   c='k',
                   edgecolors='k',
                   marker='o',
                   alpha=1.0,
                   s=100)

    ax.axhspan(ymin=4.0, ymax=6.6, facecolor='k', alpha=0.2, hatch="/")
    pfn.define_standard_plot(ax,
                             xtitle='molecular weight [g/mol]',
                             ytitle=r'$d$ [$\mathrm{\AA}$]',
                             xlim=(10, 250),
                             ylim=(2.5, 8))
    fig.tight_layout()
    fig.savefig("cytC_comp.pdf", dpi=720, bbox_inches='tight')
コード例 #15
0
def mol_dist(data_dict):
    """
    Plot distribution of a molecular property.

    """
    fig, ax = plt.subplots(figsize=(8, 5))
    width = data_dict['width']
    X_bins = np.arange(data_dict['xlim'][0], data_dict['xlim'][1], width)
    hist, bin_edges = np.histogram(a=data_dict['d'], bins=X_bins)

    ax.bar(bin_edges[:-1],
           hist,
           align='edge',
           alpha=1.0,
           width=width,
           color=data_dict['c'],
           edgecolor='k')
    pfn.define_standard_plot(ax,
                             xtitle=data_dict['xtitle'],
                             ytitle='count',
                             xlim=data_dict['xlim'],
                             ylim=None)
    fig.tight_layout()
    fig.savefig(f"hist_{data_dict['file']}.pdf", dpi=720, bbox_inches='tight')
コード例 #16
0
def no_rxns_vs_size(data, params, plot_suffix):
    """
    Plot number of possible reactions as a function of size threshold.

    """

    fig, ax = plt.subplots(figsize=(8, 5))

    # bin each of the sets of data based on X value
    width = 0.5
    X_bins = np.arange(0, 20.5, width)
    hist, bin_edges = np.histogram(a=data['max_mid_diam'], bins=X_bins)

    ax2 = ax.twinx()
    ax2.bar(
        bin_edges[:-1],
        hist,
        align='edge',
        alpha=0.9, width=width,
        color='#2C3E50',
        edgecolor='k'
    )

    # cumulative plot
    cumul = np.cumsum(hist)
    ax.plot(
        bin_edges[:-1],
        cumul,
        alpha=1.0,
        label='max component < threshold',
        color='r',
        marker='o'
    )

    # ax.axvspan(xmin=4.0, xmax=6.6, facecolor='k', alpha=0.2,
    #    hatch="/")
    ax.axvspan(xmin=4.0, xmax=6.6, facecolor='k', alpha=0.2)
    # ax.axvspan(xmin=5.4, xmax=6.6, facecolor='k', alpha=0.2)
    # plot possible region of ZIF pore limiting diameters from
    # Banerjee 2008 - 10.1126/science.1152516
    # ax.axvspan(0.0, 13, facecolor='#2ca02c', alpha=0.2)
    # ax.axvline(x=13.1, c='k', lw=2, linestyle='--')

    pfn.define_standard_plot(
        ax,
        xtitle=r'$d$ of largest component [$\mathrm{\AA}$]',
        ytitle='cumulative # reactions',
        xlim=(0, 17),
        ylim=(0, int(max(cumul)+max(cumul)*0.1))
    )
    ax2.set_ylim(0, int(max(hist)+max(hist)*0.2))
    ax2.set_ylabel('# reactions', fontsize=16)
    ax.yaxis.set_major_locator(MaxNLocator(integer=True))
    ax2.yaxis.set_major_locator(MaxNLocator(integer=True))

    # Change left y axis colours.
    ax.spines['left'].set_color('red')
    ax2.spines['left'].set_color('red')

    ax2.tick_params(axis='both', which='major', labelsize=16)
    fig.tight_layout()
    fig.savefig(
        f"{plot_suffix}/size_threshold_{plot_suffix}.pdf",
        dpi=720,
        bbox_inches='tight'
    )
コード例 #17
0
def n_phenyl_assay(output_dir):
    """
    Prepare figure showing the change in intermediate diameter for
    molecules
    commonly used in n-phenyl ester hydrolysis assays.

    """
    # the n-phenyl esters
    mol_list_1 = [
        'p-nitrophenyl acetate', 'p-nitrophenyl butyrate',
        'p-nitrophenyl hexanoate', 'p-nitrophenyl octanoate',
        'p-nitrophenyl decanoate', 'p-nitrophenyl dodecanoate'
    ]

    # the products
    mol_list_2 = [
        'acetic acid', 'butyric acid', 'hexanoic acid', 'octanoic acid',
        'decanoic acid', 'dodecanoic acid'
    ]

    # no Cs
    no_Cs = [2, 4, 6, 8, 10, 12]

    fig, ax = plt.subplots()
    for i, C in enumerate(no_Cs):
        # ester
        name = mol_list_1[i]
        out_file = (f"{output_dir}/"
                    f"{name.replace(' ', '_').replace('/', '__')}"
                    '_diam_result.csv')
        if os.path.exists(out_file) is False:
            continue
        results = pd.read_csv(out_file)
        mid_diam = min(results['diam2'])
        print(C, mol_list_1[i], mid_diam)
        ax.scatter(C,
                   mid_diam,
                   c='r',
                   edgecolors='k',
                   marker='o',
                   alpha=1.0,
                   s=100)
        # acid
        name = mol_list_2[i]
        out_file = (f"{output_dir}/"
                    f"{name.replace(' ', '_').replace('/', '__')}"
                    '_diam_result.csv')
        if os.path.exists(out_file) is False:
            continue
        results = pd.read_csv(out_file)
        mid_diam = min(results['diam2'])
        print(C, mol_list_2[i], mid_diam)
        ax.scatter(C,
                   mid_diam,
                   c='b',
                   edgecolors='k',
                   marker='o',
                   alpha=1.0,
                   s=120)
    ax.axhspan(ymin=4.0, ymax=6.6, facecolor='k', alpha=0.2, hatch="/")
    # n-phenol
    name = 'p-nitrophenol'
    out_file = (f"{output_dir}/"
                f"{name.replace(' ', '_').replace('/', '__')}"
                '_diam_result.csv')
    if os.path.exists(out_file) is False:
        import sys
        sys.exit('calc molecule diameters!')
    results = pd.read_csv(out_file)
    mid_diam = min(results['diam2'])
    ax.axhline(y=mid_diam, c='purple', alpha=1)
    pfn.define_standard_plot(ax,
                             xtitle='no. carbons',
                             ytitle=r'$d$ [$\mathrm{\AA}$]',
                             xlim=(1, 14),
                             ylim=(2.5, 8))
    # decoy legend
    ax.scatter(-100,
               -100,
               c='r',
               edgecolors='k',
               marker='o',
               alpha=1.0,
               s=100,
               label='ester')
    ax.scatter(-100,
               -100,
               c='b',
               edgecolors='k',
               marker='o',
               alpha=1.0,
               s=100,
               label='acid')
    ax.legend(fontsize=16)
    fig.tight_layout()
    fig.savefig("ester_comp.pdf", dpi=720, bbox_inches='tight')
コード例 #18
0
def rxn_space(data, filename):
    """
    Plot number of possible reactions as a function of size threshold.

    """

    plot_prop = {
        1: {
            'c': '#FA7268',
            'e': 'none',
            'a': 0.5,
            'm': 'o',
            's': 50,
            'label': 'class I'
        },
        2: {
            'c': '#DAF7A6',
            'e': 'none',
            'a': 0.5,
            'm': 'x',
            's': 50,
            'label': 'class II'
        },
        3: {
            'c': '#900C3F',
            'e': 'none',
            'a': 1.0,
            'm': 'x',
            's': 50,
            'label': 'class III'
        },
        4: {
            'c': '#F6D973',
            'e': 'none',
            'a': 0.5,
            'm': 'x',
            's': 50,
            'label': 'class IV'
        }
    }

    # bin each of the sets of data based on X value
    width = 0.5
    X_bins = np.arange(0, 20.5, width)
    fig, ax = plt.subplots(figsize=(8, 5))
    # bin each of the sets of data based on X value
    for p in plot_prop:
        if p != 3:
            continue
        pp = plot_prop[p]
        sub_data = data[data['PC_class'] == p]

        hist, bin_edges = np.histogram(
            a=sub_data['max_mid_diam'],
            bins=X_bins
        )
        ax.bar(
            bin_edges[:-1],
            hist,
            align='edge',
            alpha=pp['a'],
            width=width,
            color=pp['c'],
            edgecolor='k',
            label=pp['label']
        )

    ax.legend(fontsize=16)
    ax.axvspan(xmin=4.0, xmax=6.6, facecolor='k', alpha=0.2, hatch="/")
    # ax.axvspan(xmin=5.4, xmax=6.6, facecolor='k', alpha=0.2)
    # plot possible region of ZIF pore limiting diameters from
    # Banerjee 2008 - 10.1126/science.1152516
    # ax.axvspan(0.0, 13, facecolor='#2ca02c', alpha=0.2)
    # HOF.
    ax.axvline(x=13.1, c='k', lw=2, linestyle='--')

    pfn.define_standard_plot(
        ax,
        xtitle=r'$d$ of largest component [$\mathrm{\AA}$]',
        ytitle='# reactions',
        xlim=(0, 17),
        ylim=None
    )

    fig.tight_layout()
    fig.savefig(
        filename,
        dpi=720,
        bbox_inches='tight'
    )
コード例 #19
0
def cs_purch(purch, not_purch):
    fig, ax = plt.subplots(figsize=(8, 5))
    plot_prop = {
        't': {
            'c': '#FA7268',
            'e': 'none',
            'a': 0.5,
            'm': 'o',
            's': 50,
            'label': 'purchasable'
        },
        'f': {
            'c': '#DAF7A6',
            'e': 'none',
            'a': 0.5,
            'm': 'x',
            's': 50,
            'label': 'not purchasable'
        }
    }

    # bin each of the sets of data based on X value
    for p in plot_prop:
        pp = plot_prop[p]
        if p == 't':
            data = purch
        else:
            data = not_purch
        width = 0.5
        X_bins = np.arange(0, 15.5, width)
        hist, bin_edges = np.histogram(a=data, bins=X_bins, density=True)
        ax.bar(
            bin_edges[:-1],
            hist,
            align='edge',
            alpha=0.8,
            width=width,
            color=pp['c'],
            edgecolor='k',
            label=pp['label'],
        )

    # for X, Y, Z in zip(Xs, Ys, Zs):
    #     if Z:
    #         pp = plot_prop['t']
    #     else:
    #         pp = plot_prop['f']
    #
    #     ax.scatter(
    #         X,
    #         Y,
    #         c=pp['c'],
    #         edgecolors=pp['e'],
    #         marker=pp['m'],
    #         alpha=pp['a'],
    #         s=pp['s']
    #     )

    # Vertical lines for different materials.
    ax.axvspan(xmin=4.0, xmax=6.6, facecolor='k', alpha=0.2, hatch="/")
    # ax.axvspan(xmin=5.4, xmax=6.6, facecolor='k', alpha=0.2)
    # plot possible region of ZIF pore limiting diameters from
    # Banerjee 2008 - 10.1126/science.1152516
    # ax.axvspan(0.0, 13, facecolor='#2ca02c', alpha=0.2)
    # HOF size limit:
    ax.axvline(x=13.1, c='k', lw=2, linestyle='--')

    # # Legend.
    # for p in plot_prop:
    #     pp = plot_prop[p]
    #     ax.scatter(
    #         X,
    #         Y,
    #         c=pp['c'],
    #         edgecolors=pp['e'],
    #         marker=pp['m'],
    #         alpha=pp['a'],
    #         s=pp['s'],
    #         label=pp['label']
    #     )
    ax.legend(fontsize=16)

    pfn.define_standard_plot(
        ax,
        # xtitle='number of heavy atoms',
        xtitle=r'intermediate diameter [$\mathrm{\AA}$]',
        ytitle='frequency',
    )
    fig.tight_layout()
    fig.savefig(f'chemical_space_purch.pdf', dpi=720, bbox_inches='tight')
コード例 #20
0
def parity_cf_scale_with_known(molecules, diameters, known_df, pars,
                               scale_info):
    """
    Produce a parity plot of calculated diameters and known kinetic
    diameters for multiple input parameters.

    """

    S = 120

    fig, ax = plt.subplots(figsize=(5, 5))
    for dir in scale_info:
        if dir != 'scale09_test':
            continue
        kin_diams = []
        mid_diams = []
        sc, C, M, A, E = scale_info[dir]
        scale_output = f'scale_sc_{dir}.txt'
        if os.path.exists(scale_output):
            with open(scale_output, 'r') as f:
                for line in f:
                    res = line.rstrip().split('__')
                    name, kin_diam, mid_diam = res
                    kin_diams.append(float(kin_diam))
                    mid_diams.append(float(mid_diam))
                    ax.scatter(float(kin_diam),
                               float(mid_diam),
                               c=C,
                               edgecolors=E,
                               marker=M,
                               alpha=A,
                               s=S)
        else:
            with open(scale_output, 'w') as f:
                for name in molecules:
                    try:
                        kin_diam = float(diameters[name])
                    except ValueError:
                        print('no radius given for this molecule ' '- skipped')
                        continue
                    out_file = (f"{dir}/"
                                f"{name.replace(' ', '_').replace('/', '__')}"
                                '_diam_result.csv')
                    if os.path.exists(out_file) is False:
                        continue
                    results = pd.read_csv(out_file)
                    if len(results) == 0:
                        continue
                    mid_diam = min(results['diam2'])
                    kin_diams.append(float(kin_diam))
                    mid_diams.append(float(mid_diam))
                    ax.scatter(float(kin_diam),
                               float(mid_diam),
                               c=C,
                               edgecolors=E,
                               marker=M,
                               alpha=A,
                               s=S)
                    f.write(name + '__' + str(kin_diam) + '__' +
                            str(mid_diam) + '\n')
        corr = pearsonr(kin_diams, mid_diams)
        MAE = mean_absolute_error(kin_diams, mid_diams)
        chi2 = sum([((j - i)**2) / i for i, j in zip(kin_diams, mid_diams)])
        print(f'{dir} R^2: {corr}, MAE: {MAE}, chi^2: {chi2}')

    ax.plot(np.linspace(-1, 12, 2), np.linspace(-1, 12, 2), c='k', alpha=0.4)

    pfn.define_standard_plot(
        ax,
        xtitle=r'kinetic diameter [$\mathrm{\AA}$]',
        # ytitle='intermediate diameter [$\mathrm{\AA}$]',
        ytitle=r'$d$ [$\mathrm{\AA}$]',
        xlim=(1, 10),
        ylim=(1, 10))

    # legend
    for dir in scale_info:
        if dir != 'scale09_test':
            continue
        sc, C, M, A, E = scale_info[dir]
        ax.scatter(-100,
                   -100,
                   c=C,
                   edgecolors=E,
                   marker=M,
                   alpha=A,
                   s=S,
                   label=f'vdW scale = {sc}')
    # ax.legend(loc=2, fontsize=14)
    fig.tight_layout()
    fig.savefig("parity_scalecf.pdf", dpi=720, bbox_inches='tight')
コード例 #21
0
def dist_cf_scale_with_known(molecules, diameters, known_df, pars, scale_info):
    """
    Produce a bar plot of distributions of the deviations of calculated
    diameters and known kinetic diameters for multiple input params.

    """

    fig, ax = plt.subplots(figsize=(8, 5))
    for dir in scale_info:
        kin_diams = []
        mid_diams = []
        sc, C, M, A, E = scale_info[dir]
        scale_output = f'scale_sc_{dir}.txt'
        if os.path.exists(scale_output):
            with open(scale_output, 'r') as f:
                for line in f:
                    res = line.rstrip().split('__')
                    name, kin_diam, mid_diam = res
                    kin_diams.append(float(kin_diam))
                    mid_diams.append(float(mid_diam))
        else:
            with open(scale_output, 'w') as f:
                for name in molecules:
                    try:
                        kin_diam = float(diameters[name])
                    except ValueError:
                        print('no radius given for this molecule ' '- skipped')
                        continue
                    out_file = (f"{dir}/"
                                f"{name.replace(' ', '_').replace('/', '__')}"
                                '_diam_result.csv')
                    if os.path.exists(out_file) is False:
                        continue
                    results = pd.read_csv(out_file)
                    if len(results) == 0:
                        continue
                    mid_diam = min(results['diam2'])
                    kin_diams.append(float(kin_diam))
                    mid_diams.append(float(mid_diam))
                    f.write(name + '__' + str(kin_diam) + '__' +
                            str(mid_diam) + '\n')
        corr = pearsonr(kin_diams, mid_diams)
        MAE = mean_absolute_error(kin_diams, mid_diams)
        chi2 = sum([((j - i)**2) / i for i, j in zip(kin_diams, mid_diams)])
        print(f'{dir} R^2: {corr}, MAE: {MAE}, chi^2: {chi2}')

        X_vals = [i - j for i, j in zip(mid_diams, kin_diams)]
        width = 0.1
        xlim = (-2, 2)
        X_bins = np.arange(xlim[0], xlim[1], width)
        hist, bin_edges = np.histogram(a=X_vals, bins=X_bins)
        #
        # ax.bar(
        #     bin_edges[:-1],
        #     hist,
        #     align='edge',
        #     alpha=0.5,
        #     width=width,
        #     color=C,
        #     edgecolor='k',
        #     label=f'vdW scale = {sc}'
        # )
        ax.plot(X_bins[:-1] + width / 2,
                hist,
                c=C,
                lw='1.5',
                marker='o',
                alpha=1.0,
                label=f'vdW scale = {sc}')

    pfn.define_standard_plot(
        ax,
        xtitle=r'|$d$ - kinetic diameter| [$\mathrm{\AA}$]',
        ytitle='count',
        xlim=xlim,
        ylim=None)

    ax.legend(fontsize=14)
    fig.tight_layout()
    fig.savefig("dist_scalecf.pdf", dpi=720, bbox_inches='tight')
コード例 #22
0
def target_conformer_plot(parameter_sets, molecules, test_mol, full_results,
                          colours, markers, properties):
    # target no conformers
    targ_confs = [50, 200]
    # set property
    for p, PROP in enumerate(['MW', 'NHA', 'NRB']):
        if p == 0:
            PROP_lab = 'MW [g/mol]'
            p_lim = (0, 120)
        if p == 1:
            PROP_lab = 'no. heavy atoms'
            p_lim = (0, 9)
        if p == 2:
            PROP_lab = 'no. rotatable bonds'
            p_lim = (0, 6)
        for t in parameter_sets:
            if t != 'N_conformers':
                continue
            # fig = plt.figure()  # figsize=(8, 8))
            # ax = fig.add_subplot(111, projection='3d')
            fig, ax = plt.subplots()
            for name in molecules:
                if name not in test_mol:
                    continue
                X = []
                Y = []
                Z = []
                for i, v in enumerate(parameter_sets[t]):
                    RES = full_results[t][name][v]
                    _, _, _, _, min_mid = RES
                    # if i == 0:
                    #     ax.errorbar(
                    #     float(v), avg, c=colours[name],
                    #     yerr=std, fmt=markers[name], label=name)
                    # else:
                    #     ax.errorbar(float(v), avg, c=colours[name],
                    #                 yerr=std, fmt=markers[name])
                    X.append(float(v))
                    Y.append(min_mid)
                    Z.append(properties[name][p])
                X = np.asarray(X)
                Y = np.asarray(Y)
                Z = np.asarray(Z)
                for targ_conf in targ_confs:
                    Y2 = Y - Y[-1]
                    Z2 = Z[X == targ_conf]
                    Y2 = Y2[X == targ_conf]
                    # plot points
                    # ax.scatter(X, Y-Y[-1], Z, s=60,
                    #            c=colours[name], marker=markers[name])
                    ax.scatter(Z2,
                               Y2,
                               c=colours[name],
                               marker=markers[name],
                               label=name,
                               s=80)

            pfn.define_standard_plot(
                ax,
                xtitle=PROP_lab,
                # ytitle=(
                #     '$d_{\mathrm{i, min}}$ - '
                #     '$d_{\mathrm{i, min}}$(1000) '
                #     '[$\mathrm{\AA}$]'
                # ),
                ytitle=r'$d-d$(1000) [$\mathrm{\AA}$]',
                xlim=p_lim,
                ylim=(-0.1, 0.5))
            ax.axhline(y=0, c='k', linestyle='--')
            # ax.set_xlabel(t_name, fontsize=16)
            # ax.set_ylabel(
            #     '$d_{\mathrm{i, min}}-d_{\mathrm{i, min}}$(1000)'
            #     ' [$\mathrm{\AA}$]'
            # ),
            #               fontsize=16)
            # ax.set_zlabel(PROP_lab, fontsize=16)
            # ax.set_xlim(t_lim)
            # ax.set_ylim(-0.1, 0.5)
            # ax.set_zlim(p_lim)
            # ax.set_aspect('equal', 'box')
            # dist = 30
            # angles = 10
            # ax.view_init(dist, angles)
            # ax.legend(fontsize=14, ncol=2)
            fig.tight_layout()
            fig.savefig(f"min_of_mid_{t}_v_prop_{PROP}.pdf",
                        bbox_inches='tight',
                        dpi=720)
コード例 #23
0
def seed_test(seeds):
    """
    Compares the minimum diameter obtained for a set of molecules with
    different random seeds for the ETKDG algorithm.

    """

    molecules = {
        'n-hexane': 'CCCCCC',
        'n-heptane': 'CCCCCCC',
        'n-octane': 'CCCCCCCC',
        'toluene': 'CC1=CC=CC=C1',
        'p-nitrophenol': 'C1=CC(=CC=C1[N+](=O)[O-])O',
        'p-nitrophenyl butyrate': 'CCCC(=O)OC1=CC=C(C=C1)[N+](=O)[O-]',
        'butyric acid': 'CCCC(=O)O',
    }
    colours = {
        'n-hexane': 'k',
        'n-heptane': 'r',
        'n-octane': 'b',
        'toluene': 'green',
        'p-nitrophenol': 'purple',
        'p-nitrophenyl butyrate': 'orange',
        'butyric acid': 'darkgray',
    }
    markers = {
        'n-hexane': 'o',
        'n-heptane': 'X',
        'n-octane': 'D',
        'toluene': 'P',
        'p-nitrophenol': '^',
        'p-nitrophenyl butyrate': '>',
        'butyric acid': '<',
    }

    seed_output = "seed_test.pkl"

    if os.path.exists(seed_output):
        # load results
        full_results = pickle.load(open(seed_output, "rb"))
    else:
        full_results = {}
        for t in seeds:
            full_results[t] = {}
            for name in molecules:
                full_results[t][name] = {}

        for name in molecules:
            for t in seeds:
                output_dir = f'seeds_{t}'
                out_file = (f"{output_dir}/"
                            f"{name.replace(' ', '_').replace('/', '__')}"
                            '_diam_result.csv')
                if os.path.exists(out_file) is False:
                    continue
                results = pd.read_csv(out_file)
                if len(results) == 0:
                    continue
                min_diam_avg = np.average(results['diam1'])
                min_diam_std = np.std(results['diam1'])
                mid_diam_avg = np.average(results['diam2'])
                mid_diam_std = np.std(results['diam2'])
                min_mid = min(results['diam2'])
                result = (min_diam_avg, min_diam_std, mid_diam_avg,
                          mid_diam_std, min_mid)
                full_results[t][name] = result
        # save file
        pickle.dump(full_results, open("seed_test.pkl", "wb"))

    fig, ax = plt.subplots()
    for name in molecules:
        X = []
        Y = []
        for t in seeds:
            RES = full_results[t][name]
            _, _, _, _, min_mid = RES
            X.append(int(t))
            Y.append(min_mid)
        print(name, '--', max([i - min(Y) for i in Y]))
        ax.scatter(X, Y, c=colours[name], marker=markers[name], label=name)
    input('^^ max dev from min_of_mid for seeds')
    t_lim = (0, 850000)
    t_name = 'random seed'
    pfn.define_standard_plot(ax,
                             xtitle=t_name,
                             ytitle=r'$d$ [$\mathrm{\AA}$]',
                             xlim=t_lim,
                             ylim=(4, 8))
    # ax.set_xticks([1, 2, 3, 4, 5, 6, 7])
    # ax.set_xticklabels([str(i) for i in seeds])
    # ax.legend(fontsize=16, ncol=3)
    fig.tight_layout()
    fig.savefig("min_of_mid_seeds.pdf", dpi=720, bbox_inches='tight')
コード例 #24
0
def rxn_complexity(data, filename):
    """
    Plot the measures of complexity of each reaction.

    """

    fig, ax = plt.subplots(figsize=(8, 5))
    ylim = (-1000, 1000)
    xlim = (-10, 10)
    # CS = [(1.0, 1.0, 1.0), (44/255, 62/255, 80/255)]
    # cm = colors.LinearSegmentedColormap.from_list('test', CS, N=10)
    # fig, ax, hist = pfn.twoD_histogram(
    #     X_data=data['deltasa'],
    #     Y_data=data['deltabct'],
    #     xlim=xlim,
    #     ylim=ylim,
    #     cmap=cm,
    #     fig=fig,
    #     ax=ax
    # )
    # cbar = fig.colorbar(hist[3], ax=ax)
    # cbar.ax.set_ylabel('count', fontsize=16)
    # cbar.ax.tick_params(labelsize=16)
    ax.scatter(
        data['deltasa'],
        data['deltabct'],
        c='#CCD1D1',
        edgecolors='none',
        marker='o',
        alpha=1.0,
        s=40,
        label='full dataset'
    )
    small_data = data[data['max_mid_diam'] < 6.6]
    ax.scatter(
        small_data['deltasa'],
        small_data['deltabct'],
        c='#2C3E50',
        edgecolors='none',
        marker='o',
        alpha=1.0,
        s=40,
        label='viable reactions'
    )

    pfn.define_standard_plot(
        ax,
        # xtitle='number of heavy atoms',
        ylim=ylim,
        xlim=xlim,
        ytitle=r'$\Delta$ BertzCT',
        xtitle=r'$\Delta$ SAscore',
    )

    ax.legend(fontsize=16)

    fig.tight_layout()
    fig.savefig(
        filename,
        dpi=720,
        bbox_inches='tight'
    )