Ejemplo n.º 1
0
def test_fps_5(data):
    fps = Fingerprints(featurizers='ECFP', n_jobs=1, input_type='any', on_errors='nan', n_bits=100)
    ret = fps.transform(data['err_smis'])
    assert isinstance(ret, pd.DataFrame)
    assert ret.shape == (4, 100)
    assert np.isnan(ret.values[1][10])
    assert np.isnan(ret.values[2][20])
Ejemplo n.º 2
0
def test_fps_6(data):
    fps = Fingerprints(n_jobs=1,
                       input_type='any',
                       on_errors='nan',
                       counting=True)
    ret = fps.transform(data['smis'])
    assert isinstance(ret, pd.DataFrame)
    assert ret.shape == (4, 16751)
Ejemplo n.º 3
0
def test_fps_3(data):
    fps = Fingerprints(n_jobs=1, input_type='any')
    try:
        fps.transform(data['mols'] + data['smis'])
    except BaseException as e:
        print(e)
        assert False, 'should not got error'
    else:
        assert True
Ejemplo n.º 4
0
def test_fps_4(data):
    fps = Fingerprints(n_jobs=1, input_type='any')
    with pytest.raises(ValueError):
        fps.transform(data['err_smis'])

    fps = Fingerprints(n_jobs=1, input_type='any', on_errors='nan')
    ret = fps.transform(data['err_smis'])
    assert isinstance(ret, pd.DataFrame)
    assert ret.shape == (4, 10607)
    assert np.isnan(ret.values[1][10])
    assert np.isnan(ret.values[2][20])
Ejemplo n.º 5
0
def test_fps_1(data):
    fps = Fingerprints(n_jobs=1)

    try:
        fps.transform(data['mols'])
    except:
        assert False, 'should not got error'
    else:
        assert True

    try:
        fps.transform(data['smis'])
    except TypeError:
        assert True
    else:
        assert False, 'should not got error'
Ejemplo n.º 6
0
def main():
    # load in-house data from csv file
    data = pd.read_csv("./iQSPR_sample_data.csv")
    data_ss = data.sample(3000).reset_index()

    from xenonpy.descriptor import Fingerprints

    RDKit_FPs = Fingerprints(featurizers=['ECFP'], input_type='smiles')

    # make models
    # prd_mdls = make_forward_model_with_iqspr_tools(data_ss, RDKit_FPs)
    prd_mdls, mdls = make_forward_model(data_ss, RDKit_FPs)

    # learn NGrams by method 0 (not recommended)
    # n_gram = learn_n_gram0(data_ss['SMILES'])

    # learn NGrams by method 1 (recommended)
    # n_gram = learn_n_gram1(data_ss['SMILES'])

    # learn NGrams by method2 (recommended)
    n_gram = learn_n_gram2(data_ss['SMILES'])
    n_gram.set_params(del_range=[1, 20], max_len=500, reorder_prob=0.5)

    # set up initial molecules for iQSPR
    np.random.seed(201903)  # fix the random seed
    cans = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for i, smi in enumerate(data_ss['SMILES'])
            if (data_ss['H**O-LUMO gap'].iloc[i] > 4)]
    init_samples = np.random.choice(cans, 25)

    # set up annealing schedule in iQSPR
    beta = np.hstack(
        [np.linspace(0.01, 0.2, 20), np.linspace(0.21, 0.4, 10), np.linspace(0.4, 1, 10), np.linspace(1, 1, 10)])

    iqspr_results = run_iqspr(prd_mdls, n_gram, init_samples, beta)

    visualize(iqspr_results, RDKit_FPs, mdls, data_ss, beta)
Ejemplo n.º 7
0
def test_fps_4(data):
    fps = Fingerprints(n_jobs=1, input_type='any')
    try:
        fps.transform(data['err_smis'])
    except ValueError:
        assert True
    else:
        assert False, 'should not got error'

    fps = ECFP(n_jobs=1, input_type='any', on_errors='nan')
    try:
        ret = fps.transform(data['err_smis'])
        assert pd.DataFrame(data=ret).shape == (4, 2048)
        assert np.isnan(ret[1][10])
        assert np.isnan(ret[2][20])
    except:
        assert False
    else:
        assert True
Ejemplo n.º 8
0
def property_plot(smile, plot_path):
    P_fp = Fingerprints(featurizers=fp_type,
                        input_type='smiles',
                        on_errors='nan').transform([smile])
    P_fp = P_fp.dropna()

    PolyGeno_property = pd.read_csv(
        "./data/forward_train_Polymer_genome_new_asterisk_R.csv")
    DC = np.ravel(PolyGeno_property['Dielectric.Constant'])
    GTT = np.ravel(PolyGeno_property['Glass.Transition.Temperature'])
    singleFP2dc = pickle.load(open('./models/P_SMILES2DC_ElasticNet.sav',
                                   'rb'))
    singleFP2gtt = pickle.load(
        open("./models/P_SMILES2GTT_ElasticNet.sav", 'rb'))
    property_region = {
        'Dielectric_Constant': [3, 4],
        'Glass_Transition_Temperature': [300, 350]
    }
    property_minmax = {
        'Dielectric_Constant': [min(DC), max(DC)],
        'Glass_Transition_Temperature': [min(GTT), max(GTT)]
    }

    plt.figure(figsize=(10, 10))
    plt.xlim(property_minmax["Dielectric_Constant"][0] - 1,
             property_minmax["Dielectric_Constant"][1] + 1)
    plt.ylim(property_minmax["Glass_Transition_Temperature"][0] - 20,
             property_minmax["Glass_Transition_Temperature"][1] + 20)
    plt.plot(DC, GTT, 'k.', markersize=5, alpha=0.4, label='existing data')
    if len(P_fp) == 1:
        dc_pre = singleFP2dc.predict(P_fp)
        gtt_pre = singleFP2gtt.predict(P_fp)
        plt.scatter(dc_pre,
                    gtt_pre,
                    s=250,
                    c='g',
                    edgecolor='k',
                    label='product')

    plt.xlabel("Dielectric Constant", fontsize=20)
    plt.ylabel("Glass Transition Temperature ($^\circ$C)", fontsize=20)
    plt.title("Properties of chemical structures", fontsize=20)
    plt.legend(loc='lower right', fontsize=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.plot([
        property_region["Dielectric_Constant"][0],
        property_region["Dielectric_Constant"][0],
        property_region["Dielectric_Constant"][1],
        property_region["Dielectric_Constant"][1],
        property_region["Dielectric_Constant"][0]
    ], [
        property_region["Glass_Transition_Temperature"][0],
        property_region["Glass_Transition_Temperature"][1],
        property_region["Glass_Transition_Temperature"][1],
        property_region["Glass_Transition_Temperature"][0],
        property_region["Glass_Transition_Temperature"][0]
    ], 'r--')
    plt.savefig(plot_path)
    plt.close()
    if len(P_fp) == 0:
        dc_pre = 0
        gtt_pre = 0
    dc_pre = round(dc_pre[0], 2)
    gtt_pre = round(gtt_pre[0], 2)
    return (dc_pre, gtt_pre)
Ejemplo n.º 9
0
def test_fps_3(data):
    fps = Fingerprints(n_jobs=1, input_type='any')
    fps.transform(data['mols'] + data['smis'])
Ejemplo n.º 10
0
def test_fps_2(data):
    fps = Fingerprints(n_jobs=1, input_type='smiles')
    with pytest.raises(TypeError):
        fps.transform(data['mols'])

    fps.transform(data['smis'])
Ejemplo n.º 11
0
def test_fps_1(data):
    fps = Fingerprints(n_jobs=1)
    fps.transform(data['mols'])

    with pytest.raises(TypeError):
        fps.transform(data['smis'])