Ejemplo n.º 1
0
def graph_conv_training():
    graph_featurizer = dc.feat.graph_features.ConvMolFeaturizer()

    loader = dc.data.data_loader.CSVLoader(tasks=[t_task.get()],
                                           smiles_field=t_smiles.get(),
                                           id_field=t_id.get(),
                                           featurizer=graph_featurizer)
    dataset = loader.featurize(t_csv.get())

    splitter = dc.splits.splitters.RandomSplitter()
    trainset, testset = splitter.train_test_split(dataset)

    hp = dc.molnet.preset_hyper_parameters
    param = hp.hps['graphconvreg']
    print(param)

    batch_size = 48

    from deepchem.models.tensorgraph.models.graph_models import GraphConvModel
    model = GraphConvModel(n_tasks=1,
                           batch_size=64,
                           uncertainty=False,
                           mode='regression')

    model = dc.models.GraphConvTensorGraph(1,
                                           batch_size=batch_size,
                                           learning_rate=1e-3,
                                           use_queue=False,
                                           mode='regression',
                                           model_dir=t_savename.get())

    np.random.seed(1)
    random.seed(1)

    model.fit(dataset, nb_epoch=max(1, int(t_epochs.get())))
    #model.fit(trainset, nb_epoch=max(1, int(t_epochs.get())))

    metric = dc.metrics.Metric(dc.metrics.r2_score)

    print('epoch: ', t_epochs.get())
    print("Evaluating model")
    train_score = model.evaluate(trainset, [metric])
    test_score = model.evaluate(testset, [metric])

    model.save()

    pred_train = model.predict(trainset)
    pred_test = model.predict(testset)

    y_train = np.array(trainset.y, dtype=np.float32)
    y_test = np.array(testset.y, dtype=np.float32)

    import matplotlib.pyplot as plt

    plt.figure()

    plt.figure(figsize=(5, 5))

    plt.scatter(y_train, pred_train, label='Train', c='blue')
    plt.title('Graph Convolution')
    plt.xlabel('Measured value')
    plt.ylabel('Predicted value')
    plt.scatter(y_test, pred_test, c='lightgreen', label='Test', alpha=0.8)
    plt.legend(loc=4)
    #plt.show()
    plt.savefig('score-tmp.png')

    from PIL import Image
    img = Image.open('score-tmp.png')

    img_resize = img.resize((400, 400), Image.LANCZOS)
    img_resize.save('score-tmp.png')

    global image_score
    image_score_open = Image.open('score-tmp.png')
    image_score = ImageTk.PhotoImage(image_score_open, master=frame1)

    canvas.create_image(200, 200, image=image_score)

    #Calculate R2 score
    print("Train score")
    print(train_score)
    t_train_r2.set(train_score)

    print("Test scores")
    print(test_score)
    t_test_r2.set(test_score)

    #Calculate RMSE
    train_rmse = 1
    test_rmse = 1
    '''
    print("Train RMSE")
    print(train_rmse)
    t_train_rmse.set(train_rmse)

    print("Test RMSE")
    print(test_rmse)
    t_test_rmse.set(test_rmse)
    '''

    df_save = pd.DataFrame({'pred_train': pred_train, 'meas_train': y_train})

    df_save.to_csv('pred_and_meas_train.csv')

    print('finish!')
# Load Tox21 dataset
tox21_tasks, tox21_datasets, transformers = load_tox21(featurizer='GraphConv')
train_dataset, valid_dataset, test_dataset = tox21_datasets
print(train_dataset.data_dir)
print(valid_dataset.data_dir)

# Fit models
metric = dc.metrics.Metric(dc.metrics.roc_auc_score,
                           np.mean,
                           mode="classification")

# Batch size of models
batch_size = 50

model = GraphConvModel(len(tox21_tasks),
                       batch_size=batch_size,
                       mode='classification')

model.fit(train_dataset, nb_epoch=10)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)
Ejemplo n.º 3
0
loader_test = dc.data.data_loader.CSVLoader(tasks=['ACTIVITY'],
                                            smiles_field="smiles",
                                            featurizer=graph_featurizer)
dataset_test = loader_test.featurize('./test.csv')

# In[9]:

model = GraphConvModel(n_tasks=1, mode='regression', dropout=0.2)

model.fit(dataset_train, nb_epoch=1000)

# In[10]:

metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)

print(model.evaluate(dataset_train, [metric]))

print(model.evaluate(dataset_test, [metric]))

# In[11]:

test_preds = model.predict(dataset_test)

# In[12]:

import pandas as pd

# In[13]:

print(test_preds)