Ejemplo n.º 1
0
class MLP:
    def __init__(self, batch_size):
        # save parameters
        self.batch_size = batch_size

        # define tensorgraph
        self.tg = TensorGraph(use_queue=False)
        self.feature = Feature(shape=(None, 1024))

        # build graph
        self.build_graph()

    def build_graph(self):
        d1 = Dense(out_channels=256, activation_fn=tf.nn.relu, in_layers=[self.feature])
        d2 = Dense(out_channels=64, activation_fn=tf.nn.relu, in_layers=[d1])
        d3 = Dense(out_channels=16, activation=None, in_layers=[d2])
        d4 = Dense(out_channels=2, activation=None, in_layers=[d3])
        softmax = SoftMax(in_layers=[d4])
        self.tg.add_output(softmax)

        self.label = Label(shape=(None, 2))
        cost = SoftMaxCrossEntropy(in_layers=[self.label, d4])
        loss = ReduceMean(in_layers=[cost])
        self.tg.set_loss(loss)

    def fit(self, dataset, epochs):
        self.tg.fit_generator(self.data_generator(dataset, self.batch_size, epochs=epochs))

    def predict(self, dataset):
        pred = self.tg.predict_on_generator(self.data_generator(dataset, self.batch_size))
        return np.expand_dims(pred, axis=0)

    def data_generator(self, dataset, batch_size, epochs=1):
        for e in range(epochs):
            for X, y, w, idx in dataset.iterbatches(batch_size, pad_batches=True, deterministic=True):
                feed_dict = {self.label: to_one_hot(y[:, 0]), self.feature: X}  # data for feed

                yield feed_dict
Ejemplo n.º 2
0
    new_test_dataset = dc.data.datasets.DiskDataset.from_numpy(
        new_test_data,
        train_dataset.y[5000:],
        train_dataset.w[5000:],
        train_dataset.ids[5000:],
        data_dir=None)
    print("Test Data - added RP - tox21")

tg.fit_generator(data_generator(new_train_dataset, epochs=NUM_EPOCHS))

metric = dc.metrics.Metric(dc.metrics.roc_auc_score,
                           np.mean,
                           mode="classification")

print("Evaluating model")
train_predictions = tg.predict_on_generator(
    data_generator(new_train_dataset, predict=True))
train_predictions = reshape_y_pred(new_train_dataset.y, train_predictions)
train_scores = metric.compute_metric(new_train_dataset.y, train_predictions,
                                     new_train_dataset.w)
print("Training ROC-AUC Score: %f" % train_scores)

valid_predictions = tg.predict_on_generator(
    data_generator(new_valid_dataset, predict=True))
valid_predictions = reshape_y_pred(new_valid_dataset.y, valid_predictions)
valid_scores = metric.compute_metric(new_valid_dataset.y, valid_predictions,
                                     new_valid_dataset.w)
print("Valid ROC-AUC Score: %f" % valid_scores)

test_predictions = tg.predict_on_generator(
    data_generator(new_test_dataset, predict=True))
test_predictions = reshape_y_pred(new_test_dataset.y, test_predictions)
Ejemplo n.º 3
0
n_epoch = 300
if (validation == "fixed"):
    iii = np.arange(nd)
    dd = nd // 5
    np.random.shuffle(iii)
    i_bin = np.arange(nd % dd, nd, dd)
    iii_cv = np.split(iii, i_bin[1:5])

    iii_tst = iii_cv[cv]
    iii_trn = np.setdiff1d(iii, iii_tst)
    dataset_tst = dataset_all.select(iii_tst)
    dataset_trn = dataset_all.select(iii_trn)

    tg.fit_generator(data_generator(dataset_trn, n_epoch=n_epoch),
                     restore=False)
    pred_cv = tg.predict_on_generator(data_generator(dataset_tst,
                                                     predict=True))
    conf_cv, accu_cv = accuracy_multi_molecules(pred_cv, dataset_tst.y)

    for ts in range(ntask):
        np.savetxt('output/pred_cv{}_ts{}.txt'.format(cv, ts), pred_cv[ts])
        np.savetxt('output/conv_cv{}_ts{}.txt'.format(cv, ts), conf_cv[ts])
    np.savetxt('output/accu_cv{}.txt'.format(cv), accu_cv)

if (validation == "cv5"):
    ## NOTE: option "restore=False" for tg.fit_generator() does not seem to be working...
    kf5 = KFold(n_splits=5, random_state=12345, shuffle=True)
    pred_list = []
    conf_list = []
    accu_list = []
    ee = 0
    for iii_trn, iii_tst in kf5.split(range(nd)):
Ejemplo n.º 4
0
class GCN:
    def __init__(self, batch_size=50):
        # save parameters
        self.batch_size = batch_size

        # define tensorgraph
        self.tg = TensorGraph(use_queue=False)

        # define features
        self.atom_features = Feature(shape=(None, 75))  # feature of atom. ex) atom / degree / is aromatic and so on
        self.indexing = Feature(shape=(None, 2), dtype=tf.int32)  # index of atoms in molecules sorted by degree
        self.membership = Feature(shape=(None,), dtype=tf.int32)  # membership of atoms in molecule
        self.deg_adj_list = [Feature(shape=(None, i), dtype=tf.int32) for i in range(1, 12)]  # adj list with degree

        # build graph
        self.build_graph()

    def build_graph(self):
        # Layer 1
        gc1_input = [self.atom_features, self.indexing, self.membership] + self.deg_adj_list
        gc1 = GraphConv(64, activation_fn=tf.nn.relu, in_layers=gc1_input)
        bn1 = BatchNorm(in_layers=[gc1])
        gp1_input = [bn1, self.indexing, self.membership] + self.deg_adj_list
        gp1 = GraphPool(in_layers=gp1_input)

        # Layer 2
        gc2_input = [gp1, self.indexing, self.membership] + self.deg_adj_list
        gc2 = GraphConv(64, activation_fn=tf.nn.relu, in_layers=gc2_input)
        bn2 = BatchNorm(in_layers=[gc2])
        gp2_input = [bn2, self.indexing, self.membership] + self.deg_adj_list
        gp2 = GraphPool(in_layers=gp2_input)

        # Dense layer 1
        d1 = Dense(out_channels=128, activation_fn=tf.nn.relu, in_layers=[gp2])
        bn3 = BatchNorm(in_layers=[d1])

        # Graph gather layer
        gg1_input = [bn3, self.indexing, self.membership] + self.deg_adj_list
        gg1 = GraphGather(batch_size=self.batch_size, activation=tf.nn.tanh, in_layers=gg1_input)

        # Output dense layer
        d2 = Dense(out_channels=2, activation_fn=None, in_layers=[gg1])
        softmax = SoftMax(in_layers=[d2])
        self.tg.add_output(softmax)

        # Set loss function
        self.label = Label(shape=(None, 2))
        cost = SoftMaxCrossEntropy(in_layers=[self.label, d2])
        self.weight = Weights(shape=(None, 1))
        loss = WeightedError(in_layers=[cost, self.weight])
        self.tg.set_loss(loss)

    def fit(self, dataset, epochs:int):
        self.tg.fit_generator(self.data_generator(dataset, self.batch_size, epochs=epochs))

    def predict(self, dataset):
        pred = self.tg.predict_on_generator(self.data_generator(dataset, self.batch_size))
        return np.expand_dims(pred, axis=0)

    def data_generator(self, dataset, batch_size:int, epochs=1):
        for e in range(epochs):
            for X, y, w, idx in dataset.iterbatches(batch_size, pad_batches=True, deterministic=True):
                feed_dict = {self.label: to_one_hot(y[:, 0]), self.weight: w}  # data for feed
                ConvMolList = ConvMol.agglomerate_mols(X)
                feed_dict[self.atom_features] = ConvMolList.get_atom_features()
                feed_dict[self.indexing] = ConvMolList.deg_slice
                feed_dict[self.membership] = ConvMolList.membership
                deg_adj_list = ConvMolList.get_deg_adjacency_lists()
                for i in range(1, len(deg_adj_list)):
                    feed_dict[self.deg_adj_list[i - 1]] = deg_adj_list[i]

                yield feed_dict