def __init__(self, batch_size): # save parameters self.batch_size = batch_size # define tensorgraph self.tg = TensorGraph(use_queue=False) self.feature = Feature(shape=(None, 1024)) # build graph self.build_graph()
def __init__(self, batch_size=50): # save parameters self.batch_size = batch_size # define tensorgraph self.tg = TensorGraph(use_queue=False) # define features self.atom_features = Feature(shape=(None, 75)) # feature of atom. ex) atom / degree / is aromatic and so on self.indexing = Feature(shape=(None, 2), dtype=tf.int32) # index of atoms in molecules sorted by degree self.membership = Feature(shape=(None,), dtype=tf.int32) # membership of atoms in molecule self.deg_adj_list = [Feature(shape=(None, i), dtype=tf.int32) for i in range(1, 12)] # adj list with degree # build graph self.build_graph()
def test_set_optimizer(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y = [[0, 1] for x in range(n_data_points)] dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) label = Label(shape=(None, 2)) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) loss = ReduceMean(in_layers=[smce]) tg = dc.models.TensorGraph(learning_rate=0.01, use_queue=False) tg.add_output(output) tg.set_loss(loss) global_step = tg.get_global_step() learning_rate = ExponentialDecay(initial_rate=0.1, decay_rate=0.96, decay_steps=100000) tg.set_optimizer(GradientDescent(learning_rate=learning_rate)) tg.fit(dataset, nb_epoch=1000) prediction = np.squeeze(tg.predict_on_batch(X)) tg.save() tg1 = TensorGraph.load_from_dir(tg.model_dir) prediction2 = np.squeeze(tg1.predict_on_batch(X)) assert_true(np.all(np.isclose(prediction, prediction2, atol=0.01)))
def test_save_load(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y = [[0, 1] for x in range(n_data_points)] dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) label = Label(shape=(None, 2)) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) loss = ReduceMean(in_layers=[smce]) tg = dc.models.TensorGraph(learning_rate=0.01) tg.add_output(output) tg.set_loss(loss) submodel_loss = ReduceSum(in_layers=smce) submodel_opt = Adam(learning_rate=0.002) submodel = tg.create_submodel(layers=[dense], loss=submodel_loss, optimizer=submodel_opt) tg.fit(dataset, nb_epoch=1) prediction = np.squeeze(tg.predict_on_batch(X)) tg.save() dirpath = tempfile.mkdtemp() shutil.rmtree(dirpath) shutil.move(tg.model_dir, dirpath) tg1 = TensorGraph.load_from_dir(dirpath) prediction2 = np.squeeze(tg1.predict_on_batch(X)) assert_true(np.all(np.isclose(prediction, prediction2, atol=0.01)))
def test_save_load(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y = [[0, 1] for x in range(n_data_points)] dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) label = Label(shape=(None, 2)) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) loss = ReduceMean(in_layers=[smce]) tg = dc.models.TensorGraph(learning_rate=0.01) tg.add_output(output) tg.set_loss(loss) submodel_loss = ReduceSum(in_layers=smce) submodel_opt = Adam(learning_rate=0.002) submodel = tg.create_submodel( layers=[dense], loss=submodel_loss, optimizer=submodel_opt) tg.fit(dataset, nb_epoch=1) prediction = np.squeeze(tg.predict_on_batch(X)) tg.save() dirpath = tempfile.mkdtemp() shutil.rmtree(dirpath) shutil.move(tg.model_dir, dirpath) tg1 = TensorGraph.load_from_dir(dirpath) prediction2 = np.squeeze(tg1.predict_on_batch(X)) assert_true(np.all(np.isclose(prediction, prediction2, atol=0.01)))
def test_set_optimizer(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y = [[0, 1] for x in range(n_data_points)] dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) label = Label(shape=(None, 2)) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) loss = ReduceMean(in_layers=[smce]) tg = dc.models.TensorGraph(learning_rate=0.01, use_queue=False) tg.add_output(output) tg.set_loss(loss) global_step = tg.get_global_step() learning_rate = ExponentialDecay( initial_rate=0.1, decay_rate=0.96, decay_steps=100000) tg.set_optimizer(GradientDescent(learning_rate=learning_rate)) tg.fit(dataset, nb_epoch=1000) prediction = np.squeeze(tg.predict_on_batch(X)) tg.save() tg1 = TensorGraph.load_from_dir(tg.model_dir) prediction2 = np.squeeze(tg1.predict_on_batch(X)) assert_true(np.all(np.isclose(prediction, prediction2, atol=0.01)))
def test_save_load(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y = [[0, 1] for x in range(n_data_points)] dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) label = Label(shape=(None, 2)) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) loss = ReduceMean(in_layers=[smce]) tg = dc.models.TensorGraph(learning_rate=0.01) tg.add_output(output) tg.set_loss(loss) tg.fit(dataset, nb_epoch=1) prediction = np.squeeze(tg.predict_on_batch(X)) tg.save() tg1 = TensorGraph.load_from_dir(tg.model_dir) prediction2 = np.squeeze(tg1.predict_on_batch(X)) assert_true(np.all(np.isclose(prediction, prediction2, atol=0.01)))
class MLP: def __init__(self, batch_size): # save parameters self.batch_size = batch_size # define tensorgraph self.tg = TensorGraph(use_queue=False) self.feature = Feature(shape=(None, 1024)) # build graph self.build_graph() def build_graph(self): d1 = Dense(out_channels=256, activation_fn=tf.nn.relu, in_layers=[self.feature]) d2 = Dense(out_channels=64, activation_fn=tf.nn.relu, in_layers=[d1]) d3 = Dense(out_channels=16, activation=None, in_layers=[d2]) d4 = Dense(out_channels=2, activation=None, in_layers=[d3]) softmax = SoftMax(in_layers=[d4]) self.tg.add_output(softmax) self.label = Label(shape=(None, 2)) cost = SoftMaxCrossEntropy(in_layers=[self.label, d4]) loss = ReduceMean(in_layers=[cost]) self.tg.set_loss(loss) def fit(self, dataset, epochs): self.tg.fit_generator(self.data_generator(dataset, self.batch_size, epochs=epochs)) def predict(self, dataset): pred = self.tg.predict_on_generator(self.data_generator(dataset, self.batch_size)) return np.expand_dims(pred, axis=0) def data_generator(self, dataset, batch_size, epochs=1): for e in range(epochs): for X, y, w, idx in dataset.iterbatches(batch_size, pad_batches=True, deterministic=True): feed_dict = {self.label: to_one_hot(y[:, 0]), self.feature: X} # data for feed yield feed_dict
def graph_conv_net(batch_size, prior, num_task): """ Build a tensorgraph for multilabel classification task Return: features and labels layers """ tg = TensorGraph(use_queue=False) if prior == True: add_on = num_task else: add_on = 0 atom_features = Feature(shape=(None, 75 + 2 * add_on)) circular_features = Feature(shape=(batch_size, 256), dtype=tf.float32) degree_slice = Feature(shape=(None, 2), dtype=tf.int32) membership = Feature(shape=(None, ), dtype=tf.int32) deg_adjs = [] for i in range(0, 10 + 1): deg_adj = Feature(shape=(None, i + 1), dtype=tf.int32) deg_adjs.append(deg_adj) gc1 = GraphConv(64 + add_on, activation_fn=tf.nn.elu, in_layers=[atom_features, degree_slice, membership] + deg_adjs) batch_norm1 = BatchNorm(in_layers=[gc1]) gp1 = GraphPool(in_layers=[batch_norm1, degree_slice, membership] + deg_adjs) gc2 = GraphConv(64 + add_on, activation_fn=tf.nn.elu, in_layers=[gc1, degree_slice, membership] + deg_adjs) batch_norm2 = BatchNorm(in_layers=[gc2]) gp2 = GraphPool(in_layers=[batch_norm2, degree_slice, membership] + deg_adjs) add = Concat(in_layers=[gp1, gp2]) add = Dropout(0.5, in_layers=[add]) dense = Dense(out_channels=128, activation_fn=tf.nn.elu, in_layers=[add]) batch_norm3 = BatchNorm(in_layers=[dense]) readout = GraphGather(batch_size=batch_size, activation_fn=tf.nn.tanh, in_layers=[batch_norm3, degree_slice, membership] + deg_adjs) batch_norm4 = BatchNorm(in_layers=[readout]) dense1 = Dense(out_channels=128, activation_fn=tf.nn.elu, in_layers=[circular_features]) dense1 = BatchNorm(in_layers=[dense1]) dense1 = Dropout(0.5, in_layers=[dense1]) dense1 = Dense(out_channels=128, activation_fn=tf.nn.elu, in_layers=[circular_features]) dense1 = BatchNorm(in_layers=[dense1]) dense1 = Dropout(0.5, in_layers=[dense1]) merge_feat = Concat(in_layers=[dense1, batch_norm4]) merge = Dense(out_channels=256, activation_fn=tf.nn.elu, in_layers=[merge_feat]) costs = [] labels = [] for task in range(num_task): classification = Dense(out_channels=2, activation_fn=None, in_layers=[merge]) softmax = SoftMax(in_layers=[classification]) tg.add_output(softmax) label = Label(shape=(None, 2)) labels.append(label) cost = SoftMaxCrossEntropy(in_layers=[label, classification]) costs.append(cost) all_cost = Stack(in_layers=costs, axis=1) weights = Weights(shape=(None, num_task)) loss = WeightedError(in_layers=[all_cost, weights]) tg.set_loss(loss) #if prior == True: # return tg, atom_features,circular_features, degree_slice, membership, deg_adjs, labels, weights#, prior_layer return tg, atom_features, circular_features, degree_slice, membership, deg_adjs, labels, weights
import numpy as np import tensorflow as tf import deepchem as dc import sys from deepchem.models.tensorgraph.models.graph_models import GraphConvTensorGraph from random import shuffle from deepchem.models.tensorgraph.layers import Feature from deepchem.models.tensorgraph.layers import Dense, GraphConv, BatchNorm from deepchem.models.tensorgraph.layers import GraphPool, GraphGather from deepchem.models.tensorgraph.layers import Dense, SoftMax, SoftMaxCrossEntropy, WeightedError, Stack from deepchem.models.tensorgraph.layers import Label, Weights from deepchem.metrics import to_one_hot from deepchem.feat.mol_graphs import ConvMol from deepchem.models.tensorgraph.tensor_graph import TensorGraph tg = TensorGraph(use_queue=False) TASK = sys.argv[1] # 'tox_21', 'hiv', 'muv K = int(sys.argv[2]) technique = 'dfs' batch_size = 96 NUM_EPOCHS = 100 def randomize_perm(a): ordering = list(range(a)) shuffle(ordering) return ordering def depth_first_search(neighbour_list, root_node):
# mol = ConvMol.agglomerate_mols(dataset_all.X) # ndeg = len(mol.get_deg_adjacency_lists()) ndeg = 11 deg_adjs = [] for ii in range(1, 11): deg_adj = Feature(shape=(None, ii), dtype=tf.int32) deg_adjs.append(deg_adj) label15 = [] for ts in range(ntask): label_t = Label(shape=(None, 2)) label15.append(label_t) ## Setup Graph Convolution Network tg = TensorGraph(use_queue=False, learning_rate=0.001, model_dir='ckpt') gc1 = GraphConv(64, activation_fn=tf.nn.relu, in_layers=[atom_features, degree_slice, membership] + deg_adjs) bn1 = BatchNorm(in_layers=[gc1]) gp1 = GraphPool(in_layers=[bn1, degree_slice, membership] + deg_adjs) dp1 = Dropout(0.2, in_layers=gp1) gc2 = GraphConv(64, activation_fn=tf.nn.relu, in_layers=[dp1, degree_slice, membership] + deg_adjs) bn2 = BatchNorm(in_layers=[gc2]) gp2 = GraphPool(in_layers=[bn2, degree_slice, membership] + deg_adjs) dp2 = Dropout(0.5, in_layers=gp2)
class GCN: def __init__(self, batch_size=50): # save parameters self.batch_size = batch_size # define tensorgraph self.tg = TensorGraph(use_queue=False) # define features self.atom_features = Feature(shape=(None, 75)) # feature of atom. ex) atom / degree / is aromatic and so on self.indexing = Feature(shape=(None, 2), dtype=tf.int32) # index of atoms in molecules sorted by degree self.membership = Feature(shape=(None,), dtype=tf.int32) # membership of atoms in molecule self.deg_adj_list = [Feature(shape=(None, i), dtype=tf.int32) for i in range(1, 12)] # adj list with degree # build graph self.build_graph() def build_graph(self): # Layer 1 gc1_input = [self.atom_features, self.indexing, self.membership] + self.deg_adj_list gc1 = GraphConv(64, activation_fn=tf.nn.relu, in_layers=gc1_input) bn1 = BatchNorm(in_layers=[gc1]) gp1_input = [bn1, self.indexing, self.membership] + self.deg_adj_list gp1 = GraphPool(in_layers=gp1_input) # Layer 2 gc2_input = [gp1, self.indexing, self.membership] + self.deg_adj_list gc2 = GraphConv(64, activation_fn=tf.nn.relu, in_layers=gc2_input) bn2 = BatchNorm(in_layers=[gc2]) gp2_input = [bn2, self.indexing, self.membership] + self.deg_adj_list gp2 = GraphPool(in_layers=gp2_input) # Dense layer 1 d1 = Dense(out_channels=128, activation_fn=tf.nn.relu, in_layers=[gp2]) bn3 = BatchNorm(in_layers=[d1]) # Graph gather layer gg1_input = [bn3, self.indexing, self.membership] + self.deg_adj_list gg1 = GraphGather(batch_size=self.batch_size, activation=tf.nn.tanh, in_layers=gg1_input) # Output dense layer d2 = Dense(out_channels=2, activation_fn=None, in_layers=[gg1]) softmax = SoftMax(in_layers=[d2]) self.tg.add_output(softmax) # Set loss function self.label = Label(shape=(None, 2)) cost = SoftMaxCrossEntropy(in_layers=[self.label, d2]) self.weight = Weights(shape=(None, 1)) loss = WeightedError(in_layers=[cost, self.weight]) self.tg.set_loss(loss) def fit(self, dataset, epochs:int): self.tg.fit_generator(self.data_generator(dataset, self.batch_size, epochs=epochs)) def predict(self, dataset): pred = self.tg.predict_on_generator(self.data_generator(dataset, self.batch_size)) return np.expand_dims(pred, axis=0) def data_generator(self, dataset, batch_size:int, epochs=1): for e in range(epochs): for X, y, w, idx in dataset.iterbatches(batch_size, pad_batches=True, deterministic=True): feed_dict = {self.label: to_one_hot(y[:, 0]), self.weight: w} # data for feed ConvMolList = ConvMol.agglomerate_mols(X) feed_dict[self.atom_features] = ConvMolList.get_atom_features() feed_dict[self.indexing] = ConvMolList.deg_slice feed_dict[self.membership] = ConvMolList.membership deg_adj_list = ConvMolList.get_deg_adjacency_lists() for i in range(1, len(deg_adj_list)): feed_dict[self.deg_adj_list[i - 1]] = deg_adj_list[i] yield feed_dict
def test_graph_save(self): n_samples = 10 n_features = 11 n_tasks = 1 batch_size = 10 X = np.random.rand(batch_size, n_samples, n_features) y = np.ones(shape=(n_samples, n_tasks)) ids = np.arange(n_samples) dataset = dc.data.NumpyDataset(X, y, None, ids) g = TensorGraph(model_dir='/tmp/tmpss5_ki5_') inLayer = Input(shape=(None, n_samples, n_features)) g.add_feature(inLayer) flatten = Flatten() g.add_layer(flatten, parents=[inLayer]) dense = Dense(out_channels=1) g.add_layer(dense, parents=[flatten]) g.add_output(dense) label_out = Input(shape=(None, 1)) g.add_label(label_out) loss = LossLayer() g.add_layer(loss, parents=[dense, label_out]) g.set_loss(loss) g.fit(dataset, nb_epoch=100) g.save() g1 = TensorGraph.load_from_dir('/tmp/tmpss5_ki5_') print(g1) print(g1.predict_on_batch(X))
def test_graph_save(self): n_samples = 10 n_features = 11 n_tasks = 1 batch_size = 10 X = np.random.rand(batch_size, n_samples, n_features) y = np.ones(shape=(n_samples, n_tasks)) ids = np.arange(n_samples) dataset = dc.data.NumpyDataset(X, y, None, ids) g = TensorGraph(model_dir='/tmp/tmpss5_ki5_') inLayer = Input(shape=(None, n_samples, n_features)) g.add_feature(inLayer) flatten = Flatten() g.add_layer(flatten, parents=[inLayer]) dense = Dense(out_channels=1) g.add_layer(dense, parents=[flatten]) g.add_output(dense) label_out = Input(shape=(None, 1)) g.add_label(label_out) loss = LossLayer() g.add_layer(loss, parents=[dense, label_out]) g.set_loss(loss) g.fit(dataset, nb_epoch=100) g.save() g1 = TensorGraph.load_from_dir('/tmp/tmpss5_ki5_') print(g1) print(g1.predict_on_batch(X))