def test_graph_save(self): n_samples = 10 n_features = 11 n_tasks = 1 batch_size = 10 X = np.random.rand(batch_size, n_samples, n_features) y = np.ones(shape=(n_samples, n_tasks)) ids = np.arange(n_samples) dataset = dc.data.NumpyDataset(X, y, None, ids) g = TensorGraph(model_dir='/tmp/tmpss5_ki5_') inLayer = Input(shape=(None, n_samples, n_features)) g.add_feature(inLayer) flatten = Flatten() g.add_layer(flatten, parents=[inLayer]) dense = Dense(out_channels=1) g.add_layer(dense, parents=[flatten]) g.add_output(dense) label_out = Input(shape=(None, 1)) g.add_label(label_out) loss = LossLayer() g.add_layer(loss, parents=[dense, label_out]) g.set_loss(loss) g.fit(dataset, nb_epoch=100) g.save() g1 = TensorGraph.load_from_dir('/tmp/tmpss5_ki5_') print(g1) print(g1.predict_on_batch(X))
class MLP: def __init__(self, batch_size): # save parameters self.batch_size = batch_size # define tensorgraph self.tg = TensorGraph(use_queue=False) self.feature = Feature(shape=(None, 1024)) # build graph self.build_graph() def build_graph(self): d1 = Dense(out_channels=256, activation_fn=tf.nn.relu, in_layers=[self.feature]) d2 = Dense(out_channels=64, activation_fn=tf.nn.relu, in_layers=[d1]) d3 = Dense(out_channels=16, activation=None, in_layers=[d2]) d4 = Dense(out_channels=2, activation=None, in_layers=[d3]) softmax = SoftMax(in_layers=[d4]) self.tg.add_output(softmax) self.label = Label(shape=(None, 2)) cost = SoftMaxCrossEntropy(in_layers=[self.label, d4]) loss = ReduceMean(in_layers=[cost]) self.tg.set_loss(loss) def fit(self, dataset, epochs): self.tg.fit_generator(self.data_generator(dataset, self.batch_size, epochs=epochs)) def predict(self, dataset): pred = self.tg.predict_on_generator(self.data_generator(dataset, self.batch_size)) return np.expand_dims(pred, axis=0) def data_generator(self, dataset, batch_size, epochs=1): for e in range(epochs): for X, y, w, idx in dataset.iterbatches(batch_size, pad_batches=True, deterministic=True): feed_dict = {self.label: to_one_hot(y[:, 0]), self.feature: X} # data for feed yield feed_dict
def graph_conv_net(batch_size, prior, num_task): """ Build a tensorgraph for multilabel classification task Return: features and labels layers """ tg = TensorGraph(use_queue=False) if prior == True: add_on = num_task else: add_on = 0 atom_features = Feature(shape=(None, 75 + 2 * add_on)) circular_features = Feature(shape=(batch_size, 256), dtype=tf.float32) degree_slice = Feature(shape=(None, 2), dtype=tf.int32) membership = Feature(shape=(None, ), dtype=tf.int32) deg_adjs = [] for i in range(0, 10 + 1): deg_adj = Feature(shape=(None, i + 1), dtype=tf.int32) deg_adjs.append(deg_adj) gc1 = GraphConv(64 + add_on, activation_fn=tf.nn.elu, in_layers=[atom_features, degree_slice, membership] + deg_adjs) batch_norm1 = BatchNorm(in_layers=[gc1]) gp1 = GraphPool(in_layers=[batch_norm1, degree_slice, membership] + deg_adjs) gc2 = GraphConv(64 + add_on, activation_fn=tf.nn.elu, in_layers=[gc1, degree_slice, membership] + deg_adjs) batch_norm2 = BatchNorm(in_layers=[gc2]) gp2 = GraphPool(in_layers=[batch_norm2, degree_slice, membership] + deg_adjs) add = Concat(in_layers=[gp1, gp2]) add = Dropout(0.5, in_layers=[add]) dense = Dense(out_channels=128, activation_fn=tf.nn.elu, in_layers=[add]) batch_norm3 = BatchNorm(in_layers=[dense]) readout = GraphGather(batch_size=batch_size, activation_fn=tf.nn.tanh, in_layers=[batch_norm3, degree_slice, membership] + deg_adjs) batch_norm4 = BatchNorm(in_layers=[readout]) dense1 = Dense(out_channels=128, activation_fn=tf.nn.elu, in_layers=[circular_features]) dense1 = BatchNorm(in_layers=[dense1]) dense1 = Dropout(0.5, in_layers=[dense1]) dense1 = Dense(out_channels=128, activation_fn=tf.nn.elu, in_layers=[circular_features]) dense1 = BatchNorm(in_layers=[dense1]) dense1 = Dropout(0.5, in_layers=[dense1]) merge_feat = Concat(in_layers=[dense1, batch_norm4]) merge = Dense(out_channels=256, activation_fn=tf.nn.elu, in_layers=[merge_feat]) costs = [] labels = [] for task in range(num_task): classification = Dense(out_channels=2, activation_fn=None, in_layers=[merge]) softmax = SoftMax(in_layers=[classification]) tg.add_output(softmax) label = Label(shape=(None, 2)) labels.append(label) cost = SoftMaxCrossEntropy(in_layers=[label, classification]) costs.append(cost) all_cost = Stack(in_layers=costs, axis=1) weights = Weights(shape=(None, num_task)) loss = WeightedError(in_layers=[all_cost, weights]) tg.set_loss(loss) #if prior == True: # return tg, atom_features,circular_features, degree_slice, membership, deg_adjs, labels, weights#, prior_layer return tg, atom_features, circular_features, degree_slice, membership, deg_adjs, labels, weights
classification = Dense(out_channels=2, activation_fn=None, in_layers=[readout]) softmax = SoftMax(in_layers=[classification]) tg.add_output(softmax) label = Label(shape=(None, 2)) labels.append(label) cost = SoftMaxCrossEntropy(in_layers=[label, classification]) costs.append(cost) all_cost = Stack(in_layers=costs, axis=1) weights = Weights(shape=(None, len(current_tasks))) loss = WeightedError(in_layers=[all_cost, weights]) tg.set_loss(loss) # Data splits # Tox21 is treated differently: we manually (randomly) split into test, train, and valid directly from train_dataset.X # (rather than letting deepchem provide the data directly) # Reason: In the early stages of developing the code, the valid_dataset and test_dataset were empty for tox and # we observed a comment in the deepchem source code leading us to believe this was intended. # Thus, when we access valid_dataset.X and test_dataset.X, we don't do it for tox21. We only later # found that we could access tox21 validation and test. But we do this for all models, so the treatment is fair # # # This treatment is done for all models, so the comparison is fair. # if TASK != 'tox_21': new_train_data = generate_new_X(train_dataset.X, K, technique) new_train_dataset = dc.data.datasets.DiskDataset.from_numpy( new_train_data,
class GCN: def __init__(self, batch_size=50): # save parameters self.batch_size = batch_size # define tensorgraph self.tg = TensorGraph(use_queue=False) # define features self.atom_features = Feature(shape=(None, 75)) # feature of atom. ex) atom / degree / is aromatic and so on self.indexing = Feature(shape=(None, 2), dtype=tf.int32) # index of atoms in molecules sorted by degree self.membership = Feature(shape=(None,), dtype=tf.int32) # membership of atoms in molecule self.deg_adj_list = [Feature(shape=(None, i), dtype=tf.int32) for i in range(1, 12)] # adj list with degree # build graph self.build_graph() def build_graph(self): # Layer 1 gc1_input = [self.atom_features, self.indexing, self.membership] + self.deg_adj_list gc1 = GraphConv(64, activation_fn=tf.nn.relu, in_layers=gc1_input) bn1 = BatchNorm(in_layers=[gc1]) gp1_input = [bn1, self.indexing, self.membership] + self.deg_adj_list gp1 = GraphPool(in_layers=gp1_input) # Layer 2 gc2_input = [gp1, self.indexing, self.membership] + self.deg_adj_list gc2 = GraphConv(64, activation_fn=tf.nn.relu, in_layers=gc2_input) bn2 = BatchNorm(in_layers=[gc2]) gp2_input = [bn2, self.indexing, self.membership] + self.deg_adj_list gp2 = GraphPool(in_layers=gp2_input) # Dense layer 1 d1 = Dense(out_channels=128, activation_fn=tf.nn.relu, in_layers=[gp2]) bn3 = BatchNorm(in_layers=[d1]) # Graph gather layer gg1_input = [bn3, self.indexing, self.membership] + self.deg_adj_list gg1 = GraphGather(batch_size=self.batch_size, activation=tf.nn.tanh, in_layers=gg1_input) # Output dense layer d2 = Dense(out_channels=2, activation_fn=None, in_layers=[gg1]) softmax = SoftMax(in_layers=[d2]) self.tg.add_output(softmax) # Set loss function self.label = Label(shape=(None, 2)) cost = SoftMaxCrossEntropy(in_layers=[self.label, d2]) self.weight = Weights(shape=(None, 1)) loss = WeightedError(in_layers=[cost, self.weight]) self.tg.set_loss(loss) def fit(self, dataset, epochs:int): self.tg.fit_generator(self.data_generator(dataset, self.batch_size, epochs=epochs)) def predict(self, dataset): pred = self.tg.predict_on_generator(self.data_generator(dataset, self.batch_size)) return np.expand_dims(pred, axis=0) def data_generator(self, dataset, batch_size:int, epochs=1): for e in range(epochs): for X, y, w, idx in dataset.iterbatches(batch_size, pad_batches=True, deterministic=True): feed_dict = {self.label: to_one_hot(y[:, 0]), self.weight: w} # data for feed ConvMolList = ConvMol.agglomerate_mols(X) feed_dict[self.atom_features] = ConvMolList.get_atom_features() feed_dict[self.indexing] = ConvMolList.deg_slice feed_dict[self.membership] = ConvMolList.membership deg_adj_list = ConvMolList.get_deg_adjacency_lists() for i in range(1, len(deg_adj_list)): feed_dict[self.deg_adj_list[i - 1]] = deg_adj_list[i] yield feed_dict