def build_graph(self): # Build placeholders self.atom_features = Feature(shape=(None, self.n_atom_feat)) self.pair_features = Feature(shape=(None, self.n_pair_feat)) self.atom_split = Feature(shape=(None, ), dtype=tf.int32) self.atom_to_pair = Feature(shape=(None, 2), dtype=tf.int32) message_passing = MessagePassing(self.T, message_fn='enn', update_fn='gru', n_hidden=self.n_hidden, in_layers=[ self.atom_features, self.pair_features, self.atom_to_pair ]) atom_embeddings = Dense(self.n_hidden, in_layers=[message_passing]) mol_embeddings = SetGather( self.M, self.batch_size, n_hidden=self.n_hidden, in_layers=[atom_embeddings, self.atom_split]) dense1 = Dense(out_channels=2 * self.n_hidden, activation_fn=tf.nn.relu, in_layers=[mol_embeddings]) costs = [] self.labels_fd = [] for task in range(self.n_tasks): if self.mode == "classification": classification = Dense(out_channels=2, activation_fn=None, in_layers=[dense1]) softmax = SoftMax(in_layers=[classification]) self.add_output(softmax) label = Label(shape=(None, 2)) self.labels_fd.append(label) cost = SoftMaxCrossEntropy(in_layers=[label, classification]) costs.append(cost) if self.mode == "regression": regression = Dense(out_channels=1, activation_fn=None, in_layers=[dense1]) self.add_output(regression) label = Label(shape=(None, 1)) self.labels_fd.append(label) cost = L2Loss(in_layers=[label, regression]) costs.append(cost) if self.mode == "classification": all_cost = Concat(in_layers=costs, axis=1) elif self.mode == "regression": all_cost = Stack(in_layers=costs, axis=1) self.weights = Weights(shape=(None, self.n_tasks)) loss = WeightedError(in_layers=[all_cost, self.weights]) self.set_loss(loss)
def build_graph(self): """Building graph structures: Features => DAGLayer => DAGGather => Classification or Regression """ self.atom_features = Feature(shape=(None, self.n_atom_feat)) self.parents = Feature(shape=(None, self.max_atoms, self.max_atoms), dtype=tf.int32) self.calculation_orders = Feature(shape=(None, self.max_atoms), dtype=tf.int32) self.calculation_masks = Feature(shape=(None, self.max_atoms), dtype=tf.bool) self.membership = Feature(shape=(None, ), dtype=tf.int32) self.n_atoms = Feature(shape=(), dtype=tf.int32) dag_layer1 = DAGLayer(n_graph_feat=self.n_graph_feat, n_atom_feat=self.n_atom_feat, max_atoms=self.max_atoms, batch_size=self.batch_size, in_layers=[ self.atom_features, self.parents, self.calculation_orders, self.calculation_masks, self.n_atoms ]) dag_gather = DAGGather(n_graph_feat=self.n_graph_feat, n_outputs=self.n_outputs, max_atoms=self.max_atoms, in_layers=[dag_layer1, self.membership]) costs = [] self.labels_fd = [] for task in range(self.n_tasks): if self.mode == "classification": classification = Dense(out_channels=2, activation_fn=None, in_layers=[dag_gather]) softmax = SoftMax(in_layers=[classification]) self.add_output(softmax) label = Label(shape=(None, 2)) self.labels_fd.append(label) cost = SoftMaxCrossEntropy(in_layers=[label, classification]) costs.append(cost) if self.mode == "regression": regression = Dense(out_channels=1, activation_fn=None, in_layers=[dag_gather]) self.add_output(regression) label = Label(shape=(None, 1)) self.labels_fd.append(label) cost = L2Loss(in_layers=[label, regression]) costs.append(cost) if self.mode == "classification": all_cost = Concat(in_layers=costs, axis=1) elif self.mode == "regression": all_cost = Stack(in_layers=costs, axis=1) self.weights = Weights(shape=(None, self.n_tasks)) loss = WeightedError(in_layers=[all_cost, self.weights]) self.set_loss(loss)
def build_graph(self): self.smiles_seqs = Feature(shape=(None, self.seq_length), dtype=tf.int32) # Character embedding self.Embedding = DTNNEmbedding( n_embedding=self.n_embedding, periodic_table_length=len(self.char_dict.keys()) + 1, in_layers=[self.smiles_seqs]) self.pooled_outputs = [] self.conv_layers = [] for filter_size, num_filter in zip(self.kernel_sizes, self.num_filters): # Multiple convolutional layers with different filter widths self.conv_layers.append( Conv1D( kernel_size=filter_size, filters=num_filter, padding='valid', in_layers=[self.Embedding])) # Max-over-time pooling self.pooled_outputs.append( ReduceMax(axis=1, in_layers=[self.conv_layers[-1]])) # Concat features from all filters(one feature per filter) concat_outputs = Concat(axis=1, in_layers=self.pooled_outputs) dropout = Dropout(dropout_prob=self.dropout, in_layers=[concat_outputs]) dense = Dense( out_channels=200, activation_fn=tf.nn.relu, in_layers=[dropout]) # Highway layer from https://arxiv.org/pdf/1505.00387.pdf self.gather = Highway(in_layers=[dense]) costs = [] self.labels_fd = [] for task in range(self.n_tasks): if self.mode == "classification": classification = Dense( out_channels=2, activation_fn=None, in_layers=[self.gather]) softmax = SoftMax(in_layers=[classification]) self.add_output(softmax) label = Label(shape=(None, 2)) self.labels_fd.append(label) cost = SoftMaxCrossEntropy(in_layers=[label, classification]) costs.append(cost) if self.mode == "regression": regression = Dense( out_channels=1, activation_fn=None, in_layers=[self.gather]) self.add_output(regression) label = Label(shape=(None, 1)) self.labels_fd.append(label) cost = L2Loss(in_layers=[label, regression]) costs.append(cost) if self.mode == "classification": all_cost = Stack(in_layers=costs, axis=1) elif self.mode == "regression": all_cost = Stack(in_layers=costs, axis=1) self.weights = Weights(shape=(None, self.n_tasks)) loss = WeightedError(in_layers=[all_cost, self.weights]) self.set_loss(loss)
def build_graph(self): self.vertex_features = Feature(shape=(None, self.max_atoms, 75)) self.adj_matrix = Feature(shape=(None, self.max_atoms, 1, self.max_atoms)) self.mask = Feature(shape=(None, self.max_atoms, 1)) gcnn1 = BatchNorm( GraphCNN( num_filters=64, in_layers=[self.vertex_features, self.adj_matrix, self.mask])) gcnn1 = Dropout(self.dropout, in_layers=gcnn1) gcnn2 = BatchNorm( GraphCNN(num_filters=64, in_layers=[gcnn1, self.adj_matrix, self.mask])) gcnn2 = Dropout(self.dropout, in_layers=gcnn2) gc_pool, adj_matrix = GraphCNNPool( num_vertices=32, in_layers=[gcnn2, self.adj_matrix, self.mask]) gc_pool = BatchNorm(gc_pool) gc_pool = Dropout(self.dropout, in_layers=gc_pool) gcnn3 = BatchNorm(GraphCNN(num_filters=32, in_layers=[gc_pool, adj_matrix])) gcnn3 = Dropout(self.dropout, in_layers=gcnn3) gc_pool2, adj_matrix2 = GraphCNNPool( num_vertices=8, in_layers=[gcnn3, adj_matrix]) gc_pool2 = BatchNorm(gc_pool2) gc_pool2 = Dropout(self.dropout, in_layers=gc_pool2) flattened = Flatten(in_layers=gc_pool2) readout = Dense( out_channels=256, activation_fn=tf.nn.relu, in_layers=flattened) costs = [] self.my_labels = [] for task in range(self.n_tasks): if self.mode == 'classification': classification = Dense( out_channels=2, activation_fn=None, in_layers=[readout]) softmax = SoftMax(in_layers=[classification]) self.add_output(softmax) label = Label(shape=(None, 2)) self.my_labels.append(label) cost = SoftMaxCrossEntropy(in_layers=[label, classification]) costs.append(cost) if self.mode == 'regression': regression = Dense( out_channels=1, activation_fn=None, in_layers=[readout]) self.add_output(regression) label = Label(shape=(None, 1)) self.my_labels.append(label) cost = L2Loss(in_layers=[label, regression]) costs.append(cost) if self.mode == "classification": entropy = Stack(in_layers=costs, axis=-1) elif self.mode == "regression": entropy = Stack(in_layers=costs, axis=1) self.my_task_weights = Weights(shape=(None, self.n_tasks)) loss = WeightedError(in_layers=[entropy, self.my_task_weights]) self.set_loss(loss)
def _build_graph(self, tf_graph, scope, model_dir): """Construct a TensorGraph containing the policy and loss calculations.""" state_shape = self._env.state_shape state_dtype = self._env.state_dtype if not self._state_is_list: state_shape = [state_shape] state_dtype = [state_dtype] features = [] for s, d in zip(state_shape, state_dtype): features.append( Feature(shape=[None] + list(s), dtype=tf.as_dtype(d))) policy_layers = self._policy.create_layers(features) value = policy_layers['value'] rewards = Weights(shape=(None, )) advantages = Weights(shape=(None, )) graph = TensorGraph(batch_size=self.max_rollout_length, use_queue=False, graph=tf_graph, model_dir=model_dir) for f in features: graph._add_layer(f) if 'action_prob' in policy_layers: self.continuous = False action_prob = policy_layers['action_prob'] actions = Label(shape=(None, self._env.n_actions)) loss = A3CLossDiscrete( self.value_weight, self.entropy_weight, in_layers=[rewards, actions, action_prob, value, advantages]) graph.add_output(action_prob) else: self.continuous = True action_mean = policy_layers['action_mean'] action_std = policy_layers['action_std'] actions = Label(shape=[None] + list(self._env.action_shape)) loss = A3CLossContinuous(self.value_weight, self.entropy_weight, in_layers=[ rewards, actions, action_mean, action_std, value, advantages ]) graph.add_output(action_mean) graph.add_output(action_std) graph.add_output(value) graph.set_loss(loss) graph.set_optimizer(self._optimizer) with graph._get_tf("Graph").as_default(): with tf.variable_scope(scope): graph.build() if self.continuous: return graph, features, rewards, actions, action_mean, action_std, value, advantages else: return graph, features, rewards, actions, action_prob, value, advantages
def _build_graph(self): self.smiles_seqs = Feature(shape=(None, self.seq_length), dtype=tf.int32) # Character embedding Embedding = DTNNEmbedding( n_embedding=self.n_embedding, periodic_table_length=len(self.char_dict.keys()) + 1, in_layers=[self.smiles_seqs]) pooled_outputs = [] conv_layers = [] for filter_size, num_filter in zip(self.kernel_sizes, self.num_filters): # Multiple convolutional layers with different filter widths conv_layers.append( Conv1D(kernel_size=filter_size, filters=num_filter, padding='valid', in_layers=[Embedding])) # Max-over-time pooling pooled_outputs.append( ReduceMax(axis=1, in_layers=[conv_layers[-1]])) # Concat features from all filters(one feature per filter) concat_outputs = Concat(axis=1, in_layers=pooled_outputs) dropout = Dropout(dropout_prob=self.dropout, in_layers=[concat_outputs]) dense = Dense(out_channels=200, activation_fn=tf.nn.relu, in_layers=[dropout]) # Highway layer from https://arxiv.org/pdf/1505.00387.pdf gather = Highway(in_layers=[dense]) if self.mode == "classification": logits = Dense(out_channels=self.n_tasks * 2, activation_fn=None, in_layers=[gather]) logits = Reshape(shape=(-1, self.n_tasks, 2), in_layers=[logits]) output = SoftMax(in_layers=[logits]) self.add_output(output) labels = Label(shape=(None, self.n_tasks, 2)) loss = SoftMaxCrossEntropy(in_layers=[labels, logits]) else: vals = Dense(out_channels=self.n_tasks * 1, activation_fn=None, in_layers=[gather]) vals = Reshape(shape=(-1, self.n_tasks, 1), in_layers=[vals]) self.add_output(vals) labels = Label(shape=(None, self.n_tasks, 1)) loss = ReduceSum(L2Loss(in_layers=[labels, vals])) weights = Weights(shape=(None, self.n_tasks)) weighted_loss = WeightedError(in_layers=[loss, weights]) self.set_loss(weighted_loss)
def _build_graph(self, tf_graph, scope, model_dir): """Construct a TensorGraph containing the policy and loss calculations.""" features = [Feature(shape=[None] + list(s)) for s in self._env.state_shape] policy_layers = self._policy.create_layers(features) action_prob = policy_layers['action_prob'] value = policy_layers['value'] rewards = Weights(shape=(None, 1)) actions = Label(shape=(None, self._env.n_actions)) loss = A3CLoss( self.value_weight, self.entropy_weight, in_layers=[rewards, actions, action_prob, value]) graph = TensorGraph( batch_size=self.max_rollout_length, use_queue=False, graph=tf_graph, model_dir=model_dir) for f in features: graph._add_layer(f) graph.add_output(action_prob) graph.add_output(value) graph.set_loss(loss) graph.set_optimizer(self.optimizer) with graph._get_tf("Graph").as_default(): with tf.variable_scope(scope): graph.build() return graph, features, rewards, actions, action_prob, value
def test_saliency_mapping(self): """Test computing a saliency map.""" n_tasks = 3 n_features = 5 features = Feature(shape=(None, n_features)) dense = Dense(out_channels=n_tasks, in_layers=[features], activation_fn=tf.tanh) label = Label(shape=(None, n_tasks)) loss = ReduceSquareDifference(in_layers=[dense, label]) model = dc.models.TensorGraph() model.add_output(dense) model.set_loss(loss) x = np.random.random(n_features) s = model.compute_saliency(x) assert s.shape[0] == n_tasks assert s.shape[1] == n_features # Take a tiny step in the direction of s and see if the output changes by # the expected amount. delta = 0.01 for task in range(n_tasks): norm = np.sqrt(np.sum(s[task]**2)) step = 0.5 * delta / norm pred1 = model.predict_on_batch((x + s[task] * step).reshape( (1, n_features))).flatten() pred2 = model.predict_on_batch((x - s[task] * step).reshape( (1, n_features))).flatten() self.assertAlmostEqual(pred1[task], (pred2 + norm * delta)[task], places=4)
def test_compute_model_performance_multitask_regressor(self): random_seed = 42 n_data_points = 20 n_features = 2 n_tasks = 2 np.random.seed(seed=random_seed) X = np.random.rand(n_data_points, n_features) y1 = np.array([0.5 for x in range(n_data_points)]) y2 = np.array([-0.5 for x in range(n_data_points)]) y = np.stack([y1, y2], axis=1) dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) label = Label(shape=(None, n_tasks)) dense = Dense(out_channels=n_tasks, in_layers=[features]) loss = ReduceSquareDifference(in_layers=[dense, label]) tg = dc.models.TensorGraph(random_seed=random_seed, learning_rate=0.1) tg.add_output(dense) tg.set_loss(loss) tg.fit(dataset, nb_epoch=1000) metric = [ dc.metrics.Metric(dc.metrics.mean_absolute_error, np.mean, mode="regression"), ] scores = tg.evaluate_generator(tg.default_generator(dataset), metric, labels=[label], per_task_metrics=True) scores = list(scores[1].values()) assert_true(np.all(np.isclose(scores, [0.0, 0.0], atol=1.0)))
def test_compute_model_performance_singletask_classifier(self): n_data_points = 20 n_features = 10 X = np.ones(shape=(int(n_data_points / 2), n_features)) * -1 X1 = np.ones(shape=(int(n_data_points / 2), n_features)) X = np.concatenate((X, X1)) class_1 = np.array([[0.0, 1.0] for x in range(int(n_data_points / 2))]) class_0 = np.array([[1.0, 0.0] for x in range(int(n_data_points / 2))]) y = np.concatenate((class_0, class_1)) dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) label = Label(shape=(None, 2)) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) total_loss = ReduceMean(in_layers=smce) tg = dc.models.TensorGraph(learning_rate=0.1) tg.add_output(output) tg.set_loss(total_loss) tg.fit(dataset, nb_epoch=1000) metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean, mode="classification") scores = tg.evaluate_generator(tg.default_generator(dataset), [metric], labels=[label], per_task_metrics=True) scores = list(scores[1].values()) assert_true(np.isclose(scores, [1.0], atol=0.05))
def test_neighbor_list_simple(self): """Test that neighbor lists can be constructed.""" N_atoms = 10 start = 0 stop = 12 nbr_cutoff = 3 ndim = 3 M = 6 X = np.random.rand(N_atoms, ndim) y = np.random.rand(N_atoms, 1) dataset = NumpyDataset(X, y) features = Feature(shape=(N_atoms, ndim)) labels = Label(shape=(N_atoms, )) nbr_list = NeighborList(N_atoms, M, ndim, nbr_cutoff, start, stop, in_layers=[features]) nbr_list = ToFloat(in_layers=[nbr_list]) # This isn't a meaningful loss, but just for test loss = ReduceSum(in_layers=[nbr_list]) tg = dc.models.TensorGraph(use_queue=False) tg.add_output(nbr_list) tg.set_loss(loss) tg.build()
def _create_graph(self, feature_shape, label_shape): """This is called to create the full TensorGraph from the added layers.""" if self.built: return # The graph has already been created. # Add in features features = Feature(shape=feature_shape) # Add in labels labels = Label(shape=label_shape) # Add in all layers prev_layer = features if len(self._layer_list) == 0: raise ValueError("No layers have been added to model.") for ind, layer in enumerate(self._layer_list): if len(layer.in_layers) > 1: raise ValueError("Cannot specify more than one " "in_layer for Sequential.") layer.in_layers += [prev_layer] prev_layer = layer # The last layer is the output of the model self.outputs.append(prev_layer) if self._loss_function == "binary_crossentropy": smce = SoftMaxCrossEntropy(in_layers=[labels, prev_layer]) self.set_loss(ReduceMean(in_layers=[smce])) elif self._loss_function == "mse": mse = ReduceSquareDifference(in_layers=[prev_layer, labels]) self.set_loss(mse) else: # TODO(rbharath): Add in support for additional # losses. raise ValueError("Unsupported loss.") self.build()
def build_graph(self): # Layer 1 gc1_input = [self.atom_features, self.indexing, self.membership] + self.deg_adj_list gc1 = GraphConv(64, activation_fn=tf.nn.relu, in_layers=gc1_input) bn1 = BatchNorm(in_layers=[gc1]) gp1_input = [bn1, self.indexing, self.membership] + self.deg_adj_list gp1 = GraphPool(in_layers=gp1_input) # Layer 2 gc2_input = [gp1, self.indexing, self.membership] + self.deg_adj_list gc2 = GraphConv(64, activation_fn=tf.nn.relu, in_layers=gc2_input) bn2 = BatchNorm(in_layers=[gc2]) gp2_input = [bn2, self.indexing, self.membership] + self.deg_adj_list gp2 = GraphPool(in_layers=gp2_input) # Dense layer 1 d1 = Dense(out_channels=128, activation_fn=tf.nn.relu, in_layers=[gp2]) bn3 = BatchNorm(in_layers=[d1]) # Graph gather layer gg1_input = [bn3, self.indexing, self.membership] + self.deg_adj_list gg1 = GraphGather(batch_size=self.batch_size, activation=tf.nn.tanh, in_layers=gg1_input) # Output dense layer d2 = Dense(out_channels=2, activation_fn=None, in_layers=[gg1]) softmax = SoftMax(in_layers=[d2]) self.tg.add_output(softmax) # Set loss function self.label = Label(shape=(None, 2)) cost = SoftMaxCrossEntropy(in_layers=[self.label, d2]) self.weight = Weights(shape=(None, 1)) loss = WeightedError(in_layers=[cost, self.weight]) self.tg.set_loss(loss)
def _build_graph(self): self.one_hot_seq = Feature(shape=(None, self.pad_length, self.num_amino_acids), dtype=tf.float32) conv1 = Conv1D(kernel_size=2, filters=512, in_layers=[self.one_hot_seq]) maxpool1 = MaxPool1D(strides=2, padding="VALID", in_layers=[conv1]) conv2 = Conv1D(kernel_size=3, filters=512, in_layers=[maxpool1]) flattened = Flatten(in_layers=[conv2]) dense1 = Dense(out_channels=400, in_layers=[flattened], activation_fn=tf.nn.tanh) dropout = Dropout(dropout_prob=self.dropout_p, in_layers=[dense1]) output = Dense(out_channels=1, in_layers=[dropout], activation_fn=None) self.add_output(output) if self.mode == "regression": label = Label(shape=(None, 1)) loss = L2Loss(in_layers=[label, output]) else: raise NotImplementedError( "Classification support not added yet. Missing details in paper." ) weights = Weights(shape=(None, )) weighted_loss = WeightedError(in_layers=[loss, weights]) self.set_loss(weighted_loss)
def test_tensorboard(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y = [[0, 1] for x in range(n_data_points)] dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) label = Label(shape=(None, 2)) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) loss = ReduceMean(in_layers=[smce]) tg = dc.models.TensorGraph(tensorboard=True, tensorboard_log_frequency=1, learning_rate=0.01, model_dir='/tmp/tensorgraph') tg.add_output(output) tg.set_loss(loss) tg.fit(dataset, nb_epoch=1000) files_in_dir = os.listdir(tg.model_dir) event_file = list( filter(lambda x: x.startswith("events"), files_in_dir)) assert_true(len(event_file) > 0) event_file = os.path.join(tg.model_dir, event_file[0]) file_size = os.stat(event_file).st_size assert_true(file_size > 0)
def build_graph(self): """ Building graph structures: """ self.m1_features = Feature(shape=(None, self.n_features)) self.m2_features = Feature(shape=(None, self.n_features)) prev_layer1 = self.m1_features prev_layer2 = self.m2_features for layer_size in self.layer_sizes: prev_layer1 = Dense( out_channels=layer_size, in_layers=[prev_layer1], activation_fn=tf.nn.relu) prev_layer2 = prev_layer1.shared([prev_layer2]) if self.dropout > 0.0: prev_layer1 = Dropout(self.dropout, in_layers=prev_layer1) prev_layer2 = Dropout(self.dropout, in_layers=prev_layer2) readout_m1 = Dense( out_channels=1, in_layers=[prev_layer1], activation_fn=None) readout_m2 = readout_m1.shared([prev_layer2]) self.add_output(Sigmoid(readout_m1) * 4 + 1) self.add_output(Sigmoid(readout_m2) * 4 + 1) self.difference = readout_m1 - readout_m2 label = Label(shape=(None, 1)) loss = HingeLoss(in_layers=[label, self.difference]) self.my_task_weights = Weights(shape=(None, 1)) loss = WeightedError(in_layers=[loss, self.my_task_weights]) self.set_loss(loss)
def test_save_load(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y = [[0, 1] for x in range(n_data_points)] dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) label = Label(shape=(None, 2)) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) loss = ReduceMean(in_layers=[smce]) tg = dc.models.TensorGraph(learning_rate=0.01) tg.add_output(output) tg.set_loss(loss) submodel_loss = ReduceSum(in_layers=smce) submodel_opt = Adam(learning_rate=0.002) submodel = tg.create_submodel(layers=[dense], loss=submodel_loss, optimizer=submodel_opt) tg.fit(dataset, nb_epoch=1) prediction = np.squeeze(tg.predict_on_batch(X)) tg.save() dirpath = tempfile.mkdtemp() shutil.rmtree(dirpath) shutil.move(tg.model_dir, dirpath) tg1 = TensorGraph.load_from_dir(dirpath) prediction2 = np.squeeze(tg1.predict_on_batch(X)) assert_true(np.all(np.isclose(prediction, prediction2, atol=0.01)))
def test_set_optimizer(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y = [[0, 1] for x in range(n_data_points)] dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) label = Label(shape=(None, 2)) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) loss = ReduceMean(in_layers=[smce]) tg = dc.models.TensorGraph(learning_rate=0.01, use_queue=False) tg.add_output(output) tg.set_loss(loss) global_step = tg.get_global_step() learning_rate = ExponentialDecay(initial_rate=0.1, decay_rate=0.96, decay_steps=100000) tg.set_optimizer(GradientDescent(learning_rate=learning_rate)) tg.fit(dataset, nb_epoch=1000) prediction = np.squeeze(tg.predict_on_batch(X)) tg.save() tg1 = TensorGraph.load_from_dir(tg.model_dir) prediction2 = np.squeeze(tg1.predict_on_batch(X)) assert_true(np.all(np.isclose(prediction, prediction2, atol=0.01)))
def _build_graph(self): self.atom_flags = Feature(shape=(None, self.max_atoms * self.max_atoms)) self.atom_feats = Feature(shape=(None, self.max_atoms * self.n_feat)) reshaped_atom_feats = Reshape(in_layers=[self.atom_feats], shape=(-1, self.max_atoms, self.n_feat)) reshaped_atom_flags = Reshape(in_layers=[self.atom_flags], shape=(-1, self.max_atoms, self.max_atoms)) previous_layer = reshaped_atom_feats Hiddens = [] for n_hidden in self.layer_structures: Hidden = Dense(out_channels=n_hidden, activation_fn=tf.nn.tanh, in_layers=[previous_layer]) Hiddens.append(Hidden) previous_layer = Hiddens[-1] regression = Dense(out_channels=1 * self.n_tasks, activation_fn=None, in_layers=[Hiddens[-1]]) output = BPGather(self.max_atoms, in_layers=[regression, reshaped_atom_flags]) self.add_output(output) label = Label(shape=(None, self.n_tasks, 1)) loss = ReduceSum(L2Loss(in_layers=[label, output])) weights = Weights(shape=(None, self.n_tasks)) weighted_loss = WeightedError(in_layers=[loss, weights]) self.set_loss(weighted_loss)
def build_graph(self): """Constructs the graph architecture of IRV as described in: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2750043/ """ self.mol_features = Feature(shape=(None, self.n_features)) self._labels = Label(shape=(None, self.n_tasks)) self._weights = Weights(shape=(None, self.n_tasks)) predictions = IRVLayer(self.n_tasks, self.K, in_layers=[self.mol_features]) costs = [] outputs = [] for task in range(self.n_tasks): task_output = Slice(task, 1, in_layers=[predictions]) sigmoid = Sigmoid(in_layers=[task_output]) outputs.append(sigmoid) label = Slice(task, axis=1, in_layers=[self._labels]) cost = SigmoidCrossEntropy(in_layers=[label, task_output]) costs.append(cost) all_cost = Concat(in_layers=costs, axis=1) loss = WeightedError(in_layers=[all_cost, self._weights]) + \ IRVRegularize(predictions, self.penalty, in_layers=[predictions]) self.set_loss(loss) outputs = Stack(axis=1, in_layers=outputs) outputs = Concat(axis=2, in_layers=[1 - outputs, outputs]) self.add_output(outputs)
def build_graph(self): self.atom_flags = Feature(shape=(None, self.max_atoms, self.max_atoms)) self.atom_feats = Feature(shape=(None, self.max_atoms, self.n_feat)) previous_layer = self.atom_feats Hiddens = [] for n_hidden in self.layer_structures: Hidden = Dense( out_channels=n_hidden, activation_fn=tf.nn.tanh, in_layers=[previous_layer]) Hiddens.append(Hidden) previous_layer = Hiddens[-1] costs = [] self.labels_fd = [] for task in range(self.n_tasks): regression = Dense( out_channels=1, activation_fn=None, in_layers=[Hiddens[-1]]) output = BPGather(self.max_atoms, in_layers=[regression, self.atom_flags]) self.add_output(output) label = Label(shape=(None, 1)) self.labels_fd.append(label) cost = L2Loss(in_layers=[label, output]) costs.append(cost) all_cost = Stack(in_layers=costs, axis=1) self.weights = Weights(shape=(None, self.n_tasks)) loss = WeightedError(in_layers=[all_cost, self.weights]) self.set_loss(loss)
def build_graph(self): self.atom_features = Feature(shape=(None, 75)) self.degree_slice = Feature(shape=(None, 2), dtype=tf.int32) self.membership = Feature(shape=(None, ), dtype=tf.int32) self.deg_adjs = [] for i in range(0, 10 + 1): deg_adj = Feature(shape=(None, i + 1), dtype=tf.int32) self.deg_adjs.append(deg_adj) in_layer = self.atom_features for layer_size in self.graph_conv_layers: gc1_in = [in_layer, self.degree_slice, self.membership ] + self.deg_adjs gc1 = GraphConv(layer_size, activation_fn=tf.nn.relu, in_layers=gc1_in) batch_norm1 = MyBatchNorm(in_layers=[gc1]) gp_in = [batch_norm1, self.degree_slice, self.membership ] + self.deg_adjs in_layer = GraphPool(in_layers=gp_in) dense = Dense(out_channels=self.dense_layer_size[0], activation_fn=tf.nn.relu, in_layers=[in_layer]) batch_norm3 = MyBatchNorm(in_layers=[dense]) batch_norm3 = Dropout(self.dropout, in_layers=[batch_norm3]) readout = GraphGather( batch_size=self.batch_size, activation_fn=tf.nn.tanh, in_layers=[batch_norm3, self.degree_slice, self.membership] + self.deg_adjs) curLayer = readout for myind in range(1, len(self.dense_layer_size) - 1): curLayer = Dense(out_channels=self.dense_layer_size[myind], activation_fn=tf.nn.relu, in_layers=[curLayer]) curLayer = Dropout(self.dropout, in_layers=[curLayer]) classification = Dense(out_channels=self.n_tasks, activation_fn=None, in_layers=[curLayer]) sigmoid = MySigmoid(in_layers=[classification]) self.add_output(sigmoid) self.label = Label(shape=(None, self.n_tasks)) all_cost = MySigmoidCrossEntropy( in_layers=[self.label, classification]) self.weights = Weights(shape=(None, self.n_tasks)) loss = WeightedError(in_layers=[all_cost, self.weights]) self.set_loss(loss) self.mydense = dense self.myreadout = readout self.myclassification = classification self.mysigmoid = sigmoid self.myall_cost = all_cost self.myloss = loss
def test_compute_model_performance_multitask_regressor(self): random_seed = 42 n_data_points = 20 n_features = 2 np.random.seed(seed=random_seed) X = np.random.rand(n_data_points, n_features) y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]), axis=-1) y2 = np.expand_dims(np.array([-0.5 for x in range(n_data_points)]), axis=-1) X = NumpyDataset(X) ys = [NumpyDataset(y1), NumpyDataset(y2)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] losses = [] labels = [] for i in range(2): label = Label(shape=(None, 1)) dense = Dense(out_channels=1, in_layers=[features]) loss = ReduceSquareDifference(in_layers=[dense, label]) outputs.append(dense) losses.append(loss) labels.append(label) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=losses) tg = dc.models.TensorGraph(mode="regression", batch_size=20, random_seed=random_seed, learning_rate=0.1) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches(epochs=1000, batch_size=tg.batch_size, pad_batches=True)) metric = [ dc.metrics.Metric(dc.metrics.mean_absolute_error, np.mean, mode="regression"), ] scores = tg.evaluate_generator(databag.iterbatches(), metric, labels=labels, per_task_metrics=True) scores = list(scores[1].values()) assert_true(np.all(np.isclose(scores, [0.0, 0.0], atol=1.0)))
def _build(self): self.A_tilda_k = list() for k in range(1, self.k_max + 1): self.A_tilda_k.append( Feature(name="graph_adjacency_{}".format(k), dtype=tf.float32, shape=[None, self.max_nodes, self.max_nodes])) self.X = Feature(name='atom_features', dtype=tf.float32, shape=[None, self.max_nodes, self.num_node_features]) graph_layers = list() adaptive_filters = list() for index, k in enumerate(range(1, self.k_max + 1)): in_layers = [self.A_tilda_k[index], self.X] adaptive_filters.append( AdaptiveFilter(batch_size=self.batch_size, in_layers=in_layers, num_nodes=self.max_nodes, num_node_features=self.num_node_features, combine_method=self.combine_method)) graph_layers.append( KOrderGraphConv(batch_size=self.batch_size, in_layers=in_layers + [adaptive_filters[index]], num_nodes=self.max_nodes, num_node_features=self.num_node_features, init='glorot_uniform')) graph_features = Concat(in_layers=graph_layers, axis=2) graph_features = ReLU(in_layers=[graph_features]) flattened = Flatten(in_layers=[graph_features]) dense1 = Dense(in_layers=[flattened], out_channels=64, activation_fn=tf.nn.relu) dense2 = Dense(in_layers=[dense1], out_channels=16, activation_fn=tf.nn.relu) dense3 = Dense(in_layers=[dense2], out_channels=1 * self.n_tasks, activation_fn=None) output = Reshape(in_layers=[dense3], shape=(-1, self.n_tasks, 1)) self.add_output(output) label = Label(shape=(None, self.n_tasks, 1)) weights = Weights(shape=(None, self.n_tasks)) loss = ReduceSum(L2Loss(in_layers=[label, output])) weighted_loss = WeightedError(in_layers=[loss, weights]) self.set_loss(weighted_loss)
def __init__(self, img_rows=224, img_cols=224, weights="imagenet", classes=1000, **kwargs): super(ResNet50, self).__init__(use_queue=False, **kwargs) self.img_cols = img_cols self.img_rows = img_rows self.weights = weights self.classes = classes input = Feature(shape=(None, self.img_rows, self.img_cols, 3)) labels = Label(shape=(None, self.classes)) conv1 = Conv2D(num_outputs=64, kernel_size=7, stride=2, activation='linear', padding='same', in_layers=[input]) bn1 = BatchNorm(in_layers=[conv1]) ac1 = ReLU(bn1) pool1 = MaxPool2D(ksize=[1, 3, 3, 1], in_layers=[bn1]) cb1 = self.conv_block(pool1, 3, [64, 64, 256], 1) id1 = self.identity_block(cb1, 3, [64, 64, 256]) id1 = self.identity_block(id1, 3, [64, 64, 256]) cb2 = self.conv_block(id1, 3, [128, 128, 512]) id2 = self.identity_block(cb2, 3, [128, 128, 512]) id2 = self.identity_block(id2, 3, [128, 128, 512]) id2 = self.identity_block(id2, 3, [128, 128, 512]) cb3 = self.conv_block(id2, 3, [256, 256, 1024]) id3 = self.identity_block(cb3, 3, [256, 256, 1024]) id3 = self.identity_block(id3, 3, [256, 256, 1024]) id3 = self.identity_block(id3, 3, [256, 256, 1024]) id3 = self.identity_block(cb3, 3, [256, 256, 1024]) id3 = self.identity_block(id3, 3, [256, 256, 1024]) cb4 = self.conv_block(id3, 3, [512, 512, 2048]) id4 = self.identity_block(cb4, 3, [512, 512, 2048]) id4 = self.identity_block(id4, 3, [512, 512, 2048]) pool2 = AvgPool2D(ksize=[1, 7, 7, 1], in_layers=[id4]) flatten = Flatten(in_layers=[pool2]) dense = Dense(classes, in_layers=[flatten]) loss = SoftMaxCrossEntropy(in_layers=[labels, dense]) loss = ReduceMean(in_layers=[loss]) self.set_loss(loss) self.add_output(dense)
def test_compute_model_performance_multitask_classifier(self): n_data_points = 20 n_features = 2 X = np.ones(shape=(n_data_points // 2, n_features)) * -1 X1 = np.ones(shape=(n_data_points // 2, n_features)) X = np.concatenate((X, X1)) class_1 = np.array([[0.0, 1.0] for x in range(int(n_data_points / 2))]) class_0 = np.array([[1.0, 0.0] for x in range(int(n_data_points / 2))]) y1 = np.concatenate((class_0, class_1)) y2 = np.concatenate((class_1, class_0)) X = NumpyDataset(X) ys = [NumpyDataset(y1), NumpyDataset(y2)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] entropies = [] labels = [] for i in range(2): label = Label(shape=(None, 2)) labels.append(label) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) entropies.append(smce) outputs.append(output) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=entropies) tg = dc.models.TensorGraph(learning_rate=0.1) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches(epochs=1000, batch_size=tg.batch_size, pad_batches=True)) metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean, mode="classification") scores = tg.evaluate_generator(databag.iterbatches(), [metric], labels=labels, per_task_metrics=True) scores = list(scores[1].values()) # Loosening atol to see if tests stop failing sporadically assert_true(np.all(np.isclose(scores, [1.0, 1.0], atol=0.20)))
def build_graph(self): d1 = Dense(out_channels=256, activation_fn=tf.nn.relu, in_layers=[self.feature]) d2 = Dense(out_channels=64, activation_fn=tf.nn.relu, in_layers=[d1]) d3 = Dense(out_channels=16, activation=None, in_layers=[d2]) d4 = Dense(out_channels=2, activation=None, in_layers=[d3]) softmax = SoftMax(in_layers=[d4]) self.tg.add_output(softmax) self.label = Label(shape=(None, 2)) cost = SoftMaxCrossEntropy(in_layers=[self.label, d4]) loss = ReduceMean(in_layers=[cost]) self.tg.set_loss(loss)
def test_compute_model_performance_singletask_regressor_ordering(self): n_data_points = 1000 n_features = 1 X = np.array(range(n_data_points)) X = np.expand_dims(X, axis=-1) y1 = X + 1 X = NumpyDataset(X) ys = [NumpyDataset(y1)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] losses = [] labels = [] for i in range(1): label = Label(shape=(None, 1)) dense = Dense(out_channels=1, in_layers=[features]) loss = ReduceSquareDifference(in_layers=[dense, label]) outputs.append(dense) losses.append(loss) labels.append(label) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=losses) tg = dc.models.TensorGraph(mode="regression", learning_rate=0.1) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches(epochs=1000, batch_size=tg.batch_size, pad_batches=True)) metric = [ dc.metrics.Metric(dc.metrics.mean_absolute_error, np.mean, mode="regression"), dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression") ] scores = tg.evaluate_generator(databag.iterbatches(batch_size=1), metric, labels=labels, per_task_metrics=True) print(scores) scores = list(scores[1].values()) assert_true(np.all(np.isclose(scores, [0.0], atol=0.5)))
def fit(self, dataset, loss, **kwargs): """Fits on the specified dataset. If called for the first time, constructs the TensorFlow graph for this model. Fits this graph on the specified dataset according to the specified loss. Parameters ---------- dataset: dc.data.Dataset Dataset with data loss: string Only "binary_crossentropy" or "mse" for now. """ X_shape, y_shape, _, _ = dataset.get_shape() # Calling fit() for first time if not self.built: feature_shape = X_shape[1:] label_shape = y_shape[1:] # Add in features features = Feature(shape=(None, ) + feature_shape) # Add in labels labels = Label(shape=(None, ) + label_shape) # Add in all layers prev_layer = features if len(self._layer_list) == 0: raise ValueError("No layers have been added to model.") for ind, layer in enumerate(self._layer_list): if len(layer.in_layers) > 1: raise ValueError("Cannot specify more than one " "in_layer for Sequential.") layer.in_layers += [prev_layer] prev_layer = layer # The last layer is the output of the model self.outputs.append(prev_layer) if loss == "binary_crossentropy": smce = SoftMaxCrossEntropy(in_layers=[labels, prev_layer]) self.set_loss(ReduceMean(in_layers=[smce])) elif loss == "mse": mse = ReduceSquareDifference(in_layers=[prev_layer, labels]) self.set_loss(mse) else: # TODO(rbharath): Add in support for additional # losses. raise ValueError("Unsupported loss.") super(Sequential, self).fit(dataset, **kwargs)
def build_graph(self): """Building graph structures: Features => DTNNEmbedding => DTNNStep => DTNNStep => DTNNGather => Regression """ self.atom_number = Feature(shape=(None, ), dtype=tf.int32) self.distance = Feature(shape=(None, self.n_distance)) self.atom_membership = Feature(shape=(None, ), dtype=tf.int32) self.distance_membership_i = Feature(shape=(None, ), dtype=tf.int32) self.distance_membership_j = Feature(shape=(None, ), dtype=tf.int32) dtnn_embedding = DTNNEmbedding(n_embedding=self.n_embedding, in_layers=[self.atom_number]) if self.dropout > 0.0: dtnn_embedding = Dropout(self.dropout, in_layers=dtnn_embedding) dtnn_layer1 = DTNNStep(n_embedding=self.n_embedding, n_distance=self.n_distance, in_layers=[ dtnn_embedding, self.distance, self.distance_membership_i, self.distance_membership_j ]) if self.dropout > 0.0: dtnn_layer1 = Dropout(self.dropout, in_layers=dtnn_layer1) dtnn_layer2 = DTNNStep(n_embedding=self.n_embedding, n_distance=self.n_distance, in_layers=[ dtnn_layer1, self.distance, self.distance_membership_i, self.distance_membership_j ]) if self.dropout > 0.0: dtnn_layer2 = Dropout(self.dropout, in_layers=dtnn_layer2) dtnn_gather = DTNNGather(n_embedding=self.n_embedding, layer_sizes=[self.n_hidden], n_outputs=self.n_tasks, output_activation=self.output_activation, in_layers=[dtnn_layer2, self.atom_membership]) if self.dropout > 0.0: dtnn_gather = Dropout(self.dropout, in_layers=dtnn_gather) n_tasks = self.n_tasks weights = Weights(shape=(None, n_tasks)) labels = Label(shape=(None, n_tasks)) output = Reshape( shape=(None, n_tasks), in_layers=[Dense(in_layers=dtnn_gather, out_channels=n_tasks)]) self.add_output(output) weighted_loss = ReduceSum(L2Loss(in_layers=[labels, output, weights])) self.set_loss(weighted_loss)