def add_training_costs(self, graph, name_scopes, output, labels, weights): with graph.as_default(): epsilon = 1e-3 # small float to avoid dividing by zero weighted_costs = [] # weighted costs for each example gradient_costs = [] # costs used for gradient calculation with TensorflowGraph.shared_name_scope('costs', graph, name_scopes): for task in range(self.n_tasks): task_str = str(task).zfill(len(str(self.n_tasks))) with TensorflowGraph.shared_name_scope( 'cost_{}'.format(task_str), graph, name_scopes): with tf.name_scope('weighted'): weighted_cost = self.cost(output[task], labels[task], weights[task]) weighted_costs.append(weighted_cost) with tf.name_scope('gradient'): # Note that we divide by the batch size and not the number of # non-zero weight examples in the batch. Also, instead of using # tf.reduce_mean (which can put ops on the CPU) we explicitly # calculate with div/sum so it stays on the GPU. gradient_cost = tf.div( tf.reduce_sum(weighted_cost), self.batch_size) gradient_costs.append(gradient_cost) # aggregated costs with TensorflowGraph.shared_name_scope('aggregated', graph, name_scopes): with tf.name_scope('gradient'): loss = tf.add_n(gradient_costs) # weight decay if self.penalty != 0.0: penalty = model_ops.weight_decay( self.penalty_type, self.penalty) loss += penalty return loss
def add_task_training_costs(self, graph, name_scopes, outputs, labels, weights): """Adds the training costs for each task. Since each task is trained separately, each task is optimized w.r.t a separate task. TODO(rbharath): Figure out how to support weight decay for this model. Since each task is trained separately, weight decay should only be used on weights in column for that task. Parameters ---------- graph: tf.Graph Graph for the model. name_scopes: dict Contains all the scopes for model outputs: list List of output tensors from model. weights: list List of weight placeholders for model. """ task_costs = {} with TensorflowGraph.shared_name_scope('costs', graph, name_scopes): for task in range(self.n_tasks): with TensorflowGraph.shared_name_scope('cost_%d' % task, graph, name_scopes): weighted_cost = self.cost(outputs[task], labels[task], weights[task]) # Note that we divide by the batch size and not the number of # non-zero weight examples in the batch. Also, instead of using # tf.reduce_mean (which can put ops on the CPU) we explicitly # calculate with div/sum so it stays on the GPU. task_cost = tf.div(tf.reduce_sum(weighted_cost), self.batch_size) task_costs[task] = task_cost return task_costs
def add_training_cost(self, graph, name_scopes, output, labels, weights): with graph.as_default(): epsilon = 1e-3 # small float to avoid dividing by zero weighted_costs = [] # weighted costs for each example gradient_costs = [] # costs used for gradient calculation with TensorflowGraph.shared_name_scope('costs', graph, name_scopes): for task in range(self.n_tasks): task_str = str(task).zfill(len(str(self.n_tasks))) with TensorflowGraph.shared_name_scope('cost_{}'.format(task_str), graph, name_scopes): with tf.name_scope('weighted'): weighted_cost = self.cost(output[task], labels[task], weights[task]) weighted_costs.append(weighted_cost) with tf.name_scope('gradient'): # Note that we divide by the batch size and not the number of # non-zero weight examples in the batch. Also, instead of using # tf.reduce_mean (which can put ops on the CPU) we explicitly # calculate with div/sum so it stays on the GPU. gradient_cost = tf.div( tf.reduce_sum(weighted_cost), self.batch_size) gradient_costs.append(gradient_cost) # aggregated costs with TensorflowGraph.shared_name_scope('aggregated', graph, name_scopes): with tf.name_scope('gradient'): loss = tf.add_n(gradient_costs) # weight decay if self.penalty != 0.0: # using self-defined regularization penalty = weight_decay(self.penalty_type, self.penalty) loss += penalty return loss
def add_task_training_costs(self, graph, name_scopes, outputs, labels, weights): """Adds the training costs for each task. Since each task is trained separately, each task is optimized w.r.t a separate task. TODO(rbharath): Figure out how to support weight decay for this model. Since each task is trained separately, weight decay should only be used on weights in column for that task. Parameters ---------- graph: tf.Graph Graph for the model. name_scopes: dict Contains all the scopes for model outputs: list List of output tensors from model. weights: list List of weight placeholders for model. """ task_costs = {} with TensorflowGraph.shared_name_scope('costs', graph, name_scopes): for task in range(self.n_tasks): with TensorflowGraph.shared_name_scope('cost_%d' % task, graph, name_scopes): weighted_cost = self.cost(outputs[task], labels[task], weights[task]) # Note that we divide by the batch size and not the number of # non-zero weight examples in the batch. Also, instead of using # tf.reduce_mean (which can put ops on the CPU) we explicitly # calculate with div/sum so it stays on the GPU. task_cost = tf.div(tf.reduce_sum(weighted_cost), self.batch_size) task_costs[task] = task_cost return task_costs
def build(self, graph, name_scopes, training): """Constructs the graph architecture as specified in its config. This method creates the following Placeholders: mol_features: Molecule descriptor (e.g. fingerprint) tensor with shape batch_size x n_features. """ n_features = self.n_features placeholder_scope = TensorflowGraph.get_placeholder_scope( graph, name_scopes) with graph.as_default(): with placeholder_scope: self.mol_features = tf.placeholder(tf.float32, shape=[None, n_features], name='mol_features') layer_sizes = self.layer_sizes weight_init_stddevs = self.weight_init_stddevs bias_init_consts = self.bias_init_consts dropouts = self.dropouts lengths_set = { len(layer_sizes), len(weight_init_stddevs), len(bias_init_consts), len(dropouts), } assert len( lengths_set) == 1, 'All layer params must have same length.' n_layers = lengths_set.pop() assert n_layers > 0, 'Must have some layers defined.' prev_layer = self.mol_features prev_layer_size = n_features all_layers = {} for i in range(n_layers): for task in range(self.n_tasks): task_scope = TensorflowGraph.shared_name_scope( "task%d" % task, graph, name_scopes) print("Adding weights for task %d, layer %d" % (task, i)) with task_scope as scope: if i == 0: prev_layer = self.mol_features prev_layer_size = self.n_features else: prev_layer = all_layers[(i - 1, task)] prev_layer_size = layer_sizes[i - 1] if task > 0: lateral_contrib = self.add_adapter( all_layers, task, i) print( "Creating W_layer_%d_task%d of shape %s" % (i, task, str([prev_layer_size, layer_sizes[i]]))) W = tf.Variable(tf.truncated_normal( shape=[prev_layer_size, layer_sizes[i]], stddev=self.weight_init_stddevs[i]), name='W_layer_%d_task%d' % (i, task), dtype=tf.float32) print("Creating b_layer_%d_task%d of shape %s" % (i, task, str([layer_sizes[i]]))) b = tf.Variable(tf.constant( value=self.bias_init_consts[i], shape=[layer_sizes[i]]), name='b_layer_%d_task%d' % (i, task), dtype=tf.float32) layer = tf.matmul(prev_layer, W) + b if i > 0 and task > 0: layer = layer + lateral_contrib layer = tf.nn.relu(layer) layer = model_ops.dropout(layer, dropouts[i], training) all_layers[(i, task)] = layer output = [] for task in range(self.n_tasks): prev_layer = all_layers[(i, task)] prev_layer_size = layer_sizes[i] task_scope = TensorflowGraph.shared_name_scope( "task%d" % task, graph, name_scopes) with task_scope as scope: if task > 0: lateral_contrib = tf.squeeze( self.add_adapter(all_layers, task, i + 1)) weight_init = tf.truncated_normal( shape=[prev_layer_size, 1], stddev=weight_init_stddevs[i]) bias_init = tf.constant(value=bias_init_consts[i], shape=[1]) print("Creating W_output_task%d of shape %s" % (task, str([prev_layer_size, 1]))) w = tf.Variable(weight_init, name='W_output_task%d' % task, dtype=tf.float32) print("Creating b_output_task%d of shape %s" % (task, str([1]))) b = tf.Variable(bias_init, name='b_output_task%d' % task, dtype=tf.float32) layer = tf.squeeze(tf.matmul(prev_layer, w) + b) if i > 0 and task > 0: layer = layer + lateral_contrib output.append(layer) return output
def add_progressive_lattice(self, graph, name_scopes, training): """Constructs the graph architecture as specified in its config. This method creates the following Placeholders: mol_features: Molecule descriptor (e.g. fingerprint) tensor with shape batch_size x n_features. """ n_features = self.n_features placeholder_scope = TensorflowGraph.get_placeholder_scope(graph, name_scopes) with graph.as_default(): layer_sizes = self.layer_sizes weight_init_stddevs = self.weight_init_stddevs bias_init_consts = self.bias_init_consts dropouts = self.dropouts lengths_set = { len(layer_sizes), len(weight_init_stddevs), len(bias_init_consts), len(dropouts), } assert len(lengths_set) == 1, 'All layer params must have same length.' n_layers = lengths_set.pop() assert n_layers > 0, 'Must have some layers defined.' prev_layer = self.mol_features prev_layer_size = n_features all_layers = {} for i in range(n_layers): for task in range(self.n_tasks): task_scope = TensorflowGraph.shared_name_scope("task%d_ops" % task, graph, name_scopes) print("Adding weights for task %d, layer %d" % (task, i)) with task_scope as scope: if i == 0: prev_layer = self.mol_features prev_layer_size = self.n_features else: prev_layer = all_layers[(i - 1, task)] prev_layer_size = layer_sizes[i - 1] if task > 0: lateral_contrib = self.add_adapter(all_layers, task, i) print("Creating W_layer_%d_task%d of shape %s" % (i, task, str([prev_layer_size, layer_sizes[i]]))) W = tf.Variable( tf.truncated_normal( shape=[prev_layer_size, layer_sizes[i]], stddev=self.weight_init_stddevs[i]), name='W_layer_%d_task%d' % (i, task), dtype=tf.float32) print("Creating b_layer_%d_task%d of shape %s" % (i, task, str([layer_sizes[i]]))) b = tf.Variable( tf.constant( value=self.bias_init_consts[i], shape=[layer_sizes[i]]), name='b_layer_%d_task%d' % (i, task), dtype=tf.float32) layer = tf.matmul(prev_layer, W) + b if i > 0 and task > 0: layer = layer + lateral_contrib layer = tf.nn.relu(layer) layer = model_ops.dropout(layer, dropouts[i], training) all_layers[(i, task)] = layer output = [] for task in range(self.n_tasks): prev_layer = all_layers[(i, task)] prev_layer_size = layer_sizes[i] task_scope = TensorflowGraph.shared_name_scope("task%d" % task, graph, name_scopes) with task_scope as scope: if task > 0: lateral_contrib = tf.squeeze( self.add_adapter(all_layers, task, i + 1)) weight_init = tf.truncated_normal( shape=[prev_layer_size, 1], stddev=weight_init_stddevs[i]) bias_init = tf.constant(value=bias_init_consts[i], shape=[1]) print("Creating W_output_task%d of shape %s" % (task, str([prev_layer_size, 1]))) w = tf.Variable( weight_init, name='W_output_task%d' % task, dtype=tf.float32) print("Creating b_output_task%d of shape %s" % (task, str([1]))) b = tf.Variable( bias_init, name='b_output_task%d' % task, dtype=tf.float32) layer = tf.squeeze(tf.matmul(prev_layer, w) + b) if i > 0 and task > 0: layer = layer + lateral_contrib output.append(layer) return output