def _init_model_output(self, t): if self.multilabel_: output_size = self.n_classes_ elif self.n_classes_ > 2: output_size = self.n_classes_ else: output_size = 1 if self.is_sparse_ and not self.hidden_units: t = affine(t, output_size, input_size=self.input_layer_sz_, scope='output_layer', sparse_input=True) else: if self.keep_prob != 1.0: t = tf.nn.dropout(t, keep_prob=self._keep_prob) t = affine(t, output_size, scope='output_layer') if self.multilabel_: self.input_targets_ = \ tf.placeholder(tf.int64, [None, self.n_classes_], "targets") self.output_layer_ = tf.nn.sigmoid(t) self._zeros = tf.zeros_like(self.output_layer_) elif self.n_classes_ > 2: self.input_targets_ = tf.placeholder(tf.int64, [None], "targets") self.output_layer_ = tf.nn.softmax(t) else: self.input_targets_ = tf.placeholder(tf.int64, [None], "targets") t = tf.reshape(t, [-1]) # Convert to 1d tensor. self.output_layer_ = tf.nn.sigmoid(t) return t
def _set_up_graph(self): """Initialize TF objects (needed before fitting or restoring).""" # A placeholder to control dropout for training vs. prediction. self._keep_prob = \ tf.placeholder(dtype=np.float32, shape=(), name="keep_prob") # Input layers. if self.is_sparse_: self._input_indices = \ tf.placeholder(np.int64, [None, 2], "input_indices") self._input_values = \ tf.placeholder(np.float32, [None], "input_values") self._input_shape = \ tf.placeholder(np.int64, [2], "input_shape") # t will be the current layer as we build up the graph below. t = tf.SparseTensor(self._input_indices, self._input_values, self._input_shape) else: self._input_values = \ tf.placeholder(np.float32, [None, self.input_layer_sz_], "input_values") t = self._input_values # Hidden layers. for i, layer_sz in enumerate(self.hidden_units): if self.is_sparse_ and i == 0: t = affine(t, layer_sz, input_size=self.input_layer_sz_, scope='layer_%d' % i, sparse_input=True) else: if self.keep_prob != 1.0: t = tf.nn.dropout(t, keep_prob=self._keep_prob) t = affine(t, layer_sz, scope='layer_%d' % i) t = t if self.activation is None else self.activation(t) # Set transformed layer to hidden layer if self._transform_layer_index == i: self._transform_layer = t # The output layer and objective function depend on the model # (e.g., classification vs regression). t = self._init_model_output(t) # set the transform layer to output logits if we have no hidden layers if self._transform_layer_index == -1: self._transform_layer = t self._sample_weight = \ tf.placeholder(np.float32, [None], "sample_weight") self._init_model_objective_fn(t) self._train_step = self.solver( **self.solver_kwargs if self.solver_kwargs else {}).minimize( self._obj_func)
def _init_model_output(self, t): if self.is_sparse_ and not self.hidden_units: t = affine(t, 1, input_size=self.input_layer_sz_, scope='output_layer', sparse_input=True) else: if self.keep_prob != 1.0: t = tf.nn.dropout(t, keep_prob=self._keep_prob) t = affine(t, 1, scope='output_layer') self.input_targets_ = tf.placeholder(tf.float32, [None], "targets") t = tf.reshape(t, [-1]) # Convert to 1d tensor. self.output_layer_ = t return t
def _set_up_graph(self): """Initialize TF objects (needed before fitting or restoring).""" # A placeholder to control dropout for training vs. prediction. self._keep_prob = \ tf.placeholder(dtype=np.float32, shape=(), name="keep_prob") # Input layers. if self.is_sparse_: self._input_indices = \ tf.placeholder(np.int64, [None, 2], "input_indices") self._input_values = \ tf.placeholder(np.float32, [None], "input_values") self._input_shape = \ tf.placeholder(np.int64, [2], "input_shape") # t will be the current layer as we build up the graph below. t = tf.SparseTensor(self._input_indices, self._input_values, self._input_shape) else: self._input_values = \ tf.placeholder(np.float32, [None, self.input_layer_sz_], "input_values") t = self._input_values # Hidden layers. for i, layer_sz in enumerate(self.hidden_units): if self.is_sparse_ and i == 0: t = affine(t, layer_sz, input_size=self.input_layer_sz_, scope='layer_%d' % i, sparse_input=True) else: if self.keep_prob != 1.0: t = tf.nn.dropout(t, keep_prob=self._keep_prob) t = affine(t, layer_sz, scope='layer_%d' % i) t = t if self.activation is None else self.activation(t) # The output layer and objective function depend on the model # (e.g., classification vs regression). t = self._init_model_output(t) self._init_model_objective_fn(t) self._train_step = self.solver( **self.solver_kwargs if self.solver_kwargs else {}).minimize( self._obj_func)
def _set_up_graph(self): """Initialize TF objects (needed before fitting or restoring).""" # A placeholder to control dropout for training vs. prediction. self._keep_prob = tf.placeholder(dtype=tf.float32, shape=(), name="keep_prob") # Input values. self._input_values = tf.placeholder(tf.float32, [None, self.input_layer_size_], "input_values") t = self._input_values # These masks are for construction the mixed loss output layer and # scores. TensorFlow does not support scatter operations into Tesnors # (i.e., the results of TF graph operations). Thus we use masks to # place the right data in the right spot. # The masks are type `tf.bool` to be used with `tf.where`. self._default_msk = tf.placeholder(tf.bool, [None, self.input_layer_size_], "default_msk") self._sigmoid_msk = tf.placeholder(tf.bool, [None, self.input_layer_size_], "sigmoid_msk") self._softmax_msks = tf.placeholder( tf.bool, [None, None, self.input_layer_size_], "softmax_msks") # Fan in layers. for i, layer_sz in enumerate(self.hidden_units): if self.keep_prob != 1.0: t = tf.nn.dropout(t, keep_prob=self._keep_prob) t = affine(t, layer_sz, scope='layer_%d' % i) if (self.hidden_activation is not None and i < len(self.hidden_units) - 1): t = self.hidden_activation(t) if (self.encoding_activation is not None and i == len(self.hidden_units) - 1): t = self.encoding_activation(t) # Encoded values. self._encoded_values = t # Fan out layers. second_layers \ = list(self.hidden_units[::-1][1:]) + [self.input_layer_size_] for i, layer_sz in enumerate(second_layers): if self.keep_prob != 1.0: t = tf.nn.dropout(t, keep_prob=self._keep_prob) t = affine(t, layer_sz, scope='layer_%d' % (i + len(self.hidden_units))) if (i < len(second_layers) - 1 and self.hidden_activation is not None): t = self.hidden_activation(t) # Finally do outputs and objective function. self._output_values, self._scores \ = self._build_output_layer_and_scores(t) self._obj_func = tf.reduce_mean(self._scores) # Training w/ Adam for now. # Catching a warning related to TensorFlow sparse to dense conversions # from the graph ops for the scores for mixed losses: # '.../tensorflow/python/ops/gradients.py:90: UserWarning: Converting # sparse IndexedSlices to a dense Tensor of unknown shape. This may # consume a large amount of memory. # "Converting sparse IndexedSlices to a dense Tensor of unknown # shape."' with warnings.catch_warnings(): warnings.filterwarnings( "ignore", message=("Converting sparse IndexedSlices to a dense Tensor " "of unknown shape"), module='tensorflow') self._train_step = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self._obj_func)
def _set_up_graph(self): """Initialize TF objects (needed before fitting or restoring).""" # A placeholder to control dropout for training vs. prediction. self._keep_prob = \ tf.placeholder(dtype=np.float32, shape=(), name="keep_prob") # Input layers. if self.is_sparse_: self._input_indices = \ tf.placeholder(np.int64, [None, 2], "input_indices") self._input_values = \ tf.placeholder(np.float32, [None], "input_values") self._input_shape = \ tf.placeholder(np.int64, [2], "input_shape") # t will be the current layer as we build up the graph below. t = tf.SparseTensor(self._input_indices, self._input_values, self._input_shape) else: self._input_values = \ tf.placeholder(np.float32, [None, self.input_layer_sz_], "input_values") t = self._input_values # Hidden layers. for i, layer_sz in enumerate(self.hidden_units): if self.is_sparse_ and i == 0: t = affine(t, layer_sz, input_size=self.input_layer_sz_, scope='layer_%d' % i, sparse_input=True) else: if self.keep_prob != 1.0: t = tf.nn.dropout(t, keep_prob=self._keep_prob) t = affine(t, layer_sz, scope='layer_%d' % i) t = t if self.activation is None else self.activation(t) # Set transformed layer to hidden layer if self._transform_layer_index == i: self._transform_layer = t # The output layer and objective function depend on the model # (e.g., classification vs regression). t = self._init_model_output(t) # set the transform layer to output logits if we have no hidden layers if self._transform_layer_index == -1: self._transform_layer = t # Prediction gradients (e.g., for analyzing the importance of features) # We use the top layer before the output activation function # (e.g., softmax, sigmoid) following # https://arxiv.org/pdf/1312.6034.pdf if self.is_sparse_: self._prediction_gradient = None else: output_shape = self.output_layer_.get_shape() # Note: tf.gradients returns a list of gradients dy/dx, one per # input tensor x. In other words, # [ tensor(n_features x n_gradients) ]. if len(output_shape) == 1: self._prediction_gradient = tf.gradients( t, self._input_values)[0] elif len(output_shape) == 2: # According to the tf.gradients documentation, it looks like # we have to compute gradients separately for each output # dimension and then stack them for multiclass/label data. self._prediction_gradient = tf.stack([ tf.gradients(t[:, i], self._input_values)[0] for i in range(output_shape[1]) ], axis=1) else: # sanity check raise ValueError("Unexpected output shape") self._sample_weight = \ tf.placeholder(np.float32, [None], "sample_weight") self._init_model_objective_fn(t) self._train_step = self.solver( **self.solver_kwargs if self.solver_kwargs else {}).minimize( self._obj_func)
def _set_up_graph(self): """Initialize TF objects (needed before fitting or restoring).""" # Inputs self._keep_prob = tf.placeholder(dtype=np.float32, shape=(), name="keep_prob") self._input_targets = tf.placeholder(np.int32, [None], "input_targets") self._input_values = tf.placeholder(np.float32, [None, self.input_size_], "input_values") t = tf.reshape( self._input_values, [-1, self._image_size, self._image_size, self._num_channels]) # Conv. layers prev_feats = self._num_channels for i, (cdim, num_feats) in enumerate(self.conv_hidden_units): with tf.variable_scope('conv_layer_%d' % i): W = tf.get_variable("weights", [cdim, cdim, prev_feats, num_feats]) b = tf.get_variable("bias", [num_feats], initializer=tf.constant_initializer(0.0)) t = tf.nn.conv2d(t, W, strides=[1, 1, 1, 1], padding='SAME') + b t = t if self.activation is None else self.activation(t) t = tf.nn.max_pool( t, ksize=[1, self.max_pool_size, self.max_pool_size, 1], strides=[1, self.max_pool_size, self.max_pool_size, 1], padding='SAME') prev_feats = num_feats # Flatten to final size. final_img_size = (self._image_size // (self.max_pool_size**len(self.conv_hidden_units))) t = tf.reshape(t, [-1, final_img_size * final_img_size * num_feats]) # Dense layers. for i, layer_sz in enumerate(self.dense_hidden_units): if self.keep_prob != 1.0: t = tf.nn.dropout(t, keep_prob=self._keep_prob) t = affine(t, layer_sz, scope='dense_layer_%d' % i) t = t if self.activation is None else self.activation(t) # Final layer. if self.keep_prob != 1.0: t = tf.nn.dropout(t, keep_prob=self._keep_prob) t = affine(t, self.n_classes_, scope='output_layer') # Probs for each class. self._output_layer = tf.nn.softmax(t) # Objective function. self._obj_func = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self._input_targets, logits=t)) # Training. sk = self.solver_kwargs if self.solver_kwargs is not None else {} self._train_step = self.solver(**sk).minimize(self._obj_func)