def _build(self): input_image = ph.placeholder('input_image', (None, vgg.HEIGHT, vgg.WIDTH, 3), ph.float) encoder = vgg.VGG16('encoder') encoder.setup(input_image) h = encoder['h7'] dropout = ph.Dropout('dropout') h = dropout.setup(h) dense = ph.Linear('dense', encoder.fc7.output_size, self._num_classes) y = dense.setup(h) y = tf.nn.softmax(y) label = tf.argmax(y, axis=1) self.predict = ph.Step(inputs=input_image, outputs=(label, y), givens={dropout.keep_prob: 1.0}) input_label = ph.placeholder('input_label', (None, ), ph.int) y_target = tf.one_hot(input_label, self._num_classes) loss = ph.ops.cross_entropy(y_target, y) loss = tf.reduce_mean(loss) var_list = dense.get_trainable_variables() reg = ph.reg.L2Regularizer(1e-6) reg.setup(var_list) grad_list = [ tf.clip_by_value(grad, -10, 10) for grad in tf.gradients(loss + reg.get_loss(), var_list) ] lr = ph.train.ExponentialDecayedValue('lr_train', 1e-4, num_loops=1e4, min_value=1e-5) update = tf.train.AdamOptimizer(lr.value).apply_gradients( zip(grad_list, var_list)) self.train = ph.Step(inputs=(input_image, input_label), outputs=loss, updates=(update, lr.update_op), givens={dropout.keep_prob: self._keep_prob}) var_list = self.get_trainable_variables() reg = ph.reg.L2Regularizer(1e-7) reg.setup(var_list) grad_list = [ tf.clip_by_value(grad, -10, 10) for grad in tf.gradients(loss + reg.get_loss(), var_list) ] lr = ph.train.ExponentialDecayedValue('lr_fine_tune', 2e-5, num_loops=3e4, min_value=1e-6) update = tf.train.AdamOptimizer(lr.value).apply_gradients( zip(grad_list, var_list)) self.fine_tune = ph.Step(inputs=(input_image, input_label), outputs=loss, updates=(update, lr.update_op), givens={dropout.keep_prob: self._keep_prob})
def _build(self): image = ph.placeholder('input_image', (None, vgg.HEIGHT, vgg.WIDTH, 3), ph.float) encoder = vgg.VGG16('encoder') encoder.setup(image) h = encoder['h7'] dropout = ph.Dropout('dropout') h = dropout.setup(h) dense = ph.Linear('dense', encoder.fc7.output_size, NUM_CLASSES) y = dense.setup(h) y = tf.nn.sigmoid(y) self.predict = ph.Step(inputs=image, outputs=y, givens={dropout.keep_prob: 1.0}) target = ph.placeholder('target', (None, NUM_CLASSES), ph.float) loss = ph.ops.cross_entropy(target, y) loss = tf.reduce_mean(loss) var_list = dense.get_trainable_variables() reg = ph.reg.L2Regularizer(1e-6) reg.setup(var_list) grad_list = [ tf.clip_by_value(grad, -10, 10) for grad in tf.gradients(loss + reg.get_loss(), var_list) ] lr = ph.train.ExponentialDecayedValue('lr_train', 1e-4, num_loops=3e3, min_value=1e-5) update = tf.train.AdamOptimizer(lr.value).apply_gradients( zip(grad_list, var_list)) self.train = ph.Step(inputs=(image, target), outputs=loss, updates=update, givens={dropout.keep_prob: self._keep_prob}) var_list = self.get_trainable_variables() reg = ph.reg.L2Regularizer(1e-7) reg.setup(var_list) grad_list = [ tf.clip_by_value(grad, -10, 10) for grad in tf.gradients(loss + reg.get_loss(), var_list) ] lr = ph.train.ExponentialDecayedValue('lr_fine_tune', 2e-5, num_loops=2e4, min_value=1e-6) update = tf.train.AdamOptimizer(lr.value).apply_gradients( zip(grad_list, var_list)) self.fine_tune = ph.Step(inputs=(image, target), outputs=loss, updates=update, givens={dropout.keep_prob: self._keep_prob})
def _build(self): x = ph.placeholder('x', shape=(None, self._input_size), dtype=ph.float) hidden_layer = ph.Linear('hidden_layer', input_size=self._input_size, output_size=self._hidden_size) out_layer = ph.Linear('out_layer', input_size=self._hidden_size, output_size=self._num_classes) dropout = ph.Dropout('dropout') y = ph.setup( x, [hidden_layer, ph.ops.lrelu, dropout, out_layer, tf.nn.softmax]) label = tf.argmax(y, axis=1) self.predict = ph.Step(inputs=x, outputs=(label, y), givens={dropout.keep_prob: 1.0}) true_label = ph.placeholder('true_label', shape=(None, ), dtype=ph.int) target = tf.one_hot(true_label, self._num_classes) loss = ph.ops.cross_entropy(target, y) loss = tf.reduce_mean(loss) var_list = self.get_trainable_variables() reg = ph.reg.L2Regularizer(1e-6) reg.setup(var_list) grad_list = [ tf.clip_by_value(grad, -10, 10) for grad in tf.gradients(loss + reg.get_loss(), var_list) ] lr = ph.train.ExponentialDecayedValue('lr_train', 1e-4, num_loops=2e4, min_value=1e-6) update = tf.train.AdamOptimizer(lr.value).apply_gradients( zip(grad_list, var_list)) self.train = ph.Step(inputs=(x, true_label), outputs=loss, updates=(update, lr.update_op), givens={dropout.keep_prob: self._keep_prob})
def _build(self): input_image = ph.placeholder('input_image', (None, alexnet.HEIGHT, alexnet.WIDTH, 3), ph.float) encoder = alexnet.AlexNet('encoder', ph.ops.swish) dropout = ph.Dropout('dropout') dense = ph.Linear('dense', encoder['dense_7'].output_size, self._hidden_size) output_layer = ph.Linear('output_layer', dense.output_size, self._num_classes + 1) encoder.setup(input_image) y = ph.setup( encoder['feature_7'], [dense, ph.ops.swish, dropout, output_layer, tf.nn.softmax]) label = tf.argmax(y, axis=1) self.predict = ph.Step(inputs=input_image, outputs=(label, y), givens={dropout.keep_prob: 1.0}) input_label = ph.placeholder('input_label', (None, ), ph.int) y_target = tf.one_hot(input_label, self._num_classes + 1) loss = -ph.ops.log_likelihood(y_target, y) loss = tf.reduce_mean(loss) ################################################################################ # pre-train ################################################################################ vars_new = [ *dense.get_trainable_variables(), *output_layer.get_trainable_variables() ] reg = ph.reg.L2Regularizer(self._reg) reg.setup(vars_new) lr = ph.train.ExponentialDecayedValue('lr_1', init_value=self._learning_rate_1, num_loops=self._num_loops_1, min_value=self._learning_rate_1 / 10) update_1 = tf.train.AdamOptimizer(lr.value).apply_gradients([ (tf.clip_by_value(g, -self._grad_clip, self._grad_clip), v) for g, v in zip(tf.gradients(loss + reg.get_loss(), vars_new), vars_new) if g is not None ]) # with tf.control_dependencies([update_1]): # update_2 = ph.train.L2Regularizer(self._reg).apply(vars_new) self.train = ph.Step(inputs=(input_image, input_label), outputs=(loss, lr.variable), updates=update_1, givens={dropout.keep_prob: self._keep_prob}) ################################################################################ # fine tune ################################################################################ vars_all = self.get_trainable_variables() reg = ph.reg.L2Regularizer(self._reg) reg.setup(vars_all) lr = ph.train.ExponentialDecayedValue('lr_2', init_value=self._learning_rate_2, num_loops=self._num_loops_2, min_value=self._learning_rate_2 / 10) update_1 = tf.train.AdamOptimizer(lr.value).apply_gradients([ (tf.clip_by_value(g, -self._grad_clip, self._grad_clip), v) for g, v in zip(tf.gradients(loss + reg.get_loss(), vars_all), vars_all) if g is not None ]) # with tf.control_dependencies([update_1]): # update_2 = ph.train.L2Regularizer(self._reg).apply(vars_all) self.fine_tune = ph.Step(inputs=(input_image, input_label), outputs=(loss, lr.variable), updates=update_1, givens={dropout.keep_prob: self._keep_prob})
def _build(self): shared = Embedding('shared', self._wemb_size, 500, act) specific = Embedding('specific', self._wemb_size, 500) gate = ph.Gate('gate', (500, 500), 500) lin = ph.Linear('lin', 500, 1000) out = ph.Linear('out', 1000, 2) stat = VectorStat('stat') drop = ph.Dropout('drop') # seq = ph.placeholder('seq', (None, None, self._wemb_size)) h1, states1 = shared.setup(seq) stat.setup(tf.reshape(seq, (-1, self._wemb_size), name='flat_seq')) stat.setup(tf.reshape(states1, (-1, 500), name='flat_states')) h2, _ = specific.setup(seq) g = gate.setup(h1, h2) h = g * h1 + (1.0 - g) * h2 y_pred = ph.setup(h, [drop, lin, ph.lrelu, drop, out, tf.nn.sigmoid]) y_pred_ = ph.setup(h1, [drop, lin, ph.lrelu, drop, out, tf.nn.sigmoid]) y_pred__ = ph.setup(h1, [drop, lin, ph.lrelu, drop, out, tf.nn.sigmoid]) label_pred = tf.argmax(y_pred, 1) label = ph.placeholder('label', (None, 2)) loss = tf.reduce_mean((y_pred - label)**2, axis=1) loss += tf.reduce_mean((y_pred_ - label)**2, axis=1) loss += tf.reduce_mean((y_pred__ - label)**2, axis=1) loss_sum = tf.reduce_sum(loss) loss_mean = tf.reduce_mean(loss) # correct = tf.cast(tf.equal(label_pred, tf.argmax(label, 1)), ph.D_TYPE) correct_pos = correct * label[:, 1] correct_neg = correct * label[:, 0] hit_pos = tf.reduce_sum(correct_pos) hit_neg = tf.reduce_sum(correct_neg) pred_pos = tf.reduce_sum(label_pred) pred_neg = tf.reduce_sum(1 - label_pred) error = tf.reduce_sum(1 - correct) # reg = ph.Regularizer() reg.add_l1(self.get_trainable_variables()) # optimizer = MaskGrad(tf.train.RMSPropOptimizer(1e-4, 0.8, 0.9)) self._optimizer = optimizer optimizer.add_mask(shared.cell.wz) optimizer.add_mask(shared.cell.wr) optimizer.add_mask(shared.cell.wh) optimizer.add_mask(shared.cell.uz) optimizer.add_mask(shared.cell.ur) optimizer.add_mask(shared.cell.uh) # self._add_train_slot(inputs=(seq, label), outputs={ 'Loss': loss_mean, 'Norm': tf.norm(self.specific.cell.uz, 1) }, updates=(optimizer.minimize(loss_mean + reg.get_loss(2e-7)), stat.updates), givens={drop.keep_prob: 0.5}) self._add_validate_slot(inputs=(seq, label), outputs={ 'Loss': loss_sum, 'hit_pos': hit_pos * 100, 'hit_neg': hit_neg * 100, 'pred_pos': pred_pos * 100, 'pred_neg': pred_neg * 100, 'Error': error * 100, }, givens={drop.keep_prob: 1.0})