def _build(self): self._xav_init = tf.contrib.layers.xavier_initializer self._projA = [] for idx in range(self._num_layers): self._projA[idx] = ph.Linear('proA' + str(idx), input_size=self._inputs_size, output_size=self._state_size) self._projC = [] for idx in range(self._num_layers): self._projC[idx] = ph.Linear('proB' + str(idx), input_size=self._inputs_size, output_size=self._state_size) self._projB = ph.Linear('projC', input_size=self._inputs_size, output_size=self._state_size) self._w = tf.get_variable('w', shape=[self._state_size, self._state_size], initializer=self._xav_init()) self._ow = tf.get_variable('ow', shape=[self._state_size, self._output_size], initializer=self._xav_init())
def _build(self): height, width, channels = (self._max_len, self._voc_size, 1) output_channels = 8 self._conv1 = ph.Conv2D('conv1', input_size=(height, width, channels), output_channels=output_channels, filter_height=3, filter_width=width, stride_height=1, stride_width=1) self._pool1 = ph.Pool2D('pool1', input_size=(height, width, channels), filter_height=3, filter_width=3, stride_height=2, stride_width=2, pool_type='avg') height, width, channels = self._pool1.output_size output_channels *= 2 self._conv2 = ph.Conv2D('conv2', input_size=(height, width, channels), output_channels=output_channels, filter_height=3, filter_width=width, stride_height=1, stride_width=1) self._pool2 = ph.Pool2D('pool2', input_size=(height, width, channels), filter_height=3, filter_width=3, stride_height=2, stride_width=2, pool_type='avg') height, width, channels = self._pool2.output_size output_channels *= 2 self._conv3 = ph.Conv2D('conv3', input_size=(height, width, channels), output_channels=output_channels, filter_height=3, filter_width=width, stride_height=1, stride_width=1) self._pool3 = ph.Pool2D('pool3', input_size=(height, width, channels), filter_height=3, filter_width=3, stride_height=2, stride_width=2, pool_type='avg') self._dense1 = ph.Linear('dense1', input_size=self._pool3.flat_size, output_size=self._hidden_size) self._dense2 = ph.Linear('dense2', input_size=self._hidden_size, output_size=1)
def _build(self): self._linear = ph.Linear('linear', input_size=self._inputs_size, output_size=self._output_size) self._gate = ph.Linear('gate', input_size=self._inputs_size, output_size=self._output_size)
def _build(self): # 网络模块定义 --- build self._emb = photinia.Linear('EMB', self._voc_size, self._emb_size) self._cell = photinia.GRUCell('CELL', self._emb_size, self._state_size) self._lin = photinia.Linear('LIN', self._state_size, self._voc_size) # 输入定义 seq = tf.placeholder( shape=(None, None, self._voc_size), dtype=photinia.dtype ) seq_0 = seq[:, :-1, :] seq_1 = seq[:, 1:, :] batch_size = tf.shape(seq)[0] # RNN结构 init_state = tf.zeros( shape=(batch_size, self._state_size), dtype=photinia.dtype ) states = tf.scan( fn=self._rnn_step, elems=tf.transpose(seq_0, (1, 0, 2)), initializer=init_state ) probs = tf.map_fn( fn=self._state_to_prob, elems=states ) outputs = tf.map_fn( fn=self._prob_to_output, elems=probs ) probs = tf.transpose(probs, (1, 0, 2)) outputs = tf.transpose(outputs, (1, 0, 2)) outputs = tf.concat((seq[:, 0:1, :], outputs), 1) loss = tf.reduce_mean(-tf.log(1e-5 + tf.reduce_sum(seq_1 * probs, 2)), 1) loss = tf.reduce_mean(loss) self._add_slot( 'train', outputs=loss, inputs=seq, updates=tf.train.AdamOptimizer(1e-3).minimize(loss) ) self._add_slot( 'evaluate', outputs=outputs, inputs=seq ) # word = tf.placeholder( shape=(None, self._voc_size), dtype=photinia.dtype ) emb = self._emb.setup(word) emb = photinia.lrelu(emb) self._add_slot( 'embedding', outputs=emb, inputs=word )
def _build(self): if self._emb_layer is None: self._emb_layer = ph.Linear('emb_layer', self._voc_size, self._emb_size) else: self._emb_size = self._emb_layer.output_size self._cell = ph.GRUCell('cell', self._emb_size, self._state_size) self._out_layer = ph.Linear('out_layer', self._state_size, self._voc_size)
def _build(self): self._input_layer = ph.Linear('input_layer', self._input_size, self._hidden_size, w_init=self._w_init, b_init=self._b_init) self._output_layer = ph.Linear('output_layer', self._hidden_size, self._output_size, w_init=self._w_init, b_init=self._b_init)
def _build(self): input_image = ph.placeholder('input_image', (None, vgg.HEIGHT, vgg.WIDTH, 3), ph.float) encoder = vgg.VGG16('encoder') encoder.setup(input_image) h = encoder['h7'] dropout = ph.Dropout('dropout') h = dropout.setup(h) dense = ph.Linear('dense', encoder.fc7.output_size, self._num_classes) y = dense.setup(h) y = tf.nn.softmax(y) label = tf.argmax(y, axis=1) self.predict = ph.Step(inputs=input_image, outputs=(label, y), givens={dropout.keep_prob: 1.0}) input_label = ph.placeholder('input_label', (None, ), ph.int) y_target = tf.one_hot(input_label, self._num_classes) loss = ph.ops.cross_entropy(y_target, y) loss = tf.reduce_mean(loss) var_list = dense.get_trainable_variables() reg = ph.reg.L2Regularizer(1e-6) reg.setup(var_list) grad_list = [ tf.clip_by_value(grad, -10, 10) for grad in tf.gradients(loss + reg.get_loss(), var_list) ] lr = ph.train.ExponentialDecayedValue('lr_train', 1e-4, num_loops=1e4, min_value=1e-5) update = tf.train.AdamOptimizer(lr.value).apply_gradients( zip(grad_list, var_list)) self.train = ph.Step(inputs=(input_image, input_label), outputs=loss, updates=(update, lr.update_op), givens={dropout.keep_prob: self._keep_prob}) var_list = self.get_trainable_variables() reg = ph.reg.L2Regularizer(1e-7) reg.setup(var_list) grad_list = [ tf.clip_by_value(grad, -10, 10) for grad in tf.gradients(loss + reg.get_loss(), var_list) ] lr = ph.train.ExponentialDecayedValue('lr_fine_tune', 2e-5, num_loops=3e4, min_value=1e-6) update = tf.train.AdamOptimizer(lr.value).apply_gradients( zip(grad_list, var_list)) self.fine_tune = ph.Step(inputs=(input_image, input_label), outputs=loss, updates=(update, lr.update_op), givens={dropout.keep_prob: self._keep_prob})
def _build(self): image = ph.placeholder('input_image', (None, vgg.HEIGHT, vgg.WIDTH, 3), ph.float) encoder = vgg.VGG16('encoder') encoder.setup(image) h = encoder['h7'] dropout = ph.Dropout('dropout') h = dropout.setup(h) dense = ph.Linear('dense', encoder.fc7.output_size, NUM_CLASSES) y = dense.setup(h) y = tf.nn.sigmoid(y) self.predict = ph.Step(inputs=image, outputs=y, givens={dropout.keep_prob: 1.0}) target = ph.placeholder('target', (None, NUM_CLASSES), ph.float) loss = ph.ops.cross_entropy(target, y) loss = tf.reduce_mean(loss) var_list = dense.get_trainable_variables() reg = ph.reg.L2Regularizer(1e-6) reg.setup(var_list) grad_list = [ tf.clip_by_value(grad, -10, 10) for grad in tf.gradients(loss + reg.get_loss(), var_list) ] lr = ph.train.ExponentialDecayedValue('lr_train', 1e-4, num_loops=3e3, min_value=1e-5) update = tf.train.AdamOptimizer(lr.value).apply_gradients( zip(grad_list, var_list)) self.train = ph.Step(inputs=(image, target), outputs=loss, updates=update, givens={dropout.keep_prob: self._keep_prob}) var_list = self.get_trainable_variables() reg = ph.reg.L2Regularizer(1e-7) reg.setup(var_list) grad_list = [ tf.clip_by_value(grad, -10, 10) for grad in tf.gradients(loss + reg.get_loss(), var_list) ] lr = ph.train.ExponentialDecayedValue('lr_fine_tune', 2e-5, num_loops=2e4, min_value=1e-6) update = tf.train.AdamOptimizer(lr.value).apply_gradients( zip(grad_list, var_list)) self.fine_tune = ph.Step(inputs=(image, target), outputs=loss, updates=update, givens={dropout.keep_prob: self._keep_prob})
def _build(self): self._key_layer = ph.Linear('key_layer', input_size=self._key_size, output_size=self._attention_size, with_bias=self._with_bias) self._att_layer = ph.Linear('att_layer', input_size=self._attention_size, output_size=1, with_bias=self._with_bias) if self._query_vec_size is not None: self._query_vec_layer = ph.Linear('query_vec_layer', input_size=self._query_vec_size, output_size=self._attention_size, with_bias=self._with_bias) if self._query_seq_size is not None: self._query_seq_layer = ph.Linear('query_seq_layer', input_size=self._query_seq_size, output_size=self._attention_size, with_bias=self._with_bias)
def _build(self): self._input_layer = ph.Linear('input_layer', self._input_size, self._hidden_size, w_init=self._w_init, b_init=self._b_init) res_layers = self._res_layers = list() for i in range(self._num_layers): res_layer = ph.ResidualLayer(f'res_{str(i)}', self._hidden_size, w_init=self._w_init, b_init=self._b_init) res_layers.append(res_layer) self._output_layer = ph.Linear('output_layer', self._hidden_size, self._output_size, w_init=self._w_init, b_init=self._b_init)
def _build(self): x = ph.placeholder('x', shape=(None, self._input_size), dtype=ph.float) hidden_layer = ph.Linear('hidden_layer', input_size=self._input_size, output_size=self._hidden_size) out_layer = ph.Linear('out_layer', input_size=self._hidden_size, output_size=self._num_classes) dropout = ph.Dropout('dropout') y = ph.setup( x, [hidden_layer, ph.ops.lrelu, dropout, out_layer, tf.nn.softmax]) label = tf.argmax(y, axis=1) self.predict = ph.Step(inputs=x, outputs=(label, y), givens={dropout.keep_prob: 1.0}) true_label = ph.placeholder('true_label', shape=(None, ), dtype=ph.int) target = tf.one_hot(true_label, self._num_classes) loss = ph.ops.cross_entropy(target, y) loss = tf.reduce_mean(loss) var_list = self.get_trainable_variables() reg = ph.reg.L2Regularizer(1e-6) reg.setup(var_list) grad_list = [ tf.clip_by_value(grad, -10, 10) for grad in tf.gradients(loss + reg.get_loss(), var_list) ] lr = ph.train.ExponentialDecayedValue('lr_train', 1e-4, num_loops=2e4, min_value=1e-6) update = tf.train.AdamOptimizer(lr.value).apply_gradients( zip(grad_list, var_list)) self.train = ph.Step(inputs=(x, true_label), outputs=loss, updates=(update, lr.update_op), givens={dropout.keep_prob: self._keep_prob})
def _build(self): input_image = tf.placeholder(shape=(None, 784), dtype=tf.float32, name='input_image') hidden_layer = ph.Linear('hidden_layer', 784, self._hidden_size) output_layer = ph.Linear('output_layer', self._hidden_size, 10) y = ph.setup(input_image, [hidden_layer, ph.ops.lrelu, output_layer, tf.nn.softmax]) label = tf.argmax(y, 1) input_label = tf.placeholder(shape=(None, ), dtype=tf.int64, name='input_label') y_ = tf.one_hot(input_label, 10, dtype=tf.float32) loss = ph.ops.cross_entropy(y_, y) loss = tf.reduce_mean(loss) self.train = ph.Step(inputs=(input_image, input_label), outputs=loss, updates=tf.train.RMSPropOptimizer( 1e-4, 0.9, 0.9).minimize(loss)) self.predict = ph.Step(inputs=input_image, outputs=label)
def _build(self): # 网络模块定义 --- build self._cnn = photinia.CNN('CNN', input_height=self._height, input_width=self._width, input_depth=1, layer_shapes=[(5, 5, 32, 2, 2), (5, 5, 64, 2, 2)], activation=tf.nn.relu, with_batch_norm=False ).build() self._lin1 = photinia.Linear('LINEAR1', self._cnn.flat_size, self._feature_size) self._lin2 = photinia.Linear('LINEAR2', self._feature_size, self._num_classes) # dropout参数 keep_prob = tf.placeholder(dtype=photinia.D_TYPE) # 输入 x = tf.placeholder(dtype=photinia.D_TYPE, shape=[None, self._height, self._width, self._depth]) y_ = tf.placeholder(dtype=photinia.D_TYPE, shape=[None, self._num_classes]) # 网络结构定义 --- setup y = self._cnn.setup(x) y = self._lin1.setup(y) y = tf.nn.dropout(y, keep_prob) y = self._lin2.setup(y) # 损失函数定义, softmax交叉熵函数 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)) # accuracy计算 correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, photinia.D_TYPE)) # 设置训练和预测的slot self._add_slot( 'train', outputs=(loss, accuracy), inputs=(x, y_, keep_prob), updates=tf.train.AdamOptimizer(1e-4).minimize(loss) ) self._add_slot( 'predict', outputs=accuracy, inputs=(x, y_, keep_prob) )
def _build(self): self._c1 = ph.Conv2D('c1', self._input_size, 64, 3, 3) self._c2 = ph.Conv2D('c2', self._c1.output_size, 64, 3, 3) self._p1 = ph.Pool2D('p1', self._c2.output_size, 2, 2) # self._c3 = ph.Conv2D('c3', self._p1.output_size, 128, 3, 3) self._c4 = ph.Conv2D('c4', self._c3.output_size, 128, 3, 3) self._p2 = ph.Pool2D('p2', self._c4.output_size, 2, 2) # self._c5 = ph.Conv2D('c5', self._p2.output_size, 256, 3, 3) self._c6 = ph.Conv2D('c6', self._c5.output_size, 256, 3, 3) self._c7 = ph.Conv2D('c7', self._c6.output_size, 256, 3, 3) self._p3 = ph.Pool2D('p3', self._c7.output_size, 2, 2) # self._c8 = ph.Conv2D('c8', self._p3.output_size, 512, 3, 3) self._c9 = ph.Conv2D('c9', self._c8.output_size, 512, 3, 3) self._c10 = ph.Conv2D('c10', self._c9.output_size, 512, 3, 3) self._p4 = ph.Pool2D('p4', self._c10.output_size, 2, 2) # self._c11 = ph.Conv2D('c11', self._p4.output_size, 512, 3, 3) self._c12 = ph.Conv2D('c12', self._c11.output_size, 512, 3, 3) self._c13 = ph.Conv2D('c13', self._c12.output_size, 512, 3, 3) self._p5 = ph.Pool2D('p5', self._c13.output_size, 2, 2) # self._h1 = ph.Linear('h1', self._p5.flat_size, 4096, weight_initializer=ph.RandomNormal(stddev=1e-4)) self._h2 = ph.Linear('h2', self._h1.output_size, 4096, weight_initializer=ph.RandomNormal(stddev=1e-4)) self._h3 = ph.Linear('h3', self._h2.output_size, self._output_size, weight_initializer=ph.RandomNormal(stddev=1e-4))
def _build(self): self._layers = list() input_size = (self._height, self._width, self._channels) output_channels = self._output_channels1 for i in range(self._num_layers): layer = ph.Conv2D( 'conv%d' % (i + 1), input_size, output_channels, self._kernel_size, self._kernel_size, 2, 2 ) self._layers.append(layer) input_size = layer.output_size output_channels *= 2 self._fc = ph.Linear('fc', self._layers[-1].flat_size, self._output_size)
def _build(self): self._layers = list() output_size = (self._height, self._width, self._channels) input_channels = self._input_channels1 for i in range(self._num_layers): layer = ph.Conv2DTrans( 'tconv%d' % (self._num_layers - i), output_size, input_channels, self._kernel_size, self._kernel_size, 2, 2, w_init=self._w_init, b_init=self._b_init ) self._layers.append(layer) output_size = layer.input_size input_channels *= 2 self._fc = ph.Linear('fc', self._input_size, self._layers[-1].flat_size)
def _build(self): # 网络模块定义:线性层 --- build self._lin = photinia.Linear('LINEAR', self._input_size, self._num_classes) # 输入定义 x = tf.placeholder(dtype=photinia.dtype, shape=[None, self._input_size]) y_ = tf.placeholder(dtype=photinia.dtype, shape=[None, self._num_classes]) # 网络结构定义 --- setup y = self._lin.setup(x) # 损失函数定义, softmax交叉熵函数 loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)) # accuracy计算 correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, photinia.dtype)) # 设置训练和预测的slot self._add_slot( 'train', outputs=loss, inputs=(x, y_), updates=tf.train.GradientDescentOptimizer(0.5).minimize(loss)) self._add_slot('predict', outputs=accuracy, inputs=(x, y_))
def _build(self): input_x = tf.tile(self._input_x, [self._num_mc_samples] + [1] * (len(self._input_x.shape) - 1)) g_net = self._glimpse_network l_net = self._location_network input_stddev = tf.placeholder( shape=(), dtype=ph.dtype, name='input_stddev' ) cell = self._cell = ph.GRUCell( 'cell', g_net.output_size, self._state_size, w_init=ph.init.GlorotUniform() ) batch_size = tf.shape(input_x)[0] init_state = tf.zeros(shape=(batch_size, self._state_size), dtype=ph.dtype) init_loc = tf.random_uniform((batch_size, 2), minval=-1, maxval=1) def _loop(acc, _): prev_state, loc, _ = acc g = g_net.setup(input_x, loc) state = cell.setup(g, prev_state) next_loc, next_mean = l_net.setup(state, input_stddev) return state, next_loc, next_mean states, locs, means = tf.scan( fn=_loop, elems=tf.zeros(shape=(self._num_steps,), dtype=tf.int8), initializer=(init_state, init_loc, init_loc) ) # (num_steps, batch_size, *) baseline_layer = self._baseline_layer = ph.Linear('baseline_layer', self._state_size, 1) def _make_baseline(state): baseline = baseline_layer.setup(state) # (batch_size, 1) baseline = tf.reshape(baseline, (-1,)) # (batch_size,) return baseline baselines = tf.map_fn(_make_baseline, states) # (num_steps, batch_size) baselines = tf.transpose(baselines) # (batch_size, num_steps) predict_layer = self._predict_layer = ph.Linear('predict_layer', self._state_size, self._num_classes) last_state = states[-1] # (batch_size, state_size) prob = predict_layer.setup(last_state) prob = tf.nn.softmax(prob) # (batch_size, num_classes) label = tf.argmax(prob, 1) # (batch_size,) self._step_predict = ph.Step( inputs=input_x, outputs=label, givens={input_stddev: 1e-3} ) self._input_label = ph.placeholder('input_label', (None,), tf.int64) input_label = tf.tile(self._input_label, (self._num_mc_samples,)) prob_ = tf.one_hot(input_label, self._num_classes) # (batch_size, num_classes) predict_loss = self._predict_loss = -tf.reduce_mean(ph.ops.log_likelihood(prob_, prob)) reward = tf.cast(tf.equal(label, input_label), tf.float32) # (batch_size,) rewards = tf.reshape(reward, (-1, 1)) # (batch_size, 1) rewards = tf.tile(rewards, (1, self._num_steps)) # (batch_size, num_steps) rewards = tf.stop_gradient(rewards) baseline_loss = self._baseline_loss = tf.reduce_mean(ph.ops.mean_square_error(rewards, baselines)) advantages = rewards - tf.stop_gradient(baselines) logll = self._log_gaussian(locs, means, input_stddev) logll = tf.reduce_sum(logll, 2) # (num_steps, batch_size) logll = tf.transpose(logll) # (batch_size, num_steps) logll_ratio = self._logll_ratio = tf.reduce_mean(logll * advantages) loss = self._loss = predict_loss - logll_ratio + baseline_loss if self._reg is not None: self._reg.setup(self.get_trainable_variables()) update = self._optimizer.minimize(loss + self._reg.get_loss()) else: update = self._optimizer.minimize(loss) self._step_train = ph.Step( inputs=(self._input_x, self._input_label), outputs=(loss, tf.reduce_mean(rewards)), updates=update, givens={input_stddev: self._stddev} )
def _build(self): self._layer = ph.Linear('layer', self._input_size, self._output_size)
def _build(self): input_image = ph.placeholder('input_image', (None, alexnet.HEIGHT, alexnet.WIDTH, 3), ph.float) encoder = alexnet.AlexNet('encoder', ph.ops.swish) dropout = ph.Dropout('dropout') dense = ph.Linear('dense', encoder['dense_7'].output_size, self._hidden_size) output_layer = ph.Linear('output_layer', dense.output_size, self._num_classes + 1) encoder.setup(input_image) y = ph.setup( encoder['feature_7'], [dense, ph.ops.swish, dropout, output_layer, tf.nn.softmax]) label = tf.argmax(y, axis=1) self.predict = ph.Step(inputs=input_image, outputs=(label, y), givens={dropout.keep_prob: 1.0}) input_label = ph.placeholder('input_label', (None, ), ph.int) y_target = tf.one_hot(input_label, self._num_classes + 1) loss = -ph.ops.log_likelihood(y_target, y) loss = tf.reduce_mean(loss) ################################################################################ # pre-train ################################################################################ vars_new = [ *dense.get_trainable_variables(), *output_layer.get_trainable_variables() ] reg = ph.reg.L2Regularizer(self._reg) reg.setup(vars_new) lr = ph.train.ExponentialDecayedValue('lr_1', init_value=self._learning_rate_1, num_loops=self._num_loops_1, min_value=self._learning_rate_1 / 10) update_1 = tf.train.AdamOptimizer(lr.value).apply_gradients([ (tf.clip_by_value(g, -self._grad_clip, self._grad_clip), v) for g, v in zip(tf.gradients(loss + reg.get_loss(), vars_new), vars_new) if g is not None ]) # with tf.control_dependencies([update_1]): # update_2 = ph.train.L2Regularizer(self._reg).apply(vars_new) self.train = ph.Step(inputs=(input_image, input_label), outputs=(loss, lr.variable), updates=update_1, givens={dropout.keep_prob: self._keep_prob}) ################################################################################ # fine tune ################################################################################ vars_all = self.get_trainable_variables() reg = ph.reg.L2Regularizer(self._reg) reg.setup(vars_all) lr = ph.train.ExponentialDecayedValue('lr_2', init_value=self._learning_rate_2, num_loops=self._num_loops_2, min_value=self._learning_rate_2 / 10) update_1 = tf.train.AdamOptimizer(lr.value).apply_gradients([ (tf.clip_by_value(g, -self._grad_clip, self._grad_clip), v) for g, v in zip(tf.gradients(loss + reg.get_loss(), vars_all), vars_all) if g is not None ]) # with tf.control_dependencies([update_1]): # update_2 = ph.train.L2Regularizer(self._reg).apply(vars_all) self.fine_tune = ph.Step(inputs=(input_image, input_label), outputs=(loss, lr.variable), updates=update_1, givens={dropout.keep_prob: self._keep_prob})
def _build(self): shared = Embedding('shared', self._wemb_size, 500, act) specific = Embedding('specific', self._wemb_size, 500) gate = ph.Gate('gate', (500, 500), 500) lin = ph.Linear('lin', 500, 1000) out = ph.Linear('out', 1000, 2) stat = VectorStat('stat') drop = ph.Dropout('drop') # seq = ph.placeholder('seq', (None, None, self._wemb_size)) h1, states1 = shared.setup(seq) stat.setup(tf.reshape(seq, (-1, self._wemb_size), name='flat_seq')) stat.setup(tf.reshape(states1, (-1, 500), name='flat_states')) h2, _ = specific.setup(seq) g = gate.setup(h1, h2) h = g * h1 + (1.0 - g) * h2 y_pred = ph.setup(h, [drop, lin, ph.lrelu, drop, out, tf.nn.sigmoid]) y_pred_ = ph.setup(h1, [drop, lin, ph.lrelu, drop, out, tf.nn.sigmoid]) y_pred__ = ph.setup(h1, [drop, lin, ph.lrelu, drop, out, tf.nn.sigmoid]) label_pred = tf.argmax(y_pred, 1) label = ph.placeholder('label', (None, 2)) loss = tf.reduce_mean((y_pred - label)**2, axis=1) loss += tf.reduce_mean((y_pred_ - label)**2, axis=1) loss += tf.reduce_mean((y_pred__ - label)**2, axis=1) loss_sum = tf.reduce_sum(loss) loss_mean = tf.reduce_mean(loss) # correct = tf.cast(tf.equal(label_pred, tf.argmax(label, 1)), ph.D_TYPE) correct_pos = correct * label[:, 1] correct_neg = correct * label[:, 0] hit_pos = tf.reduce_sum(correct_pos) hit_neg = tf.reduce_sum(correct_neg) pred_pos = tf.reduce_sum(label_pred) pred_neg = tf.reduce_sum(1 - label_pred) error = tf.reduce_sum(1 - correct) # reg = ph.Regularizer() reg.add_l1(self.get_trainable_variables()) # optimizer = MaskGrad(tf.train.RMSPropOptimizer(1e-4, 0.8, 0.9)) self._optimizer = optimizer optimizer.add_mask(shared.cell.wz) optimizer.add_mask(shared.cell.wr) optimizer.add_mask(shared.cell.wh) optimizer.add_mask(shared.cell.uz) optimizer.add_mask(shared.cell.ur) optimizer.add_mask(shared.cell.uh) # self._add_train_slot(inputs=(seq, label), outputs={ 'Loss': loss_mean, 'Norm': tf.norm(self.specific.cell.uz, 1) }, updates=(optimizer.minimize(loss_mean + reg.get_loss(2e-7)), stat.updates), givens={drop.keep_prob: 0.5}) self._add_validate_slot(inputs=(seq, label), outputs={ 'Loss': loss_sum, 'hit_pos': hit_pos * 100, 'hit_neg': hit_neg * 100, 'pred_pos': pred_pos * 100, 'pred_neg': pred_neg * 100, 'Error': error * 100, }, givens={drop.keep_prob: 1.0})
def _build(self): ################################################################################ # -> (55, 55, 96) # -> (27, 27, 96) ################################################################################ self._conv_1 = ph.Conv2D( 'conv_1', input_size=[self._height, self._width, 3], output_channels=96, filter_height=11, filter_width=11, stride_width=4, stride_height=4, padding='VALID' ) self._pool_1 = ph.Pool2D( 'pool_1', input_size=self._conv_1.output_size, filter_height=3, filter_width=3, stride_height=2, stride_width=2, padding='VALID', pool_type='max' ) ################################################################################ # -> (27, 27, 256) # -> (13, 13, 256) ################################################################################ self._conv_2 = ph.GroupConv2D( 'conv_2', input_size=self._pool_1.output_size, output_channels=256, num_groups=2, filter_height=5, filter_width=5, stride_height=1, stride_width=1, padding='SAME' ) self._pool_2 = ph.Pool2D( 'pool_2', input_size=self._conv_2.output_size, filter_height=3, filter_width=3, stride_height=2, stride_width=2, padding='VALID', pool_type='max' ) ################################################################################ # -> (13, 13, 384) ################################################################################ self._conv_3 = ph.Conv2D( 'conv_3', input_size=self._pool_2.output_size, output_channels=384, filter_width=3, filter_height=3, stride_width=1, stride_height=1, padding='SAME' ) ################################################################################ # -> (13, 13, 384) ################################################################################ self._conv_4 = ph.GroupConv2D( 'conv_4', input_size=self._conv_3.output_size, output_channels=384, num_groups=2, filter_width=3, filter_height=3, stride_width=1, stride_height=1, padding='SAME' ) ################################################################################ # -> (13, 13, 256) # -> (6, 6, 256) ################################################################################ self._conv_5 = ph.GroupConv2D( 'conv_5', input_size=self._conv_4.output_size, output_channels=256, num_groups=2, filter_width=3, filter_height=3, stride_width=1, stride_height=1, padding='SAME' ) self._pool_5 = ph.Pool2D( 'pool_5', input_size=self._conv_5.output_size, filter_height=3, filter_width=3, stride_height=2, stride_width=2, padding='VALID', pool_type='max' ) # # fc layer self._dense_6 = ph.Linear('dense_6', input_size=self._pool_5.flat_size, output_size=4096) self._dense_7 = ph.Linear('dense_7', input_size=self._dense_6.output_size, output_size=4096) self._dense_8 = ph.Linear('dense_8', input_size=self._dense_7.output_size, output_size=1000)
def _build(self): self._emb_layer = ph.Linear('emb_layer', self._voc_size, self._emb_size) self._cell = ph.GRUCell('cell', self._emb_size, self._state_size)
def _build(self): self._emb_layer = ph.Linear( 'emb_layer', self._voc_size, self._emb_size) if self._emb_size is not None else None self._cell = ph.GRUCell('cell', self._emb_size, self._state_size)
def _build(self): encoder = vgg.VGG16('encoder') dense1 = ph.Linear('dense1', encoder.fc7.output_size, 4096, w_init=ph.init.TruncatedNormal(0, 1e-3)) dense2 = ph.Linear('dense2', 4096, self._num_classes, w_init=ph.init.TruncatedNormal(0, 1e-3)) input_image = ph.placeholder('input_image', (None, vgg.HEIGHT, vgg.WIDTH, 3), ph.float) input_label = ph.placeholder('input_label', (None, ), ph.int) self._num_gpus -= 1 batch_size = tf.shape(input_image)[0] num_per_device = tf.cast(tf.ceil(batch_size / self._num_gpus), tf.int32) var_list1 = [ *dense1.get_trainable_variables(), *dense2.get_trainable_variables() ] var_list2 = self.get_trainable_variables() y_list = [] loss_list = [] grad_list_list1 = [] grad_list_list2 = [] for i in range(self._num_gpus): with tf.device(f'/gpu:{i + 1}'): input_image_i = input_image[i * num_per_device:(i + 1) * num_per_device] encoder.setup(input_image_i) h = encoder['h7'] if i == 0 else encoder[f'h7_{i}'] y = ph.ops.lrelu(dense1.setup(h) + h) y = tf.nn.softmax(dense2.setup(y)) y_list.append(y) input_label_i = input_label[i * num_per_device:(i + 1) * num_per_device] y_target = tf.one_hot(input_label_i, self._num_classes) loss = ph.ops.cross_entropy(y_target, y) loss = tf.reduce_mean(loss) loss_list.append(loss) reg1 = ph.reg.L2Regularizer(1e-6) reg1.setup(var_list1) grad_list1 = tf.gradients(loss + reg1.get_loss(), var_list1) grad_list_list1.append(grad_list1) reg2 = ph.reg.L2Regularizer(1e-6) reg2.setup(var_list2) grad_list2 = tf.gradients(loss + reg2.get_loss(), var_list2) grad_list_list2.append(grad_list2) y = tf.concat(y_list, axis=0) loss = tf.reduce_mean(loss_list) grad_list1 = [ tf.reduce_mean(grads, axis=0) for grads in zip(*grad_list_list1) ] self.train = ph.Step(inputs=(input_image, input_label), outputs=loss, updates=tf.train.RMSPropOptimizer( 1e-5, 0.9, 0.9).apply_gradients( zip(grad_list1, var_list1))) grad_list2 = [ tf.reduce_mean(grads, axis=0) for grads in zip(*grad_list_list2) ] self.fine_tune = ph.Step(inputs=(input_image, input_label), outputs=loss, updates=tf.train.RMSPropOptimizer( 1e-6, 0.9, 0.9).apply_gradients( zip(grad_list2, var_list2))) label = tf.argmax(y, axis=1) self.predict = ph.Step(inputs=input_image, outputs=(label, y))
def _build(self): input_size = self._input_size = self._retina_height * self._retina_width * self._num_channels self._input_layer = ph.Linear('input_layer', input_size * 3, self._h_input_size) self._loc_layer = ph.Linear('loc_layer', 2, self._h_loc_size) self._output_layer = ph.Linear('output_layer', self._h_input_size + self._h_loc_size, self._output_size)
def _build(self): # conv1 padding=SAME self._conv1_1 = ph.Conv2D('conv1_1', input_size=[self._height, self._width, 3], output_channels=64, filter_height=3, filter_width=3, stride_width=1, stride_height=1, padding='SAME') # conv1_2 padding=SAME self._conv1_2 = ph.Conv2D('conv1_2', input_size=self._conv1_1.output_size, output_channels=64, filter_height=3, filter_width=3, stride_width=1, stride_height=1, padding='SAME') self._pool1 = ph.Pool2D('pool1', input_size=self._conv1_2.output_size, filter_height=2, filter_width=2, stride_height=2, stride_width=2, padding='SAME', pool_type='max') # # conv2 padding=SAME self._conv2_1 = ph.Conv2D('conv2_1', input_size=self._pool1.output_size, output_channels=128, filter_height=3, filter_width=3, stride_width=1, stride_height=1, padding='SAME') self._conv2_2 = ph.Conv2D('conv2_2', input_size=self._conv2_1.output_size, output_channels=128, filter_height=3, filter_width=3, stride_width=1, stride_height=1, padding='SAME') self._pool2 = ph.Pool2D('pool2', input_size=self._conv2_2.output_size, filter_height=2, filter_width=2, stride_height=2, stride_width=2, padding='SAME', pool_type='max') # # conv3 padding=SAME self._conv3_1 = ph.Conv2D('conv3_1', input_size=self._pool2.output_size, output_channels=256, filter_height=3, filter_width=3, stride_width=1, stride_height=1, padding='SAME') self._conv3_2 = ph.Conv2D('conv3_2', input_size=self._conv3_1.output_size, output_channels=256, filter_height=3, filter_width=3, stride_width=1, stride_height=1, padding='SAME') self._conv3_3 = ph.Conv2D('conv3_3', input_size=self._conv3_2.output_size, output_channels=256, filter_height=3, filter_width=3, stride_width=1, stride_height=1, padding='SAME') self._pool3 = ph.Pool2D('pool3', input_size=self._conv3_3.output_size, filter_height=2, filter_width=2, stride_height=2, stride_width=2, padding='SAME', pool_type='max') # # conv4 padding=SAME self._conv4_1 = ph.Conv2D('conv4_1', input_size=self._pool3.output_size, output_channels=512, filter_height=3, filter_width=3, stride_width=1, stride_height=1, padding='SAME') self._conv4_2 = ph.Conv2D('conv4_2', input_size=self._conv4_1.output_size, output_channels=512, filter_height=3, filter_width=3, stride_width=1, stride_height=1, padding='SAME') self._conv4_3 = ph.Conv2D('conv4_3', input_size=self._conv4_2.output_size, output_channels=512, filter_height=3, filter_width=3, stride_width=1, stride_height=1, padding='SAME') self._pool4 = ph.Pool2D('pool4', input_size=self._conv4_3.output_size, filter_height=2, filter_width=2, stride_height=2, stride_width=2, padding='SAME', pool_type='max') # # conv5 padding=SAME self._conv5_1 = ph.Conv2D('conv5_1', input_size=self._pool4.output_size, output_channels=512, filter_height=3, filter_width=3, stride_width=1, stride_height=1, padding='SAME') self._conv5_2 = ph.Conv2D('conv5_2', input_size=self._conv5_1.output_size, output_channels=512, filter_height=3, filter_width=3, stride_width=1, stride_height=1, padding='SAME') self._conv5_3 = ph.Conv2D('conv5_3', input_size=self._conv5_2.output_size, output_channels=512, filter_height=3, filter_width=3, stride_width=1, stride_height=1, padding='SAME') self._pool5 = ph.Pool2D('pool5', input_size=self._conv5_3.output_size, filter_height=2, filter_width=2, stride_height=2, stride_width=2, padding='SAME', pool_type='max') # # fc layer self._fc6 = ph.Linear('fc6', input_size=self._pool5.flat_size, output_size=4096) self._fc7 = ph.Linear('fc7', input_size=self._fc6.output_size, output_size=4096) self._fc8 = ph.Linear('fc8', input_size=self._fc7.output_size, output_size=1000, w_init=ph.init.RandomNormal(stddev=1e-4))
def _build(self): # # conv1 padding=VALID self._conv1 = ph.Conv2D('conv1', input_size=[self._height, self._width, 3], output_channels=96, filter_height=11, filter_width=11, stride_width=4, stride_height=4, padding='VALID') self._pool1 = ph.Pool2D('pool1', input_size=self._conv1.output_size, filter_height=3, filter_width=3, stride_height=2, stride_width=2, padding='VALID', pool_type='max') # # conv2, 这里是拆分训练的 self._conv2 = ph.GroupConv2D('conv2', input_size=self._pool1.output_size, output_channels=256, num_groups=2, filter_height=5, filter_width=5, stride_height=1, stride_width=1) self._pool2 = ph.Pool2D('pool2', input_size=self._conv2.output_size, filter_height=3, filter_width=3, stride_height=2, stride_width=2, padding='VALID', pool_type='max') # # conv3 self._conv3 = ph.Conv2D('conv3', input_size=self._pool2.output_size, output_channels=384, filter_width=3, filter_height=3, stride_width=1, stride_height=1) # # conv4, 这里是拆分训练的 self._conv4 = ph.GroupConv2D('conv4', input_size=self._conv3.output_size, output_channels=384, num_groups=2, filter_width=3, filter_height=3, stride_width=1, stride_height=1) # # conv5, 这里是拆分训练的 self._conv5 = ph.GroupConv2D('conv5', input_size=self._conv4.output_size, output_channels=256, num_groups=2, filter_width=3, filter_height=3, stride_width=1, stride_height=1) self._pool5 = ph.Pool2D('pool5', input_size=self._conv5.output_size, filter_height=3, filter_width=3, stride_height=2, stride_width=2, padding='VALID', pool_type='max') # # fc layer self._fc6 = ph.Linear('fc6', input_size=self._pool5.flat_size, output_size=4096) self._fc7 = ph.Linear('fc7', input_size=self._fc6.output_size, output_size=4096) self._fc8 = ph.Linear('fc8', input_size=self._fc7.output_size, output_size=1000, w_init=ph.RandomNormal(stddev=1e-4)) print(self._fc8.output_size)