def encoder(x): e_conv1 = slim.convolution(x, 32, 2, stride=2, activation_fn=tf.identity, normalizer_fn=slim.batch_norm, scope='e_conv1') e_conv1 = lrelu(e_conv1) print 'conv1: ', e_conv1 e_conv2 = slim.convolution(e_conv1, 64, 2, stride=2, activation_fn=tf.identity, normalizer_fn=slim.batch_norm, scope='e_conv2') e_conv2 = lrelu(e_conv2) print 'conv2: ', e_conv2 # convolutional layer with a leaky Relu activation e_conv3 = slim.convolution(e_conv2, 128, 2, stride=2, activation_fn=tf.identity, normalizer_fn=slim.batch_norm, scope='e_conv3') e_conv3 = lrelu(e_conv3) print 'conv3: ', e_conv3 e_conv3_flat = tf.reshape(e_conv3, [batch_size, -1]) e_fc1 = slim.fully_connected(e_conv3_flat, 256, normalizer_fn=slim.batch_norm, activation_fn=tf.identity, scope='e_fc1') e_fc1 = lrelu(e_fc1) print 'fc1: ', e_fc1 e_fc2 = slim.fully_connected(e_fc1, 64, normalizer_fn=slim.batch_norm, activation_fn=tf.identity, scope='e_fc2') e_fc2 = lrelu(e_fc2) print 'fc2: ', e_fc2 e_fc3 = slim.fully_connected(e_fc2, 32, normalizer_fn=slim.batch_norm, activation_fn=tf.identity, scope='e_fc3') e_fc3 = lrelu(e_fc3) print 'fc3: ', e_fc3 e_fc4 = slim.fully_connected(e_fc3, 8, normalizer_fn=slim.batch_norm, activation_fn=tf.identity, scope='e_fc4') e_fc4 = lrelu(e_fc4) print 'fc4: ', e_fc4 return e_fc4
def create_model(self, model_input, vocab_size, num_frames, **unused_params): """Creates a model which uses a logistic classifier over the average of the frame-level features. This class is intended to be an example for implementors of frame level models. If you want to train a model over averaged features it is more efficient to average them beforehand rather than on the fly. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ sliced_input = tf.slice(model_input, [0,0,0],[-1,120,-1]) conv_output = slim.convolution(sliced_input, 512, [9], 5, "VALID", data_format = "NWC") conv_output = slim.convolution(conv_output, 512, [5], 3, "VALID", data_format = "NWC") conv_output = slim.flatten(conv_output) output = slim.fully_connected( conv_output, vocab_size, activation_fn=tf.nn.sigmoid, weights_regularizer=slim.l2_regularizer(1e-8)) return {"predictions": output}
def resnet_v22(inputs, blocks, num_classes=None, is_training=True, reuse=None, scope=None): with tf.variable_scope(scope, 'resnet_v22', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope([slim.convolution, bottleneck, stack_blocks_dense], outputs_collections=end_points_collection): with slim.arg_scope([slim.dropout], is_training=is_training): net = inputs net = slim.convolution(net, 64, 16, stride=1, padding='SAME', scope='conv1') shortcut = subsample(net, factor=2, scope='shortcut') net = conv2d_same(net, 64, 16, stride=2, scope='conv2') # net = slim.dropout(net, keep_prob=0.8, scope='droput') net = slim.convolution(net, 64, 16, stride=1, padding='SAME', scope='conv3') # net = slim.convolution(net, 64, 16, # stride=1, # padding='SAME', # normalizer_fn=None, # activation_fn=None, # scope='conv3' # ) net = net + shortcut net = stack_blocks_dense(net, blocks) # net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') print('last:', net) if num_classes is not None: net = slim.flatten(net, scope='flatten') net = slim.fully_connected(net, num_classes, activation_fn=tf.nn.relu, normalizer_fn=None, scope='fc') end_points = utils.convert_collection_to_dict( end_points_collection) if num_classes is not None: end_points['predictions'] = slim.softmax( net, scope='predictions') return net, end_points
def vggm1234(x, TRAIN_COVN=True): net = slim.convolution(x, 96, [7, 7], 2, padding='VALID', scope='conv1', activation_fn=tf.nn.relu, reuse=tf.AUTO_REUSE, trainable=TRAIN_COVN) net = tf.nn.lrn(net, depth_radius=5, bias=2, alpha=1e-4 * 1, beta=0.75) net = slim.pool(net, [3, 3], 'MAX', stride=2, padding='VALID', scope='pool1') net = slim.convolution(net, 256, [5, 5], 2, padding='VALID', scope='conv2', activation_fn=tf.nn.relu, reuse=tf.AUTO_REUSE, trainable=TRAIN_COVN) net = tf.nn.lrn(net, depth_radius=5, bias=2, alpha=1e-4 * 1, beta=0.75) net = slim.pool(net, [3, 3], 'MAX', stride=2, padding='VALID', scope='pool2') net = slim.convolution(net, 512, [3, 3], 1, padding='VALID', scope='conv3', activation_fn=tf.nn.relu, reuse=tf.AUTO_REUSE, trainable=TRAIN_COVN) net = slim.convolution(net, 512, [3, 3], 1, padding='VALID', scope='conv4', activation_fn=tf.nn.relu, reuse=tf.AUTO_REUSE, trainable=TRAIN_COVN) return U.flattenallbut0(net)
def bottleneck2(inputs, depth, depth_bottleneck, stride, outputs_collections=None, scope=None): with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: # preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact') preact = inputs #tf.nn.relu(inputs) depth_in = utils.last_dimension(inputs.get_shape(), min_rank=3) if depth_in == depth: shortcut = subsample(inputs, stride, 'shortcut') else: shortcut = slim.convolution(inputs, depth, 1, stride=stride, padding='SAME', normalizer_fn=None, activation_fn=None, scope='shortcut') # residual = slim.dropout(preact, keep_prob=0.8, scope='dropout1') residual = conv2d_same(preact, depth_bottleneck, 16, stride, scope='conv1') # residual = slim.dropout(residual, keep_prob=0.8, scope='dropout2') residual = slim.convolution(residual, depth, 16, stride=1, padding='SAME', scope='conv2') # residual = slim.convolution(residual, depth, 16, # stride=1, # padding='SAME', # normalizer_fn=None, # activation_fn=None, # scope='conv2' # ) output = shortcut + residual return utils.collect_named_outputs(outputs_collections, sc.name, output)
def create_model(self, model_input, vocab_size, labels, scope='default', is_training=True, **unused_params): X = FLAGS.residualcnn_x with tf.variable_scope(scope, tf.AUTO_REUSE): fc = slim.fully_connected( model_input, X, weights_regularizer=tf.contrib.layers.l2_regularizer(0.01)) reshaped_input = tf.expand_dims(fc, -1) reshaped_input = tf.expand_dims(reshaped_input, -1) conv1 = slim.convolution(reshaped_input, 64, [49, 1]) conv1_norm = slim.batch_norm(conv1, is_training=is_training) module1 = self.residual_module([128, 192, 64], conv1_norm, 'module1') module1_norm = slim.batch_norm(module1, is_training=is_training) conv2 = slim.convolution(module1_norm, 128, 1) conv2_norm = slim.batch_norm(conv2, is_training=is_training) module2 = self.residual_module([256, 512, 128], conv2_norm, 'module2') module2_norm = slim.batch_norm(module2, is_training=is_training) conv3 = slim.convolution(module2_norm, 256, 1) conv3_norm = slim.batch_norm(conv3, is_training=is_training) module3 = self.residual_module([512, 256], conv3_norm, 'module3') module3_norm = slim.batch_norm(module3, is_training=is_training) conv4 = slim.convolution(module3_norm, X, 1) conv4_norm = slim.batch_norm(conv4, is_training=is_training) module4 = self.residual_module([512, X], conv4_norm, 'module4') features = tf.squeeze(module4, [2]) features = model_utils.FramePooling(features, FLAGS.residualcnn_pooling) + fc results = MoeModel().create_model(features, vocab_size) results['features'] = features if labels != None: results['loss'] = losses.CrossEntropyLoss().calculate_loss( results['predictions'], labels) return results
def res_block(net, nb_filter, scope): residual = net net = slim.convolution(net, nb_filter, [1, 1], 1, scope='%s_res_1' % scope) net = slim.convolution(net, nb_filter, [3, 3], 1, scope='%s_res_2' % scope) net = slim.convolution(net, nb_filter, [1, 1], 1, scope='%s_res_3' % scope) return net + residual
def build_predict_op(self, input_tensor, is_training=False): """ Builds the graph from input tensor to model prediction. The 'is_training' argument is not used for now, but it allows easy handling of potential dropout/batchnorm layers. Args: input_tensor (tf tensor): input, with dimensions [batch_size, time, nr_channels=1]. is_training (bool): whether in training mode (True) or evaluation mode (False) Returns: (tf operation): computes model predictions with dimensions [batch_size, mel_bands, time, nr_channels=1]. """ predict_op = input_tensor with tf.variable_scope('MSTmodel'): predict_op = slim.convolution(predict_op, 512, [1024], stride=[512], padding='SAME', activation_fn=None, weights_initializer=self.W_init, biases_initializer=self.b_init, weights_regularizer=self.W_reg, biases_regularizer=self.b_reg, scope='cnn_raw_1') predict_op = slim.batch_norm(predict_op, updates_collections=None, scope='cnn_raw_1', is_training=is_training) predict_op = tf.nn.relu(predict_op) predict_op = slim.convolution(predict_op, 256, [3], stride=[1], padding='SAME', activation_fn=None, weights_initializer=self.W_init, biases_initializer=self.b_init, weights_regularizer=self.W_reg, biases_regularizer=self.b_reg, scope='cnn_raw_2') predict_op = slim.batch_norm(predict_op, updates_collections=None, scope='cnn_raw_2', is_training=is_training) predict_op = tf.nn.relu(predict_op) predict_op = slim.convolution(predict_op, 60, [3], stride=[1], padding='SAME', activation_fn=None, weights_initializer=self.W_init, biases_initializer=self.b_init, weights_regularizer=self.W_reg, biases_regularizer=self.b_reg, scope='cnn_raw_3') predict_op = slim.batch_norm(predict_op, updates_collections=None, scope='cnn_raw_3', is_training=is_training) predict_op = tf.nn.tanh(predict_op) # transpose and add a channel dimension to match with the shape of the label predict_op = tf.transpose(predict_op, [0, 2, 1]) predict_op = tf.expand_dims(predict_op, 3) return predict_op
def QNet(inputs, width, is_training=True, reuse=False, scope="QNet"): with tf.variable_scope(scope, reuse=reuse): with slim.arg_scope( [slim.convolution, slim.fully_connected], weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(0.01), biases_initializer=tf.zeros_initializer(), biases_regularizer=slim.l2_regularizer(0.01), ): net = slim.convolution(inputs=inputs, num_outputs=32, kernel_size=5, stride=1, activation_fn=tf.nn.relu, padding="VALID", scope="conv1") net = slim.batch_norm(net, is_training=is_training) net = slim.convolution(inputs=net, num_outputs=64, kernel_size=5, stride=1, activation_fn=tf.nn.relu, padding="VALID", scope="conv2") net = slim.batch_norm(net, is_training=is_training) net = slim.flatten(net) net = slim.fully_connected( inputs=net, num_outputs=512, activation_fn=tf.nn.relu, scope="fc1", ) net = slim.batch_norm(net, is_training=is_training) net = slim.fully_connected(inputs=net, num_outputs=512, activation_fn=tf.nn.relu, scope="fc2") net = slim.batch_norm(net, is_training=is_training) net = slim.fully_connected(inputs=net, num_outputs=width**2, activation_fn=None, scope="fc3") net = tf.reshape(net, (-1, width, width)) return net
def conv2d_same(inputs, num_outs, kernal_size, stride, scope=None): if stride == 1: return slim.convolution(inputs, num_outs, kernal_size, stride=1, padding='SAME', scope=scope) else: pad_total = kernal_size - 1 pad_beg = pad_total // 2 pad_end = pad_total - pad_beg inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], [0, 0]]) return slim.convolution(inputs, num_outs, kernal_size, stride=stride, padding='VALID', scope=scope)
def residual_module(self, params, inp, scope='default'): with tf.variable_scope(scope, tf.AUTO_REUSE): depth = len(params) out = inp for i in range(depth): out = slim.convolution(out, params[i], (9, 1), rate=(2 * i + 1, 1)) return inp + out
def conv3d(*args, **kwargs): out = slim.convolution(*args, **kwargs) print kwargs['scope'], '->', shape(out), 'before:', shape(args[0]) if 0: out = tf.Print(out, [ kwargs['scope'], tf.reduce_mean(out, [0, 1, 2, 3]), tf.nn.moments(out, axes=[0, 1, 2, 3]) ], summarize=20) return out
def build_architecture(self, inputs_dict): """Builds the RNN text encoder. Returns: rnn_outputs: A list of outputs for all RNNs. This is a list even if there is one RNN being constructed. """ caption_batch = inputs_dict['caption_batch'] embedding = inputs_dict['embedding_batch'] seq_length = compute_sequence_length(caption_batch) # Build convolutions with slim.arg_scope([slim.convolution, slim.fully_connected], activation_fn=tf.nn.relu, weights_regularizer=slim.l2_regularizer(0.0005)): net = slim.convolution(embedding, 128, 3, scope='conv1') net = slim.convolution(net, 128, 3, scope='conv2') net = tf.layers.batch_normalization(net, training=self.is_training) # net = slim.pool(net, 2, 'MAX') # change the sequence length net = slim.convolution(net, 256, 3, scope='conv3') net = slim.convolution(net, 256, 3, scope='conv4') net = tf.layers.batch_normalization(net, training=self.is_training) # net = slim.pool(net, 2, 'MAX') rnn_cell = tf.contrib.rnn.GRUCell(num_units=256) # initial_state = rnn_cell.zero_state(self._batch_size, tf.float32) outputs, final_state = tf.nn.dynamic_rnn(cell=rnn_cell, inputs=net, sequence_length=seq_length, dtype=tf.float32, scope='rnn') net = extract_last_output(outputs, seq_length) net = slim.fully_connected(net, 256, scope='fc5') net = slim.fully_connected(net, 128, activation_fn=None, scope='fc6') return {'encoder_output': net}
def G(self, training=True): with slim.arg_scope([slim.batch_norm], is_training=training): with slim.arg_scope([slim.convolution], normalizer_fn=slim.batch_norm): with tf.variable_scope("G"): noise = tf.random_normal( shape=[config_.BATCH_SIZE, config_.num_fc_1]) fc1 = slim.fully_connected( noise, num_outputs=config_.FEATURE_LEN, biases_initializer=PointInitializer(.1), weights_regularizer=slim.l2_regularizer(0.001), scope="fc1") fc2 = slim.fully_connected( fc1, num_outputs=config_.FEATURE_LEN * config_.num_filt_2, biases_initializer=PointInitializer(.1), weights_regularizer=slim.l2_regularizer(0.001), scope="fc2") fc2 = tf.reshape( fc2, [-1, config_.FEATURE_LEN, config_.num_filt_2]) deconv3 = slim.convolution( fc2, num_outputs=config_.num_filt_1, kernel_size=4, biases_initializer=PointInitializer(.1), weights_regularizer=slim.l2_regularizer(0.001), scope="deconv3") deconv4 = slim.convolution( deconv3, num_outputs=1, kernel_size=5, activation_fn=None, biases_initializer=PointInitializer(.1), weights_regularizer=slim.l2_regularizer(0.001), scope="deconv4") return deconv4
def atrousconv(self, x, num_out_layers, kernel_size, stride=1, rate=1, activation_fn=tf.nn.elu): return slim.convolution(x, num_out_layers, kernel_size, stride=stride, rate=rate, activation_fn=activation_fn)
def conv2d(self, outChannels=20, kernel=3, pool=True, dropout=False, norm=True): with tf.name_scope('conv'): print("input shape ", self.last_shape) print("conv outChannels ", outChannels) # conv = tf.nn.conv2d(self.last_layer, [1, kernel, kernel, 1], strides=[1, 2, 2, 1]) # conv = tf.nn.conv2d(self.last_layer, [1, kernel, kernel, 1], strides=[1, 1, 1, 1], padding='SAME') conv = slim.convolution(self.last_layer, outChannels, kernel, scope="conv_" + str(len(self.layers))) if pool: conv = slim.max_pool2d(conv, [3, 3], scope='pool') # if pool: conv = tf.nn.max_pool(conv, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') if dropout: conv = tf.nn.dropout(conv, self.keep_prob) if norm: conv = tf.nn.lrn(conv, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75) if debug: tf.summary.histogram('norm_' + str(len(self.layers)), conv) print("output shape ", conv.get_shape()) self.add(conv)
def G(self, training=True): with slim.arg_scope([slim.batch_norm], is_training=training, scale=True): with slim.arg_scope([slim.convolution], normalizer_fn=slim.batch_norm): with tf.variable_scope("G"): noise = tf.random_normal(shape=[self.config.BATCH_SIZE, self.config.num_fc_1]) fc1 = slim.fully_connected(noise, num_outputs=self.config.FEATURE_LEN, biases_initializer=PointInitializer(.1), scope="fc1") fc2 = slim.fully_connected(fc1, num_outputs=self.config.FEATURE_LEN * self.config.num_filt_2, biases_initializer=PointInitializer(.1), scope="fc2") fc2 = tf.reshape(fc2, [-1, self.config.FEATURE_LEN, self.config.num_filt_2]) deconv3 = slim.convolution(fc2, num_outputs=self.config.num_filt_1, kernel_size=4, biases_initializer=PointInitializer(.1), scope="deconv3") deconv4 = slim.convolution(deconv3, num_outputs=1, kernel_size=5, activation_fn=None, biases_initializer=PointInitializer(.1), scope="deconv4", normalizer_fn=None) # deconv4 = tf.nn.relu(deconv4) return deconv4
def build_conv_layer(is_training, in_feats_mod, param_dict): """ needs data_batch_size, conv_depth, conv_actv_str, conv_gene_pair, conv_batch_norm, and data_input_size, reg_do_keep_prob, is_training""" if param_dict['conv_depth'] > 0: #Assuming inputs are still in [batch_size, 2*gene_count]. Will first #change it to be [batch_size, gene_count, 2], which slim expects #in_feats_mod = tf.Print(in_feats_mod, [in_feats_mod], message="in_feats_mod: ", # summarize=param_dict['data_batch_size']*param_dict['data_input_size']) conv_inputs = tf.reshape(in_feats_mod, [ param_dict['data_batch_size'], 2, int(int(in_feats_mod.shape[1]) / 2) ]) #conv_inputs = tf.Print(conv_inputs, [conv_inputs], message="conv_inputs1: ", # summarize=param_dict['data_batch_size']*param_dict['data_input_size']) conv_inputs = tf.transpose(conv_inputs, [0, 2, 1]) #conv_inputs = tf.Print(conv_inputs, [conv_inputs], message="conv_inputs2: ", # summarize=param_dict['data_batch_size']*param_dict['data_input_size']) print("model conv reshape: " + str(conv_inputs)) conv_actv_fn = get_act_fn(param_dict['conv_actv_str']) if param_dict['conv_gene_pair']: conv_outputs = gene_pair_convolution( conv_inputs, param_dict['data_batch_size'], [ int(param_dict['data_input_size'] / 2), 2, param_dict['conv_depth'] ], conv_actv_fn) else: conv_outputs = slim.convolution( inputs=conv_inputs, num_outputs=param_dict['conv_depth'], kernel_size=1, stride=1, data_format='NWC', activation_fn=conv_actv_fn) conv_outputs = tf.contrib.layers.flatten(conv_outputs) if param_dict['conv_batch_norm']: conv_outputs = batch_normalize(conv_outputs) if param_dict['reg_do_keep_prob'] < 1: conv_outputs = slim.dropout( conv_outputs, keep_prob=param_dict['reg_do_keep_prob'], is_training=is_training) else: # Flattens the input while maintaining the batch_size conv_outputs = tf.contrib.layers.flatten(in_feats_mod) return conv_outputs
def create_model(self, model_input, vocab_size, num_frames, **unused_params): """Creates a model which uses a stack of LSTMs to represent the video. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ lstm_size = FLAGS.lstm_cells number_of_layers = FLAGS.lstm_layers stacked_lstm = tf.contrib.rnn.MultiRNNCell([ tf.contrib.rnn.BasicLSTMCell(lstm_size, forget_bias=1.0) for _ in range(number_of_layers) ], state_is_tuple=False) loss = 0.0 outputs, state = tf.nn.dynamic_rnn(stacked_lstm, model_input, sequence_length=num_frames, dtype=tf.float32) state = tf.expand_dims(state, axis=1) state = tf.expand_dims(state, axis=1) state = slim.convolution(state, FLAGS.num_filters, 1, 1, "SAME") aggregated_model = getattr(video_level_models, FLAGS.video_level_classifier_model) return aggregated_model().create_model(model_input=state, vocab_size=vocab_size, **unused_params)
def inception_module(self, inp, param, scope): with tf.variable_scope(scope, tf.AUTO_REUSE): # 1x1 out1 = slim.convolution(inp, param[0], 1, 1) # 3x3 out2 = slim.convolution(inp, param[1], 1, 1) out2 = slim.convolution(out2, param[2], (9, 1), 1) # 5x5 out3 = slim.convolution(inp, param[3], 1, 1) out3 = slim.convolution(out3, param[4], (25, 1), 1) # pool out4 = slim.max_pool2d(inp, (9, 1), 1, padding='SAME') out4 = slim.convolution(out4, param[5], 1, 1) output = tf.concat([out1, out2, out3, out4], 3) return output
def create_model(self, model_input, vocab_size, num_frames, **unused_params): output = model_input hidden_size = 1024 output = slim.convolution(output, hidden_size, [8], stride = 2, padding = 'SAME') tmp_state = output output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = output + tmp_state tmp_state = output output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = output + tmp_state tmp_state = output output = slim.convolution(output, hidden_size, [3], stride=2, padding="SAME") output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME') tmp_state = slim.pool(tmp_state, [2], "AVG", stride=2, padding="SAME") output = output + tmp_state tmp_state = output output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = output + tmp_state tmp_state = output output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = output + tmp_state tmp_state = output output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = output + tmp_state tmp_state = output output = slim.convolution(output, hidden_size, [3], stride=2, padding="SAME") output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME') tmp_state = slim.pool(tmp_state, [2], "AVG", stride=2, padding="SAME") output = output + tmp_state tmp_state = output output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = output + tmp_state tmp_state = output output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = output + tmp_state tmp_state = output output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = slim.convolution(output, hidden_size, [3], stride = 1, padding = 'SAME') output = output + tmp_state tmp_state = output output = slim.convolution(output, hidden_size, [3], stride=2, padding="SAME") output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME') tmp_state = slim.pool(tmp_state, [2], "AVG", stride=2, padding="SAME") output = output + tmp_state tmp_state = output output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME') output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME') output = output + tmp_state tmp_state = output output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME') output = slim.convolution(output, hidden_size, [3], stride=1, padding='SAME') output = output + tmp_state # output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None) output = slim.pool(output, [2], "AVG", stride=2, padding="SAME") output = slim.flatten(output) output = slim.fully_connected(output, 2048) # output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None) output = slim.dropout(output) output = slim.fully_connected(output, 2048) # output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None) output = slim.dropout(output) output = slim.fully_connected(output, vocab_size, activation_fn = tf.nn.sigmoid) return {"predictions": output}
def resnetb_block(x, numFmOut, bottleneck_size, stride): """Defines a single resnetB block, according to paper Args: x: block input, 5D tensor base_fm: base number of feature maps in the block Returns: output: 5D tensor, output of the block """ # Number of input fms numFmIn = x.get_shape().as_list()[-1] # Determine if its a reduction if numFmOut > numFmIn: increase_dim = True else: increase_dim = False # First 1x1 layer with tf.variable_scope('conv1x1x1_1'): layer = slim.convolution(x, bottleneck_size, 1, stride=1) # Second 3x3 layer, apply stride here with tf.variable_scope('conv3x3x3_2'): layer = slim.convolution(layer, bottleneck_size, 3, stride=stride) # Third layer, restore FM size with tf.variable_scope('conv1x1x1_3'): layer = slim.convolution(layer, numFmOut, 1, stride=1, activation_fn=None) # When the channels of input layer and conv2 does not match, add zero pads to increase the # depth of input layers adjusted_input = x if stride == 2: # take care of 1D<->2D<->3D if len(x.get_shape().as_list()) == 5: adjusted_input = tf.nn.pool(adjusted_input, [2, 2, 2], "AVG", padding='SAME', strides=[2, 2, 2]) elif len(x.get_shape().as_list()) == 4: adjusted_input = tf.nn.pool(adjusted_input, [2, 2], "AVG", padding='SAME', strides=[2, 2]) else: adjusted_input = tf.nn.pool(adjusted_input, [2], "AVG", padding='SAME', strides=[2]) if increase_dim: lower_pad = math.ceil((numFmOut - numFmIn) / 2) upper_pad = (numFmOut - numFmIn) - lower_pad # take care of 1D<->2D<->3D if len(x.get_shape().as_list()) == 5: adjusted_input = tf.pad( adjusted_input, [[0, 0], [0, 0], [0, 0], [0, 0], [lower_pad, upper_pad]]) elif len(x.get_shape().as_list()) == 4: adjusted_input = tf.pad( adjusted_input, [[0, 0], [0, 0], [0, 0], [lower_pad, upper_pad]]) else: adjusted_input = tf.pad(adjusted_input, [[0, 0], [0, 0], [lower_pad, upper_pad]]) # Residual connection + activation output = tf.nn.relu(adjusted_input + layer) return output
def Inception3D(x, mdlParams, placeholders=None): """Defines the Inception3D architecture from the paper "A Deep Learning Approach for Pose Estimation from Volumetric OCT Data" Args: x: 5D input tensor, usually a placeholder of shape [batchSize, width, height, depth, channel] mdlParams: dictionary, contains model configuration is_training: boolean, indicates if it is training or evaluation Returns: output: 2D tensor of shape [batchSize, numberOfOutputs] """ with tf.variable_scope('Inception3D'): with slim.arg_scope( [slim.convolution], padding='SAME', activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer( stddev=0.01), normalizer_fn=slim.batch_norm, normalizer_params={ 'is_training': placeholders['train_state'], 'epsilon': 0.0001, 'decay': 0.9, 'center': True, 'scale': True, 'activation_fn': None, 'updates_collections': tf.GraphKeys.UPDATE_OPS, 'fused': False }): # Initial part with tf.variable_scope('Initial'): layer = slim.convolution(x, 48, 3, stride=1, scope='conv1') layer = slim.convolution(layer, 64, 3, stride=2, scope='conv2') layer = slim.convolution(layer, 64, 3, stride=1, scope='conv3') layer = slim.convolution(layer, 64, 3, stride=1, scope='conv4') layer = slim.convolution(layer, 64, 3, stride=1, scope='conv5') # Inception modules with tf.variable_scope('Inception_Modules'): # Iterate through all modlues for i in range(len(mdlParams['num_inception_blocks'])): with tf.variable_scope('Module_%d' % (i)): # Save for long-range connections module_input = layer # Input feature map size for the first block, needed for long range connections input_size = module_input.get_shape().as_list()[-1] # First, apply reduction block with tf.variable_scope('Reduction_Block'): layer = inception_block( layer, mdlParams['inception_dims_reduction'][i, :], stride=2, scale=mdlParams['inception_block_scale']) # Input size for the rest of the modules, needed for long range connections red_fm_size = mdlParams['inception_dims_reduction'][ i, 0] + mdlParams['inception_dims_reduction'][ i, 2] + mdlParams['inception_dims_reduction'][i, 4] # Then, add normal inception blocks for j in range(mdlParams['num_inception_blocks'][i]): with tf.variable_scope('Normal_Block_%d' % (j)): layer = inception_block( layer, mdlParams['inception_dims'][i, :], stride=1, scale=mdlParams['inception_block_scale'], last=( j == mdlParams['num_inception_blocks'][i] - 1)) # If desired, add long range connection from the input if mdlParams['long_range_connection'][i] > 0: # Resize input, depending on connection type # If long-range residual connections are used if mdlParams['long_range_connection'][i] == 1: with tf.variable_scope('resize_module'): adjusted_input = slim.convolution( module_input, red_fm_size, 1, stride=2) # Add scaled residual connection layer = mdlParams[ 'module_scale'] * layer + adjusted_input # If long-range dense connections are used elif mdlParams['long_range_connection'][i] == 2: pooled_input = slim.layers.avg_pool3d( module_input, 2) lower_pad = math.ceil( (red_fm_size - input_dim) / 2) upper_pad = (red_fm_size - input_dim) - lower_pad # Pad adjusted_input = tf.pad( pooled_input, [[0, 0], [0, 0], [0, 0], [0, 0], [lower_pad, upper_pad]]) # Concat and adjust size with conv target_size = layer.get_shape().as_list()[-1] layer = tf.concat([layer, adjusted_input], 4) layer = slim.convolution( layer, target_size, 1, scope='long_range_resize') # GAP for 1D,2D,3D if len(layer.get_shape().as_list()) == 5: layer = math_ops.reduce_mean(layer, axis=[1, 2, 3], keep_dims=False, name='global_pool') elif len(layer.get_shape().as_list()) == 4: layer = math_ops.reduce_mean(layer, axis=[1, 2], keep_dims=False, name='global_pool') else: layer = math_ops.reduce_mean(layer, axis=[1], keep_dims=False, name='global_pool') # Dense output layer output = slim.layers.fully_connected(layer, len(mdlParams['tar_range']), activation_fn=None) return output
def ResNeXt3D(x, mdlParams, placeholders=None): """Defines the ResNetB3D architecture from the paper "A Deep Learning Approach for Pose Estimation from Volumetric OCT Data" Args: x: 5D input tensor, usually a placeholder of shape [batchSize, width, height, depth, channel] mdlParams: dictionary, contains model configuration is_training: boolean, indicates if it is training or evaluation Returns: output: 2D tensor of shape [batchSize, numberOfOutputs] """ with tf.variable_scope('ResNetA3D'): with slim.arg_scope( [slim.convolution], padding='SAME', activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer( stddev=0.01), normalizer_fn=slim.batch_norm, normalizer_params={ 'is_training': placeholders['train_state'], 'epsilon': 0.0001, 'decay': 0.9, 'center': True, 'scale': True, 'activation_fn': None, 'updates_collections': tf.GraphKeys.UPDATE_OPS, 'fused': False }): # Initial part with tf.variable_scope('Initial'): layer = slim.convolution(x, 48, 3, stride=1, scope='conv1') layer = slim.convolution(layer, 64, 3, stride=2, scope='conv2') layer = slim.convolution(layer, 64, 3, stride=1, scope='conv3') layer = slim.convolution(layer, 64, 3, stride=1, scope='conv4') layer = slim.convolution(layer, 64, 3, stride=1, scope='conv5') # Resnet modules with tf.variable_scope('Resnet_modules'): # Initial output feature map size output_fm = mdlParams['ResNeXt3D_FM'] # Initial feature map sizes for bottleneck reduced_fm = mdlParams['ResNeXt3D_Red_FM'] # Iterate through all modules for i in range(len(mdlParams['ResNeXt3D_Size'])): with tf.variable_scope('Module_%d' % (i)): # Iterate through all blocks inside the module for j in range(mdlParams['ResNeXt3D_Size'][i]): with tf.variable_scope('Block_%d' % (j)): # Set desired output feature map dimension of the block and the desired stride for the first block in the module if j == 0: output_fm = 2 * output_fm reduced_fm = 2 * reduced_fm block_stride = mdlParams[ 'ResNeXt3D_Stride'][i] else: block_stride = 1 layer = resnext_block(layer, output_fm, reduced_fm, block_stride, mdlParams['cardinality']) # GAP for 1D,2D,3D if len(layer.get_shape().as_list()) == 5: layer = math_ops.reduce_mean(layer, axis=[1, 2, 3], keep_dims=False, name='global_pool') elif len(layer.get_shape().as_list()) == 4: layer = math_ops.reduce_mean(layer, axis=[1, 2], keep_dims=False, name='global_pool') else: layer = math_ops.reduce_mean(layer, axis=[1], keep_dims=False, name='global_pool') # Dense output layer output = slim.layers.fully_connected(layer, len(mdlParams['tar_range']), activation_fn=None) return output
def inception_block(x, inception_dims, stride, scale, last=False): """Defines a single inception block, according to paper Args: x: block input, 5D tensor inception_dims: 1D array, number of feature maps for unit in the block stride: int, contains the stride of the core convolutions, to be used for resizing the input scale: scale of the residual, see paper last: boolean, indicates whether this is the last block in a chain Returns: output: 5D tensor, output of the block """ # First: 1x1 layer with tf.variable_scope('conv1x1x1_1'): conv1x1x1_1 = slim.convolution(x, inception_dims[0], 1, stride=stride) # Second: 1x1 with followed 3x3 with tf.variable_scope('conv1x1x1_2'): conv1x1x1_2 = slim.convolution(x, inception_dims[1], 1) with tf.variable_scope('conv3x3x3_2'): conv3x3x3_2 = slim.convolution(conv1x1x1_2, inception_dims[2], 3, stride=stride) # Third: 1x1 with followed 3x3 3x3 with tf.variable_scope('conv1x1x1_3'): conv1x1x1_3 = slim.convolution(x, inception_dims[3], 1) with tf.variable_scope('conv3x3x3_3_1'): conv3x3x3_3_1 = slim.convolution(conv1x1x1_3, inception_dims[4], 3) with tf.variable_scope('conv3x3x3_3_2'): conv3x3x3_3_2 = slim.convolution(conv3x3x3_3_1, inception_dims[4], 3, stride=stride) # Concat output = tf.concat([conv1x1x1_1, conv3x3x3_2, conv3x3x3_3_2], 4) # Resize input for residual connections if stride == 1: # Expand concat tensor to original size with tf.variable_scope('expand_output'): expanded_output = slim.convolution(output, x.get_shape().as_list()[-1], 1, activation_fn=None) # Residual connection with scale if last: output = scale * expanded_output + x else: output = scale * expanded_output + x output = tf.nn.relu(output) else: # This is a reduction block, therefore adjust input instead pooled_input = slim.layers.avg_pool3d(x, 2) lower_pad = math.ceil( (output.get_shape().as_list()[-1] - x.get_shape().as_list()[-1]) / 2) upper_pad = (output.get_shape().as_list()[-1] - x.get_shape().as_list()[-1]) - lower_pad # Pad adjusted_input = tf.pad( pooled_input, [[0, 0], [0, 0], [0, 0], [0, 0], [lower_pad, upper_pad]]) # Residual connection with scale output = scale * output + adjusted_input return output
def resnext_block(x, numFmOut, bottleneck_size, stride, cardinality): """Defines a single resnext block, according to paper Args: x: block input, 5D tensor numFmOut: int, number of feature maps to be outputted bottleneck_size: int, number of feature maps for every paths stride: int, stride for the 3x3x3 convolutions cardinality: int, number of paths Returns: output: 5D tensor, output of the block """ # Number of input fms numFmIn = x.get_shape().as_list()[-1] # Determine if its a reduction if numFmOut > numFmIn: increase_dim = True else: increase_dim = False # Split into paths all_paths = [] for i in range(cardinality): # First, 1x1 to bring FMs down to bottleneck size with tf.variable_scope('conv1x1x1_%d' % (i)): layer = slim.convolution(x, bottleneck_size, 1, stride=1) # Then, 3x3, apply stride with tf.variable_scope('conv3x3x3_%d' % (i)): layer = slim.convolution(layer, bottleneck_size, 3, stride=stride) # Add to list of paths all_paths.append(layer) # Concat all paths layer = tf.concat(all_paths, axis=4, name='concat_paths') # Restore FM size from concatenated paths with tf.variable_scope('conv1x1x1_restore'): layer = slim.convolution(layer, numFmOut, 1, stride=1, activation_fn=None) # When the channels of input layer and conv2 does not match, we add zero pads to increase the # depth of input layers adjusted_input = x if stride == 2: # take care of 1D<->2D<->3D if len(x.get_shape().as_list()) == 5: adjusted_input = tf.nn.pool(adjusted_input, [2, 2, 2], "AVG", padding='SAME', strides=[2, 2, 2]) elif len(x.get_shape().as_list()) == 4: adjusted_input = tf.nn.pool(adjusted_input, [2, 2], "AVG", padding='SAME', strides=[2, 2]) else: adjusted_input = tf.nn.pool(adjusted_input, [2], "AVG", padding='SAME', strides=[2]) if increase_dim: lower_pad = math.ceil((numFmOut - numFmIn) / 2) upper_pad = (numFmOut - numFmIn) - lower_pad # take care of 1D<->2D<->3D if len(x.get_shape().as_list()) == 5: adjusted_input = tf.pad( adjusted_input, [[0, 0], [0, 0], [0, 0], [0, 0], [lower_pad, upper_pad]]) elif len(x.get_shape().as_list()) == 4: adjusted_input = tf.pad( adjusted_input, [[0, 0], [0, 0], [0, 0], [lower_pad, upper_pad]]) else: adjusted_input = tf.pad(adjusted_input, [[0, 0], [0, 0], [lower_pad, upper_pad]]) # Residual connection + activation output = tf.nn.relu(adjusted_input + layer) return output
def build(self): # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/layers.py#L429 batch_norm_params = { 'is_training': self.is_training, 'center': True, 'scale': False, 'decay': 0.9, 'epsilon': 0.001, 'fused': True, 'zero_debias_moving_mean': True } activation = tf.nn.relu6 net = self.input_batch features = [] for i in range(1): net = slim.convolution( net, int(128), [3, 3], 1, padding='SAME', scope='preconv%d' % i, weights_initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.01), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, activation_fn=activation) features.append(net) net = slim.convolution( net, int(256), [3, 3], 1, padding='SAME', scope='preconv%d-2' % i, weights_initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.01), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, activation_fn=activation) features.append(net) net = slim.convolution( net, int(32), [1, 1], 1, padding='SAME', scope='preconv%d-b' % i, weights_initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.01), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, activation_fn=activation) conv_pool_size = 4 for i in range(conv_pool_size): net = slim.convolution( net, int(64 * (2**i)), [3, 3], 1, padding='SAME', scope='conv%d' % (i + 1), weights_initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.01), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, activation_fn=activation) net = slim.max_pool2d(net, [3, 3], 2, padding='SAME', scope='pool%d' % (i + 1)) features.append(net) net = slim.convolution( net, int(256), [3, 3], 1, padding='SAME', scope='conv%d' % (conv_pool_size + 1), weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, activation_fn=activation) features.append(net) # upsample features_up = [ tf.image.resize_bilinear(f, (112, 112)) for f in features ] net = tf.concat(axis=3, values=features_up, name='concat_features') net = slim.convolution( net, int(256), [1, 1], 1, padding='SAME', scope='bottleneck', weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, activation_fn=activation) net = slim.convolution( net, 1, [5, 5], 1, padding='SAME', scope='conv_last', weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), normalizer_fn=None, activation_fn=None) net = tf.image.resize_bilinear(net, (224, 224)) self.logit = net self.output = tf.nn.sigmoid(net, 'visualization') if self.unet_weight: w = self.weight_batch else: w = 1.0 self.loss = tf.losses.sigmoid_cross_entropy( multi_class_labels=self.mask_batch, logits=self.logit, weights=w, reduction=Reduction.SUM_BY_NONZERO_WEIGHTS) self.loss_opt = self.loss return net
def create_model(self, model_input, vocab_size, num_frames, **unused_params): output = model_input output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME") output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME") # output = slim.convolution(output, 512, [3], stride = 1, padding = "SAME") output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None) output = slim.pool(output, [2], "MAX", stride = 2) # output = slim.convolution(output, 512, [3], stride = 1, padding = "SAME") output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME") output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME") output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None) output = slim.pool(output, [2], "MAX", stride = 2) output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME") output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME") output = slim.convolution(output, 1024, [3], stride = 1, padding = "SAME") output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None) output = slim.pool(output, [3], "MAX", stride = 2) output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None) output = slim.pool(output, [3], "MAX", stride=2) output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None) output = slim.pool(output, [2], "MAX", stride=2) output = slim.flatten(output) output = slim.fully_connected(output, 4096) output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None) output = slim.dropout(output) output = slim.fully_connected(output, 4096) output = tf.contrib.layers.batch_norm(output,center = True, scale = True, is_training = True, scope = None) output = slim.dropout(output) output = slim.fully_connected(output, vocab_size, activation_fn = tf.nn.sigmoid) return {"predictions": output}
def visual_features(self): input_images = self.input_images tf.add_to_collection("input_images", input_images) images = -1.0 + 2.0 * tf.cast(input_images, tf.float32) / 255.0 bsize = self._bsize # Batch size lblen = self._args.lookback_length # Lookback length sqlen = self._sqlen # Sequence length images = tf.reshape(images, shape=[bsize, lblen + sqlen, ds.HEIGHT, ds.WIDTH, ds.CHANNELS]) net = slim.convolution(images, num_outputs=64, kernel_size=[3, 12, 12], stride=[1, 6, 6], padding="VALID") net = tf.nn.dropout(x=net, keep_prob=self._keep_prob) # Height x Width x Channel hwc = np.prod(net.get_shape().as_list()[2:]) aux1 = slim.fully_connected(tf.reshape(net[:, -sqlen:, :, :, :], [bsize, sqlen, hwc]), 128, activation_fn=None) net = slim.convolution(net, num_outputs=64, kernel_size=[2, 5, 5], stride=[1, 2, 2], padding="VALID") net = tf.nn.dropout(x=net, keep_prob=self._keep_prob) # Height x Width x Channel hwc = np.prod(net.get_shape().as_list()[2:]) aux2 = slim.fully_connected(tf.reshape(net[:, -sqlen:, :, :, :], [bsize, sqlen, hwc]), 128, activation_fn=None) net = slim.convolution(net, num_outputs=64, kernel_size=[2, 5, 5], stride=[1, 1, 1], padding="VALID") net = tf.nn.dropout(x=net, keep_prob=self._keep_prob) # Height x Width x Channel hwc = np.prod(net.get_shape().as_list()[2:]) aux3 = slim.fully_connected(tf.reshape(net[:, -sqlen:, :, :, :], [bsize, sqlen, hwc]), 128, activation_fn=None) net = slim.convolution(net, num_outputs=64, kernel_size=[2, 5, 5], stride=[1, 1, 1], padding="VALID") net = tf.nn.dropout(x=net, keep_prob=self._keep_prob) # At this point the tensor 'net' is of shape # batch_size x seq_len x Height x Width x Channel # Height x Width x Channel hwc = np.prod(net.get_shape().as_list()[2:]) aux4 = slim.fully_connected(tf.reshape(net, [bsize, sqlen, hwc]), 128, activation_fn=None) net = slim.fully_connected(tf.reshape(net, [bsize, sqlen, hwc]), 1024, activation_fn=tf.nn.relu) net = tf.nn.dropout(x=net, keep_prob=self._keep_prob) net = slim.fully_connected(net, 512, activation_fn=tf.nn.relu) net = tf.nn.dropout(x=net, keep_prob=self._keep_prob) net = slim.fully_connected(net, 256, activation_fn=tf.nn.relu) net = tf.nn.dropout(x=net, keep_prob=self._keep_prob) net = slim.fully_connected(net, 128, activation_fn=None) # aux[1-4] are residual connections (shortcuts) visual_features = _layer_norm(tf.nn.elu( net + aux1 + aux2 + aux3 + aux4)) num_outputs = visual_features.get_shape().as_list()[-1] visual_features = tf.reshape(visual_features, [bsize, sqlen, num_outputs]) visual_features = tf.nn.dropout(x=visual_features, keep_prob=self._keep_prob) return visual_features
def create_model(self, model_input, vocab_size, num_frames, **unused_params): lstm_size = FLAGS.lstm_cells number_of_layers = FLAGS.lstm_layers stacked_lstm = tf.contrib.rnn.MultiRNNCell( [ tf.contrib.rnn.BasicLSTMCell( lstm_size, forget_bias=1.0) for _ in range(number_of_layers) ]) loss = 0.0 output, state = tf.nn.dynamic_rnn(stacked_lstm, model_input, sequence_length=num_frames, dtype=tf.float32) output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") # output = slim.convolution(output, 512, [3], stride = 1, padding = "SAME") output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None) output = slim.pool(output, [2], "MAX", stride=2) # output = slim.convolution(output, 512, [3], stride = 1, padding = "SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None) output = slim.pool(output, [2], "MAX", stride=2) output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None) output = slim.pool(output, [3], "MAX", stride=2) output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None) output = slim.pool(output, [3], "MAX", stride=2) output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None) output = slim.pool(output, [2], "MAX", stride=2) output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None) output = slim.pool(output, [2], "MAX", stride=1) output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None) output = slim.pool(output, [2], "MAX", stride=1) output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = slim.convolution(output, 1024, [3], stride=1, padding="SAME") output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None) output = slim.pool(output, [2], "MAX", stride=1) output = slim.convolution(output, 512, [3], stride=1, padding="SAME") output = slim.convolution(output, 512, [3], stride=1, padding="SAME") output = slim.convolution(output, 512, [3], stride=1, padding="SAME") output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None) output = slim.pool(output, [2], "MAX", stride=1) output = slim.flatten(output) output = slim.fully_connected(output, 4096) output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None) output = slim.dropout(output) output = slim.fully_connected(output, 4096) output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=True, scope=None) output = slim.dropout(output) output = slim.fully_connected(output, vocab_size, activation_fn=tf.nn.sigmoid) return {"predictions": output}
activation_fn=tf.identity, scope='g_conv6') conv6 = tf.nn.relu(conv6) conv7 = slim.convolution2d_transpose(conv6, 3, 3, stride=1, activation_fn=tf.identity, scope='g_conv7') G = tf.nn.tanh(conv7) with tf.variable_scope("D_net"): conv1 = slim.convolution(x, 64, 5, stride=2, activation_fn=tf.identity, scope='d_conv1') conv1 = leaky_relu(conv1) conv2 = slim.convolution(conv1, 128, 5, stride=2, normalizer_fn=slim.batch_norm, activation_fn=tf.identity, scope='d_conv2') conv2 = leaky_relu(conv2) conv3 = slim.convolution(conv2, 256,