def network_fn(X): buffer_size = X.shape[1] net = X net = layers.conv1d(net, 5, 3, scope='cnn1d_c1') net = layers.conv1d(net, 5, 3, scope='cnn1d_c2') net = layers.conv1d(net, 1, 3, scope='cnn1d_c3') net = tf.reshape(net, [-1, buffer_size]) net = fc(net, 'cnn1d_fc1', nh=16, init_scale=np.sqrt(2)) net = tf.tanh(net) # tf.nn.conv1d(X, w, stride, 'SAME') # print(X) return net
def network_fn(X, action): buffer_size = X.shape[1] net = X net = layers.conv1d(net, 20, 5, scope='cnn1d_c1') net = layers.conv1d(net, 15, 3, scope='cnn1d_c2') net = layers.conv1d(net, 10, 3, scope='cnn1d_c3') net = layers.conv1d(net, 5, 3, scope='cnn1d_c4') net = layers.conv1d(net, 1, 3, scope='cnn1d_c5') net = tf.reshape(net, [-1, buffer_size]) net = tf.concat([net, action], 1) net = fc(net, 'cnn1d_fc1', nh=32, init_scale=np.sqrt(2)) net = fc(net, 'cnn1d_fc2', nh=24, init_scale=np.sqrt(2)) net = fc(net, 'cnn1d_fc3', nh=16, init_scale=np.sqrt(2)) net = tf.tanh(net) # tf.nn.conv1d(X, w, stride, 'SAME') # print(X) return net
def build_sequence(self, char_seq_set_size, embed_dim, filter_num, seq_window_len): seq_embed = tf.Variable( tf.random_normal([char_seq_set_size + 1, embed_dim])) enc_seq = tf.nn.embedding_lookup(seq_embed, self.seq) enc_seq = layers.conv1d(enc_seq, filter_num, seq_window_len, padding='VALID') enc_seq = layers.conv1d(enc_seq, filter_num * 2, seq_window_len, padding='VALID') enc_seq = layers.conv1d(enc_seq, filter_num * 3, seq_window_len, padding='VALID') enc_seq = tf.keras.layers.GlobalAveragePooling1D()(enc_seq) return enc_seq
def __init__(self, filter_num, smi_window_len, seq_window_len, max_smi_len, max_seq_len, char_smi_set_size, char_seq_set_size, embed_dim): self.smi = tf.placeholder(shape=[None, max_smi_len], dtype=tf.int32) self.seq = tf.placeholder(shape=[None, max_seq_len], dtype=tf.int32) self.labels = tf.placeholder(shape=[None, 1], dtype=tf.int32) self.training = tf.placeholder(dtype=tf.bool) self.smi_embed = tf.Variable( tf.random_normal([char_smi_set_size + 1, embed_dim])) self.seq_embed = tf.Variable( tf.random_normal([char_seq_set_size + 1, embed_dim])) enc_smi = tf.nn.embedding_lookup(self.smi_embed, self.smi) enc_smi = layers.conv1d(enc_smi, filter_num, smi_window_len, padding='VALID') enc_smi = layers.conv1d(enc_smi, filter_num * 2, smi_window_len, padding='VALID') enc_smi = layers.conv1d(enc_smi, filter_num * 3, smi_window_len, padding='VALID') enc_smi = tf.keras.layers.GlobalAveragePooling1D()(enc_smi) enc_seq = tf.nn.embedding_lookup(self.seq_embed, self.seq) enc_seq = layers.conv1d(enc_seq, filter_num, seq_window_len, padding='VALID') enc_seq = layers.conv1d(enc_seq, filter_num * 2, seq_window_len, padding='VALID') enc_seq = layers.conv1d(enc_seq, filter_num * 3, seq_window_len, padding='VALID') enc_seq = tf.keras.layers.GlobalAveragePooling1D()(enc_seq) flatten = tf.concat([enc_smi, enc_seq], -1) fc1 = layers.fully_connected(flatten, 1024) drop1 = layers.dropout(fc1, 0.1, is_training=self.training) fc2 = layers.fully_connected(drop1, 1024) drop2 = layers.dropout(fc2, 0.1, is_training=self.training) self.fc3 = layers.fully_connected(drop2, 512) self.init = tf.global_variables_initializer self.saver = tf.train.Saver()
def CausalConv(x, dilation_rate, filters, kernel_size=2, scope=""): """Performs causal dilated 1D convolutions. Args: x : Tensor of shape (batch_size, steps, input_dim). dilation_rate: Dilation rate of convolution. filters: Number of convolution filters. kernel_size: Width of convolution kernel. SNAIL paper uses 2 for all experiments. scope: Variable scope for this layer. Returns: y: Tensor of shape (batch_size, new_steps, D). """ with tf.variable_scope(scope): causal_pad_size = (kernel_size - 1) * dilation_rate # Pad sequence dimension. x = tf.pad(x, [[0, 0], [causal_pad_size, 0], [0, 0]]) return layers.conv1d(x, filters, kernel_size=kernel_size, padding="VALID", rate=dilation_rate)
def mnet_v6d6_embed(inputs: MNetV6Inputs, embed_sc: MNetV6EmbedScope, consts: MNetV6Consts, nc: MNetV6Config, coord_sys, scope=None): inputs_obs = inputs.X with tf.variable_scope(scope, default_name='mnet_v6_embed'): # unit list embeddings # low-level unit embeddings u_embed, u_coor, u_is_selected_mask, u_was_tar_mask = _units_embed_block( inputs=inputs_obs, embed_sc=embed_sc, nc=nc) # higher-level unit embeddings if nc.trans_version == 'v1': enhanced_units_embed, embedded_unit = _transformer_block( units_embed=u_embed, units_mask=inputs_obs['MASK_LEN'], nc=nc) elif nc.trans_version == 'v2': enhanced_units_embed, embedded_unit = _transformer_block_v2( units_embed=u_embed, units_mask=inputs_obs['MASK_LEN'], nc=nc) elif nc.trans_version == 'v3': enhanced_units_embed, embedded_unit = _transformer_block_v3( units_embed=u_embed, units_mask=inputs_obs['MASK_LEN'], nc=nc) elif nc.trans_version == 'v4': enhanced_units_embed, embedded_unit = _transformer_block_v4( units_embed=u_embed, units_mask=inputs_obs['MASK_LEN'], enc_dim=nc.enc_dim, out_fc_dim=nc.enc_dim, nc=nc) elif nc.trans_version == 'v5': enhanced_units_embed, embedded_unit = _transformer_block_v5( units_embed=u_embed, units_mask=inputs_obs['MASK_LEN'], enc_dim=nc.enc_dim, out_fc_dim=nc.enc_dim, nc=nc) else: raise NotImplementedError # scatter units to img # (bs, 600, dim) lowdim_units_embed = tfc_layers.conv1d(enhanced_units_embed, 32, 1) # (bs, 600, 32) scattered_embed = _scatter_units_block( inputs_units_embed=lowdim_units_embed, inputs_xy=u_coor, coord_sys=coord_sys, nc=nc) # (bs, 128, 128, 32) # joint unit-map spatial embeddings map_skip, spa_vec_embed = _spa_embed_block_v2( inputs_img=inputs_obs['X_IMAGE'], inputs_additonal_img=scattered_embed, nc=nc) # global feature embeddings ab_mask_embed = None # aka "available_actions" # vector embeddings if nc.vec_embed_version == 'v2': vec_embed = _vec_embed_block_v2(inputs=inputs_obs, enc_dim=nc.enc_dim) elif nc.vec_embed_version == 'v2d1' or nc.vec_embed_version == 'v2.1': vec_embed, ab_mask_embed = _vec_embed_block_v2d1( inputs=inputs_obs, enc_dim=nc.enc_dim) elif nc.vec_embed_version == 'v3': vec_embed = _vec_embed_block_v3(inputs=inputs_obs, enc_dim=nc.enc_dim) elif nc.vec_embed_version == 'v3d1' or nc.vec_embed_version == 'v3.1': vec_embed, ab_mask_embed = _vec_embed_block_v3d1( inputs=inputs_obs, enc_dim=nc.enc_dim) else: raise NotImplementedError('unknown vec_embed_version: {}'.format( nc.vec_embed_version)) # last actions embeddings if nc.last_act_embed_version == 'v1': last_actions_embed = _last_action_embed_block_mnet_v6( inputs=inputs_obs, inputs_arg_mask=consts.arg_mask, ab_embed_sc=embed_sc.ab_embed_sc, nc=nc) elif nc.last_act_embed_version == 'v2': last_actions_embed = _last_action_embed_block_mnet_v6_v2( inputs=inputs_obs, inputs_arg_mask=consts.arg_mask, ab_embed_sc=embed_sc.ab_embed_sc, nc=nc) else: raise NotImplementedError( 'unknown last_act_embed_version: {}'.format( nc.last_act_embed_version)) # zstat embeddings zstat_embed = _zstat_embed(inputs_obs, nc) # integrate the features int_embed = tf.concat([ embedded_unit, spa_vec_embed, vec_embed, last_actions_embed, zstat_embed ], axis=-1) # lstm embeddings hs_new = None lstm_embed = None if nc.use_lstm: lstm_embed, hs_new = _lstm_embed_block(inputs_x=int_embed, inputs_hs=inputs.S, inputs_mask=inputs.M, nc=nc) int_embed = tf.concat([int_embed, lstm_embed], axis=-1) # used for burn-in if nc.fix_all_embed: int_embed = tf.stop_gradient(int_embed) enhanced_units_embed = tf.stop_gradient(enhanced_units_embed) embedded_unit = tf.stop_gradient(embedded_unit) spa_vec_embed = tf.stop_gradient(spa_vec_embed) vec_embed = tf.stop_gradient(vec_embed) if ab_mask_embed is not None: ab_mask_embed = tf.stop_gradient(ab_mask_embed) zstat_embed = tf.stop_gradient(zstat_embed) map_skip = [tf.stop_gradient(m) for m in map_skip] if nc.use_lstm: lstm_embed = tf.stop_gradient(lstm_embed) return MNetV6Embed( units_embed=MNetV6UnitEmbed(units_embed=enhanced_units_embed, embedded_unit=embedded_unit), spa_embed=MNetV6SpaEmbed(map_skip=map_skip, spa_vec_embed=spa_vec_embed), vec_embed=MNetV6VecEmbed(vec_embed=vec_embed, ab_mask_embed=ab_mask_embed), int_embed=int_embed, zstat_embed=zstat_embed, lstm_embed=lstm_embed, ), hs_new
def create_model(albert_config, is_training, input_ids, input_mask, segment_ids, labels, aspechts_char): model = modeling.AlbertModel(config=albert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=False) context_embedding = model.get_sequence_output() #contexts_ch_lens = get_setence_length(input_ids, # 'contexts_ch_lens') # tf.placeholder(tf.int32, [None],name="contexts_ch_lens") #contexts_ch_lens = tf.reshape(contexts_ch_lens, [-1]) aspects_ch = tf.expand_dims(tf.get_variable(initializer=aspechts_char, name='aspect_char', dtype=tf.int64, trainable=False), axis=0) with tf.variable_scope("aspect_layer"): embedding_matrix = tf.get_variable( name='embedding_ch', shape=[len(params.char2id.keys()), params.embedding_dim], trainable=True, dtype=tf.float32) aspects_ch = tf.reshape(aspects_ch, shape=[-1, params.max_char_len]) aspect_inputs = tf.nn.embedding_lookup(embedding_matrix, aspects_ch) aspects_ch_lens = get_setence_length(aspects_ch, "aspects_ch_lens") aspects_ch_lens = tf.reshape(aspects_ch_lens, [-1]) cell_fw = tf.contrib.rnn.GRUCell(params.hiden_sizes) cell_bw = tf.contrib.rnn.GRUCell(params.hiden_sizes) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, aspect_inputs, aspects_ch_lens, dtype=tf.float32) aspect_emb = tf.concat([state_fw, state_bw], axis=1) aspect_emb = tf.reshape(aspect_emb, [ -1, params.n_class, 2 * params.hiden_sizes * params.max_aspect_len ]) new_aspects = fully_connected(aspect_emb, params.kernel_num, activation_fn=None) all_aspects = tf.split(new_aspects, [1] * params.n_class, 1) with tf.variable_scope('context_layer'): content_reps = conv1d(context_embedding, params.kernel_num, params.kernel_sizes) if is_training: content_reps = tf.nn.dropout(content_reps, keep_prob=params.dropout_keep) with tf.variable_scope('gate_cnn'): represent_reps = [] for idx, a_aspect in enumerate(all_aspects): with tf.variable_scope("context_conv_" + str(idx), reuse=tf.AUTO_REUSE): aspect_rel_reps = conv1d(context_embedding, params.kernel_num, params.kernel_sizes) x = tf.multiply(tf.nn.relu(a_aspect + aspect_rel_reps), content_reps) with tf.variable_scope('represent_conv_' + str(idx)): repre = conv1d(x, params.kernel_num, params.kernel_sizes) max_pool_repre = tf.layers.max_pooling1d( repre, repre.get_shape().as_list()[1], repre.get_shape().as_list()[1]) repre_last = tf.squeeze(max_pool_repre, axis=1) with tf.variable_scope('full_connect_' + str(idx)): if is_training: repre_last = tf.nn.dropout(repre_last, keep_prob=0.9) output_repre = fully_connected( repre_last, params.n_sub_class, activation_fn=None, weights_initializer=tf.random_uniform_initializer( -0.1, 0.1), weights_regularizer=tf.contrib.layers.l2_regularizer( params.l2_reg)) represent_reps.append(output_repre) with tf.variable_scope("output_layer"): logit = tf.concat(represent_reps, 1) logit = tf.reshape(logit, [-1, params.n_class, params.n_sub_class]) predictions = tf.argmax(logit, axis=-1, output_type=tf.int32) probabilities = tf.nn.softmax(logit, axis=-1) log_probs = tf.nn.log_softmax(logit, axis=-1) #with tf.variable_scope("loss"): per_example_loss = -tf.reduce_sum( tf.cast(labels, dtype=tf.float32) * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss, name='loss') return (loss, probabilities, predictions)
def pointnet(points, output_feature_count, apply_learned_ortho_tx=False, apply_learned_64d_tx=True, use_bad_reduce=False, nerfify=False, maxpool_feature_count=1024, use_gpu=True): """Applies pointnet to an input set of point features. Args: points: Tensor with shape [batch_size, point_count, feature_count]. output_feature_count: The number of features in the final linear layer. apply_learned_ortho_tx: Whether to apply the learned transformation to the input points. apply_learned_64d_tx: Whether to apply the 64x64 learned orthogonal transform. use_bad_reduce: Whether to use the original slow 'maxpool2d' global max reduce. Only still an option for compatibility with existing trained networks. nerfify: Whether to apply the math_util.nerfify function to the features (all of them, not just the points) after the initial transform step. maxpool_feature_count: Integer. The number of features in the vector before doing a global maxpool. This is the main computational bottleneck, so reducing it is good for training time. use_gpu: Whether to assume a GPU is available. Returns: embedding: Tensor with shape [batch_size, embedding_length]. """ batch_size, point_count, feature_count = points.get_shape().as_list() point_positions = points[..., 0:3] point_features = points[..., 3:] feature_count = points.get_shape().as_list()[-1] - 3 with tf.variable_scope('pointnet', reuse=tf.AUTO_REUSE): if apply_learned_ortho_tx: with tf.variable_scope('learned_transformation'): transformation, translation = point_set_to_transformation( points) transformed_points = tf.matmul(point_positions + translation, transformation) if feature_count > 0: transformed_points = tf.concat( [transformed_points, point_features], axis=2) points = transformed_points # Go from NWC to NCW so that the final reduce can be faster. assert len(points.shape) == 3 net = points if nerfify: net = math_util.nerfify(net, 10, flatten=True, interleave=False) # On the GPU HCW is substantially faster, but there is so NCW CPU kernel. # So in CPU mode we have to do NWC convolutions. if use_gpu: net = tf.transpose(net, perm=[0, 2, 1]) data_format = 'NCW' reduce_dim = 2 else: data_format = 'NWC' reduce_dim = 1 # Apply the 'mlp 64, 64' layers: with tf.variable_scope('mlp_block_1'): # with tf.variable_scope('test_keras'): # net = tf.keras.layers.Conv1D(filters=64, # kernel_size=1, # strides=1, # padding='valid', # data_format='channels_first', # activation=tf.keras.activations.relu)(net) net = contrib_layers.conv1d(net, num_outputs=64, kernel_size=1, padding='VALID', stride=1, data_format=data_format, scope='conv1') net = contrib_layers.conv1d(net, num_outputs=64, kernel_size=1, padding='VALID', stride=1, data_format=data_format, scope='conv2') if apply_learned_64d_tx: if use_gpu: net = tf.transpose(net, perm=[0, 2, 1]) with tf.variable_scope('learned_feature_transformation'): feature_transformation = point_set_to_feature_transformation( net, output_dimensionality=64) net = tf.matmul(tf.reshape(net, [batch_size, point_count, 64]), feature_transformation) net = tf.expand_dims(net, axis=2) if use_gpu: net = tf.transpose(net, perm=[0, 2, 1]) # Second MLP block with tf.variable_scope('mlp_block_2'): net = contrib_layers.conv1d(net, num_outputs=64, kernel_size=1, padding='VALID', stride=1, data_format=data_format, scope='conv1') net = contrib_layers.conv1d(net, num_outputs=128, kernel_size=1, padding='VALID', stride=1, data_format=data_format, scope='conv2') net = contrib_layers.conv1d( net, num_outputs=maxpool_feature_count, # TODO(kgenova) A bottleneck. kernel_size=1, padding='VALID', stride=1, data_format=data_format, scope='conv3') # log.info(f'Hello in pointnet. The shape is {net.get_shape().as_list()}') # raise ValueError('Stop') assert len(net.get_shape().as_list()) == 3 if use_bad_reduce: raise ValueError('Bad Reduce is not supported with pointnet1d.') net = tf.reduce_max(net, axis=reduce_dim) # net = contrib_layers.flatten(net) # Final MLP with tf.variable_scope('final_mlp'): net = contrib_layers.fully_connected(net, num_outputs=512, activation_fn=tf.nn.relu, scope='fc1') net = contrib_layers.fully_connected(net, num_outputs=256, activation_fn=tf.nn.relu, scope='fc2') net = contrib_layers.fully_connected( net, num_outputs=output_feature_count, activation_fn=None, scope='final_fc') return net
# 2. weights w_s1_0 = tf.get_variable(name='w_s1_0', shape=(1, 128, 64), dtype=tf.float32, initializer=tf.truncated_normal_initializer(), trainable=True) w_s2_0 = tf.get_variable(name='w_s2_0', shape=(1, 64, d1), dtype=tf.float32, initializer=tf.truncated_normal_initializer(), trainable=True) w_s1 = tf.tile(w_s1_0, multiples=[batch_size, 1, 1], name='w_s1') w_s2 = tf.tile(w_s2_0, multiples=[batch_size, 1, 1], name='w_s2') # 3. build the model conv_output = conv1d(inputs=x_batch, num_outputs=128, kernel_size=1, stride=1) # (batch_size,300,128) conv_output = batch_norm(conv_output, is_training=is_training) # Attention model tmp = tf.tanh(tf.matmul(conv_output, w_s1)) A = tf.sigmoid(tf.matmul(tmp, w_s2)) M = tf.matmul(A, conv_output, transpose_a=True) p = tf.norm(tf.matmul(A, A, transpose_b=True) - tf.eye(num_rows=d2), ord=2) penalty = p * p / batch_size # S-LSTM with tf.variable_scope("S-LSTM", reuse=tf.AUTO_REUSE): cell = tf.nn.rnn_cell.LSTMCell(num_units=256, reuse=tf.AUTO_REUSE) initial_state = cell.zero_state(batch_size, dtype=tf.float32) # rnn_output, _ = tf.nn.dynamic_rnn(cell, M, dtype=tf.float32, initial_state=initial_state)
def net(self, inputs, num_classes, is_training, reuse, scope): with tf.variable_scope(scope, 'cnn_v1', [inputs], reuse=reuse) as sc: with arg_scope( [layers.batch_norm], is_training=is_training, decay=0.9, epsilon=1e-3, scale=True, param_initializers={ "beta": tf.constant_initializer(value=0), "gamma": tf.random_normal_initializer(mean=1, stddev=0.045), 'moving_mean': tf.constant_initializer(value=0), 'moving_variance': tf.constant_initializer(value=1) }): with arg_scope( [layers_lib.conv1d, layers_lib.fully_connected], activation_fn=None, normalizer_fn=None, weights_regularizer=None, weights_initializer=tf.contrib.layers. xavier_initializer(), biases_initializer=tf.constant_initializer(0.001)): end_points = {} conv1 = layers_lib.conv1d(inputs, 1000, [5], stride=1, padding='SAME', scope='conv1') conv1r = layers.batch_norm(conv1, activation_fn=tf.nn.relu, scope='bn1') conv2 = layers_lib.conv1d(conv1r, 1000, [7], stride=2, padding='SAME', scope='conv2') conv2r = layers.batch_norm(conv2, activation_fn=tf.nn.relu, scope='bn2') conv3 = layers_lib.conv1d(conv2r, 1000, [1], stride=1, padding='SAME', scope='conv3') conv3r = layers.batch_norm(conv3, activation_fn=tf.nn.relu, scope='bn3') conv4 = layers_lib.conv1d(conv3r, 1500, [1], stride=1, padding='SAME', scope='conv4') conv4r = layers.batch_norm(conv4, activation_fn=tf.nn.relu, scope='bn4') mean = tf.reduce_mean(conv4r, 1, keep_dims=True) res1 = tf.squeeze(mean, axis=1) fc1 = layers_lib.fully_connected(res1, 1500, scope='fc1') # fc1_bn = layers.batch_norm(fc1, activation_fn=tf.nn.relu, scope='bn5') end_points[sc.name + '/fc1'] = fc1 fc1_bn = layers.batch_norm(fc1, activation_fn=None, scope='bn5') fc2 = layers_lib.fully_connected(fc1_bn, 600, scope='fc2') end_points[sc.name + '/fc2'] = fc2 fc2_bn = layers.batch_norm(fc2, activation_fn=tf.nn.relu, scope='bn6') fc3 = layers_lib.fully_connected(fc2_bn, num_classes, scope='fc3') end_points['predictions'] = fc3 return end_points
model.add(Dense(units=32)) return model model = build_model() output_s = model(x_s_lstm) # M-LSTM (1) with tf.variable_scope("M-LSTM-1", reuse=tf.AUTO_REUSE): cell_1 = SkipLSTMCell(num_units=64) initial_state_1 = cell_1.trainable_initial_state(batch_size=batch_size) hidden_1 = conv1d(x_m_lstm, num_outputs=1, kernel_size=1, padding='VALID', stride=1, weights_regularizer=l2_regularizer(scale=1.0e-3)) rnn_outputs_1, _ = tf.nn.dynamic_rnn(cell_1, hidden_1, dtype=tf.float32, initial_state=initial_state_1) rnn_outputs_1 = rnn_outputs_1.h[:, -1, :] hidden_2 = dropout(inputs=rnn_outputs_1, keep_prob=0.7) output_1 = fully_connected(hidden_2, num_outputs=32) # M-LSTM (2) with tf.variable_scope("M-LSTM-2", reuse=tf.AUTO_REUSE): cell_2 = SkipLSTMCell(num_units=64) initial_state_2 = cell_2.trainable_initial_state(batch_size=batch_size)
x = tf.placeholder(dtype=tf.float32, shape=(None, d2, d), name='x_train') y = tf.placeholder(dtype=tf.float32, shape=(None, 1), name='y_true') # 2. weights # 3. build the model # M-LSTM (1) with tf.variable_scope("M-LSTM-1", reuse=tf.AUTO_REUSE): cell_1 = SkipLSTMCell(num_units=64) initial_state_1 = cell_1.trainable_initial_state(batch_size=batch_size) hidden_1 = conv1d(x, num_outputs=1, kernel_size=1, padding='VALID', stride=1) rnn_outputs_1, _ = tf.nn.dynamic_rnn(cell_1, hidden_1, dtype=tf.float32, initial_state=initial_state_1) rnn_outputs_1 = rnn_outputs_1.h[:, -1, :] hidden_2 = dropout(inputs=rnn_outputs_1, keep_prob=0.7) output_1 = fully_connected(hidden_2, num_outputs=32, weights_regularizer=l2_regularizer(0.01)) # M-LSTM (2) with tf.variable_scope("M-LSTM-2", reuse=tf.AUTO_REUSE): cell_2 = SkipLSTMCell(num_units=64)