def decoder(self, dec_inputs, pre_state, encoder_outputs, encoder_len, decode=False): ''' inputs: dec_inputs: A list of 2D Tensors [batch_size, embedding_size] encoder_outputs: [batch_size, seq_len, hidden_size] pre_state: [batch_size, hidden_size] returns: outputs: A list of the same length as decoder_inputs of 2D Tensor state: the final state of the decoder ''' batch_size, _, hidden_size = encoder_outputs.get_shape().as_list() with tf.variable_scope('decoder'): # context_vector = tf.zeros([batch_size, hidden_size], dtype=tf.float32) # if decode: # context_vector, _ = self.attention(pre_state, encoder_outputs, encoder_len) state = pre_state outputs = [] for i, dec_input in enumerate(dec_inputs): if i > 0: tf.get_variable_scope().reuse_variables() context_vector, _ = self.attention(state, encoder_outputs, encoder_len) dec_size = dec_input.get_shape()[-1] x = linear([dec_input] + [context_vector], dec_size, True) # merge input and context output, state = self.cell(x, state) with tf.variable_scope('decoder_output'): output = linear([output] + [context_vector], self.cell.output_size, True) outputs.append(output) # when we have output(y_i), then we can calculate the next time attention # if we are decode model, we decode only one step for each run # if not decode: # context_vector, _ = self.attention(state, encoder_outputs, encoder_len) return outputs, state
def discriminator(self, inputs_logits, num_blocks=3, use_bias=False, num_classes=1): ''' The discriminator to score the distribution of time and event If the time is consistent with the history times, give high score. If it is on the constant, give low score. Implementation: CNN ''' with tf.variable_scope('Discriminator'): inputs = tf.transpose(inputs_logits, [0, 2, 1]) output = utils.conv1d('D.Input', 1, self.filter_output_dim, self.filter_size, inputs) output = self.res_block('D.1', output) output = self.res_block('D.2', output) output = self.res_block('D.3', output) output = self.res_block('D.4', output) output = self.res_block('D.5', output) output = tf.reshape(output, [-1, self.num_steps * self.filter_output_dim]) output = utils.linear('D.Output', self.num_steps * self.filter_output_dim, 1, output) return output
def attention(self, decoder_state, encoder_outputs, encoder_len, reuse=None): hidden_size = encoder_outputs.get_shape().as_list()[-1] max_len = tf.shape(encoder_outputs)[1] with tf.variable_scope('Attention', reuse=reuse): encoder_outputs_ = tf.expand_dims( encoder_outputs, axis=2) # [batch_size, seq_len, 1, hidden_size] w = tf.get_variable('w', [1, 1, hidden_size, hidden_size]) encoder_features = tf.nn.conv2d(encoder_outputs_, w, [1, 1, 1, 1], 'SAME') decoder_feature = linear(decoder_state, hidden_size, True) # [batch_size, hidden_size] decoder_feature = tf.expand_dims( tf.expand_dims(decoder_feature, 1), 1) v = tf.get_variable('v', [hidden_size]) e = tf.reduce_sum(v * tf.tanh(encoder_features + decoder_feature), [2, 3]) # [batch_size, hidden_size] mask = tf.sequence_mask(encoder_len, maxlen=max_len, dtype=tf.float32) att = e * mask + (1 - mask) * (-1e6) att = tf.nn.softmax(att, axis=-1) context_vector = tf.matmul( tf.transpose(encoder_outputs, [0, 2, 1]), tf.expand_dims(att, 2)) context_vector = tf.reshape(context_vector, [-1, hidden_size]) return context_vector, att
def _decoder_step(time, prev_output, prev_rnn_states, outputs_array): new_output, new_rnn_states = seq_cell(prev_output, prev_rnn_states) new_output = utils.linear(new_output, 3) new_output = tf.where(tf.greater(lens_tensor, time), new_output, tf.zeros_like(new_output)) outputs_array = outputs_array.write(time, new_output) return time + 1, new_output, new_rnn_states, outputs_array
def add_dense_layers(self, architecture, input_layer, parameters, name_aux): """ Creates an MLP params: architecture: list on ints (hidden layers of the MLP) parameters: dictionary with weights """ #TODO delete config parameter last_layer = input_layer #print(last_layer, "as input") # prefix = prefix + "_" if prefix != '' else prefix # # parameters = getattr(self, prefix + 'w') histograms = [] for i, neurons in enumerate(architecture): number = 'l' + str(i + 1) layer_name = name_aux + number layer, weights, biases = \ utils.linear(input_ = last_layer, output_size = neurons, activation_fn = tf.nn.relu, name = layer_name) # histograms += [tf.summary.histogram("w_" + layer_name, weights), # tf.summary.histogram("b_" + layer_name, biases)] # tf.summary.histogram("o_" + layer_name, layer)] #setattr(self, layer_name, layer) parameters[layer_name + "_w"] = weights parameters[layer_name + "_b"] = biases last_layer = layer # print(layer_name, layer.get_shape().as_list(), 'added') #print(layer, 'added', layer_name) return last_layer, histograms
def inference(self, x): with tf.variable_scope("conv0"): conv1 = utils.relu(utils.Bn(utils.conv2d(x, 64, 7, 7, 2, 2, bias=True), training=self.is_training)) with tf.name_scope("pool1"): pool1 = utils.max_pool(conv1, 3, 3, 2, 2) with tf.variable_scope("group0"): res2a = self.residual(pool1, 256, name='block0') res2b = self.residual(res2a, 256, name='block1') res2c = self.residual(res2b, 256, name='block2') with tf.variable_scope("group1"): res3a = self.residual(res2c, 512, 2, name='block0') res3b = self.residual(res3a, 512, name='block1') res3c = self.residual(res3b, 512, name='block2') res3d = self.residual(res3c, 512, name='block3') with tf.variable_scope("group2"): res4a = self.residual(res3d, 1024, 2, name='block0') res4b = self.residual(res4a, 1024, name='block1') res4c = self.residual(res4b, 1024, name='block2') res4d = self.residual(res4c, 1024, name='block3') res4e = self.residual(res4d, 1024, name='block4') res4f = self.residual(res4e, 1024, name='block5') with tf.variable_scope("group3"): res5a = self.residual(res4f, 2048, 2, name='block0') res5b = self.residual(res5a, 2048, name='block1') res5c = self.residual(res5b, 2048, name='block2') with tf.name_scope("pool5"): pool5 = utils.global_pool(res5c) with tf.variable_scope("linear"): dropout = tf.nn.dropout(pool5, keep_prob=self.keep_prob) out = utils.linear(dropout, 1000) return out
def __init__(self, deep_columns_idx, embedding_columns_dict, hidden_layers, dropouts, output_dim): """ init parameters :param deep_columns_idx: dict include column name and it's index e.g. {'age': 0, 'career': 1,...} :param embedding_columns_dict: dict include categories columns name and number of unique val and embedding dimension e.g. {'age':(10, 32),...} :param hidden_layers: number of hidden layers :param deep_columns_idx: dict of columns name and columns index :param dropouts: list of float each hidden layers dropout len(dropouts) == hidden_layers - 1 """ super(DeepModel, self).__init__() self.embedding_columns_dict = embedding_columns_dict self.deep_columns_idx = deep_columns_idx for key, val in embedding_columns_dict.items(): setattr(self, 'dense_col_' + key, nn.Embedding(val[0], val[1])) embedding_layer = 0 for col in self.deep_columns_idx.keys(): if col in embedding_columns_dict: embedding_layer += embedding_columns_dict[col][1] else: embedding_layer += 1 self.layers = nn.Sequential() hidden_layers = [embedding_layer] + hidden_layers dropouts = [0.0] + dropouts for i in range(1, len(hidden_layers)): self.layers.add_module( 'hidden_layer_{}'.format(i - 1), linear(hidden_layers[i - 1], hidden_layers[i], dropouts[i - 1])) self.layers.add_module('last_linear', nn.Linear(hidden_layers[-1], output_dim))
def _discriminator_conv(self, states): '''Convolve output of bidirectional RNN and predict the discriminator label.''' with tf.variable_scope("Discriminator"): W_conv = tf.get_variable( 'W_conv', [ cfg.d_conv_window, 1, states.get_shape()[2], cfg.hidden_size // cfg.d_conv_window ], initializer=tf.contrib.layers.xavier_initializer_conv2d()) b_conv = tf.get_variable('b_conv', [cfg.hidden_size // cfg.d_conv_window], initializer=tf.constant_initializer(0.0)) states = tf.expand_dims(states, 2) conv = tf.nn.conv2d(states, W_conv, strides=[1, 1, 1, 1], padding='SAME') conv_out = tf.reshape( conv, [2 * cfg.batch_size, -1, cfg.hidden_size // cfg.d_conv_window]) conv_out = tf.nn.bias_add(conv_out, b_conv) reduced = tf.nn.elu(tf.reduce_sum(conv_out, [1])) * 1e-1 output = utils.linear(reduced, 1, True, 0.0, scope='discriminator_output') return output
def generator(self, z): """ :param z: the noise vector :return: """ with tf.variable_scope("generator") as scope: self.z_, self.h0_w, self.h0_b = linear(z, self.gf_dim * 8 * 4 * 4, 'g_h0_lin', with_w=True) hs = [None] hs[0] = tf.reshape(self.z_, [-1, 4, 4, self.gf_dim * 8]) hs[0] = tf.nn.relu(self.g_bns[0](hs[0], self.is_training)) i = 1 depth_mul = 8 size = 8 while size < self.image_size: hs.append(None) name = 'g_h{}'.format(i) hs[i], _, _, conv2d_transpose(hs[i - 1], [self.batch_size, size, size, self.gf_dim * depth_mul], name=name, with_w=True) hs[i] = tf.nn.relu(self.g_bns[i](hs[i]), self.istraining) i += 1 depth_mul //= 2 size *= 2 hs.append(None) name = 'g_h{}'.format(i) hs[i], _, _ = conv2d_transpose(hs[i - 1], [self.batch_size, size, size, 3], name=name, with_w=True) return tf.nn.tanh(hs[i])
def discriminator(self, inputs_logits, num_blocks=3, use_bias=False, num_classes=1): """ The discriminator to score the distribution of time and event If the time is consistent with the history times, give high score. If it is on the constant, give low score. Implementation: CNN""" with tf.variable_scope('Discriminator'): # inputs = tf.transpose(inputs_logits, [0,2,1]) inputs = inputs_logits output = utils.conv1d('D.Input', 1, self.filter_output_dim, self.filter_size, inputs) output = self.res_block('D.1', output) output = self.res_block('D.2', output) output = self.res_block('D.3', output) output = self.res_block('D.4', output) output = self.res_block('D.5', output) output = tf.reshape( output, [-1, (self.length + self.num_steps) * self.filter_output_dim]) # if the output size is 1, it is the discriminator score of D # if the output size is 2, it is a bi-classification result of D output = tf.nn.sigmoid( utils.linear('D.Output', (self.length + self.num_steps) * self.filter_output_dim, 1, output)) logging.info('The shape of output from D {}'.format( output.get_shape())) return output
def build_model(xs, ys, n_neurons, n_layers, activation_fn, final_activation_fn, cost_type): xs = np.asarray(xs) ys = np.asarray(ys) if xs.ndim != 2: raise ValueError('xs should be a n_observates x n_features, ' + 'or a 2-dimensional array.') if ys.ndim != 2: raise ValueError('ys should be a n_observates x n_features, ' + 'or a 2-dimensional array.') n_xs = xs.shape[1] n_ys = ys.shape[1] X = tf.compat.v1.placeholder(name='X', shape=[None, n_xs], dtype=tf.float32) Y = tf.compat.v1.placeholder(name='Y', shape=[None, n_ys], dtype=tf.float32) current_input = X for layer_i in range(n_layers): current_input = utils.linear(current_input, n_neurons, activation=activation_fn, name='layer{}'.format(layer_i))[0] Y_pred = utils.linear(current_input, n_ys, activation=final_activation_fn, name='pred')[0] if cost_type == 'l1_norm': cost = tf.reduce_mean( input_tensor=tf.reduce_sum(input_tensor=tf.abs(Y - Y_pred), axis=1)) elif cost_type == 'l2_norm': cost = tf.reduce_mean(input_tensor=tf.reduce_sum( input_tensor=tf.math.squared_difference(Y, Y_pred), axis=1)) else: raise ValueError('Unknown cost_type: {}. '.format(cost_type) + 'Use only "l1_norm" or "l2_norm"') return {'X': X, 'Y': Y, 'Y_pred': Y_pred, 'cost': cost}
def add_dueling(self, prefix, input_layer): """ Extends module with the Dueling architecture """ if prefix in ['', 'target']: #DQN architecture = self.ag.architecture_duel output_length = self.environment.action_size else: #HDQN if prefix in ['mc', 'mc_target']: architecture = self.mc_ag.architecture_duel output_length = self.goal_size elif prefix in ['c', 'c_target']: architecture = self.c_ag.architecture_duel output_length = self.environment.action_size else: assert 0 parameters = self.get(prefix, 'w') # prefix = prefix.replace("target_", "") last_layer = input_layer #print("adding dense into ", prefix+'w') value_hid, histograms_v = self.add_dense_layers( architecture=architecture, input_layer=last_layer, parameters=parameters, name_aux='value_hid_') adv_hid, histograms_a = self.add_dense_layers( architecture=architecture, input_layer=last_layer, parameters=parameters, name_aux='adv_hid_') aux1 = 'value_out' aux2 = 'adv_out' value, w_val, b_val = utils.linear(value_hid, 1, name=aux1) adv, w_adv, b_adv = utils.linear(adv_hid, output_length, name=aux2) parameters[aux1 + "_w"] = w_val parameters[aux1 + "_b"] = b_val parameters[aux2 + "_w"] = w_adv parameters[aux2 + "_b"] = b_adv q = value + (adv - tf.reduce_mean(adv, reduction_indices=1, keepdims=True)) #print(q) return q
def __init__(self, input_dim, output_dim, dropout=0): """ wide model using LR :param input_dim: int the dimension of wide model input :param output_dim: int the dimension of wide model output """ super(WideModel, self).__init__() self.linear = linear(input_dim, output_dim, dropout)
def discriminator_energy(self, states): # FIXME '''An energy-based discriminator that tries to reconstruct the input states.''' with tf.variable_scope("Discriminator"): _, state = tf.nn.dynamic_rnn(self.rnn_cell(cfg.d_num_layers, cfg.hidden_size), states, swap_memory=True, dtype=tf.float32, scope='discriminator_encoder') # XXX use BiRNN+convnet for the encoder # this latent is of size cfg.hidden_size since it needs a lot more capacity than # cfg.latent_size to reproduce the hidden states # TODO use all states instead of just the final state latent = utils.highway(state, layer_size=1) latent = utils.linear(latent, cfg.hidden_size, True, scope='discriminator_latent_transform') # TODO make initial state from latent, don't just use zeros decoder_input = tf.concat( 1, [tf.zeros([2 * cfg.batch_size, 1, cfg.hidden_size]), states]) output, _ = tf.nn.dynamic_rnn(self.rnn_cell( cfg.d_num_layers, cfg.hidden_size, latent), decoder_input, swap_memory=True, dtype=tf.float32, scope='discriminator_decoder') output = tf.reshape(output, [-1, cfg.hidden_size]) reconstructed = utils.linear(output, cfg.hidden_size, True, 0.0, scope='discriminator_reconst') reconstructed = tf.reshape( reconstructed, [2 * cfg.batch_size, -1, cfg.hidden_size]) # don't train this projection, since the model can learn to zero out ret_latent to # minimize the reconstruction error ret_latent = tf.nn.tanh( utils.linear(self.latent, cfg.hidden_size, False, scope='discriminator_ret_latent', train=False)) return reconstructed, ret_latent
def __call__(self, inputs, state, scope=None): with vs.variable_scope("gates"): value = tf.nn.sigmoid( linear([state, inputs], 2 * self._num_units, True, normalize=self._normalize)) i, f = array_ops.split(value=value, num_or_size_splits=2, axis=1) with vs.variable_scope("candidate"): c = linear([inputs], self._num_units, True, normalize=self._normalize) new_c = i * c + f * state new_h = self._activation(c) return new_h, new_c
def __call__(self, inputs, state, scope=None): with _checked_scope(self, scope or "rwa_cell", reuse=self._reuse): h, n, d = state with vs.variable_scope("u"): u = linear(inputs, self._num_units, True, normalize=self._normalize) with vs.variable_scope("g"): g = linear([inputs, h], self._num_units, True, normalize=self._normalize) with vs.variable_scope( "a" ): # The bias term when factored out of the numerator and denominator cancels and is unnecessary a = tf.exp( linear([inputs, h], self._num_units, True, normalize=self._normalize)) with vs.variable_scope("discount_factor"): discount_factor = tf.nn.sigmoid( linear([inputs, h], self._num_units, True, normalize=self._normalize)) z = tf.multiply(u, tanh(g)) n = tf.multiply(n, discount_factor) + tf.multiply( z, a) # Numerically stable update of numerator d = tf.multiply(d, discount_factor ) + a # Numerically stable update of denominator h_new = self._activation(tf.div(n, d)) new_state = RDACellTuple(h_new, n, d) return h_new, new_state
def discriminator(self, img, cond, reuse): dim = len(img.get_shape()) with tf.variable_scope("disc", reuse=reuse): image = tf.concat([img, cond], dim - 1) feature = conf.conv_channel_base h0 = lrelu(conv2d(image, feature, name="h0")) h1 = lrelu(batch_norm(conv2d(h0, feature * 2, name="h1"), "h1")) h2 = lrelu(batch_norm(conv2d(h1, feature * 4, name="h2"), "h2")) h3 = lrelu(batch_norm(conv2d(h2, feature * 8, name="h3"), "h3")) h4 = linear(tf.reshape(h3, [1, -1]), 1, "linear") return h4
def __atm_chisq__(self, pars, apertures): """Computes combined chi (not squared) for every aperture""" k, *c = pars diff = np.array([]) for i, data in enumerate(apertures): # compute chi residuals for each aperture mag, mag_err = utils.flux_to_mag(data[:,2], data[:,3]) airmass = data[:,0] diff_new = np.abs((mag - utils.linear(airmass, k, c[i])) / mag_err) diff = np.hstack((diff, diff_new)) return diff
def g_time(self, hidden_r): ''' The generator model for time and event ''' with tf.variable_scope('Generator_T'): outputs = utils.build_rnn_graph(hidden_r, self.num_layers, self.hidden_size, self.batch_size, self.length, "G_T.RNN") output = tf.reshape(tf.concat(outputs, 1), [-1, self.g_size]) output = utils.linear('G_T.Output', self.g_size, 1, output) logits = tf.reshape(output, [self.batch_size, self.length, 1]) return logits
def discriminator(self, image, reuse=False): with tf.variable_scope('discriminator') as scope: if reuse: scope.reuse_variables() h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv')) h1 = lrelu(self.d_bns[0](conv2d(h0, self.df_dim * 2, name='d_h1_conv'), self.is_training)) h2 = lrelu(self.d_bns[1](conv2d(h1, self.df_dim * 4, name='d_h2_conv'), self.is_training)) h3 = lrelu(self.d_bns[2](conv2d(h2, self.df_dim * 8, name='d_h3_conv'), self.is_training)) h4 = linear(tf.reshape(h3, [-1, 8192]), 1, 'd_h4_lin') return tf.nn.sigmoid(h4), h4
def build_cnn_model(self): self.imgs = tf.placeholder('float32', [self.batch_size, self.input_dims]) self.img_reshape = tf.reshape(self.imgs, [self.batch_size, self.w, self.h, self.channel]) if self.synthetic: self.layer_out['l1'], self.var['l1_w'], self.var['l1_b'], self.synthetic_grad['l1'] = conv2d(self.img_reshape, 128, [5,5], [1,1], self.weight_initializer, self.bias_initializer, synthetic=True, batch_norm=True, activation_fn=tf.nn.relu, name='l1_con2d') self.layer_out['l1_pool'] = pooling(self.layer_out['l1'], kernel_size=[3,3], stride=[1,1], type='max') self.layer_out['l2'], self.var['l2_w'], self.var['l2_b'], self.synthetic_grad['l2'] = conv2d(self.layer_out['l1_pool'], 128, [5,5], [1,1], self.weight_initializer, self.bias_initializer, synthetic=True, batch_norm=True, activation_fn=tf.nn.relu, name='l2_con2d') self.layer_out['l2_pool'] = pooling(self.layer_out['l2'], kernel_size=[3,3], stride=[1,1], type='average') self.layer_out['l3'], self.var['l3_w'], self.var['l3_b'], self.synthetic_grad['l3'] = conv2d(self.layer_out['l2_pool'], 128, [5,5], [1,1], self.weight_initializer, self.bias_initializer, synthetic=True, batch_norm=True, activation_fn=tf.nn.relu, name='l3_con2d') self.layer_out['l3_pool'] = pooling(self.layer_out['l3'], kernel_size=[3,3], stride=[1,1], type='average') self.layer_out['l3_reshape'] = tf.reshape(self.layer_out['l3_pool'], [self.batch_size, -1]) self.layer_out['l4'], self.var['l4_w'], self.var['l4_b'], self.synthetic_grad['l4'] = linear(self.layer_out['l3_reshape'], self.output_size, self.weight_initializer, self.bias_initializer, synthetic=True, activation_fn=tf.nn.relu, name='l4_linear') else: self.layer_out['l1'], self.var['l1_w'], self.var['l1_b'] = conv2d(self.img_reshape, 128, [5,5], [1,1], self.weight_initializer, self.bias_initializer, batch_norm=True, activation_fn=tf.nn.relu, name='l1_con2d') self.layer_out['l1_pool'] = pooling(self.layer_out['l1'], kernel_size=[3,3], stride=[1,1], type='max') self.layer_out['l2'], self.var['l2_w'], self.var['l2_b'] = conv2d(self.layer_out['l1_pool'], 128, [5,5], [1,1], self.weight_initializer, self.bias_initializer, batch_norm=True, activation_fn=tf.nn.relu, name='l2_con2d') self.layer_out['l2_pool'] = pooling(self.layer_out['l2'], kernel_size=[3,3], stride=[1,1], type='average') self.layer_out['l3'], self.var['l3_w'], self.var['l3_b'] = conv2d(self.layer_out['l2_pool'], 128, [5,5], [1,1], self.weight_initializer, self.bias_initializer, batch_norm=True, activation_fn=tf.nn.relu, name='l3_con2d') self.layer_out['l3_pool'] = pooling(self.layer_out['l3'], kernel_size=[3,3], stride=[1,1], type='average') self.layer_out['l3_reshape'] = tf.reshape(self.layer_out['l3_pool'], [self.batch_size, -1]) self.layer_out['l4'], self.var['l4_w'], self.var['l4_b'] = linear(self.layer_out['l3_reshape'], self.output_size, self.weight_initializer, self.bias_initializer, activation_fn=tf.nn.relu, name='l4_linear') self.out_logit = tf.nn.softmax(self.layer_out['l4']) self.out_argmax = tf.argmax(self.out_logit, 1) self.labels = tf.placeholder('int32', [self.batch_size]) self.loss_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(self.layer_out['l4'], self.labels) self.loss = tf.reduce_sum(self.loss_entropy)/self.batch_size if self.synthetic: self.grad_output['l1'] = tf.gradients(self.loss, self.layer_out['l1']) self.grad_output['l2'] = tf.gradients(self.loss, self.layer_out['l2']) self.grad_output['l3'] = tf.gradients(self.loss, self.layer_out['l3']) self.grad_output['l4'] = tf.gradients(self.loss, self.layer_out['l4']) for k in self.grad_output.keys(): self.grad_loss.append(tf.reduce_sum(tf.square(self.synthetic_grad[k]-self.grad_output[k]))) self.grad_total_loss = sum(self.grad_loss)
def __init__(self, x_bxu, z_size, name, var_min=0.0): """Create an input dependent diagonal Gaussian distribution. Args: x: The input tensor from which the mean and variance are computed, via a linear transformation of x. I.e. mu = Wx + b, log(var) = Mx + c z_size: The size of the distribution. name: The name to prefix to learned variables. var_min (optional): Minimal variance allowed. This is an additional way to control the amount of information getting through the stochastic layer. """ size_bxn = tf.stack([tf.shape(x_bxu)[0], z_size]) # [batch, size] self.mean_bxn = mean_bxn = linear(x_bxu, z_size, name=(name + "/mean")) logvar_bxn = linear(x_bxu, z_size, name=(name + "/logvar")) if var_min > 0.0: logvar_bxn = tf.log(tf.exp(logvar_bxn) + var_min) self.logvar_bxn = logvar_bxn self.noise_bxn = noise_bxn = tf.random_normal(size_bxn) # [batch, size] self.noise_bxn.set_shape([None, z_size]) # [batch, size] self.sample_bxn = mean_bxn + tf.exp(0.5 * logvar_bxn) * noise_bxn # [batch, size]
def build_mlp_model(self): self.imgs = tf.placeholder('float32',[self.batch_size, self.input_dims]) # quite annoyed if self.synthetic: self.layer_out['l1'], self.var['l1_w'], self.var['l1_b'], self.synthetic_grad['l1'] = linear(self.imgs, self.hidden_size, self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l1_linear') self.layer_out['l2'], self.var['l2_w'], self.var['l2_b'], self.synthetic_grad['l2'] = linear(self.layer_out['l1'], self.hidden_size, self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l2_linear') self.layer_out['l3'], self.var['l3_w'], self.var['l3_b'], self.synthetic_grad['l3'] = linear(self.layer_out['l2'], self.hidden_size, self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l3_linear') self.layer_out['l4'], self.var['l4_w'], self.var['l4_b'], self.synthetic_grad['l4'] = linear(self.layer_out['l3'], self.output_size, self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l4_linear') else: self.layer_out['l1'], self.var['l1_w'], self.var['l1_b'] = linear(self.imgs, self.hidden_size, self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l1_linear') self.layer_out['l2'], self.var['l2_w'], self.var['l2_b'] = linear(self.layer_out['l1'], self.hidden_size, self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l2_linear') self.layer_out['l3'], self.var['l3_w'], self.var['l3_b'] = linear(self.layer_out['l2'], self.hidden_size, self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l3_linear') self.layer_out['l4'], self.var['l4_w'], self.var['l4_b'] = linear(self.layer_out['l3'], self.output_size, self.weight_initializer, self.bias_initializer, synthetic=self.synthetic, activation_fn=tf.nn.relu, name='l4_linear') self.out_logit = tf.nn.softmax(self.layer_out['l4']) self.out_argmax = tf.argmax(self.out_logit, 1) self.labels = tf.placeholder('int32', [self.batch_size]) self.loss_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(self.layer_out['l4'], self.labels) self.loss = tf.reduce_mean(self.loss_entropy) if self.synthetic: self.grad_output['l1'] = tf.gradients(self.loss, self.layer_out['l1']) self.grad_output['l2'] = tf.gradients(self.loss, self.layer_out['l2']) self.grad_output['l3'] = tf.gradients(self.loss, self.layer_out['l3']) self.grad_output['l4'] = tf.gradients(self.loss, self.layer_out['l4']) for k in self.grad_output.keys(): self.grad_loss.append(tf.reduce_sum(tf.square(self.synthetic_grad[k]-self.grad_output[k]))) self.grad_total_loss = sum(self.grad_loss)
def __call__(self, inputs, state, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" with tf.variable_scope(scope or type(self).__name__): # "GRUCell" if self.pretanh: state = state[:, :self.num_units] with tf.variable_scope("Gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. r, u = tf.split( 1, 2, utils.linear([inputs, state], 2 * self.num_units, True, 1.0)) r, u = tf.nn.sigmoid(r), tf.nn.sigmoid(u) with tf.variable_scope("Candidate"): preact = utils.linear([inputs, r * state], self.num_units, True) c = self.activation(preact) new_h = u * state + (1 - u) * c if self.pretanh: new_state = tf.concat(1, [new_h, preact]) else: new_state = new_h return new_h, new_state
def discriminator(self, image, caption, reuse=False): if reuse: tf.get_variable_scope().reuse_variables() h0 = utils.lrelu( utils.conv2d(image, self.channel_dim, name='d_h0_conv')) h1 = utils.lrelu( self.d_bn1(utils.conv2d(h0, self.channel_dim * 2, name='d_h1_conv'))) h2 = utils.lrelu( self.d_bn2(utils.conv2d(h1, self.channel_dim * 4, name='d_h2_conv'))) h3 = utils.lrelu( self.d_bn3(utils.conv2d(h2, self.channel_dim * 8, name='d_h3_conv'))) reduced_caption = utils.lrelu( utils.linear(caption, self.reduced_text_dim, 'd_embedding')) reduced_caption = tf.expand_dims(reduced_caption, 1) reduced_caption = tf.expand_dims(reduced_caption, 2) tiled_caption = tf.tile(reduced_caption, [1, 4, 4, 1], name='tiled_embedding') h3_concat = tf.concat([h3, tiled_caption], 3, name='h3_concat') h3_new = utils.lrelu( self.d_bn4( utils.conv2d(h3_concat, self.channel_dim * 8, 1, 1, 1, 1, name='d_h3_conv_new'))) h4 = utils.linear(tf.reshape(h3_new, [self.batch_size, -1]), 1, 'd_h3_lin') return tf.nn.sigmoid(h4), h4
def __init__(self, x_bxu, z_size, name, var_min=0.0): """Create an input dependent diagonal Gaussian distribution. Args: x: The input tensor from which the mean and variance are computed, via a linear transformation of x. I.e. mu = Wx + b, log(var) = Mx + c z_size: The size of the distribution. name: The name to prefix to learned variables. var_min (optional): Minimal variance allowed. This is an additional way to control the amount of information getting through the stochastic layer. """ size_bxn = tf.stack([tf.shape(x_bxu)[0], z_size]) self.mean_bxn = mean_bxn = linear(x_bxu, z_size, name=(name+"/mean")) logvar_bxn = linear(x_bxu, z_size, name=(name+"/logvar")) if var_min > 0.0: logvar_bxn = tf.log(tf.exp(logvar_bxn) + var_min) self.logvar_bxn = logvar_bxn self.noise_bxn = noise_bxn = tf.random_normal(size_bxn) self.noise_bxn.set_shape([None, z_size]) self.sample_bxn = mean_bxn + tf.exp(0.5 * logvar_bxn) * noise_bxn
def build_controller(self): self.c_w = {} self.c_target_w = {} with tf.variable_scope('c_prediction'): #input_size = self.environment.state_size + self.goal_size self.c_s_t = tf.placeholder("float", [None, 1, self.environment.state_size], name = 'c_s_t') shape = self.c_s_t.get_shape().as_list() self.c_s_t_flat = tf.reshape(self.c_s_t, [-1, reduce( lambda x, y: x * y, shape[1:])]) self.c_g_t = tf.placeholder("float", [None, self.goal_size], name = 'c_g_t') self.c_gs_t = tf.concat([self.c_g_t, self.c_s_t_flat], axis = 1, name = 'c_gs_concat') last_layer = self.c_gs_t last_layer, histograms = self.add_dense_layers( architecture = self.c_ag.architecture, input_layer = last_layer, parameters = self.c_w, name_aux= '') if self.c_ag.dueling: self.c_q = self.add_dueling(prefix = 'c', input_layer = last_layer) else: self.c_q, self.c_w['q_w'], self.c_w['q_b'] = \ utils.linear(last_layer, self.environment.action_size, name='c_q') self.c_q_action= tf.argmax(self.c_q, axis=1) q_summary = histograms avg_q = tf.reduce_mean(self.c_q, 0) for idx in range(self.c_ag.q_output_length): q_summary.append(tf.summary.histogram('c_q/%s' % idx, avg_q[idx])) self.c_q_summary = tf.summary.merge(q_summary, 'c_q_summary') # target network self.create_target(prefix = 'c') #Controller optimizer self.build_optimizer(prefix = 'c')
def encode(self): encoded_output, encoded_state = utils.encode_seq( input_seq=self.q1, seq_len=self.len1, word_embeddings=self.word_embeddings, num_neurons=self.num_neurons) # [batch_size, 2*num_neurons] with tf.variable_scope( "variational_inference"): # Variational inference mean = utils.linear(encoded_state, self.hidden_size, scope='mean') # [batch_size, n_hidden] logsigm = utils.linear(encoded_state, self.hidden_size, scope='logsigm') # [batch_size, n_hidden] self.mean, self.logsigm = mean, logsigm # Gaussian Multivariate kld(z,N(0,1)) = -0.5 * [ sum_d(logsigma) + d - sum_d(sigma) - mu_T*mu] klds = -0.5 * (tf.reduce_sum(logsigm, 1) + tf.cast(tf.shape(mean)[1], tf.float32) - tf.reduce_sum(tf.exp(logsigm), 1) - tf.reduce_sum(tf.square(mean), 1) ) # KLD(q(z|x), N(0,1)) tensor [batch_size] utils.variable_summaries( 'klds', klds) # posterior distribution close to prior N(0,1) self.kld = tf.reduce_mean(klds, 0) # mean over batches: scalar h_ = tf.get_variable("GO", [1, self.hidden_size], initializer=self.initializer) h_ = tf.tile(h_, [self.batch_size, 1 ]) # trainable tensor: decoder init_state[1] eps = tf.random_normal((self.batch_size, self.hidden_size), 0, 1) self.doc_vec = tf.multiply( tf.exp(logsigm), eps ) + mean # sample from latent intent space: decoder init_state[0] self.doc_vec = self.doc_vec, h_ # tuple state Z, h
def encode(self, x, weights=None): if weights == None: conv = self.encoder(x) mu, logvar = self.fc21(conv.view(-1, 512)), self.fc22( conv.view(-1, 512)) z = self.reparameterize(mu, logvar) else: x = utils.conv2d(x, weights[0], weights[1], stride=2, padding=1) x = utils.batch_norm(x, weights[2], weights[3], momentum=1) x = F.relu(x) x = utils.conv2d(x, weights[4], weights[5], stride=2, padding=1) x = utils.batch_norm(x, weights[6], weights[7], momentum=1) x = F.relu(x) x = utils.conv2d(x, weights[8], weights[9], stride=2, padding=1) x = utils.batch_norm(x, weights[10], weights[11], momentum=1) x = F.relu(x) x = utils.conv2d(x, weights[12], weights[13], stride=1, padding=0) x = utils.batch_norm(x, weights[14], weights[15], momentum=1) x = F.relu(x) x = x.view(-1, 512) mu = utils.linear(x, weights[16], weights[17]) logvar = utils.linear(x, weights[18], weights[19]) z = self.reparameterize(mu, logvar) return z, mu, logvar
def __call__(self, inputs, state, scope=None): with _checked_scope(self, scope or "ran_cell", reuse=self._reuse): with vs.variable_scope("gates"): c, h = state gates = tf.nn.sigmoid( linear([inputs, h], 2 * self._num_units, True, normalize=self._normalize)) i, f = array_ops.split(value=gates, num_or_size_splits=2, axis=1) with vs.variable_scope("candidate"): content = linear([inputs], self._num_units, True, normalize=self._normalize) new_c = i * content + f * c new_h = self._activation(c) new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h) output = new_h return output, new_state
def g_time(self, hidden_r, name=''): """ The generative model for time and event mode: 1. use the concatenated hidden representation for each time step 2. use the unfolded hidden representation separately for each time step """ with tf.variable_scope('Generator/Time-g' + name): outputs = utils.build_rnn_graph_decoder1(hidden_r, self.num_layers, self.hidden_size, self.batch_size, self.length, "G_T.RNN") output = tf.reshape(tf.concat(outputs, 1), [-1, self.g_size]) output = utils.linear('G_T.Output', self.g_size, 1, output) logits = tf.reshape(output, [self.batch_size, self.length, 1]) return logits
def build_dqn(self): self.w = {} with tf.variable_scope('step'): self.step_op = tf.Variable(0, trainable=False, name='step') self.step_input = tf.placeholder('int32', None, name='step_input') self.step_assign_op = self.step_op.assign(self.step_input) # training network with tf.variable_scope('prediction'): # tf Graph input self.s_t = tf.placeholder( "float", [None, self.ag.history_length, self.environment.state_size], name='s_t') shape = self.s_t.get_shape().as_list() self.s_t_flat = tf.reshape( self.s_t, [-1, reduce(lambda x, y: x * y, shape[1:])]) last_layer = self.s_t_flat last_layer, histograms = self.add_dense_layers( architecture=self.ag.architecture, input_layer=last_layer, parameters=self.w, name_aux='') if self.ag.dueling: self.q = self.add_dueling(prefix='', input_layer=last_layer) else: self.q, self.w['q_w'], self.w['q_b'] = utils.linear( last_layer, self.environment.action_size, name='q') self.avg_q = tf.reduce_max(self.q, axis=1) self.q_action = tf.argmax(self.q, axis=1) self.create_target(prefix='') # optimizer self.build_optimizer(prefix='') self.setup_summary(self.m.scalar_tags, self.m.histogram_tags) tf.global_variables_initializer().run() vars_ = list(self.w.values()) + [self.step_op] self._saver = tf.train.Saver(vars_, max_to_keep=30) self.load_model() self.update_target_q_network(prefix='')
def decoder(self, inputs, z): '''Use the latent representation and word inputs to predict next words.''' with tf.variable_scope("Decoder"): initial = [] for i in range(cfg.num_layers): initial.append( tf.nn.tanh( utils.linear(z, cfg.hidden_size, True, 0.0, scope='decoder_initial%d' % i))) self.decode_initial = tuple(initial) self.decode_cell = self.rnn_cell(cfg.num_layers) output, _ = tf.nn.dynamic_rnn(self.decode_cell, inputs, initial_state=self.decode_initial, sequence_length=self.lengths - 1, swap_memory=True, dtype=tf.float32) return output