def _build_layers_v2(self, input_dict, num_outputs, options): # Weights shared with CustomModel1 with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, "shared"), reuse=tf.AUTO_REUSE, auxiliary_name_scope=False): last_layer = slim.fully_connected(input_dict["obs"], 64, activation_fn=tf.nn.relu, scope="fc1") last_layer = slim.fully_connected(last_layer, 64, activation_fn=tf.nn.relu, scope="fc2") output = slim.fully_connected(last_layer, num_outputs, activation_fn=None, scope="fc_out") return output, last_layer
def __init__(self, conf, trainlabels, outputs, constraint, name=None): '''EDDecoder constructor Args: conf: the decoder configuration as a ConfigParser trainlabels: the number of extra labels required by the trainer outputs: the name of the outputs of the model constraint: the constraint for the variables ''' #save the parameters self.conf = dict(conf.items('decoder')) self.outputs = outputs self.output_dims = self.get_output_dims(trainlabels) self.scope = tf.VariableScope(tf.AUTO_REUSE, name or type(self).__name__, constraint=constraint)
def conv(self, name, inputData, outChannel): inChannel = inputData.get_shape()[-1] with tf.VariableScope(name): kernel = tf.get_variable("weights", shape=[3, 3, inChannel, outChannel], dtype=tf.float32, trainable=False) biases = tf.get_variable("biases", shape=[outChannel], dtype=tf.float32, trainable=False) convRes = tf.nn.conv2d(input=inputData, filter=kernel, strides=[1, 1, 1, 1], padding="SAME") res = tf.nn.bias_add(convRes, bias=biases) out = tf.nn.relu(res, biases) self.parameters += [kernel, biases] return out
def __init__(self, conf, constraint, name=None): '''EDEncoder constructor Args: conf: the encoder configuration name: the encoder name constraint: the constraint for the variables ''' #save the configuration self.conf = dict(conf.items('encoder')) #apply default configuration default = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'defaults', type(self).__name__.lower() + '.cfg') apply_defaults(self.conf, default) self.scope = tf.VariableScope(tf.AUTO_REUSE, name or type(self).__name__, constraint=constraint)
def __init__(self, conf, name=None): '''Model constructor Args: conf: The model configuration as a configparser object ''' self.input_names = conf.get('io', 'inputs').split(' ') if self.input_names == ['']: self.input_names = [] self.output_names = conf.get('io', 'outputs').split(' ') if self.output_names == ['']: self.output_names = [] self.conf = dict(conf.items('model')) self.output_dims = {} for i, d in enumerate(self.conf['output_dims'].split(' ')): self.output_dims[self.output_names[i]] = int(d) self.scope = tf.VariableScope(False, name or type(self).__name__)
def _build_layers_v2(self, input_dict, num_outputs, options): # Example of (optional) weight sharing between two different policies. # Here, we share the variables defined in the 'shared' variable scope # by entering it explicitly with tf.AUTO_REUSE. This creates the # variables for the 'fc1' layer in a global scope called 'shared' # outside of the policy's normal variable scope. with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, "shared"), reuse=tf.AUTO_REUSE, auxiliary_name_scope=False): last_layer = slim.fully_connected(input_dict["obs"], 64, activation_fn=tf.nn.relu, scope="fc1") last_layer = slim.fully_connected(last_layer, 64, activation_fn=tf.nn.relu, scope="fc2") output = slim.fully_connected(last_layer, num_outputs, activation_fn=None, scope="fc_out") return output, last_layer
def test_load_weight_from_ckpt(): tf.logging.set_verbosity(tf.logging.INFO) x_response_value = [ [10378, 119, 119, 151, 8815, 8281, 8211, 10425, 8154, 0, 0, 0, 0, 0] + [0 for i in range(512 - 14)], [ 165, 8991, 8181, 8184, 131, 120, 120, 8134, 11300, 10540, 8735, 8207, 0, 0 ] + [0 for i in range(512 - 14)] ] poly_encoder_config = PolyEncoderConfig.from_json_file( '../../cfg/poly_encoder.json') encoder_inst = PolyEncoder(config=poly_encoder_config, mode=tf.estimator.ModeKeys.TRAIN) bert_scope = tf.VariableScope(name="bert", reuse=tf.AUTO_REUSE) bert_config = BertConfig.from_json_file(poly_encoder_config.bert_config) x_response = tf.convert_to_tensor(value=x_response_value, dtype=tf.int32) x_response_emb, x_response_mask = encoder_inst.encode_candidate( x_response=x_response, bert_config=bert_config, bert_scope=bert_scope) load_weight_from_ckpt(init_checkpoint="../../ckpt/albert/")
def generator(z, out_channel_dim, is_train=True): """ Create the generator network :param z: Input z :param out_channel_dim: The number of channels in the output image :param is_train: Boolean if generator is being used for training :return: The tensor output of the generator """ # TODO: Implement Function with tf.VariableScope('generator'): x1 = tf.layers.dense(z, 4 * 4 * 512) x1 = tf.layers.batch_normalization(x1, training=is_train) x1 = tf.maximum(0.2 * x1, x1) # 4x4x512 now x2 = tf.layers.conv2d_transpose(x1, 256, 5, strides=2, padding='same') x2 = tf.layers.batch_normalization(x2, training=is_train) x2 = tf.maximum(0.2 * x2, x2) # 8x8x256 now x3 = tf.layers.conv2d_transpose(x2, 128, 5, strides=2, padding='same') x3 = tf.layers.batch_normalization(x3, training=is_train) x3 = tf.maximum(0.2 * x3, x3) # 16x16x128 now # Output layer x4 = tf.layers.conv2d_transpose(x3, out_channel_dim, 5, strides=2, padding='same') # 32x32x3 now logits = tf.image.resize_images(x4, [28, 28], method=tf.ResizeMethod.BILINEA, align_corners=True) out = tf.tanh(logits) return out
def __init__(self, conf, output_dim, name=None): '''classifier constructor Args: conf: The classifier configuration output_dim: the classifier output dimension This is a tuple, each element representing the output_dim for one kind of targets name: the classifier name ''' self.conf = conf # if there is only a add_labels in the config, we suppose that only the # first element of this tuple is important if 'add_labels' in conf: self.output_dim = output_dim[0] + int(conf['add_labels']) # if there is only an add_labels_reconstruction but not an # add_labels_prediction in config, assume only second element to be of # importance elif 'add_labels_reconstruction' in conf and \ not 'add_labels_prediction' in conf: self.output_dim = output_dim[1] + int( conf['add_labels_reconstruction']) # if both present, both elements of the tuple will be of importance elif 'add_labels_reconstruction' in conf and \ 'add_labels_prediction' in conf: outdim1 = output_dim[0] + int(conf['add_labels_prediction']) outdim2 = output_dim[1] + int(conf['add_labels_reconstruction']) self.output_dim = (outdim1, outdim2) else: raise Exception( 'Wrong kind of add_labels information in the config') #create the variable scope for the classifier self.scope = tf.VariableScope(False, name or type(self).__name__)
def build(self, hparams, is_training=True): self._total_length = hparams.max_seq_len if self._total_length != np.prod(self._level_lengths): raise ValueError( 'The product of the HierarchicalLstmEncoder level lengths (%d) must ' 'equal the padded input sequence length (%d).' % ( np.prod(self._level_lengths), self._total_length)) tf.logging.info('\nHierarchical Encoder:\n' ' input length: %d\n' ' level lengths: %s\n', self._total_length, self._level_lengths) self._hierarchical_encoders = [] num_splits = np.prod(self._level_lengths) for i, l in enumerate(self._level_lengths): num_splits //= l tf.logging.info('Level %d splits: %d', i, num_splits) h_encoder = self._core_encoder_cls() h_encoder.build( hparams, is_training, name_or_scope=tf.VariableScope( tf.AUTO_REUSE, 'encoder/hierarchical_level_%d' % i)) self._hierarchical_encoders.append((num_splits, h_encoder))
def _build_layers_v2(self, input_dict, num_outputs, options): convs = options.get("conv_filters") if convs is None: convs = filters_mnih15 activation = tf.nn.relu conv_output = input_dict["obs"] with tf.name_scope("mnih15_convs"): for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1): conv_output = slim.conv2d( input_dict["obs"], out_size, kernel, stride, activation_fn=activation, padding="SAME", scope="conv{}".format(i)) out_size, kernel, stride = convs[-1] conv_output = slim.conv2d( conv_output, out_size, kernel, stride, activation_fn=activation, padding="VALID", scope="conv_out") action_out = slim.flatten(conv_output) with tf.name_scope("mnih15_FC"): # Share weights of the following layer with other instances of this # model (usually by other macad_agents in a Multi-Agent setting) with tf.variable_scope( tf.VariableScope(tf.AUTO_REUSE, "shared"), reuse=tf.AUTO_REUSE): shared_layer = slim.fully_connected( action_out, 128, activation_fn=activation) action_logits = slim.fully_connected( action_out, num_outputs=num_outputs, activation_fn=None) return action_logits, shared_layer
def __init__(self, input_size, layers, hidden_units, max_length, learning_rate): with tf.VariableScope(name="BiLSTM", reuse=tf.AUTO_REUSE): self.input_X = tf.placeholder(dtype=tf.float32, shape=[None, max_length, input_size], name="input_X") self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, 1], name="input_y") self.sequence_length = tf.placeholder(dtype=tf.int32, shape=[None], name="sequence_length") self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, shape=(), name="dropout_keep_prob") self.output = self.build_bilstm(self.input_X, layers, hidden_units, self.dropout_keep_prob) self.loss = -(self.input_y * tf.log(self.output) + (1 - self.input_y) * tf.log(1 - self.output)) self.train = tf.train.AdamOptimizer(learning_rate).minimize( self.loss) self.prediction = tf.cast(tf.to_int32(self.loss >= 0.5), dtype=tf.float32, name="prediction") self.accuracy = tf.multiply(tf.reduce_mean( tf.cast(tf.equal(self.input_y, self.prediction), dtype=tf.float32)), 100, name="accuracy") tf.summary.scalar("loss", self.loss) tf.summary.scalar("accuracy", self.accuracy) self.merge_graph = tf.summary.merge_all()
def build_graph(self, graph): self.env.seed(self.random_seed) np.random.seed(self.random_seed) with graph.as_default(): tf.set_random_seed(self.random_seed) # Graph of the LSTM model of the world input_scope = tf.VariableScope(reuse=False, name="inputs") with tf.variable_scope(input_scope): self.state_input_plh = tf.placeholder( tf.float32, shape=[None, None, self.m_params['env_state_size']], name='state_input_plh') self.action_input_plh = tf.placeholder(tf.int32, shape=[None, None, 1], name='action_input_plh') self.mask_plh = tf.placeholder(tf.float32, shape=[None, None, 1], name="mask_plh") input_shape = tf.shape(self.state_input_plh) dynamic_batch_size, dynamic_num_steps = input_shape[ 0], input_shape[1] action_input = tf.one_hot(indices=tf.squeeze( self.action_input_plh, 2), depth=self.m_params['nb_actions']) m_inputs = tf.concat([self.state_input_plh, action_input], 2, name="m_inputs") m_scope = tf.VariableScope(reuse=False, name="m") with tf.variable_scope(m_scope): self.state_reward_preds, self.m_final_state, self.m_initial_state = capacities.predictive_model( self.m_params, m_inputs, dynamic_batch_size, None, summary_collections=[self.M_SUMMARIES]) fixed_m_scope = tf.VariableScope(reuse=False, name='FixedM') with tf.variable_scope(fixed_m_scope): self.update_m_fixed_vars_op = capacities.fix_scope(m_scope) m_training_scope = tf.VariableScope(reuse=False, name='m_training') with tf.variable_scope(m_training_scope): self.m_next_states = tf.placeholder( tf.float32, shape=[None, None, self.m_params['env_state_size']], name="m_next_states") self.m_rewards = tf.placeholder(tf.float32, shape=[None, None, 1], name="m_rewards") y_true = tf.concat([self.m_rewards, self.m_next_states], 2) with tf.control_dependencies([self.state_reward_preds]): self.m_loss = 1 / 2 * tf.reduce_mean( tf.square(self.state_reward_preds - y_true) * self.mask_plh) tf.summary.scalar('m_loss', self.m_loss, collections=[self.M_SUMMARIES]) m_adam = tf.train.AdamOptimizer(self.m_params['lr']) self.m_global_step = tf.Variable(0, trainable=False, name="m_global_step") tf.summary.scalar('m_global_step', self.m_global_step, collections=[self.M_SUMMARIES]) self.m_train_op = m_adam.minimize( self.m_loss, global_step=self.m_global_step) self.all_m_summary_t = tf.summary.merge_all(key=self.M_SUMMARIES) # Graph of the controller c_scope = tf.VariableScope(reuse=False, name="c") c_summary_collection = [self.C_SUMMARIES] with tf.variable_scope(c_scope): # c_cell = LSTMCell( # num_units=self.c_params['nb_units'] # , initializer=tf.truncated_normal_initializer( # mean=self.c_params['initial_mean'] # , stddev=self.c_params['initial_stddev'] # ) # ) # self.c_initial_state = c_cell.zero_state(dynamic_batch_size, dtype=tf.float32) # c_c_h_states, self.c_final_state = tf.nn.dynamic_rnn(c_cell, self.state_input_plh, initial_state=self.c_initial_state) # c_c_states, c_h_states = tf.split(value=c_c_h_states, num_or_size_splits=[self.c_params['nb_units'], self.c_params['nb_units']], axis=2) # # Compute the Controller projection # self.probs_t, self.actions_t = projection_func(c_h_states) m_params = self.m_params model_func = lambda m_inputs, m_state: capacities.predictive_model( m_params, m_inputs, dynamic_batch_size, m_state) c_params = self.c_params projection_func = lambda inputs: capacities.projection( c_params, inputs) cm_cell = CMCell(num_units=self.c_params['nb_units'], m_units=self.m_params['nb_units'], fixed_model_scope=fixed_m_scope, model_func=model_func, projection_func=projection_func, num_proj=self.c_params['nb_actions'], initializer=tf.truncated_normal_initializer( mean=self.c_params['initial_mean'], stddev=self.c_params['initial_stddev'])) self.cm_initial_state = cm_cell.zero_state(dynamic_batch_size, dtype=tf.float32) probs_and_actions_t, self.cm_final_state = tf.nn.dynamic_rnn( cm_cell, self.state_input_plh, initial_state=self.cm_initial_state) self.probs_t, actions_t = tf.split( value=probs_and_actions_t, num_or_size_splits=[self.c_params['nb_actions'], 1], axis=2) self.actions_t = tf.cast(actions_t, tf.int32) # helper tensor used for inference self.action_t = self.actions_t[0, 0, 0] c_training_scope = tf.VariableScope(reuse=False, name='c_training') with tf.variable_scope(c_training_scope): self.c_rewards_plh = tf.placeholder(tf.float32, shape=[None, None, 1], name="c_rewards_plh") baseline = tf.reduce_mean(self.c_rewards_plh) batch_size, num_steps = tf.shape(self.actions_t)[0], tf.shape( self.actions_t)[1] line_indices = tf.matmul( # Line indice tf.reshape(tf.range(0, batch_size), [-1, 1]), tf.ones([1, num_steps], dtype=tf.int32)) column_indices = tf.matmul( # Column indice tf.ones([batch_size, 1], dtype=tf.int32), tf.reshape(tf.range(0, num_steps), [1, -1])) depth_indices = tf.squeeze(self.actions_t, 2) stacked_actions = tf.stack( [line_indices, column_indices, depth_indices], 2) with tf.control_dependencies([self.probs_t]): log_probs = tf.expand_dims( tf.log(tf.gather_nd(self.probs_t, stacked_actions)), 2) masked_log_probs = log_probs * self.mask_plh self.c_loss = tf.reduce_mean(-tf.reduce_sum( masked_log_probs * (self.c_rewards_plh - baseline), 1)) tf.summary.scalar('c_loss', self.c_loss, collections=c_summary_collection) c_adam = tf.train.AdamOptimizer(self.c_params['lr']) self.c_global_step = tf.Variable( 0, trainable=False, name="global_step", collections=[ tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES ], dtype=tf.int32) tf.summary.scalar('c_global_step', self.c_global_step, collections=c_summary_collection) self.c_train_op = c_adam.minimize( self.c_loss, global_step=self.c_global_step) self.all_c_summary_t = tf.summary.merge_all(key=self.C_SUMMARIES) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.episode_id, self.inc_ep_id_op = capacities.counter( "episode_id") self.episode_id_sum = tf.summary.scalar('episode_id', self.episode_id) self.time, self.inc_time_op = capacities.counter("time") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
import os import sys import numpy as np import tensorflow as tf import tensorflow.contrib.slim as slim import tflearn from genotypes import PRIMITIVES from genotypes import Genotype from operations import * import utils null_scope = tf.VariableScope("") def MixedOp(x, C_out, stride, index, reduction): ops = [] with tf.variable_scope(null_scope): with tf.variable_scope("arch_params", reuse=tf.AUTO_REUSE): weight = tf.get_variable( "weight{}_{}".format(2 if reduction else 1, index), [len(PRIMITIVES)], initializer=tf.random_normal_initializer(0, 1e-3), regularizer=slim.l2_regularizer(0.0001)) weight = tf.nn.softmax(weight) weight = tf.reshape(weight, [-1, 1, 1, 1]) index = 0 for primitive in PRIMITIVES: op = OPS[primitive](x, C_out, stride) if 'pool' in primitive:
def __init__(self, config, env, sess, writer, name='haggle', trainable=True): super(Model, self).__init__() self.config = { 'pre': [], 'lstm': 128, 'value_scale': 0.5, 'lr': 0.001, 'grad_clip': 0.5, 'ppo': 0.1, 'ppo_epochs': 10, } self.max_rounds = env.max_rounds self.config.update(config) self.original_name = name self.version = 0 self.name = name self.observation_space = env.observation_space self.action_space = env.action_space self.context_space = env.context_space self.reward_space = env.reward_space self.empty_reward = np.zeros(self.reward_space, dtype='float32') # Used for getting offers by index self.env = env self.scope = tf.VariableScope(reuse=False, name=name) self.sess = sess self.writer = writer self.writer_step = 0 with tf.variable_scope(self.scope): self.input = tf.placeholder(tf.int32, shape=( None, self.observation_space, ), name='input') self.context = tf.placeholder(tf.int32, shape=( None, self.context_space, ), name='input_context') # Init layers self.layers = { 'embedding': tf.get_variable('embedding', dtype=tf.float32, initializer=tf.initializers.random_normal, shape=(self.action_space, self.config['lstm'])), 'pre': [], 'action': tf.layers.Dense(self.action_space, name='action'), 'value': tf.layers.Dense(self.reward_space, name='value'), 'context': tf.layers.Dense(self.config['lstm'] * 2, activation=tf.nn.relu, name='context'), 'lstm': tf.contrib.rnn.LSTMBlockCell(name='lstm', num_units=self.config['lstm']), } self.build_context = self.layers['context'](tf.cast( self.context, dtype=tf.float32)) for i, width in enumerate(self.config['pre']): pre = tf.layers.Dense(width, activation=tf.nn.relu, name='preprocess_{}'.format(i)) self.layers['pre'].append(pre) state_size = self.layers['lstm'].state_size self.rnn_state = tf.placeholder(tf.float32, shape=(None, state_size.c + state_size.h), name='rnn_state') state = tf.contrib.rnn.LSTMStateTuple( c=self.rnn_state[:, :state_size.c], h=self.rnn_state[:, state_size.c:]) new_state, action, action_probs, value = \ self._network(state, self.input) self.action = action self.action_probs = action_probs self.value = value self.new_state = tf.concat([ new_state.c, new_state.h ], axis=-1, \ name='new_state') # Losses if trainable: self._losses() # Weight loading self.trainable_variables = self.scope.trainable_variables() self.weight_placeholders = {} self.load_ops = [] for var in self.trainable_variables: name = var.name.split(':', 1)[0] name = name.split('/', 1)[1] placeholder = tf.placeholder( var.dtype, shape=var.shape, name='{}/placeholder'.format(name)) self.weight_placeholders[name] = placeholder self.load_ops.append(var.assign(placeholder))
def __init__(self, cell, name): '''ScopeRNNCellWrapper constructor''' self._cell = cell self.scope = tf.VariableScope(None, name)
def _get_input_scope(self, default_name=""): if self.share_embeddings == EmbeddingsSharingLevel.SOURCE_TARGET_INPUT: name = "shared_embeddings" else: name = default_name return tf.VariableScope(None, name=tf.get_variable_scope().name + "/" + name)
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.inputs = tf.placeholder( tf.float32, shape=[None, self.observation_space.shape[0] + 1], name='inputs') policy_scope = tf.VariableScope(reuse=False, name='Policy') with tf.variable_scope(policy_scope): self.probs, self.actions = capacities.policy( self.policy_params, self.inputs) self.action_t = tf.squeeze(self.actions, 1)[0] q_scope = tf.VariableScope(reuse=False, name='QValues') with tf.variable_scope(q_scope): self.q_values = capacities.value_f(self.q_params, self.inputs) self.q = self.q_values[0, tf.stop_gradient(self.action_t)] with tf.variable_scope('Training'): stacked_actions = tf.stack([ tf.range(0, tf.shape(self.actions)[0]), tf.squeeze(self.actions, 1) ], 1) qs = tf.gather_nd(self.q_values, stacked_actions) log_probs = tf.log(tf.gather_nd(self.probs, stacked_actions)) self.policy_loss = -tf.reduce_sum( log_probs * tf.stop_gradient(qs)) self.rewards = tf.placeholder(tf.float32, shape=[None], name="rewards") self.next_states = tf.placeholder( tf.float32, shape=[None, self.observation_space.shape[0] + 1], name="next_states") self.next_actions = tf.placeholder(tf.int32, shape=[None], name="next_actions") with tf.variable_scope(q_scope, reuse=True): next_q_values = capacities.value_f(self.q_params, self.next_states) next_stacked_actions = tf.stack([ tf.range(0, tf.shape(self.next_actions)[0]), self.next_actions ], 1) next_qs = tf.gather_nd(next_q_values, next_stacked_actions) target_qs1 = tf.stop_gradient(self.rewards + self.discount * next_qs) target_qs2 = self.rewards stacked_targets = tf.stack([target_qs1, target_qs2], 1) select_targets = tf.stack([ tf.range(0, tf.shape(self.next_states)[0]), tf.cast(self.next_states[:, -1], tf.int32) ], 1) target_qs = tf.gather_nd(stacked_targets, select_targets) self.q_loss = 1 / 2 * tf.reduce_sum(tf.square(target_qs - qs)) self.loss = self.policy_loss + self.q_scale_lr * self.q_loss adam = tf.train.AdamOptimizer(self.lr) self.global_step = tf.Variable( 0, trainable=False, name="global_step", collections=[ tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES ]) self.train_op = adam.minimize(self.loss, global_step=self.global_step) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.policy_loss_plh = tf.placeholder(tf.float32, shape=[]) self.policy_loss_sum_t = tf.summary.scalar('policy_loss', self.policy_loss_plh) self.q_loss_plh = tf.placeholder(tf.float32, shape=[]) self.q_loss_sum_t = tf.summary.scalar('q_loss', self.q_loss_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter( "episode_id") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
def neural_net(self): ############### # Input Module ############### # Hyperparameters # The number of dimensions used to store data passed between recurrent layers in the network. recurrent_cell_size = 128 # The number of dimensions in our word vectorizations. D = 50 # How quickly the network learns. Too high, and we may run into numeric instability # or other issues. learning_rate = 0.005 # Dropout probabilities. For a description of dropout and what these probabilities are, # see Entailment with TensorFlow. input_p, output_p = 0.5, 0.5 # How many questions we train on at a time. batch_size = 128 # Number of passes in episodic memory. We'll get to this later. passes = 4 # Feed Forward layer sizes: the number of dimensions used to store data passed from feed-forward layers. ff_hidden_size = 256 weight_decay = 0.00000001 # The strength of our regularization. Increase to encourage sparsity in episodic memory, # but makes training slower. Don't make this larger than leraning_rate. training_iterations_count = 400000 # How many questions the network trains on each time it is trained. # Some questions are counted multiple times. display_step = 100 # How many iterations of training occur before each validation check. # Context: A [batch_size, maximum_context_length, word_vectorization_dimensions] tensor # that contains all the context information. # context = tf.placeholder(tf.float64, [None, None, D], "context") # context_placeholder = context # I use context as a variable name later on # input_sentence_endings: A [batch_size, maximum_sentence_count, 2] tensor that # contains the locations of the ends of sentences. input_sentence_endings = tf.placeholder(tf.int32, [None, None, 2], "sentence") # recurrent_cell_size: the number of hidden units in recurrent layers. input_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size) # input_p: The probability of maintaining a specific hidden input unit. # Likewise, output_p is the probability of maintaining a specific hidden output unit. gru_drop = tf.contrib.rnn.DropoutWrapper(input_gru, input_p, output_p) # dynamic_rnn also returns the final internal state. We don't need that, and can # ignore the corresponding output (_). input_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, self._story, dtype=tf.float64, scope="input_module") # cs: the facts gathered from the context. cs = tf.gather_nd(input_module_outputs, input_sentence_endings) # to use every word as a fact, useful for tasks with one-sentence contexts s = input_module_outputs # Question Module # query: A [batch_size, maximum_question_length, word_vectorization_dimensions] tensor # that contains all of the questions. query = tf.placeholder(tf.float64, [None, None, D], "query") # input_query_lengths: A [batch_size, 2] tensor that contains question length information. # input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range() # so that it plays nice with gather_nd. input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths") question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float64, scope=tf.VariableScope(True, "input_module")) # q: the question states. A [batch_size, recurrent_cell_size] tensor. q = tf.gather_nd(question_module_outputs, input_query_lengths) # Episodic Memory # make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)]) re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size) # Final output for attention, needs to be 1 in order to create a mask output_size = 1 # Weights and biases attend_init = tf.random_normal_initializer(stddev=0.1) w_1 = tf.get_variable("attend_w1", [1, recurrent_cell_size * 7, recurrent_cell_size], tf.float64, initializer=attend_init) w_2 = tf.get_variable("attend_w2", [1, recurrent_cell_size, output_size], tf.float64, initializer=attend_init) b_1 = tf.get_variable("attend_b1", [1, recurrent_cell_size], tf.float64, initializer=attend_init) b_2 = tf.get_variable("attend_b2", [1, output_size], tf.float64, initializer=attend_init) # Regulate all the weights and biases tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_2)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_2)) def attention(c, mem, existing_facts): """ Custom attention mechanism. c: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that contains all the facts from the contexts. mem: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that contains the current memory. It should be the same memory for all facts for accurate results. existing_facts: A [batch_size, maximum_sentence_count, 1] tensor that acts as a binary mask for which facts exist and which do not. """ with tf.variable_scope("attending") as scope: # attending: The metrics by which we decide what to attend to. attending = tf.concat([c, mem, re_q, c * re_q, c * mem, (c - re_q) ** 2, (c - mem) ** 2], 2) # m1: First layer of multiplied weights for the feed-forward network. # We tile the weights in order to manually broadcast, since tf.matmul does not # automatically broadcast batch matrix multiplication as of TensorFlow 1.2. m1 = tf.matmul(attending * existing_facts, tf.tile(w_1, tf.stack([tf.shape(attending)[0], 1, 1]))) * existing_facts # bias_1: A masked version of the first feed-forward layer's bias # over only existing facts. bias_1 = b_1 * existing_facts # tnhan: First nonlinearity. In the original paper, this is a tanh nonlinearity; # choosing relu was a design choice intended to avoid issues with # low gradient magnitude when the tanh returned values close to 1 or -1. tnhan = tf.nn.relu(m1 + bias_1) # m2: Second layer of multiplied weights for the feed-forward network. # Still tiling weights for the same reason described in m1's comments. m2 = tf.matmul(tnhan, tf.tile(w_2, tf.stack([tf.shape(attending)[0], 1, 1]))) # bias_2: A masked version of the second feed-forward layer's bias. bias_2 = b_2 * existing_facts # norm_m2: A normalized version of the second layer of weights, which is used # to help make sure the softmax nonlinearity doesn't saturate. norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1) # softmaxable: A hack in order to use sparse_softmax on an otherwise dense tensor. # We make norm_m2 a sparse tensor, then make it dense again after the operation. softmax_idx = tf.where(tf.not_equal(norm_m2, 0))[:, :-1] softmax_gather = tf.gather_nd(norm_m2[..., 0], softmax_idx) softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1] softmaxable = tf.SparseTensor(softmax_idx, softmax_gather, softmax_shape) return tf.expand_dims(tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1) # facts_0s: a [batch_size, max_facts_length, 1] tensor # whose values are 1 if the corresponding fact exists and 0 if not. facts_0s = tf.cast(tf.count_nonzero(input_sentence_endings[:, :, -1:], -1, keep_dims=True), tf.float64) with tf.variable_scope("Episodes") as scope: attention_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size) # memory: A list of all tensors that are the (current or past) memory state # of the attention mechanism. memory = [q] # attends: A list of all tensors that represent what the network attends to. attends = [] for a in range(passes): # attention mask attend_to = attention(cs, tf.tile(tf.reshape(memory[-1], [-1, 1, recurrent_cell_size]), size), facts_0s) # Inverse attention mask, for what's retained in the state. retain = 1 - attend_to # GRU pass over the facts, according to the attention mask. while_valid_index = (lambda state, index: index < tf.shape(cs)[1]) update_state = (lambda state, index: (attend_to[:, index, :] * attention_gru(cs[:, index, :], state)[0] + retain[:, index, :] * state)) # start loop with most recent memory and at the first index memory.append(tuple(tf.while_loop(while_valid_index, (lambda state, index: (update_state(state, index), index + 1)), loop_vars=[memory[-1], 0]))[0]) attends.append(attend_to) # Reuse variables so the GRU pass uses the same variables every pass. scope.reuse_variables() # Answer Module # a0: Final memory state. (Input to answer module) a0 = tf.concat([memory[-1], q], -1) # fc_init: Initializer for the final fully connected layer's weights. fc_init = tf.random_normal_initializer(stddev=0.1) with tf.variable_scope("answer"): # w_answer: The final fully connected layer's weights. w_answer = tf.get_variable("weight", [recurrent_cell_size * 2, D], tf.float64, initializer=fc_init) # Regulate the fully connected layer's weights tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_answer)) # The regressed word. This isn't an actual word yet; # we still have to find the closest match. logit = tf.expand_dims(tf.matmul(a0, w_answer), 1) # Make a mask over which words exist. with tf.variable_scope("ending"): all_ends = tf.reshape(input_sentence_endings, [-1, 2]) range_ends = tf.range(tf.shape(all_ends)[0]) ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1) ind = tf.reduce_max(tf.scatter_nd(ends_indices, all_ends[:, 1], [tf.shape(q)[0], tf.shape(all_ends)[0]]), axis=-1) range_ind = tf.range(tf.shape(ind)[0]) mask_ends = tf.cast(tf.scatter_nd(tf.stack([ind, range_ind], axis=1), tf.ones_like(range_ind), [tf.reduce_max(ind) + 1, tf.shape(ind)[0]]), bool) # A bit of a trick. With the locations of the ends of the mask (the last periods in # each of the contexts) as 1 and the rest as 0, we can scan with exclusive or # (starting from all 1). For each context in the batch, this will result in 1s # up until the marker (the location of that last period) and 0s afterwards. mask = tf.scan(tf.logical_xor, mask_ends, tf.ones_like(range_ind, dtype=bool)) # We score each possible word inversely with their Euclidean distance to the regressed word. # The highest score (lowest distance) will correspond to the selected word. logits = -tf.reduce_sum(tf.square(self._story * tf.transpose(tf.expand_dims(tf.cast(mask, tf.float64), -1), [1, 0, 2]) - logit), axis=-1) return logits
_seqlens = tf.placeholder(tf.int32, shape=[batch_size]) if pre_trained: embeddings = tf.Variable(tf.constant( 0.0, shape=[vocabulary_size, glove_size]), trainable=True) embedding_init = embeddings.assign(embedding_placeholder) embed = tf.nn.embedding_lookup(embeddings, _inputs) else: embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_dimension], -1.0, 1.0)) embed = tf.nn.embedding_lookup(embeddings, _inputs) with tf.name_scope('biGRU'): with tf.VariableScope('forward'): gru_fw_cell = tf.contrib.rnn.GRUCell(hidden_layer_size) gru_fw_cell = tf.contrib.rnn.DropoutWrapper(gru_fw_cell) with tf.VariableScope('backwwar'): gru_bw_cell = tf.contrib.rnn.GRUCell(hidden_layer_size) gru_bw_cell = tf.contrib.rnn.DropoutWrapper(gru_bw_cell) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_bw=gru_bw_cell, cell_fw=gru_fw_cell, inputs=embed, sequence_length=_seqlens, dtype=tf.float32, scope='BiGRU') states = tf.concat(values=states, axis=1)
def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope: """Forcefully enter the specified variable scope, ignoring any surrounding scopes.""" return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False)
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh") q_scope = tf.VariableScope(reuse=False, name='QValues') with tf.variable_scope(q_scope): self.Qs = tf.get_variable( 'Qs', shape=[self.nb_state, self.action_space.n], initializer=tf.constant_initializer(self.initial_q_value), dtype=tf.float32) tf.summary.histogram('Qarray', self.Qs) self.q_preds_t = tf.gather(self.Qs, self.inputs_plh) policy_scope = tf.VariableScope(reuse=False, name='Policy') with tf.variable_scope(policy_scope): if 'UCB' in self.config and self.config['UCB']: self.actions_t, self.probs_t = capacities.tabular_UCB( self.Qs, self.inputs_plh) else: self.actions_t, self.probs_t = capacities.tabular_eps_greedy( self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps) self.action_t = self.actions_t[0] self.q_value_t = self.q_preds_t[0][self.action_t] et_scope = tf.VariableScope(reuse=False, name='EligibilityTraces') with tf.variable_scope(et_scope): et, update_et_op, self.reset_et_op = capacities.eligibility_traces( self.Qs, self.inputs_plh, self.actions_t, self.discount, self.lambda_value) with tf.variable_scope('Learning'): self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh") self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh") self.targets_t = capacities.get_q_learning_target( self.Qs, self.rewards_plh, self.next_states_plh, self.discount) target = self.targets_t[0] state_action_pairs = tf.stack( [self.inputs_plh, self.actions_t], 1) estimate = tf.gather_nd(self.Qs, state_action_pairs)[0] err_estimate = target - estimate global_step = tf.Variable(0, trainable=False, name="global_step", collections=[ tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES ]) lr = tf.train.exponential_decay(tf.constant(self.lr, dtype=tf.float32), global_step, self.lr_decay_steps, 0.5, staircase=True) tf.summary.scalar('lr', lr) inc_global_step = global_step.assign_add(1) with tf.control_dependencies([update_et_op, inc_global_step]): self.loss = tf.reduce_sum(err_estimate * et) self.train_op = tf.assign_add(self.Qs, lr * err_estimate * et) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter( "episode_id") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
def __init__(self): with tf.VariableScope(True, 'd'): pass
def __init__(self, name, env_spec): self._name = name self._env_spec = env_spec self._variable_scope = tf.VariableScope(reuse=False, name=name)
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.inputs_plh = tf.placeholder(tf.int32, shape=[None], name="inputs_plh") q_scope = tf.VariableScope(reuse=False, name='QValues') with tf.variable_scope(q_scope): self.Qs = tf.get_variable( 'Qs', shape=[self.nb_state, self.action_space.n], initializer=tf.constant_initializer(self.initial_q_value), dtype=tf.float32) tf.summary.histogram('Qarray', self.Qs) self.q_preds_t = tf.gather(self.Qs, self.inputs_plh) fixed_q_scope = tf.VariableScope(reuse=False, name='FixedQValues') with tf.variable_scope(fixed_q_scope): self.update_fixed_vars_op = capacities.fix_scope(q_scope) policy_scope = tf.VariableScope(reuse=False, name='Policy') with tf.variable_scope(policy_scope): if 'UCB' in self.config and self.config['UCB']: self.actions_t, self.probs_t = capacities.tabular_UCB( self.Qs, self.inputs_plh) else: self.actions_t, self.probs_t = capacities.tabular_eps_greedy( self.inputs_plh, self.q_preds_t, self.nb_state, self.env.action_space.n, self.N0, self.min_eps) self.action_t = self.actions_t[0] self.q_value_t = self.q_preds_t[0][self.action_t] # Experienced replay part with tf.variable_scope('Learning'): with tf.variable_scope(fixed_q_scope, reuse=True): fixed_Qs = tf.get_variable('Qs') self.rewards_plh = tf.placeholder(tf.float32, shape=[None], name="rewards_plh") self.next_states_plh = tf.placeholder(tf.int32, shape=[None], name="next_states_plh") # Note that we use the fixed Qs to create the targets self.targets_t = capacities.get_q_learning_target( fixed_Qs, self.rewards_plh, self.next_states_plh, self.discount) self.loss, self.train_op = capacities.tabular_learning_with_lr( self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh, self.actions_t, self.targets_t) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter( "episode_id") self.event_count, self.inc_event_count_op = capacities.counter( "event_count") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
def virtual_assistant(server_message): import itertools # import matplotlib.pyplot as plt import numpy as np import tensorflow as tf # print(os.getcwd()) def fill_unk(unk): global glove_wordmap glove_wordmap[unk] = RS.multivariate_normal(m, np.diag(v)) return glove_wordmap[unk] '''-----------------------------------------------------------''' def sentence2sequence(sentence): tokens = sentence.strip('"(),-').lower().split(" ") rows = [] words = [] # Greedy search for tokens for token in tokens: i = len(token) while len(token) > 0: word = token[:i] # print(word) if word in glove_wordmap: rows.append(glove_wordmap[word]) words.append(word) token = token[i:] i = len(token) continue else: i = i - 1 if i == 0: # word OOV # https://arxiv.org/pdf/1611.01436.pdf rows.append(fill_unk(token)) words.append(token) break return np.array(rows), words # import sys # text = " ".join(sys.argv[1:]).replace('_', '\n') + " a 1" # print("-----------------", text) '''-----------------------------------------------------------''' def contextualize(category, server_message): data = [] context = [] server_message = server_message + "\tt\t3" from pprint import pprint pprint(server_message) print(server_message) for line in server_message.split('\n'): l, ine = tuple(line.split(" ", 1)) # Split the line numbers from the sentences they refer to. if l is "1": # New contexts always start with 1, # so this is a signal to reset the context. context = [] if "\t" in ine: # Tabs are the separator between questions and answers, # and are not present in context statements. # print(tuple(ine.split("\t"))) question, answer, support = tuple(ine.split("\t")) # print("old", question, answer, support.replace("\n",'')) # print("old", "-------------------------------------------") # print(*context) # print("-------------------------------------------") data.append((tuple(zip(*context)) + sentence2sequence(question) + sentence2sequence(answer) + ([int(s) for s in support.replace("\n", '')],))) # Multiple questions may refer to the same context, so we don't reset it. else: # Context sentence. # print(ine.replace("\n", '')) context.append(sentence2sequence(ine.replace("\n", ''))) # print("-------------------------------------------") # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") # print(data[0]) # print(data) return data def finalize(data): """ Prepares data generated by contextualize() for use in the network. """ final_data = [] for cqas in data: contextvs, contextws, qvs, qws, avs, aws, spt = cqas lengths = itertools.accumulate(len(cvec) for cvec in contextvs) context_vec = np.concatenate(contextvs) context_words = sum(contextws, []) # Location markers for the beginnings of new sentences. sentence_ends = np.array(list(lengths)) final_data.append((context_vec, sentence_ends, qvs, spt, context_words, cqas, avs, aws)) return np.array(final_data) def attention(c, mem, existing_facts): """ Custom attention mechanism. c: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that contains all the facts from the contexts. mem: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that contains the current memory. It should be the same memory for all facts for accurate results. existing_facts: A [batch_size, maximum_sentence_count, 1] tensor that acts as a binary mask for which facts exist and which do not. """ with tf.variable_scope("attending") as scope: # attending: The metrics by which we decide what to attend to. attending = tf.concat([c, mem, re_q, c * re_q, c * mem, (c - re_q) ** 2, (c - mem) ** 2], 2) # m1: First layer of multiplied weights for the feed-forward network. # We tile the weights in order to manually broadcast, since tf.matmul does not # automatically broadcast batch matrix multiplication as of TensorFlow 1.2. m1 = tf.matmul(attending * existing_facts, tf.tile(w_1, tf.stack([tf.shape(attending)[0], 1, 1]))) * existing_facts # bias_1: A masked version of the first feed-forward layer's bias # over only existing facts. bias_1 = b_1 * existing_facts # tnhan: First nonlinearity. In the original paper, this is a tanh nonlinearity; # choosing relu was a design choice intended to avoid issues with # low gradient magnitude when the tanh returned values close to 1 or -1. tnhan = tf.nn.relu(m1 + bias_1) # m2: Second layer of multiplied weights for the feed-forward network. # Still tiling weights for the same reason described in m1's comments. m2 = tf.matmul(tnhan, tf.tile(w_2, tf.stack([tf.shape(attending)[0], 1, 1]))) # bias_2: A masked version of the second feed-forward layer's bias. bias_2 = b_2 * existing_facts # norm_m2: A normalized version of the second layer of weights, which is used # to help make sure the softmax nonlinearity doesn't saturate. norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1) # softmaxable: A hack in order to use sparse_softmax on an otherwise dense tensor. # We make norm_m2 a sparse tensor, then make it dense again after the operation. softmax_idx = tf.where(tf.not_equal(norm_m2, 0))[:, :-1] softmax_gather = tf.gather_nd(norm_m2[..., 0], softmax_idx) softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1] softmaxable = tf.SparseTensor(softmax_idx, softmax_gather, softmax_shape) return tf.expand_dims(tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1) def prep_batch(batch_data, more_data=False): """ Prepare all the preproccessing that needs to be done on a batch-by-batch basis. """ context_vec, sentence_ends, questionvs, spt, context_words, cqas, answervs, _ = zip(*batch_data) ends = list(sentence_ends) maxend = max(map(len, ends)) aends = np.zeros((len(ends), maxend)) for index, i in enumerate(ends): for indexj, x in enumerate(i): aends[index, indexj] = x - 1 new_ends = np.zeros(aends.shape + (2,)) for index, x in np.ndenumerate(aends): new_ends[index + (0,)] = index[0] new_ends[index + (1,)] = x contexts = list(context_vec) max_context_length = max([len(x) for x in contexts]) contextsize = list(np.array(contexts[0]).shape) contextsize[0] = max_context_length final_contexts = np.zeros([len(contexts)] + contextsize) contexts = [np.array(x) for x in contexts] for i, context in enumerate(contexts): final_contexts[i, 0:len(context), :] = context max_query_length = max(len(x) for x in questionvs) querysize = list(np.array(questionvs[0]).shape) querysize[:1] = [len(questionvs), max_query_length] queries = np.zeros(querysize) querylengths = np.array(list(zip(range(len(questionvs)), [len(q) - 1 for q in questionvs]))) questions = [np.array(q) for q in questionvs] for i, question in enumerate(questions): queries[i, 0:len(question), :] = question data = {context_placeholder: final_contexts, input_sentence_endings: new_ends, query: queries, input_query_lengths: querylengths, gold_standard: answervs} return (data, context_words, cqas) if more_data else data def restore_sess(location): saver = tf.train.Saver() session = tf.Session() saver.restore(session, location) return session def session_manage(location): full_location = location + "model.ckpt" return restore_sess(full_location) '''-----------------------------------------------------------''' # Deserialize GloVe vectors # print(os.getcwd()) # os.chdir("..") # print(os.getcwd()) from entity.embeddingrepo import EmbeddingDbRepo glove_wordmap = {} embrepo = EmbeddingDbRepo() gloves = embrepo.get() for glove in gloves: name, vector = list(glove)[1], list(glove)[2] glove_wordmap[name] = np.fromstring(vector, sep=" ") # glove_wordmap = {} # with open(glove_vectors_file, "r", encoding='utf-8-sig') as glove: # for line in glove: # name, vector = tuple(line.split(" ", 1)) # glove_wordmap[name] = np.fromstring(vector, sep=" ") wvecs = [] for item in glove_wordmap.items(): wvecs.append(item[1]) s = np.vstack(wvecs) # Gather the distribution hyperparameters v = np.var(s, 0) m = np.mean(s, 0) RS = np.random.RandomState() # final_train_data = finalize(train_data) final_test_data = finalize(contextualize(1, server_message)) # finalize(test_data) '''-----------------------------------------------------------''' tf.reset_default_graph() '''-----------------------------------------------------------''' # Hyperparameters # The number of dimensions used to store data passed between recurrent layers in the network. recurrent_cell_size = 128 # The number of dimensions in our word vectorizations. D = 50 # How quickly the network learns. Too high, and we may run into numeric instability # or other issues. learning_rate = 0.005 # Dropout probabilities. For a description of dropout and what these probabilities are, # see Entailment with TensorFlow. input_p, output_p = 0.5, 0.5 # How many questions we train on at a time. batch_size = 128 # Number of passes in episodic memory. We'll get to this later. passes = 4 # Feed Forward layer sizes: the number of dimensions used to store data passed from feed-forward layers. ff_hidden_size = 256 weight_decay = 0.00000001 # The strength of our regularization. Increase to encourage sparsity in episodic memory, # but makes training slower. Don't make this larger than leraning_rate. training_iterations_count = 400000 # How many questions the network trains on each time it is trained. # Some questions are counted multiple times. display_step = 100 # How many iterations of training occur before each validation check. '''-----------------------------------------------------------''' # Input Module # Context: A [batch_size, maximum_context_length, word_vectorization_dimensions] tensor # that contains all the context information. context = tf.placeholder(tf.float32, [None, None, D], "context") context_placeholder = context # I use context as a variable name later on # input_sentence_endings: A [batch_size, maximum_sentence_count, 2] tensor that # contains the locations of the ends of sentences. input_sentence_endings = tf.placeholder(tf.int32, [None, None, 2], "sentence") # recurrent_cell_size: the number of hidden units in recurrent layers. input_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size) # input_p: The probability of maintaining a specific hidden input unit. # Likewise, output_p is the probability of maintaining a specific hidden output unit. gru_drop = tf.contrib.rnn.DropoutWrapper(input_gru, input_p, output_p) # dynamic_rnn also returns the final internal state. We don't need that, and can # ignore the corresponding output (_). input_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, context, dtype=tf.float32, scope="input_module") # cs: the facts gathered from the context. cs = tf.gather_nd(input_module_outputs, input_sentence_endings) # to use every word as a fact, useful for tasks with one-sentence contexts s = input_module_outputs '''-----------------------------------------------------------''' # Question Module # query: A [batch_size, maximum_question_length, word_vectorization_dimensions] tensor # that contains all of the questions. query = tf.placeholder(tf.float32, [None, None, D], "query") # input_query_lengths: A [batch_size, 2] tensor that contains question length information. # input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range() # so that it plays nice with gather_nd. input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths") question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float32, scope=tf.VariableScope(True, "input_module")) # q: the question states. A [batch_size, recurrent_cell_size] tensor. q = tf.gather_nd(question_module_outputs, input_query_lengths) '''-----------------------------------------------------------''' # Episodic Memory # make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)]) re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size) # Final output for attention, needs to be 1 in order to create a mask output_size = 1 # Weights and biases attend_init = tf.random_normal_initializer(stddev=0.1) w_1 = tf.get_variable("attend_w1", [1, recurrent_cell_size * 7, recurrent_cell_size], tf.float32, initializer=attend_init) w_2 = tf.get_variable("attend_w2", [1, recurrent_cell_size, output_size], tf.float32, initializer=attend_init) b_1 = tf.get_variable("attend_b1", [1, recurrent_cell_size], tf.float32, initializer=attend_init) b_2 = tf.get_variable("attend_b2", [1, output_size], tf.float32, initializer=attend_init) # Regulate all the weights and biases tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_2)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_2)) # facts_0s: a [batch_size, max_facts_length, 1] tensor # whose values are 1 if the corresponding fact exists and 0 if not. facts_0s = tf.cast(tf.count_nonzero(input_sentence_endings[:, :, -1:], -1, keepdims=True), tf.float32) with tf.variable_scope("Episodes") as scope: attention_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size) # memory: A list of all tensors that are the (current or past) memory state # of the attention mechanism. memory = [q] # attends: A list of all tensors that represent what the network attends to. attends = [] for a in range(passes): # attention mask attend_to = attention(cs, tf.tile(tf.reshape(memory[-1], [-1, 1, recurrent_cell_size]), size), facts_0s) # Inverse attention mask, for what's retained in the state. retain = 1 - attend_to # GRU pass over the facts, according to the attention mask. while_valid_index = (lambda state, index: index < tf.shape(cs)[1]) update_state = (lambda state, index: (attend_to[:, index, :] * attention_gru(cs[:, index, :], state)[0] + retain[:, index, :] * state)) # start loop with most recent memory and at the first index memory.append(tuple(tf.while_loop(while_valid_index, (lambda state, index: (update_state(state, index), index + 1)), loop_vars=[memory[-1], 0]))[0]) attends.append(attend_to) # Reuse variables so the GRU pass uses the same variables every pass. scope.reuse_variables() '''-----------------------------------------------------------''' # Answer Module # a0: Final memory state. (Input to answer module) a0 = tf.concat([memory[-1], q], -1) # fc_init: Initializer for the final fully connected layer's weights. fc_init = tf.random_normal_initializer(stddev=0.1) with tf.variable_scope("answer"): # w_answer: The final fully connected layer's weights. w_answer = tf.get_variable("weight", [recurrent_cell_size * 2, D], tf.float32, initializer=fc_init) # Regulate the fully connected layer's weights tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_answer)) # The regressed word. This isn't an actual word yet; # we still have to find the closest match. logit = tf.expand_dims(tf.matmul(a0, w_answer), 1) # Make a mask over which words exist. with tf.variable_scope("ending"): all_ends = tf.reshape(input_sentence_endings, [-1, 2]) range_ends = tf.range(tf.shape(all_ends)[0]) ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1) ind = tf.reduce_max(tf.scatter_nd(ends_indices, all_ends[:, 1], [tf.shape(q)[0], tf.shape(all_ends)[0]]), axis=-1) range_ind = tf.range(tf.shape(ind)[0]) mask_ends = tf.cast(tf.scatter_nd(tf.stack([ind, range_ind], axis=1), tf.ones_like(range_ind), [tf.reduce_max(ind) + 1, tf.shape(ind)[0]]), bool) # A bit of a trick. With the locations of the ends of the mask (the last periods in # each of the contexts) as 1 and the rest as 0, we can scan with exclusive or # (starting from all 1). For each context in the batch, this will result in 1s # up until the marker (the location of that last period) and 0s afterwards. mask = tf.scan(tf.logical_xor, mask_ends, tf.ones_like(range_ind, dtype=bool)) # We score each possible word inversely with their Euclidean distance to the regressed word. # The highest score (lowest distance) will correspond to the selected word. logits = -tf.reduce_sum(tf.square(context * tf.transpose(tf.expand_dims( tf.cast(mask, tf.float32), -1), [1, 0, 2]) - logit), axis=-1) '''-----------------------------------------------------------''' # Training # gold_standard: The real answers. gold_standard = tf.placeholder(tf.float32, [None, 1, D], "answer") with tf.variable_scope('accuracy'): eq = tf.equal(context, gold_standard) corrbool = tf.reduce_all(eq, -1) logloc = tf.reduce_max(logits, -1, keepdims=True) # locs: A boolean tensor that indicates where the score # matches the minimum score. This happens on multiple dimensions, # so in the off chance there's one or two indexes that match # we make sure it matches in all indexes. locs = tf.equal(logits, logloc) # correctsbool: A boolean tensor that indicates for which # words in the context the score always matches the minimum score. correctsbool = tf.reduce_any(tf.logical_and(locs, corrbool), -1) # corrects: A tensor that is simply correctsbool cast to floats. corrects = tf.where(correctsbool, tf.ones_like(correctsbool, dtype=tf.float32), tf.zeros_like(correctsbool, dtype=tf.float32)) # corr: corrects, but for the right answer instead of our selected answer. corr = tf.where(corrbool, tf.ones_like(corrbool, dtype=tf.float32), tf.zeros_like(corrbool, dtype=tf.float32)) with tf.variable_scope("loss"): # Use sigmoid cross entropy as the base loss, # with our distances as the relative probabilities. There are # multiple correct labels, for each location of the answer word within the context. loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.nn.l2_normalize(logits, -1), labels=corr) # Add regularization losses, weighted by weight_decay. total_loss = tf.reduce_mean(loss) + weight_decay * tf.add_n( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # TensorFlow's default implementation of the Adam optimizer works. We can adjust more than # just the learning rate, but it's not necessary to find a very good optimum. optimizer = tf.train.AdamOptimizer(learning_rate) # Once we have an optimizer, we ask it to minimize the loss # in order to work towards the proper training. opt_op = optimizer.minimize(total_loss) '''-----------------------------------------------------------''' # Initialize variables init = tf.global_variables_initializer() # Launch the TensorFlow session sess = tf.Session() sess.run(init) '''-----------------------------------------------------------''' '''-----------------------------------------------------------''' # Prepare validation set # print(final_test_data.shape[0]) batch = np.random.randint(final_test_data.shape[0], size=batch_size * 10) batch_data = final_test_data[batch] validation_set, val_context_words, val_cqas = prep_batch(batch_data, True) train_location = "./neural/pre_trained_model/" sess = session_manage(train_location) '''-----------------------------------------------------------''' ancr = sess.run([corrbool, locs, total_loss, logits, facts_0s, w_1] + attends + [query, cs, question_module_outputs], feed_dict=validation_set) a = ancr[0] n = ancr[1] cr = ancr[2] attenders = np.array(ancr[6:-3]) faq = np.sum(ancr[4], axis=(-1, -2)) # Number of facts in each context # print(list(map((lambda x: x.shape),ancr[3:])), new_ends.shape) '''-----------------------------------------------------------''' # Locations of responses within contexts indices = np.argmax(n, axis=1) # Locations of actual answers within contexts indicesc = np.argmax(a, axis=1) limit = 1 for i, e, cw, cqa in list(zip(indices, indicesc, val_context_words, val_cqas))[:limit]: ccc = " ".join(cw) print("TEXT: ", ccc) print("QUESTION: ", " ".join(cqa[3])) print("RESPONSE: ", cw[i], ["Correct", "Incorrect"][i != e]) print("EXPECTED: ", cw[e]) print() return "RESPONSE: " + cw[i] '''-----------------------------------------------------------''' # train_location = "./max_train_model/" # sess = session_manage(train_location, rewrite=True, iter=training_iterations_count, batch_size=batch_size) '''-----------------------------------------------------------''' # Final testing accuracy print(np.mean(sess.run([corrects], feed_dict=prep_batch(final_test_data))[0])) '''-----------------------------------------------------------''' sess.close() '''-----------------------------------------------------------'''
query = tf.placeholder(tf.float32, [None, None, D], "query") # input_query_lengths: A [batch_size, 2] tensor that contains question length information. # input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range() # so that it plays nice with gather_nd. input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths") # question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float32, scope = tf.VariableScope(True, "input_module")) question_module_outputs, question_module_outputs_bw = tf.nn.bidirectional_dynamic_rnn( lstm_fw_cell, lstm_bw_cell, query, dtype=tf.float32, scope=tf.VariableScope(True, "input_module")) question_module_outputs = tf.concat(question_module_outputs, 2) # q: the question states. A [batch_size, recurrent_cell_size] tensor. # q = tf.squeeze(tf.gather_nd(question_module_outputs, input_query_lengths),axis=1) q = tf.gather_nd(question_module_outputs, input_query_lengths) # q=tf.squeeze(q,) #print("q:",q) # Episodic Memory # make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)]) # print("size:",size) re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size) # print("re_q:",re_q)
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.inputs = tf.placeholder( tf.float32, shape=[None, self.observation_space.shape[0] + 1], name='inputs') q_scope = tf.VariableScope(reuse=False, name='QValues') with tf.variable_scope(q_scope): self.q_values = tf.squeeze( capacities.value_f(self.q_params, self.inputs)) self.action_t = capacities.eps_greedy(self.inputs, self.q_values, self.env.action_space.n, self.N0, self.min_eps) self.q_t = self.q_values[self.action_t] fixed_q_scope = tf.VariableScope(reuse=False, name='FixedQValues') with tf.variable_scope(fixed_q_scope): self.update_fixed_vars_op = capacities.fix_scope(q_scope) with tf.variable_scope('ExperienceReplay'): self.er_inputs = tf.placeholder( tf.float32, shape=[None, self.observation_space.shape[0] + 1], name="ERInputs") self.er_actions = tf.placeholder(tf.int32, shape=[None], name="ERInputs") self.er_rewards = tf.placeholder(tf.float32, shape=[None], name="ERReward") self.er_next_states = tf.placeholder( tf.float32, shape=[None, self.observation_space.shape[0] + 1], name="ERNextState") with tf.variable_scope(q_scope, reuse=True): er_q_values = capacities.value_f(self.q_params, self.er_inputs) er_stacked_actions = tf.stack([ tf.range(0, tf.shape(self.er_actions)[0]), self.er_actions ], 1) er_qs = tf.gather_nd(er_q_values, er_stacked_actions) with tf.variable_scope(fixed_q_scope, reuse=True): er_fixed_next_q_values = capacities.value_f( self.q_params, self.er_next_states) with tf.variable_scope(q_scope, reuse=True): er_next_q_values = capacities.value_f( self.q_params, self.er_next_states) er_next_max_action_t = tf.cast(tf.argmax(er_next_q_values, 1), tf.int32) er_next_stacked_actions = tf.stack([ tf.range(0, tf.shape(self.er_next_states)[0]), er_next_max_action_t ], 1) er_next_qs = tf.gather_nd(er_fixed_next_q_values, er_next_stacked_actions) er_target_qs1 = tf.stop_gradient(self.er_rewards + self.discount * er_next_qs) er_target_qs2 = self.er_rewards er_stacked_targets = tf.stack([er_target_qs1, er_target_qs2], 1) select_targets = tf.stack([ tf.range(0, tf.shape(self.er_next_states)[0]), tf.cast(self.er_next_states[:, -1], tf.int32) ], 1) er_target_qs = tf.gather_nd(er_stacked_targets, select_targets) self.er_loss = 1 / 2 * tf.reduce_sum( tf.square(er_target_qs - er_qs)) er_adam = tf.train.AdamOptimizer(self.lr) self.global_step = tf.Variable( 0, trainable=False, name="global_step", collections=[ tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES ]) self.er_train_op = er_adam.minimize( self.er_loss, global_step=self.global_step) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter( "episode_id") self.timestep, self.inc_timestep_op = capacities.counter( "timestep") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.inputs = tf.placeholder( tf.float32, shape=[None, self.observation_space.shape[0] + 1], name='inputs') policy_scope = tf.VariableScope(reuse=False, name='Policy') with tf.variable_scope(policy_scope): self.probs, self.actions = capacities.policy( self.policy_params, self.inputs) self.action_t = tf.squeeze(self.actions, 1)[0] # self.action_t = tf.Print(self.action_t, data=[self.probs, self.action_t], message="self.probs, self.action_t:") v_scope = tf.VariableScope(reuse=False, name='VValues') with tf.variable_scope(v_scope): vs = capacities.value_f(self.v_params, self.inputs) with tf.control_dependencies([self.probs, vs]): with tf.variable_scope('Training'): stacked_actions = tf.stack([ tf.range(0, tf.shape(self.actions)[0]), tf.squeeze(self.actions, 1) ], 1) self.rewards = tf.placeholder(tf.float32, shape=[None], name="rewards") self.next_states = tf.placeholder( tf.float32, shape=[None, self.observation_space.shape[0] + 1], name="next_states") self.next_actions = tf.placeholder(tf.int32, shape=[None], name="next_actions") with tf.variable_scope(v_scope, reuse=True): next_vs = tf.squeeze( capacities.value_f(self.v_params, self.next_states), 1) with tf.variable_scope('TargetVs'): target_vs1 = tf.stop_gradient(self.rewards + self.discount * next_vs) target_vs2 = self.rewards stacked_targets = tf.stack([target_vs1, target_vs2], 1) select_targets = tf.stack([ tf.range(0, tf.shape(self.next_states)[0]), tf.cast(self.next_states[:, -1], tf.int32) ], 1) target_vs = tf.gather_nd(stacked_targets, select_targets) log_probs = tf.log( tf.gather_nd(self.probs, stacked_actions)) with tf.control_dependencies([log_probs, target_vs]): self.v_loss = 1 / 2 * tf.reduce_sum( tf.square(target_vs - vs)) v_adam = tf.train.AdamOptimizer(self.v_lr) self.v_global_step = tf.Variable(0, trainable=False, name="v_global_step") self.v_train_op = v_adam.minimize( self.v_loss, global_step=self.v_global_step) td = target_vs - vs self.policy_loss = -tf.reduce_sum( log_probs * tf.stop_gradient(td)) policy_adam = tf.train.AdamOptimizer(self.policy_lr) self.policy_global_step = tf.Variable( 0, trainable=False, name="policy_global_step", collections=[ tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES ]) self.policy_train_op = policy_adam.minimize( self.policy_loss, global_step=self.policy_global_step) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.policy_loss_plh = tf.placeholder(tf.float32, shape=[]) self.policy_loss_sum_t = tf.summary.scalar('policy_loss', self.policy_loss_plh) self.v_loss_plh = tf.placeholder(tf.float32, shape=[]) self.v_loss_sum_t = tf.summary.scalar('v_loss', self.v_loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter( "episode_id") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph
def build_graph(self, graph): with graph.as_default(): tf.set_random_seed(self.random_seed) self.N0_t = tf.constant(self.N0, tf.float32, name='N_0') self.N = tf.Variable(0., dtype=tf.float32, name='N', trainable=False) self.min_eps_t = tf.constant(self.min_eps, tf.float32, name='min_eps') self.inputs = tf.placeholder( tf.float32, shape=[None, self.observation_space.shape[0] + 1], name='inputs') q_scope = tf.VariableScope(reuse=False, name='QValues') with tf.variable_scope(q_scope): self.q_values = tf.squeeze( capacities.value_f(self.q_params, self.inputs)) self.action_t = capacities.eps_greedy(self.inputs, self.q_values, self.env.action_space.n, self.N0, self.min_eps) self.q_t = self.q_values[self.action_t] with tf.variable_scope('Training'): self.reward = tf.placeholder(tf.float32, shape=[], name="reward") self.next_state = tf.placeholder( tf.float32, shape=[1, self.observation_space.shape[0] + 1], name="nextState") self.next_action = tf.placeholder(tf.int32, shape=[], name="nextAction") with tf.variable_scope(q_scope, reuse=True): next_q_values = tf.squeeze( capacities.value_f(self.q_params, self.next_state)) target_q1 = tf.stop_gradient(self.reward + self.discount * next_q_values[self.next_action]) target_q2 = self.reward is_done = tf.cast(self.next_state[0, 4], tf.bool) target_q = tf.where(is_done, target_q2, target_q1) with tf.control_dependencies([target_q]): self.loss = 1 / 2 * tf.square(target_q - self.q_t) adam = tf.train.AdamOptimizer(self.lr) self.global_step = tf.Variable( 0, trainable=False, name="global_step", collections=[ tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES ]) self.train_op = adam.minimize(self.loss, global_step=self.global_step) self.score_plh = tf.placeholder(tf.float32, shape=[]) self.score_sum_t = tf.summary.scalar('score', self.score_plh) self.loss_plh = tf.placeholder(tf.float32, shape=[]) self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh) self.all_summary_t = tf.summary.merge_all() self.episode_id, self.inc_ep_id_op = capacities.counter( "episode_id") # Playing part self.pscore_plh = tf.placeholder(tf.float32, shape=[]) self.pscore_sum_t = tf.summary.scalar('play_score', self.pscore_plh) return graph