예제 #1
0
 def _build_layers_v2(self, input_dict, num_outputs, options):
     # Weights shared with CustomModel1
     with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, "shared"),
                            reuse=tf.AUTO_REUSE,
                            auxiliary_name_scope=False):
         last_layer = slim.fully_connected(input_dict["obs"],
                                           64,
                                           activation_fn=tf.nn.relu,
                                           scope="fc1")
     last_layer = slim.fully_connected(last_layer,
                                       64,
                                       activation_fn=tf.nn.relu,
                                       scope="fc2")
     output = slim.fully_connected(last_layer,
                                   num_outputs,
                                   activation_fn=None,
                                   scope="fc_out")
     return output, last_layer
예제 #2
0
    def __init__(self, conf, trainlabels, outputs, constraint, name=None):
        '''EDDecoder constructor

        Args:
            conf: the decoder configuration as a ConfigParser
            trainlabels: the number of extra labels required by the trainer
            outputs: the name of the outputs of the model
            constraint: the constraint for the variables
        '''

        #save the parameters
        self.conf = dict(conf.items('decoder'))
        self.outputs = outputs

        self.output_dims = self.get_output_dims(trainlabels)

        self.scope = tf.VariableScope(tf.AUTO_REUSE,
                                      name or type(self).__name__,
                                      constraint=constraint)
예제 #3
0
 def conv(self, name, inputData, outChannel):
     inChannel = inputData.get_shape()[-1]
     with tf.VariableScope(name):
         kernel = tf.get_variable("weights",
                                  shape=[3, 3, inChannel, outChannel],
                                  dtype=tf.float32,
                                  trainable=False)
         biases = tf.get_variable("biases",
                                  shape=[outChannel],
                                  dtype=tf.float32,
                                  trainable=False)
         convRes = tf.nn.conv2d(input=inputData,
                                filter=kernel,
                                strides=[1, 1, 1, 1],
                                padding="SAME")
         res = tf.nn.bias_add(convRes, bias=biases)
         out = tf.nn.relu(res, biases)
     self.parameters += [kernel, biases]
     return out
예제 #4
0
    def __init__(self, conf, constraint, name=None):
        '''EDEncoder constructor

        Args:
            conf: the encoder configuration
            name: the encoder name
            constraint: the constraint for the variables
        '''

        #save the configuration
        self.conf = dict(conf.items('encoder'))

        #apply default configuration
        default = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               'defaults',
                               type(self).__name__.lower() + '.cfg')
        apply_defaults(self.conf, default)

        self.scope = tf.VariableScope(tf.AUTO_REUSE,
                                      name or type(self).__name__,
                                      constraint=constraint)
예제 #5
0
    def __init__(self, conf, name=None):
        '''Model constructor

        Args:
            conf: The model configuration as a configparser object
        '''

        self.input_names = conf.get('io', 'inputs').split(' ')
        if self.input_names == ['']:
            self.input_names = []
        self.output_names = conf.get('io', 'outputs').split(' ')
        if self.output_names == ['']:
            self.output_names = []

        self.conf = dict(conf.items('model'))

        self.output_dims = {}
        for i, d in enumerate(self.conf['output_dims'].split(' ')):
            self.output_dims[self.output_names[i]] = int(d)

        self.scope = tf.VariableScope(False, name or type(self).__name__)
예제 #6
0
 def _build_layers_v2(self, input_dict, num_outputs, options):
     # Example of (optional) weight sharing between two different policies.
     # Here, we share the variables defined in the 'shared' variable scope
     # by entering it explicitly with tf.AUTO_REUSE. This creates the
     # variables for the 'fc1' layer in a global scope called 'shared'
     # outside of the policy's normal variable scope.
     with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, "shared"),
                            reuse=tf.AUTO_REUSE,
                            auxiliary_name_scope=False):
         last_layer = slim.fully_connected(input_dict["obs"],
                                           64,
                                           activation_fn=tf.nn.relu,
                                           scope="fc1")
     last_layer = slim.fully_connected(last_layer,
                                       64,
                                       activation_fn=tf.nn.relu,
                                       scope="fc2")
     output = slim.fully_connected(last_layer,
                                   num_outputs,
                                   activation_fn=None,
                                   scope="fc_out")
     return output, last_layer
예제 #7
0
def test_load_weight_from_ckpt():

    tf.logging.set_verbosity(tf.logging.INFO)
    x_response_value = [
        [10378, 119, 119, 151, 8815, 8281, 8211, 10425, 8154, 0, 0, 0, 0, 0] +
        [0 for i in range(512 - 14)],
        [
            165, 8991, 8181, 8184, 131, 120, 120, 8134, 11300, 10540, 8735,
            8207, 0, 0
        ] + [0 for i in range(512 - 14)]
    ]
    poly_encoder_config = PolyEncoderConfig.from_json_file(
        '../../cfg/poly_encoder.json')
    encoder_inst = PolyEncoder(config=poly_encoder_config,
                               mode=tf.estimator.ModeKeys.TRAIN)

    bert_scope = tf.VariableScope(name="bert", reuse=tf.AUTO_REUSE)
    bert_config = BertConfig.from_json_file(poly_encoder_config.bert_config)
    x_response = tf.convert_to_tensor(value=x_response_value, dtype=tf.int32)
    x_response_emb, x_response_mask = encoder_inst.encode_candidate(
        x_response=x_response, bert_config=bert_config, bert_scope=bert_scope)
    load_weight_from_ckpt(init_checkpoint="../../ckpt/albert/")
예제 #8
0
def generator(z, out_channel_dim, is_train=True):
    """
    Create the generator network
    :param z: Input z
    :param out_channel_dim: The number of channels in the output image
    :param is_train: Boolean if generator is being used for training
    :return: The tensor output of the generator
    """
    # TODO: Implement Function
    with tf.VariableScope('generator'):
        x1 = tf.layers.dense(z, 4 * 4 * 512)
        x1 = tf.layers.batch_normalization(x1, training=is_train)
        x1 = tf.maximum(0.2 * x1, x1)
        # 4x4x512 now

        x2 = tf.layers.conv2d_transpose(x1, 256, 5, strides=2, padding='same')
        x2 = tf.layers.batch_normalization(x2, training=is_train)
        x2 = tf.maximum(0.2 * x2, x2)
        # 8x8x256 now

        x3 = tf.layers.conv2d_transpose(x2, 128, 5, strides=2, padding='same')
        x3 = tf.layers.batch_normalization(x3, training=is_train)
        x3 = tf.maximum(0.2 * x3, x3)
        # 16x16x128 now

        # Output layer
        x4 = tf.layers.conv2d_transpose(x3,
                                        out_channel_dim,
                                        5,
                                        strides=2,
                                        padding='same')
        # 32x32x3 now

        logits = tf.image.resize_images(x4, [28, 28],
                                        method=tf.ResizeMethod.BILINEA,
                                        align_corners=True)
        out = tf.tanh(logits)

        return out
예제 #9
0
    def __init__(self, conf, output_dim, name=None):
        '''classifier constructor

        Args:
            conf: The classifier configuration
            output_dim: the classifier output dimension
                    This is a tuple, each element representing the output_dim
                    for one kind of targets
            name: the classifier name
        '''

        self.conf = conf

        # if there is only a add_labels in the config, we suppose that only the
        # first element of this tuple is important
        if 'add_labels' in conf:
            self.output_dim = output_dim[0] + int(conf['add_labels'])

        # if there is only an add_labels_reconstruction but not an
        # add_labels_prediction in config, assume only second element to be of
        # importance
        elif 'add_labels_reconstruction' in conf and \
            not 'add_labels_prediction' in conf:
            self.output_dim = output_dim[1] + int(
                conf['add_labels_reconstruction'])

        # if both present, both elements of the tuple will be of importance
        elif 'add_labels_reconstruction' in conf and \
            'add_labels_prediction' in conf:
            outdim1 = output_dim[0] + int(conf['add_labels_prediction'])
            outdim2 = output_dim[1] + int(conf['add_labels_reconstruction'])
            self.output_dim = (outdim1, outdim2)

        else:
            raise Exception(
                'Wrong kind of add_labels information in the config')

        #create the variable scope for the classifier
        self.scope = tf.VariableScope(False, name or type(self).__name__)
예제 #10
0
 def build(self, hparams, is_training=True):
   self._total_length = hparams.max_seq_len
   if self._total_length != np.prod(self._level_lengths):
     raise ValueError(
         'The product of the HierarchicalLstmEncoder level lengths (%d) must '
         'equal the padded input sequence length (%d).' % (
             np.prod(self._level_lengths), self._total_length))
   tf.logging.info('\nHierarchical Encoder:\n'
                   '  input length: %d\n'
                   '  level lengths: %s\n',
                   self._total_length,
                   self._level_lengths)
   self._hierarchical_encoders = []
   num_splits = np.prod(self._level_lengths)
   for i, l in enumerate(self._level_lengths):
     num_splits //= l
     tf.logging.info('Level %d splits: %d', i, num_splits)
     h_encoder = self._core_encoder_cls()
     h_encoder.build(
         hparams, is_training,
         name_or_scope=tf.VariableScope(
             tf.AUTO_REUSE, 'encoder/hierarchical_level_%d' % i))
     self._hierarchical_encoders.append((num_splits, h_encoder))
예제 #11
0
 def _build_layers_v2(self, input_dict, num_outputs, options):
     convs = options.get("conv_filters")
     if convs is None:
         convs = filters_mnih15
     activation = tf.nn.relu
     conv_output = input_dict["obs"]
     with tf.name_scope("mnih15_convs"):
         for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1):
             conv_output = slim.conv2d(
                 input_dict["obs"],
                 out_size,
                 kernel,
                 stride,
                 activation_fn=activation,
                 padding="SAME",
                 scope="conv{}".format(i))
         out_size, kernel, stride = convs[-1]
         conv_output = slim.conv2d(
             conv_output,
             out_size,
             kernel,
             stride,
             activation_fn=activation,
             padding="VALID",
             scope="conv_out")
     action_out = slim.flatten(conv_output)
     with tf.name_scope("mnih15_FC"):
         # Share weights of the following layer with other instances of this
         # model (usually by other macad_agents in a Multi-Agent setting)
         with tf.variable_scope(
                 tf.VariableScope(tf.AUTO_REUSE, "shared"),
                 reuse=tf.AUTO_REUSE):
             shared_layer = slim.fully_connected(
                 action_out, 128, activation_fn=activation)
         action_logits = slim.fully_connected(
             action_out, num_outputs=num_outputs, activation_fn=None)
     return action_logits, shared_layer
    def __init__(self, input_size, layers, hidden_units, max_length,
                 learning_rate):
        with tf.VariableScope(name="BiLSTM", reuse=tf.AUTO_REUSE):
            self.input_X = tf.placeholder(dtype=tf.float32,
                                          shape=[None, max_length, input_size],
                                          name="input_X")
            self.input_y = tf.placeholder(dtype=tf.float32,
                                          shape=[None, 1],
                                          name="input_y")
            self.sequence_length = tf.placeholder(dtype=tf.int32,
                                                  shape=[None],
                                                  name="sequence_length")
            self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                    shape=(),
                                                    name="dropout_keep_prob")

            self.output = self.build_bilstm(self.input_X, layers, hidden_units,
                                            self.dropout_keep_prob)

            self.loss = -(self.input_y * tf.log(self.output) +
                          (1 - self.input_y) * tf.log(1 - self.output))
            self.train = tf.train.AdamOptimizer(learning_rate).minimize(
                self.loss)

            self.prediction = tf.cast(tf.to_int32(self.loss >= 0.5),
                                      dtype=tf.float32,
                                      name="prediction")
            self.accuracy = tf.multiply(tf.reduce_mean(
                tf.cast(tf.equal(self.input_y, self.prediction),
                        dtype=tf.float32)),
                                        100,
                                        name="accuracy")

            tf.summary.scalar("loss", self.loss)
            tf.summary.scalar("accuracy", self.accuracy)
            self.merge_graph = tf.summary.merge_all()
예제 #13
0
    def build_graph(self, graph):
        self.env.seed(self.random_seed)
        np.random.seed(self.random_seed)
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            # Graph of the  LSTM model of the world
            input_scope = tf.VariableScope(reuse=False, name="inputs")
            with tf.variable_scope(input_scope):
                self.state_input_plh = tf.placeholder(
                    tf.float32,
                    shape=[None, None, self.m_params['env_state_size']],
                    name='state_input_plh')
                self.action_input_plh = tf.placeholder(tf.int32,
                                                       shape=[None, None, 1],
                                                       name='action_input_plh')
                self.mask_plh = tf.placeholder(tf.float32,
                                               shape=[None, None, 1],
                                               name="mask_plh")

                input_shape = tf.shape(self.state_input_plh)
                dynamic_batch_size, dynamic_num_steps = input_shape[
                    0], input_shape[1]

                action_input = tf.one_hot(indices=tf.squeeze(
                    self.action_input_plh, 2),
                                          depth=self.m_params['nb_actions'])
                m_inputs = tf.concat([self.state_input_plh, action_input],
                                     2,
                                     name="m_inputs")

            m_scope = tf.VariableScope(reuse=False, name="m")
            with tf.variable_scope(m_scope):
                self.state_reward_preds, self.m_final_state, self.m_initial_state = capacities.predictive_model(
                    self.m_params,
                    m_inputs,
                    dynamic_batch_size,
                    None,
                    summary_collections=[self.M_SUMMARIES])

            fixed_m_scope = tf.VariableScope(reuse=False, name='FixedM')
            with tf.variable_scope(fixed_m_scope):
                self.update_m_fixed_vars_op = capacities.fix_scope(m_scope)

            m_training_scope = tf.VariableScope(reuse=False, name='m_training')
            with tf.variable_scope(m_training_scope):
                self.m_next_states = tf.placeholder(
                    tf.float32,
                    shape=[None, None, self.m_params['env_state_size']],
                    name="m_next_states")
                self.m_rewards = tf.placeholder(tf.float32,
                                                shape=[None, None, 1],
                                                name="m_rewards")
                y_true = tf.concat([self.m_rewards, self.m_next_states], 2)

                with tf.control_dependencies([self.state_reward_preds]):
                    self.m_loss = 1 / 2 * tf.reduce_mean(
                        tf.square(self.state_reward_preds - y_true) *
                        self.mask_plh)
                    tf.summary.scalar('m_loss',
                                      self.m_loss,
                                      collections=[self.M_SUMMARIES])

                m_adam = tf.train.AdamOptimizer(self.m_params['lr'])
                self.m_global_step = tf.Variable(0,
                                                 trainable=False,
                                                 name="m_global_step")
                tf.summary.scalar('m_global_step',
                                  self.m_global_step,
                                  collections=[self.M_SUMMARIES])
                self.m_train_op = m_adam.minimize(
                    self.m_loss, global_step=self.m_global_step)

            self.all_m_summary_t = tf.summary.merge_all(key=self.M_SUMMARIES)

            # Graph of the controller
            c_scope = tf.VariableScope(reuse=False, name="c")
            c_summary_collection = [self.C_SUMMARIES]
            with tf.variable_scope(c_scope):
                # c_cell = LSTMCell(
                #     num_units=self.c_params['nb_units']
                #     , initializer=tf.truncated_normal_initializer(
                #         mean=self.c_params['initial_mean']
                #         , stddev=self.c_params['initial_stddev']
                #     )
                # )
                # self.c_initial_state = c_cell.zero_state(dynamic_batch_size, dtype=tf.float32)
                # c_c_h_states, self.c_final_state = tf.nn.dynamic_rnn(c_cell, self.state_input_plh, initial_state=self.c_initial_state)
                # c_c_states, c_h_states = tf.split(value=c_c_h_states, num_or_size_splits=[self.c_params['nb_units'], self.c_params['nb_units']], axis=2)
                # # Compute the Controller projection
                # self.probs_t, self.actions_t = projection_func(c_h_states)
                m_params = self.m_params
                model_func = lambda m_inputs, m_state: capacities.predictive_model(
                    m_params, m_inputs, dynamic_batch_size, m_state)
                c_params = self.c_params
                projection_func = lambda inputs: capacities.projection(
                    c_params, inputs)
                cm_cell = CMCell(num_units=self.c_params['nb_units'],
                                 m_units=self.m_params['nb_units'],
                                 fixed_model_scope=fixed_m_scope,
                                 model_func=model_func,
                                 projection_func=projection_func,
                                 num_proj=self.c_params['nb_actions'],
                                 initializer=tf.truncated_normal_initializer(
                                     mean=self.c_params['initial_mean'],
                                     stddev=self.c_params['initial_stddev']))

                self.cm_initial_state = cm_cell.zero_state(dynamic_batch_size,
                                                           dtype=tf.float32)
                probs_and_actions_t, self.cm_final_state = tf.nn.dynamic_rnn(
                    cm_cell,
                    self.state_input_plh,
                    initial_state=self.cm_initial_state)
                self.probs_t, actions_t = tf.split(
                    value=probs_and_actions_t,
                    num_or_size_splits=[self.c_params['nb_actions'], 1],
                    axis=2)
                self.actions_t = tf.cast(actions_t, tf.int32)
                # helper tensor used for inference
                self.action_t = self.actions_t[0, 0, 0]

            c_training_scope = tf.VariableScope(reuse=False, name='c_training')
            with tf.variable_scope(c_training_scope):
                self.c_rewards_plh = tf.placeholder(tf.float32,
                                                    shape=[None, None, 1],
                                                    name="c_rewards_plh")

                baseline = tf.reduce_mean(self.c_rewards_plh)

                batch_size, num_steps = tf.shape(self.actions_t)[0], tf.shape(
                    self.actions_t)[1]
                line_indices = tf.matmul(  # Line indice
                    tf.reshape(tf.range(0, batch_size), [-1, 1]),
                    tf.ones([1, num_steps], dtype=tf.int32))
                column_indices = tf.matmul(  # Column indice
                    tf.ones([batch_size, 1], dtype=tf.int32),
                    tf.reshape(tf.range(0, num_steps), [1, -1]))
                depth_indices = tf.squeeze(self.actions_t, 2)
                stacked_actions = tf.stack(
                    [line_indices, column_indices, depth_indices], 2)

                with tf.control_dependencies([self.probs_t]):
                    log_probs = tf.expand_dims(
                        tf.log(tf.gather_nd(self.probs_t, stacked_actions)), 2)
                    masked_log_probs = log_probs * self.mask_plh
                    self.c_loss = tf.reduce_mean(-tf.reduce_sum(
                        masked_log_probs * (self.c_rewards_plh - baseline), 1))
                    tf.summary.scalar('c_loss',
                                      self.c_loss,
                                      collections=c_summary_collection)

                c_adam = tf.train.AdamOptimizer(self.c_params['lr'])
                self.c_global_step = tf.Variable(
                    0,
                    trainable=False,
                    name="global_step",
                    collections=[
                        tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES
                    ],
                    dtype=tf.int32)
                tf.summary.scalar('c_global_step',
                                  self.c_global_step,
                                  collections=c_summary_collection)
                self.c_train_op = c_adam.minimize(
                    self.c_loss, global_step=self.c_global_step)

            self.all_c_summary_t = tf.summary.merge_all(key=self.C_SUMMARIES)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")
            self.episode_id_sum = tf.summary.scalar('episode_id',
                                                    self.episode_id)
            self.time, self.inc_time_op = capacities.counter("time")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph
예제 #14
0
import os
import sys

import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tflearn
from genotypes import PRIMITIVES
from genotypes import Genotype
from operations import *
import utils
null_scope = tf.VariableScope("")


def MixedOp(x, C_out, stride, index, reduction):
    ops = []

    with tf.variable_scope(null_scope):
        with tf.variable_scope("arch_params", reuse=tf.AUTO_REUSE):
            weight = tf.get_variable(
                "weight{}_{}".format(2 if reduction else 1,
                                     index), [len(PRIMITIVES)],
                initializer=tf.random_normal_initializer(0, 1e-3),
                regularizer=slim.l2_regularizer(0.0001))
    weight = tf.nn.softmax(weight)
    weight = tf.reshape(weight, [-1, 1, 1, 1])
    index = 0
    for primitive in PRIMITIVES:

        op = OPS[primitive](x, C_out, stride)
        if 'pool' in primitive:
예제 #15
0
    def __init__(self,
                 config,
                 env,
                 sess,
                 writer,
                 name='haggle',
                 trainable=True):
        super(Model, self).__init__()
        self.config = {
            'pre': [],
            'lstm': 128,
            'value_scale': 0.5,
            'lr': 0.001,
            'grad_clip': 0.5,
            'ppo': 0.1,
            'ppo_epochs': 10,
        }

        self.max_rounds = env.max_rounds

        self.config.update(config)

        self.original_name = name
        self.version = 0

        self.name = name

        self.observation_space = env.observation_space
        self.action_space = env.action_space
        self.context_space = env.context_space
        self.reward_space = env.reward_space

        self.empty_reward = np.zeros(self.reward_space, dtype='float32')

        # Used for getting offers by index
        self.env = env

        self.scope = tf.VariableScope(reuse=False, name=name)
        self.sess = sess
        self.writer = writer
        self.writer_step = 0

        with tf.variable_scope(self.scope):
            self.input = tf.placeholder(tf.int32,
                                        shape=(
                                            None,
                                            self.observation_space,
                                        ),
                                        name='input')
            self.context = tf.placeholder(tf.int32,
                                          shape=(
                                              None,
                                              self.context_space,
                                          ),
                                          name='input_context')

            # Init layers

            self.layers = {
                'embedding':
                tf.get_variable('embedding',
                                dtype=tf.float32,
                                initializer=tf.initializers.random_normal,
                                shape=(self.action_space,
                                       self.config['lstm'])),
                'pre': [],
                'action':
                tf.layers.Dense(self.action_space, name='action'),
                'value':
                tf.layers.Dense(self.reward_space, name='value'),
                'context':
                tf.layers.Dense(self.config['lstm'] * 2,
                                activation=tf.nn.relu,
                                name='context'),
                'lstm':
                tf.contrib.rnn.LSTMBlockCell(name='lstm',
                                             num_units=self.config['lstm']),
            }

            self.build_context = self.layers['context'](tf.cast(
                self.context, dtype=tf.float32))

            for i, width in enumerate(self.config['pre']):
                pre = tf.layers.Dense(width,
                                      activation=tf.nn.relu,
                                      name='preprocess_{}'.format(i))
                self.layers['pre'].append(pre)

            state_size = self.layers['lstm'].state_size
            self.rnn_state = tf.placeholder(tf.float32,
                                            shape=(None, state_size.c +
                                                   state_size.h),
                                            name='rnn_state')

            state = tf.contrib.rnn.LSTMStateTuple(
                c=self.rnn_state[:, :state_size.c],
                h=self.rnn_state[:, state_size.c:])

            new_state, action, action_probs, value = \
                self._network(state, self.input)

            self.action = action
            self.action_probs = action_probs
            self.value = value
            self.new_state = tf.concat([ new_state.c, new_state.h ], axis=-1, \
                name='new_state')

            # Losses
            if trainable:
                self._losses()

            # Weight loading
            self.trainable_variables = self.scope.trainable_variables()

            self.weight_placeholders = {}
            self.load_ops = []
            for var in self.trainable_variables:
                name = var.name.split(':', 1)[0]
                name = name.split('/', 1)[1]
                placeholder = tf.placeholder(
                    var.dtype,
                    shape=var.shape,
                    name='{}/placeholder'.format(name))
                self.weight_placeholders[name] = placeholder
                self.load_ops.append(var.assign(placeholder))
예제 #16
0
    def __init__(self, cell, name):
        '''ScopeRNNCellWrapper constructor'''

        self._cell = cell
        self.scope = tf.VariableScope(None, name)
예제 #17
0
 def _get_input_scope(self, default_name=""):
   if self.share_embeddings == EmbeddingsSharingLevel.SOURCE_TARGET_INPUT:
     name = "shared_embeddings"
   else:
     name = default_name
   return tf.VariableScope(None, name=tf.get_variable_scope().name + "/" + name)
예제 #18
0
    def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs = tf.placeholder(
                tf.float32,
                shape=[None, self.observation_space.shape[0] + 1],
                name='inputs')

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                self.probs, self.actions = capacities.policy(
                    self.policy_params, self.inputs)
            self.action_t = tf.squeeze(self.actions, 1)[0]

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.q_values = capacities.value_f(self.q_params, self.inputs)
            self.q = self.q_values[0, tf.stop_gradient(self.action_t)]

            with tf.variable_scope('Training'):
                stacked_actions = tf.stack([
                    tf.range(0,
                             tf.shape(self.actions)[0]),
                    tf.squeeze(self.actions, 1)
                ], 1)
                qs = tf.gather_nd(self.q_values, stacked_actions)
                log_probs = tf.log(tf.gather_nd(self.probs, stacked_actions))
                self.policy_loss = -tf.reduce_sum(
                    log_probs * tf.stop_gradient(qs))

                self.rewards = tf.placeholder(tf.float32,
                                              shape=[None],
                                              name="rewards")
                self.next_states = tf.placeholder(
                    tf.float32,
                    shape=[None, self.observation_space.shape[0] + 1],
                    name="next_states")
                self.next_actions = tf.placeholder(tf.int32,
                                                   shape=[None],
                                                   name="next_actions")
                with tf.variable_scope(q_scope, reuse=True):
                    next_q_values = capacities.value_f(self.q_params,
                                                       self.next_states)
                next_stacked_actions = tf.stack([
                    tf.range(0,
                             tf.shape(self.next_actions)[0]), self.next_actions
                ], 1)
                next_qs = tf.gather_nd(next_q_values, next_stacked_actions)
                target_qs1 = tf.stop_gradient(self.rewards +
                                              self.discount * next_qs)
                target_qs2 = self.rewards
                stacked_targets = tf.stack([target_qs1, target_qs2], 1)
                select_targets = tf.stack([
                    tf.range(0,
                             tf.shape(self.next_states)[0]),
                    tf.cast(self.next_states[:, -1], tf.int32)
                ], 1)
                target_qs = tf.gather_nd(stacked_targets, select_targets)
                self.q_loss = 1 / 2 * tf.reduce_sum(tf.square(target_qs - qs))

                self.loss = self.policy_loss + self.q_scale_lr * self.q_loss

                adam = tf.train.AdamOptimizer(self.lr)
                self.global_step = tf.Variable(
                    0,
                    trainable=False,
                    name="global_step",
                    collections=[
                        tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES
                    ])
                self.train_op = adam.minimize(self.loss,
                                              global_step=self.global_step)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.policy_loss_plh = tf.placeholder(tf.float32, shape=[])
            self.policy_loss_sum_t = tf.summary.scalar('policy_loss',
                                                       self.policy_loss_plh)
            self.q_loss_plh = tf.placeholder(tf.float32, shape=[])
            self.q_loss_sum_t = tf.summary.scalar('q_loss', self.q_loss_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph
예제 #19
0
파일: model_dmn.py 프로젝트: gcouti/pypagAI
    def neural_net(self):
        ###############
        # Input Module
        ###############

        # Hyperparameters
        # The number of dimensions used to store data passed between recurrent layers in the network.
        recurrent_cell_size = 128

        # The number of dimensions in our word vectorizations.
        D = 50

        # How quickly the network learns. Too high, and we may run into numeric instability
        # or other issues.
        learning_rate = 0.005

        # Dropout probabilities. For a description of dropout and what these probabilities are,
        # see Entailment with TensorFlow.
        input_p, output_p = 0.5, 0.5

        # How many questions we train on at a time.
        batch_size = 128

        # Number of passes in episodic memory. We'll get to this later.
        passes = 4

        # Feed Forward layer sizes: the number of dimensions used to store data passed from feed-forward layers.
        ff_hidden_size = 256

        weight_decay = 0.00000001
        # The strength of our regularization. Increase to encourage sparsity in episodic memory,
        # but makes training slower. Don't make this larger than leraning_rate.

        training_iterations_count = 400000
        # How many questions the network trains on each time it is trained.
        # Some questions are counted multiple times.

        display_step = 100
        # How many iterations of training occur before each validation check.






        # Context: A [batch_size, maximum_context_length, word_vectorization_dimensions] tensor
        # that contains all the context information.
        # context = tf.placeholder(tf.float64, [None, None, D], "context")
        # context_placeholder = context  # I use context as a variable name later on
        # input_sentence_endings: A [batch_size, maximum_sentence_count, 2] tensor that
        # contains the locations of the ends of sentences.
        input_sentence_endings = tf.placeholder(tf.int32, [None, None, 2], "sentence")

        # recurrent_cell_size: the number of hidden units in recurrent layers.
        input_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size)

        # input_p: The probability of maintaining a specific hidden input unit.
        # Likewise, output_p is the probability of maintaining a specific hidden output unit.
        gru_drop = tf.contrib.rnn.DropoutWrapper(input_gru, input_p, output_p)

        # dynamic_rnn also returns the final internal state. We don't need that, and can
        # ignore the corresponding output (_).
        input_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, self._story, dtype=tf.float64, scope="input_module")

        # cs: the facts gathered from the context.
        cs = tf.gather_nd(input_module_outputs, input_sentence_endings)
        # to use every word as a fact, useful for tasks with one-sentence contexts
        s = input_module_outputs

        # Question Module

        # query: A [batch_size, maximum_question_length, word_vectorization_dimensions] tensor
        #  that contains all of the questions.

        query = tf.placeholder(tf.float64, [None, None, D], "query")

        # input_query_lengths: A [batch_size, 2] tensor that contains question length information.
        # input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range()
        # so that it plays nice with gather_nd.
        input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths")

        question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float64, scope=tf.VariableScope(True, "input_module"))

        # q: the question states. A [batch_size, recurrent_cell_size] tensor.
        q = tf.gather_nd(question_module_outputs, input_query_lengths)

        # Episodic Memory

        # make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension
        size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)])
        re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size)

        # Final output for attention, needs to be 1 in order to create a mask
        output_size = 1

        # Weights and biases
        attend_init = tf.random_normal_initializer(stddev=0.1)
        w_1 = tf.get_variable("attend_w1", [1, recurrent_cell_size * 7, recurrent_cell_size], tf.float64, initializer=attend_init)
        w_2 = tf.get_variable("attend_w2", [1, recurrent_cell_size, output_size], tf.float64, initializer=attend_init)

        b_1 = tf.get_variable("attend_b1", [1, recurrent_cell_size], tf.float64, initializer=attend_init)
        b_2 = tf.get_variable("attend_b2", [1, output_size], tf.float64, initializer=attend_init)

        # Regulate all the weights and biases
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_1))
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_1))
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_2))
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_2))

        def attention(c, mem, existing_facts):
            """
            Custom attention mechanism.
            c: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor
                that contains all the facts from the contexts.
            mem: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that
                contains the current memory. It should be the same memory for all facts for accurate results.
            existing_facts: A [batch_size, maximum_sentence_count, 1] tensor that
                acts as a binary mask for which facts exist and which do not.

            """
            with tf.variable_scope("attending") as scope:
                # attending: The metrics by which we decide what to attend to.
                attending = tf.concat([c, mem, re_q, c * re_q, c * mem, (c - re_q) ** 2, (c - mem) ** 2], 2)

                # m1: First layer of multiplied weights for the feed-forward network.
                #     We tile the weights in order to manually broadcast, since tf.matmul does not
                #     automatically broadcast batch matrix multiplication as of TensorFlow 1.2.
                m1 = tf.matmul(attending * existing_facts, tf.tile(w_1, tf.stack([tf.shape(attending)[0], 1, 1]))) * existing_facts
                # bias_1: A masked version of the first feed-forward layer's bias
                #     over only existing facts.

                bias_1 = b_1 * existing_facts

                # tnhan: First nonlinearity. In the original paper, this is a tanh nonlinearity;
                #        choosing relu was a design choice intended to avoid issues with
                #        low gradient magnitude when the tanh returned values close to 1 or -1.
                tnhan = tf.nn.relu(m1 + bias_1)

                # m2: Second layer of multiplied weights for the feed-forward network.
                #     Still tiling weights for the same reason described in m1's comments.
                m2 = tf.matmul(tnhan, tf.tile(w_2, tf.stack([tf.shape(attending)[0], 1, 1])))

                # bias_2: A masked version of the second feed-forward layer's bias.
                bias_2 = b_2 * existing_facts

                # norm_m2: A normalized version of the second layer of weights, which is used
                #     to help make sure the softmax nonlinearity doesn't saturate.
                norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1)

                # softmaxable: A hack in order to use sparse_softmax on an otherwise dense tensor.
                #     We make norm_m2 a sparse tensor, then make it dense again after the operation.
                softmax_idx = tf.where(tf.not_equal(norm_m2, 0))[:, :-1]
                softmax_gather = tf.gather_nd(norm_m2[..., 0], softmax_idx)
                softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1]
                softmaxable = tf.SparseTensor(softmax_idx, softmax_gather, softmax_shape)

                return tf.expand_dims(tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1)

        # facts_0s: a [batch_size, max_facts_length, 1] tensor
        #     whose values are 1 if the corresponding fact exists and 0 if not.
        facts_0s = tf.cast(tf.count_nonzero(input_sentence_endings[:, :, -1:], -1, keep_dims=True), tf.float64)

        with tf.variable_scope("Episodes") as scope:
            attention_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size)

            # memory: A list of all tensors that are the (current or past) memory state
            #   of the attention mechanism.
            memory = [q]

            # attends: A list of all tensors that represent what the network attends to.
            attends = []
            for a in range(passes):
                # attention mask
                attend_to = attention(cs, tf.tile(tf.reshape(memory[-1], [-1, 1, recurrent_cell_size]), size), facts_0s)

                # Inverse attention mask, for what's retained in the state.
                retain = 1 - attend_to

                # GRU pass over the facts, according to the attention mask.
                while_valid_index = (lambda state, index: index < tf.shape(cs)[1])
                update_state = (lambda state, index: (attend_to[:, index, :] * attention_gru(cs[:, index, :], state)[0] + retain[:, index, :] * state))
                # start loop with most recent memory and at the first index
                memory.append(tuple(tf.while_loop(while_valid_index, (lambda state, index: (update_state(state, index), index + 1)), loop_vars=[memory[-1], 0]))[0])

                attends.append(attend_to)

                # Reuse variables so the GRU pass uses the same variables every pass.
                scope.reuse_variables()

        # Answer Module

        # a0: Final memory state. (Input to answer module)
        a0 = tf.concat([memory[-1], q], -1)

        # fc_init: Initializer for the final fully connected layer's weights.
        fc_init = tf.random_normal_initializer(stddev=0.1)

        with tf.variable_scope("answer"):
            # w_answer: The final fully connected layer's weights.
            w_answer = tf.get_variable("weight", [recurrent_cell_size * 2, D], tf.float64, initializer=fc_init)
            # Regulate the fully connected layer's weights
            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_answer))

            # The regressed word. This isn't an actual word yet;
            #    we still have to find the closest match.
            logit = tf.expand_dims(tf.matmul(a0, w_answer), 1)

            # Make a mask over which words exist.
            with tf.variable_scope("ending"):
                all_ends = tf.reshape(input_sentence_endings, [-1, 2])
                range_ends = tf.range(tf.shape(all_ends)[0])
                ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1)
                ind = tf.reduce_max(tf.scatter_nd(ends_indices, all_ends[:, 1], [tf.shape(q)[0], tf.shape(all_ends)[0]]), axis=-1)
                range_ind = tf.range(tf.shape(ind)[0])
                mask_ends = tf.cast(tf.scatter_nd(tf.stack([ind, range_ind], axis=1), tf.ones_like(range_ind), [tf.reduce_max(ind) + 1, tf.shape(ind)[0]]), bool)
                # A bit of a trick. With the locations of the ends of the mask (the last periods in
                # each of the contexts) as 1 and the rest as 0, we can scan with exclusive or
                # (starting from all 1). For each context in the batch, this will result in 1s
                # up until the marker (the location of that last period) and 0s afterwards.
                mask = tf.scan(tf.logical_xor, mask_ends, tf.ones_like(range_ind, dtype=bool))

            # We score each possible word inversely with their Euclidean distance to the regressed word.
            #  The highest score (lowest distance) will correspond to the selected word.
            logits = -tf.reduce_sum(tf.square(self._story * tf.transpose(tf.expand_dims(tf.cast(mask, tf.float64), -1), [1, 0, 2]) - logit), axis=-1)

            return logits
예제 #20
0
    _seqlens = tf.placeholder(tf.int32, shape=[batch_size])

    if pre_trained:
        embeddings = tf.Variable(tf.constant(
            0.0, shape=[vocabulary_size, glove_size]),
                                 trainable=True)
        embedding_init = embeddings.assign(embedding_placeholder)
        embed = tf.nn.embedding_lookup(embeddings, _inputs)
    else:
        embeddings = tf.Variable(
            tf.random_uniform([vocabulary_size, embedding_dimension], -1.0,
                              1.0))
        embed = tf.nn.embedding_lookup(embeddings, _inputs)

    with tf.name_scope('biGRU'):
        with tf.VariableScope('forward'):
            gru_fw_cell = tf.contrib.rnn.GRUCell(hidden_layer_size)
            gru_fw_cell = tf.contrib.rnn.DropoutWrapper(gru_fw_cell)
        with tf.VariableScope('backwwar'):
            gru_bw_cell = tf.contrib.rnn.GRUCell(hidden_layer_size)
            gru_bw_cell = tf.contrib.rnn.DropoutWrapper(gru_bw_cell)

        outputs, states = tf.nn.bidirectional_dynamic_rnn(
            cell_bw=gru_bw_cell,
            cell_fw=gru_fw_cell,
            inputs=embed,
            sequence_length=_seqlens,
            dtype=tf.float32,
            scope='BiGRU')
    states = tf.concat(values=states, axis=1)
예제 #21
0
def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope:
    """Forcefully enter the specified variable scope, ignoring any surrounding scopes."""
    return tf.variable_scope(tf.VariableScope(name=scope, **kwargs),
                             auxiliary_name_scope=False)
예제 #22
0
    def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs_plh = tf.placeholder(tf.int32,
                                             shape=[None],
                                             name="inputs_plh")

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.Qs = tf.get_variable(
                    'Qs',
                    shape=[self.nb_state, self.action_space.n],
                    initializer=tf.constant_initializer(self.initial_q_value),
                    dtype=tf.float32)
                tf.summary.histogram('Qarray', self.Qs)
                self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                if 'UCB' in self.config and self.config['UCB']:
                    self.actions_t, self.probs_t = capacities.tabular_UCB(
                        self.Qs, self.inputs_plh)
                else:
                    self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
                        self.inputs_plh, self.q_preds_t, self.nb_state,
                        self.env.action_space.n, self.N0, self.min_eps)
                self.action_t = self.actions_t[0]
                self.q_value_t = self.q_preds_t[0][self.action_t]

            et_scope = tf.VariableScope(reuse=False, name='EligibilityTraces')
            with tf.variable_scope(et_scope):
                et, update_et_op, self.reset_et_op = capacities.eligibility_traces(
                    self.Qs, self.inputs_plh, self.actions_t, self.discount,
                    self.lambda_value)

            with tf.variable_scope('Learning'):
                self.rewards_plh = tf.placeholder(tf.float32,
                                                  shape=[None],
                                                  name="rewards_plh")
                self.next_states_plh = tf.placeholder(tf.int32,
                                                      shape=[None],
                                                      name="next_states_plh")

                self.targets_t = capacities.get_q_learning_target(
                    self.Qs, self.rewards_plh, self.next_states_plh,
                    self.discount)
                target = self.targets_t[0]
                state_action_pairs = tf.stack(
                    [self.inputs_plh, self.actions_t], 1)
                estimate = tf.gather_nd(self.Qs, state_action_pairs)[0]
                err_estimate = target - estimate

                global_step = tf.Variable(0,
                                          trainable=False,
                                          name="global_step",
                                          collections=[
                                              tf.GraphKeys.GLOBAL_STEP,
                                              tf.GraphKeys.GLOBAL_VARIABLES
                                          ])
                lr = tf.train.exponential_decay(tf.constant(self.lr,
                                                            dtype=tf.float32),
                                                global_step,
                                                self.lr_decay_steps,
                                                0.5,
                                                staircase=True)
                tf.summary.scalar('lr', lr)
                inc_global_step = global_step.assign_add(1)
                with tf.control_dependencies([update_et_op, inc_global_step]):
                    self.loss = tf.reduce_sum(err_estimate * et)
                    self.train_op = tf.assign_add(self.Qs,
                                                  lr * err_estimate * et)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph
예제 #23
0
 def __init__(self):
     with tf.VariableScope(True, 'd'):
         pass
예제 #24
0
 def __init__(self, name, env_spec):
     self._name = name
     self._env_spec = env_spec
     self._variable_scope = tf.VariableScope(reuse=False, name=name)
    def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs_plh = tf.placeholder(tf.int32,
                                             shape=[None],
                                             name="inputs_plh")

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.Qs = tf.get_variable(
                    'Qs',
                    shape=[self.nb_state, self.action_space.n],
                    initializer=tf.constant_initializer(self.initial_q_value),
                    dtype=tf.float32)
                tf.summary.histogram('Qarray', self.Qs)
                self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)

            fixed_q_scope = tf.VariableScope(reuse=False, name='FixedQValues')
            with tf.variable_scope(fixed_q_scope):
                self.update_fixed_vars_op = capacities.fix_scope(q_scope)

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                if 'UCB' in self.config and self.config['UCB']:
                    self.actions_t, self.probs_t = capacities.tabular_UCB(
                        self.Qs, self.inputs_plh)
                else:
                    self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
                        self.inputs_plh, self.q_preds_t, self.nb_state,
                        self.env.action_space.n, self.N0, self.min_eps)
                self.action_t = self.actions_t[0]
                self.q_value_t = self.q_preds_t[0][self.action_t]

            # Experienced replay part
            with tf.variable_scope('Learning'):
                with tf.variable_scope(fixed_q_scope, reuse=True):
                    fixed_Qs = tf.get_variable('Qs')

                self.rewards_plh = tf.placeholder(tf.float32,
                                                  shape=[None],
                                                  name="rewards_plh")
                self.next_states_plh = tf.placeholder(tf.int32,
                                                      shape=[None],
                                                      name="next_states_plh")

                # Note that we use the fixed Qs to create the targets
                self.targets_t = capacities.get_q_learning_target(
                    fixed_Qs, self.rewards_plh, self.next_states_plh,
                    self.discount)
                self.loss, self.train_op = capacities.tabular_learning_with_lr(
                    self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh,
                    self.actions_t, self.targets_t)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")
            self.event_count, self.inc_event_count_op = capacities.counter(
                "event_count")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph
예제 #26
0
def virtual_assistant(server_message):
    import itertools

    # import matplotlib.pyplot as plt
    import numpy as np
    import tensorflow as tf

    # print(os.getcwd())
    def fill_unk(unk):
        global glove_wordmap
        glove_wordmap[unk] = RS.multivariate_normal(m, np.diag(v))
        return glove_wordmap[unk]

    '''-----------------------------------------------------------'''

    def sentence2sequence(sentence):
        tokens = sentence.strip('"(),-').lower().split(" ")
        rows = []
        words = []
        # Greedy search for tokens
        for token in tokens:
            i = len(token)
            while len(token) > 0:
                word = token[:i]
                # print(word)
                if word in glove_wordmap:
                    rows.append(glove_wordmap[word])
                    words.append(word)
                    token = token[i:]
                    i = len(token)
                    continue
                else:
                    i = i - 1
                if i == 0:
                    # word OOV
                    # https://arxiv.org/pdf/1611.01436.pdf
                    rows.append(fill_unk(token))
                    words.append(token)
                    break
        return np.array(rows), words

    # import sys
    # text = " ".join(sys.argv[1:]).replace('_', '\n') + "   a   1"

    # print("-----------------", text)
    '''-----------------------------------------------------------'''

    def contextualize(category, server_message):
        data = []
        context = []

        server_message = server_message + "\tt\t3"
        from pprint import pprint
        pprint(server_message)
        print(server_message)
        for line in server_message.split('\n'):
            l, ine = tuple(line.split(" ", 1))
            # Split the line numbers from the sentences they refer to.
            if l is "1":
                # New contexts always start with 1,
                # so this is a signal to reset the context.
                context = []
            if "\t" in ine:
                # Tabs are the separator between questions and answers,
                # and are not present in context statements.
                # print(tuple(ine.split("\t")))
                question, answer, support = tuple(ine.split("\t"))

                # print("old", question, answer, support.replace("\n",''))
                # print("old", "-------------------------------------------")
                # print(*context)
                # print("-------------------------------------------")
                data.append((tuple(zip(*context)) +
                             sentence2sequence(question) +
                             sentence2sequence(answer) +
                             ([int(s) for s in support.replace("\n", '')],)))
                # Multiple questions may refer to the same context, so we don't reset it.
            else:
                # Context sentence.
                # print(ine.replace("\n", ''))
                context.append(sentence2sequence(ine.replace("\n", '')))
            # print("-------------------------------------------")

        # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
        # print(data[0])
        # print(data)
        return data

    def finalize(data):
        """
        Prepares data generated by contextualize() for use in the network.
        """
        final_data = []
        for cqas in data:
            contextvs, contextws, qvs, qws, avs, aws, spt = cqas

            lengths = itertools.accumulate(len(cvec) for cvec in contextvs)
            context_vec = np.concatenate(contextvs)
            context_words = sum(contextws, [])

            # Location markers for the beginnings of new sentences.
            sentence_ends = np.array(list(lengths))
            final_data.append((context_vec, sentence_ends, qvs, spt, context_words, cqas, avs, aws))
        return np.array(final_data)

    def attention(c, mem, existing_facts):
        """
        Custom attention mechanism.
        c: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor
            that contains all the facts from the contexts.
        mem: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that
            contains the current memory. It should be the same memory for all facts for accurate results.
        existing_facts: A [batch_size, maximum_sentence_count, 1] tensor that
            acts as a binary mask for which facts exist and which do not.

        """
        with tf.variable_scope("attending") as scope:
            # attending: The metrics by which we decide what to attend to.
            attending = tf.concat([c, mem, re_q, c * re_q, c * mem, (c - re_q) ** 2, (c - mem) ** 2], 2)

            # m1: First layer of multiplied weights for the feed-forward network.
            #     We tile the weights in order to manually broadcast, since tf.matmul does not
            #     automatically broadcast batch matrix multiplication as of TensorFlow 1.2.
            m1 = tf.matmul(attending * existing_facts,
                           tf.tile(w_1, tf.stack([tf.shape(attending)[0], 1, 1]))) * existing_facts
            # bias_1: A masked version of the first feed-forward layer's bias
            #     over only existing facts.

            bias_1 = b_1 * existing_facts

            # tnhan: First nonlinearity. In the original paper, this is a tanh nonlinearity;
            #        choosing relu was a design choice intended to avoid issues with
            #        low gradient magnitude when the tanh returned values close to 1 or -1.
            tnhan = tf.nn.relu(m1 + bias_1)

            # m2: Second layer of multiplied weights for the feed-forward network.
            #     Still tiling weights for the same reason described in m1's comments.
            m2 = tf.matmul(tnhan, tf.tile(w_2, tf.stack([tf.shape(attending)[0], 1, 1])))

            # bias_2: A masked version of the second feed-forward layer's bias.
            bias_2 = b_2 * existing_facts

            # norm_m2: A normalized version of the second layer of weights, which is used
            #     to help make sure the softmax nonlinearity doesn't saturate.
            norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1)

            # softmaxable: A hack in order to use sparse_softmax on an otherwise dense tensor.
            #     We make norm_m2 a sparse tensor, then make it dense again after the operation.
            softmax_idx = tf.where(tf.not_equal(norm_m2, 0))[:, :-1]
            softmax_gather = tf.gather_nd(norm_m2[..., 0], softmax_idx)
            softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1]
            softmaxable = tf.SparseTensor(softmax_idx, softmax_gather, softmax_shape)
            return tf.expand_dims(tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1)

    def prep_batch(batch_data, more_data=False):
        """
            Prepare all the preproccessing that needs to be done on a batch-by-batch basis.
        """
        context_vec, sentence_ends, questionvs, spt, context_words, cqas, answervs, _ = zip(*batch_data)
        ends = list(sentence_ends)
        maxend = max(map(len, ends))
        aends = np.zeros((len(ends), maxend))
        for index, i in enumerate(ends):
            for indexj, x in enumerate(i):
                aends[index, indexj] = x - 1
        new_ends = np.zeros(aends.shape + (2,))

        for index, x in np.ndenumerate(aends):
            new_ends[index + (0,)] = index[0]
            new_ends[index + (1,)] = x

        contexts = list(context_vec)
        max_context_length = max([len(x) for x in contexts])
        contextsize = list(np.array(contexts[0]).shape)
        contextsize[0] = max_context_length
        final_contexts = np.zeros([len(contexts)] + contextsize)

        contexts = [np.array(x) for x in contexts]
        for i, context in enumerate(contexts):
            final_contexts[i, 0:len(context), :] = context
        max_query_length = max(len(x) for x in questionvs)
        querysize = list(np.array(questionvs[0]).shape)
        querysize[:1] = [len(questionvs), max_query_length]
        queries = np.zeros(querysize)
        querylengths = np.array(list(zip(range(len(questionvs)), [len(q) - 1 for q in questionvs])))
        questions = [np.array(q) for q in questionvs]
        for i, question in enumerate(questions):
            queries[i, 0:len(question), :] = question
        data = {context_placeholder: final_contexts, input_sentence_endings: new_ends,
                query: queries, input_query_lengths: querylengths, gold_standard: answervs}
        return (data, context_words, cqas) if more_data else data

    def restore_sess(location):
        saver = tf.train.Saver()
        session = tf.Session()
        saver.restore(session, location)
        return session

    def session_manage(location):
        full_location = location + "model.ckpt"
        return restore_sess(full_location)

    '''-----------------------------------------------------------'''
    # Deserialize GloVe vectors
    # print(os.getcwd())
    # os.chdir("..")
    # print(os.getcwd())
    from entity.embeddingrepo import EmbeddingDbRepo

    glove_wordmap = {}
    embrepo = EmbeddingDbRepo()
    gloves = embrepo.get()
    for glove in gloves:
        name, vector = list(glove)[1], list(glove)[2]
        glove_wordmap[name] = np.fromstring(vector, sep=" ")
    # glove_wordmap = {}
    # with open(glove_vectors_file, "r", encoding='utf-8-sig') as glove:
    #     for line in glove:
    #         name, vector = tuple(line.split(" ", 1))
    #         glove_wordmap[name] = np.fromstring(vector, sep=" ")
    wvecs = []
    for item in glove_wordmap.items():
        wvecs.append(item[1])
    s = np.vstack(wvecs)

    # Gather the distribution hyperparameters
    v = np.var(s, 0)
    m = np.mean(s, 0)
    RS = np.random.RandomState()

    # final_train_data = finalize(train_data)
    final_test_data = finalize(contextualize(1, server_message))  # finalize(test_data)
    '''-----------------------------------------------------------'''
    tf.reset_default_graph()
    '''-----------------------------------------------------------'''
    # Hyperparameters

    # The number of dimensions used to store data passed between recurrent layers in the network.
    recurrent_cell_size = 128

    # The number of dimensions in our word vectorizations.
    D = 50

    # How quickly the network learns. Too high, and we may run into numeric instability
    # or other issues.
    learning_rate = 0.005

    # Dropout probabilities. For a description of dropout and what these probabilities are,
    # see Entailment with TensorFlow.
    input_p, output_p = 0.5, 0.5

    # How many questions we train on at a time.
    batch_size = 128

    # Number of passes in episodic memory. We'll get to this later.
    passes = 4

    # Feed Forward layer sizes: the number of dimensions used to store data passed from feed-forward layers.
    ff_hidden_size = 256

    weight_decay = 0.00000001
    # The strength of our regularization. Increase to encourage sparsity in episodic memory,
    # but makes training slower. Don't make this larger than leraning_rate.

    training_iterations_count = 400000
    # How many questions the network trains on each time it is trained.
    # Some questions are counted multiple times.

    display_step = 100
    # How many iterations of training occur before each validation check.
    '''-----------------------------------------------------------'''
    # Input Module

    # Context: A [batch_size, maximum_context_length, word_vectorization_dimensions] tensor
    # that contains all the context information.
    context = tf.placeholder(tf.float32, [None, None, D], "context")
    context_placeholder = context  # I use context as a variable name later on

    # input_sentence_endings: A [batch_size, maximum_sentence_count, 2] tensor that
    # contains the locations of the ends of sentences.
    input_sentence_endings = tf.placeholder(tf.int32, [None, None, 2], "sentence")

    # recurrent_cell_size: the number of hidden units in recurrent layers.
    input_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size)

    # input_p: The probability of maintaining a specific hidden input unit.
    # Likewise, output_p is the probability of maintaining a specific hidden output unit.
    gru_drop = tf.contrib.rnn.DropoutWrapper(input_gru, input_p, output_p)

    # dynamic_rnn also returns the final internal state. We don't need that, and can
    # ignore the corresponding output (_).
    input_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, context, dtype=tf.float32, scope="input_module")

    # cs: the facts gathered from the context.
    cs = tf.gather_nd(input_module_outputs, input_sentence_endings)
    # to use every word as a fact, useful for tasks with one-sentence contexts
    s = input_module_outputs
    '''-----------------------------------------------------------'''
    # Question Module

    # query: A [batch_size, maximum_question_length, word_vectorization_dimensions] tensor
    #  that contains all of the questions.

    query = tf.placeholder(tf.float32, [None, None, D], "query")

    # input_query_lengths: A [batch_size, 2] tensor that contains question length information.
    # input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range()
    # so that it plays nice with gather_nd.
    input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths")

    question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float32,
                                                   scope=tf.VariableScope(True, "input_module"))

    # q: the question states. A [batch_size, recurrent_cell_size] tensor.
    q = tf.gather_nd(question_module_outputs, input_query_lengths)
    '''-----------------------------------------------------------'''
    # Episodic Memory

    # make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension
    size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)])
    re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size)

    # Final output for attention, needs to be 1 in order to create a mask
    output_size = 1

    # Weights and biases
    attend_init = tf.random_normal_initializer(stddev=0.1)
    w_1 = tf.get_variable("attend_w1", [1, recurrent_cell_size * 7, recurrent_cell_size],
                          tf.float32, initializer=attend_init)
    w_2 = tf.get_variable("attend_w2", [1, recurrent_cell_size, output_size],
                          tf.float32, initializer=attend_init)

    b_1 = tf.get_variable("attend_b1", [1, recurrent_cell_size],
                          tf.float32, initializer=attend_init)
    b_2 = tf.get_variable("attend_b2", [1, output_size],
                          tf.float32, initializer=attend_init)

    # Regulate all the weights and biases
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_1))
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_1))
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_2))
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_2))

    # facts_0s: a [batch_size, max_facts_length, 1] tensor
    #     whose values are 1 if the corresponding fact exists and 0 if not.
    facts_0s = tf.cast(tf.count_nonzero(input_sentence_endings[:, :, -1:], -1, keepdims=True), tf.float32)

    with tf.variable_scope("Episodes") as scope:
        attention_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size)

        # memory: A list of all tensors that are the (current or past) memory state
        #   of the attention mechanism.
        memory = [q]

        # attends: A list of all tensors that represent what the network attends to.
        attends = []
        for a in range(passes):
            # attention mask
            attend_to = attention(cs, tf.tile(tf.reshape(memory[-1], [-1, 1, recurrent_cell_size]), size),
                                  facts_0s)

            # Inverse attention mask, for what's retained in the state.
            retain = 1 - attend_to

            # GRU pass over the facts, according to the attention mask.
            while_valid_index = (lambda state, index: index < tf.shape(cs)[1])
            update_state = (lambda state, index: (attend_to[:, index, :] *
                                                  attention_gru(cs[:, index, :], state)[0] +
                                                  retain[:, index, :] * state))
            # start loop with most recent memory and at the first index
            memory.append(tuple(tf.while_loop(while_valid_index,
                                              (lambda state, index: (update_state(state, index), index + 1)),
                                              loop_vars=[memory[-1], 0]))[0])

            attends.append(attend_to)

            # Reuse variables so the GRU pass uses the same variables every pass.
            scope.reuse_variables()
    '''-----------------------------------------------------------'''
    # Answer Module

    # a0: Final memory state. (Input to answer module)
    a0 = tf.concat([memory[-1], q], -1)

    # fc_init: Initializer for the final fully connected layer's weights.
    fc_init = tf.random_normal_initializer(stddev=0.1)

    with tf.variable_scope("answer"):
        # w_answer: The final fully connected layer's weights.
        w_answer = tf.get_variable("weight", [recurrent_cell_size * 2, D],
                                   tf.float32, initializer=fc_init)
        # Regulate the fully connected layer's weights
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                             tf.nn.l2_loss(w_answer))

        # The regressed word. This isn't an actual word yet;
        #    we still have to find the closest match.
        logit = tf.expand_dims(tf.matmul(a0, w_answer), 1)

        # Make a mask over which words exist.
        with tf.variable_scope("ending"):
            all_ends = tf.reshape(input_sentence_endings, [-1, 2])
            range_ends = tf.range(tf.shape(all_ends)[0])
            ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1)
            ind = tf.reduce_max(tf.scatter_nd(ends_indices, all_ends[:, 1],
                                              [tf.shape(q)[0], tf.shape(all_ends)[0]]),
                                axis=-1)
            range_ind = tf.range(tf.shape(ind)[0])
            mask_ends = tf.cast(tf.scatter_nd(tf.stack([ind, range_ind], axis=1),
                                              tf.ones_like(range_ind), [tf.reduce_max(ind) + 1,
                                                                        tf.shape(ind)[0]]), bool)
            # A bit of a trick. With the locations of the ends of the mask (the last periods in
            #  each of the contexts) as 1 and the rest as 0, we can scan with exclusive or
            #  (starting from all 1). For each context in the batch, this will result in 1s
            #  up until the marker (the location of that last period) and 0s afterwards.
            mask = tf.scan(tf.logical_xor, mask_ends, tf.ones_like(range_ind, dtype=bool))

        # We score each possible word inversely with their Euclidean distance to the regressed word.
        #  The highest score (lowest distance) will correspond to the selected word.
        logits = -tf.reduce_sum(tf.square(context * tf.transpose(tf.expand_dims(
            tf.cast(mask, tf.float32), -1), [1, 0, 2]) - logit), axis=-1)
    '''-----------------------------------------------------------'''
    # Training

    # gold_standard: The real answers.
    gold_standard = tf.placeholder(tf.float32, [None, 1, D], "answer")
    with tf.variable_scope('accuracy'):
        eq = tf.equal(context, gold_standard)
        corrbool = tf.reduce_all(eq, -1)
        logloc = tf.reduce_max(logits, -1, keepdims=True)
        # locs: A boolean tensor that indicates where the score
        #  matches the minimum score. This happens on multiple dimensions,
        #  so in the off chance there's one or two indexes that match
        #  we make sure it matches in all indexes.
        locs = tf.equal(logits, logloc)

        # correctsbool: A boolean tensor that indicates for which
        #   words in the context the score always matches the minimum score.
        correctsbool = tf.reduce_any(tf.logical_and(locs, corrbool), -1)
        # corrects: A tensor that is simply correctsbool cast to floats.
        corrects = tf.where(correctsbool, tf.ones_like(correctsbool, dtype=tf.float32),
                            tf.zeros_like(correctsbool, dtype=tf.float32))

        # corr: corrects, but for the right answer instead of our selected answer.
        corr = tf.where(corrbool, tf.ones_like(corrbool, dtype=tf.float32),
                        tf.zeros_like(corrbool, dtype=tf.float32))
    with tf.variable_scope("loss"):
        # Use sigmoid cross entropy as the base loss,
        #  with our distances as the relative probabilities. There are
        #  multiple correct labels, for each location of the answer word within the context.
        loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.nn.l2_normalize(logits, -1),
                                                       labels=corr)

        # Add regularization losses, weighted by weight_decay.
        total_loss = tf.reduce_mean(loss) + weight_decay * tf.add_n(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    # TensorFlow's default implementation of the Adam optimizer works. We can adjust more than
    #  just the learning rate, but it's not necessary to find a very good optimum.
    optimizer = tf.train.AdamOptimizer(learning_rate)

    # Once we have an optimizer, we ask it to minimize the loss
    #   in order to work towards the proper training.
    opt_op = optimizer.minimize(total_loss)
    '''-----------------------------------------------------------'''
    # Initialize variables
    init = tf.global_variables_initializer()

    # Launch the TensorFlow session
    sess = tf.Session()
    sess.run(init)
    '''-----------------------------------------------------------'''

    '''-----------------------------------------------------------'''
    # Prepare validation set
    # print(final_test_data.shape[0])
    batch = np.random.randint(final_test_data.shape[0], size=batch_size * 10)
    batch_data = final_test_data[batch]

    validation_set, val_context_words, val_cqas = prep_batch(batch_data, True)

    train_location = "./neural/pre_trained_model/"
    sess = session_manage(train_location)
    '''-----------------------------------------------------------'''
    ancr = sess.run([corrbool, locs, total_loss, logits, facts_0s, w_1] + attends +
                    [query, cs, question_module_outputs], feed_dict=validation_set)
    a = ancr[0]
    n = ancr[1]
    cr = ancr[2]
    attenders = np.array(ancr[6:-3])
    faq = np.sum(ancr[4], axis=(-1, -2))  # Number of facts in each context

    # print(list(map((lambda x: x.shape),ancr[3:])), new_ends.shape)
    '''-----------------------------------------------------------'''
    # Locations of responses within contexts
    indices = np.argmax(n, axis=1)

    # Locations of actual answers within contexts
    indicesc = np.argmax(a, axis=1)
    limit = 1

    for i, e, cw, cqa in list(zip(indices, indicesc, val_context_words, val_cqas))[:limit]:
        ccc = " ".join(cw)
        print("TEXT: ", ccc)
        print("QUESTION: ", " ".join(cqa[3]))
        print("RESPONSE: ", cw[i], ["Correct", "Incorrect"][i != e])
        print("EXPECTED: ", cw[e])
        print()
        return "RESPONSE: " + cw[i]
    '''-----------------------------------------------------------'''
    # train_location = "./max_train_model/"
    # sess = session_manage(train_location, rewrite=True, iter=training_iterations_count, batch_size=batch_size)
    '''-----------------------------------------------------------'''

    # Final testing accuracy
    print(np.mean(sess.run([corrects], feed_dict=prep_batch(final_test_data))[0]))
    '''-----------------------------------------------------------'''
    sess.close()
    '''-----------------------------------------------------------'''
예제 #27
0
query = tf.placeholder(tf.float32, [None, None, D], "query")

# input_query_lengths: A [batch_size, 2] tensor that contains question length information.
# input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range()
# so that it plays nice with gather_nd.
input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths")

# question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float32, scope = tf.VariableScope(True, "input_module"))

question_module_outputs, question_module_outputs_bw = tf.nn.bidirectional_dynamic_rnn(
    lstm_fw_cell,
    lstm_bw_cell,
    query,
    dtype=tf.float32,
    scope=tf.VariableScope(True, "input_module"))
question_module_outputs = tf.concat(question_module_outputs, 2)
# q: the question states. A [batch_size, recurrent_cell_size] tensor.
# q = tf.squeeze(tf.gather_nd(question_module_outputs, input_query_lengths),axis=1)
q = tf.gather_nd(question_module_outputs, input_query_lengths)
# q=tf.squeeze(q,)
#print("q:",q)

# Episodic Memory

# make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension
size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)])
# print("size:",size)
re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size)
# print("re_q:",re_q)
예제 #28
0
    def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs = tf.placeholder(
                tf.float32,
                shape=[None, self.observation_space.shape[0] + 1],
                name='inputs')

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.q_values = tf.squeeze(
                    capacities.value_f(self.q_params, self.inputs))

            self.action_t = capacities.eps_greedy(self.inputs, self.q_values,
                                                  self.env.action_space.n,
                                                  self.N0, self.min_eps)
            self.q_t = self.q_values[self.action_t]

            fixed_q_scope = tf.VariableScope(reuse=False, name='FixedQValues')
            with tf.variable_scope(fixed_q_scope):
                self.update_fixed_vars_op = capacities.fix_scope(q_scope)

            with tf.variable_scope('ExperienceReplay'):
                self.er_inputs = tf.placeholder(
                    tf.float32,
                    shape=[None, self.observation_space.shape[0] + 1],
                    name="ERInputs")
                self.er_actions = tf.placeholder(tf.int32,
                                                 shape=[None],
                                                 name="ERInputs")
                self.er_rewards = tf.placeholder(tf.float32,
                                                 shape=[None],
                                                 name="ERReward")
                self.er_next_states = tf.placeholder(
                    tf.float32,
                    shape=[None, self.observation_space.shape[0] + 1],
                    name="ERNextState")

                with tf.variable_scope(q_scope, reuse=True):
                    er_q_values = capacities.value_f(self.q_params,
                                                     self.er_inputs)
                er_stacked_actions = tf.stack([
                    tf.range(0,
                             tf.shape(self.er_actions)[0]), self.er_actions
                ], 1)
                er_qs = tf.gather_nd(er_q_values, er_stacked_actions)

                with tf.variable_scope(fixed_q_scope, reuse=True):
                    er_fixed_next_q_values = capacities.value_f(
                        self.q_params, self.er_next_states)
                with tf.variable_scope(q_scope, reuse=True):
                    er_next_q_values = capacities.value_f(
                        self.q_params, self.er_next_states)
                er_next_max_action_t = tf.cast(tf.argmax(er_next_q_values, 1),
                                               tf.int32)
                er_next_stacked_actions = tf.stack([
                    tf.range(0,
                             tf.shape(self.er_next_states)[0]),
                    er_next_max_action_t
                ], 1)
                er_next_qs = tf.gather_nd(er_fixed_next_q_values,
                                          er_next_stacked_actions)

                er_target_qs1 = tf.stop_gradient(self.er_rewards +
                                                 self.discount * er_next_qs)
                er_target_qs2 = self.er_rewards
                er_stacked_targets = tf.stack([er_target_qs1, er_target_qs2],
                                              1)
                select_targets = tf.stack([
                    tf.range(0,
                             tf.shape(self.er_next_states)[0]),
                    tf.cast(self.er_next_states[:, -1], tf.int32)
                ], 1)
                er_target_qs = tf.gather_nd(er_stacked_targets, select_targets)

                self.er_loss = 1 / 2 * tf.reduce_sum(
                    tf.square(er_target_qs - er_qs))
                er_adam = tf.train.AdamOptimizer(self.lr)
                self.global_step = tf.Variable(
                    0,
                    trainable=False,
                    name="global_step",
                    collections=[
                        tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES
                    ])
                self.er_train_op = er_adam.minimize(
                    self.er_loss, global_step=self.global_step)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")
            self.timestep, self.inc_timestep_op = capacities.counter(
                "timestep")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph
예제 #29
0
    def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs = tf.placeholder(
                tf.float32,
                shape=[None, self.observation_space.shape[0] + 1],
                name='inputs')

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                self.probs, self.actions = capacities.policy(
                    self.policy_params, self.inputs)
            self.action_t = tf.squeeze(self.actions, 1)[0]
            # self.action_t = tf.Print(self.action_t, data=[self.probs, self.action_t], message="self.probs, self.action_t:")

            v_scope = tf.VariableScope(reuse=False, name='VValues')
            with tf.variable_scope(v_scope):
                vs = capacities.value_f(self.v_params, self.inputs)

            with tf.control_dependencies([self.probs, vs]):
                with tf.variable_scope('Training'):
                    stacked_actions = tf.stack([
                        tf.range(0,
                                 tf.shape(self.actions)[0]),
                        tf.squeeze(self.actions, 1)
                    ], 1)

                    self.rewards = tf.placeholder(tf.float32,
                                                  shape=[None],
                                                  name="rewards")
                    self.next_states = tf.placeholder(
                        tf.float32,
                        shape=[None, self.observation_space.shape[0] + 1],
                        name="next_states")
                    self.next_actions = tf.placeholder(tf.int32,
                                                       shape=[None],
                                                       name="next_actions")

                    with tf.variable_scope(v_scope, reuse=True):
                        next_vs = tf.squeeze(
                            capacities.value_f(self.v_params,
                                               self.next_states), 1)

                    with tf.variable_scope('TargetVs'):
                        target_vs1 = tf.stop_gradient(self.rewards +
                                                      self.discount * next_vs)
                        target_vs2 = self.rewards
                        stacked_targets = tf.stack([target_vs1, target_vs2], 1)
                        select_targets = tf.stack([
                            tf.range(0,
                                     tf.shape(self.next_states)[0]),
                            tf.cast(self.next_states[:, -1], tf.int32)
                        ], 1)
                        target_vs = tf.gather_nd(stacked_targets,
                                                 select_targets)

                    log_probs = tf.log(
                        tf.gather_nd(self.probs, stacked_actions))

                    with tf.control_dependencies([log_probs, target_vs]):
                        self.v_loss = 1 / 2 * tf.reduce_sum(
                            tf.square(target_vs - vs))
                        v_adam = tf.train.AdamOptimizer(self.v_lr)
                        self.v_global_step = tf.Variable(0,
                                                         trainable=False,
                                                         name="v_global_step")
                        self.v_train_op = v_adam.minimize(
                            self.v_loss, global_step=self.v_global_step)

                        td = target_vs - vs
                        self.policy_loss = -tf.reduce_sum(
                            log_probs * tf.stop_gradient(td))
                        policy_adam = tf.train.AdamOptimizer(self.policy_lr)
                        self.policy_global_step = tf.Variable(
                            0,
                            trainable=False,
                            name="policy_global_step",
                            collections=[
                                tf.GraphKeys.GLOBAL_STEP,
                                tf.GraphKeys.GLOBAL_VARIABLES
                            ])
                        self.policy_train_op = policy_adam.minimize(
                            self.policy_loss,
                            global_step=self.policy_global_step)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.policy_loss_plh = tf.placeholder(tf.float32, shape=[])
            self.policy_loss_sum_t = tf.summary.scalar('policy_loss',
                                                       self.policy_loss_plh)
            self.v_loss_plh = tf.placeholder(tf.float32, shape=[])
            self.v_loss_sum_t = tf.summary.scalar('v_loss', self.v_loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph
예제 #30
0
    def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.N0_t = tf.constant(self.N0, tf.float32, name='N_0')
            self.N = tf.Variable(0.,
                                 dtype=tf.float32,
                                 name='N',
                                 trainable=False)
            self.min_eps_t = tf.constant(self.min_eps,
                                         tf.float32,
                                         name='min_eps')

            self.inputs = tf.placeholder(
                tf.float32,
                shape=[None, self.observation_space.shape[0] + 1],
                name='inputs')

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.q_values = tf.squeeze(
                    capacities.value_f(self.q_params, self.inputs))

            self.action_t = capacities.eps_greedy(self.inputs, self.q_values,
                                                  self.env.action_space.n,
                                                  self.N0, self.min_eps)
            self.q_t = self.q_values[self.action_t]

            with tf.variable_scope('Training'):
                self.reward = tf.placeholder(tf.float32,
                                             shape=[],
                                             name="reward")
                self.next_state = tf.placeholder(
                    tf.float32,
                    shape=[1, self.observation_space.shape[0] + 1],
                    name="nextState")
                self.next_action = tf.placeholder(tf.int32,
                                                  shape=[],
                                                  name="nextAction")

                with tf.variable_scope(q_scope, reuse=True):
                    next_q_values = tf.squeeze(
                        capacities.value_f(self.q_params, self.next_state))
                target_q1 = tf.stop_gradient(self.reward + self.discount *
                                             next_q_values[self.next_action])
                target_q2 = self.reward
                is_done = tf.cast(self.next_state[0, 4], tf.bool)
                target_q = tf.where(is_done, target_q2, target_q1)
                with tf.control_dependencies([target_q]):
                    self.loss = 1 / 2 * tf.square(target_q - self.q_t)

                adam = tf.train.AdamOptimizer(self.lr)
                self.global_step = tf.Variable(
                    0,
                    trainable=False,
                    name="global_step",
                    collections=[
                        tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES
                    ])
                self.train_op = adam.minimize(self.loss,
                                              global_step=self.global_step)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph