Beispiel #1
0
 def _build_layers_v2(self, input_dict, num_outputs, options):
     # Weights shared with CustomModel1
     with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, "shared"),
                            reuse=tf.AUTO_REUSE,
                            auxiliary_name_scope=False):
         last_layer = slim.fully_connected(input_dict["obs"],
                                           64,
                                           activation_fn=tf.nn.relu,
                                           scope="fc1")
     last_layer = slim.fully_connected(last_layer,
                                       64,
                                       activation_fn=tf.nn.relu,
                                       scope="fc2")
     output = slim.fully_connected(last_layer,
                                   num_outputs,
                                   activation_fn=None,
                                   scope="fc_out")
     return output, last_layer
Beispiel #2
0
    def __init__(self, conf, trainlabels, outputs, constraint, name=None):
        '''EDDecoder constructor

        Args:
            conf: the decoder configuration as a ConfigParser
            trainlabels: the number of extra labels required by the trainer
            outputs: the name of the outputs of the model
            constraint: the constraint for the variables
        '''

        #save the parameters
        self.conf = dict(conf.items('decoder'))
        self.outputs = outputs

        self.output_dims = self.get_output_dims(trainlabels)

        self.scope = tf.VariableScope(tf.AUTO_REUSE,
                                      name or type(self).__name__,
                                      constraint=constraint)
Beispiel #3
0
 def conv(self, name, inputData, outChannel):
     inChannel = inputData.get_shape()[-1]
     with tf.VariableScope(name):
         kernel = tf.get_variable("weights",
                                  shape=[3, 3, inChannel, outChannel],
                                  dtype=tf.float32,
                                  trainable=False)
         biases = tf.get_variable("biases",
                                  shape=[outChannel],
                                  dtype=tf.float32,
                                  trainable=False)
         convRes = tf.nn.conv2d(input=inputData,
                                filter=kernel,
                                strides=[1, 1, 1, 1],
                                padding="SAME")
         res = tf.nn.bias_add(convRes, bias=biases)
         out = tf.nn.relu(res, biases)
     self.parameters += [kernel, biases]
     return out
Beispiel #4
0
    def __init__(self, conf, constraint, name=None):
        '''EDEncoder constructor

        Args:
            conf: the encoder configuration
            name: the encoder name
            constraint: the constraint for the variables
        '''

        #save the configuration
        self.conf = dict(conf.items('encoder'))

        #apply default configuration
        default = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               'defaults',
                               type(self).__name__.lower() + '.cfg')
        apply_defaults(self.conf, default)

        self.scope = tf.VariableScope(tf.AUTO_REUSE,
                                      name or type(self).__name__,
                                      constraint=constraint)
Beispiel #5
0
    def __init__(self, conf, name=None):
        '''Model constructor

        Args:
            conf: The model configuration as a configparser object
        '''

        self.input_names = conf.get('io', 'inputs').split(' ')
        if self.input_names == ['']:
            self.input_names = []
        self.output_names = conf.get('io', 'outputs').split(' ')
        if self.output_names == ['']:
            self.output_names = []

        self.conf = dict(conf.items('model'))

        self.output_dims = {}
        for i, d in enumerate(self.conf['output_dims'].split(' ')):
            self.output_dims[self.output_names[i]] = int(d)

        self.scope = tf.VariableScope(False, name or type(self).__name__)
Beispiel #6
0
 def _build_layers_v2(self, input_dict, num_outputs, options):
     # Example of (optional) weight sharing between two different policies.
     # Here, we share the variables defined in the 'shared' variable scope
     # by entering it explicitly with tf.AUTO_REUSE. This creates the
     # variables for the 'fc1' layer in a global scope called 'shared'
     # outside of the policy's normal variable scope.
     with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, "shared"),
                            reuse=tf.AUTO_REUSE,
                            auxiliary_name_scope=False):
         last_layer = slim.fully_connected(input_dict["obs"],
                                           64,
                                           activation_fn=tf.nn.relu,
                                           scope="fc1")
     last_layer = slim.fully_connected(last_layer,
                                       64,
                                       activation_fn=tf.nn.relu,
                                       scope="fc2")
     output = slim.fully_connected(last_layer,
                                   num_outputs,
                                   activation_fn=None,
                                   scope="fc_out")
     return output, last_layer
Beispiel #7
0
def test_load_weight_from_ckpt():

    tf.logging.set_verbosity(tf.logging.INFO)
    x_response_value = [
        [10378, 119, 119, 151, 8815, 8281, 8211, 10425, 8154, 0, 0, 0, 0, 0] +
        [0 for i in range(512 - 14)],
        [
            165, 8991, 8181, 8184, 131, 120, 120, 8134, 11300, 10540, 8735,
            8207, 0, 0
        ] + [0 for i in range(512 - 14)]
    ]
    poly_encoder_config = PolyEncoderConfig.from_json_file(
        '../../cfg/poly_encoder.json')
    encoder_inst = PolyEncoder(config=poly_encoder_config,
                               mode=tf.estimator.ModeKeys.TRAIN)

    bert_scope = tf.VariableScope(name="bert", reuse=tf.AUTO_REUSE)
    bert_config = BertConfig.from_json_file(poly_encoder_config.bert_config)
    x_response = tf.convert_to_tensor(value=x_response_value, dtype=tf.int32)
    x_response_emb, x_response_mask = encoder_inst.encode_candidate(
        x_response=x_response, bert_config=bert_config, bert_scope=bert_scope)
    load_weight_from_ckpt(init_checkpoint="../../ckpt/albert/")
def generator(z, out_channel_dim, is_train=True):
    """
    Create the generator network
    :param z: Input z
    :param out_channel_dim: The number of channels in the output image
    :param is_train: Boolean if generator is being used for training
    :return: The tensor output of the generator
    """
    # TODO: Implement Function
    with tf.VariableScope('generator'):
        x1 = tf.layers.dense(z, 4 * 4 * 512)
        x1 = tf.layers.batch_normalization(x1, training=is_train)
        x1 = tf.maximum(0.2 * x1, x1)
        # 4x4x512 now

        x2 = tf.layers.conv2d_transpose(x1, 256, 5, strides=2, padding='same')
        x2 = tf.layers.batch_normalization(x2, training=is_train)
        x2 = tf.maximum(0.2 * x2, x2)
        # 8x8x256 now

        x3 = tf.layers.conv2d_transpose(x2, 128, 5, strides=2, padding='same')
        x3 = tf.layers.batch_normalization(x3, training=is_train)
        x3 = tf.maximum(0.2 * x3, x3)
        # 16x16x128 now

        # Output layer
        x4 = tf.layers.conv2d_transpose(x3,
                                        out_channel_dim,
                                        5,
                                        strides=2,
                                        padding='same')
        # 32x32x3 now

        logits = tf.image.resize_images(x4, [28, 28],
                                        method=tf.ResizeMethod.BILINEA,
                                        align_corners=True)
        out = tf.tanh(logits)

        return out
Beispiel #9
0
    def __init__(self, conf, output_dim, name=None):
        '''classifier constructor

        Args:
            conf: The classifier configuration
            output_dim: the classifier output dimension
                    This is a tuple, each element representing the output_dim
                    for one kind of targets
            name: the classifier name
        '''

        self.conf = conf

        # if there is only a add_labels in the config, we suppose that only the
        # first element of this tuple is important
        if 'add_labels' in conf:
            self.output_dim = output_dim[0] + int(conf['add_labels'])

        # if there is only an add_labels_reconstruction but not an
        # add_labels_prediction in config, assume only second element to be of
        # importance
        elif 'add_labels_reconstruction' in conf and \
            not 'add_labels_prediction' in conf:
            self.output_dim = output_dim[1] + int(
                conf['add_labels_reconstruction'])

        # if both present, both elements of the tuple will be of importance
        elif 'add_labels_reconstruction' in conf and \
            'add_labels_prediction' in conf:
            outdim1 = output_dim[0] + int(conf['add_labels_prediction'])
            outdim2 = output_dim[1] + int(conf['add_labels_reconstruction'])
            self.output_dim = (outdim1, outdim2)

        else:
            raise Exception(
                'Wrong kind of add_labels information in the config')

        #create the variable scope for the classifier
        self.scope = tf.VariableScope(False, name or type(self).__name__)
Beispiel #10
0
 def build(self, hparams, is_training=True):
   self._total_length = hparams.max_seq_len
   if self._total_length != np.prod(self._level_lengths):
     raise ValueError(
         'The product of the HierarchicalLstmEncoder level lengths (%d) must '
         'equal the padded input sequence length (%d).' % (
             np.prod(self._level_lengths), self._total_length))
   tf.logging.info('\nHierarchical Encoder:\n'
                   '  input length: %d\n'
                   '  level lengths: %s\n',
                   self._total_length,
                   self._level_lengths)
   self._hierarchical_encoders = []
   num_splits = np.prod(self._level_lengths)
   for i, l in enumerate(self._level_lengths):
     num_splits //= l
     tf.logging.info('Level %d splits: %d', i, num_splits)
     h_encoder = self._core_encoder_cls()
     h_encoder.build(
         hparams, is_training,
         name_or_scope=tf.VariableScope(
             tf.AUTO_REUSE, 'encoder/hierarchical_level_%d' % i))
     self._hierarchical_encoders.append((num_splits, h_encoder))
Beispiel #11
0
 def _build_layers_v2(self, input_dict, num_outputs, options):
     convs = options.get("conv_filters")
     if convs is None:
         convs = filters_mnih15
     activation = tf.nn.relu
     conv_output = input_dict["obs"]
     with tf.name_scope("mnih15_convs"):
         for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1):
             conv_output = slim.conv2d(
                 input_dict["obs"],
                 out_size,
                 kernel,
                 stride,
                 activation_fn=activation,
                 padding="SAME",
                 scope="conv{}".format(i))
         out_size, kernel, stride = convs[-1]
         conv_output = slim.conv2d(
             conv_output,
             out_size,
             kernel,
             stride,
             activation_fn=activation,
             padding="VALID",
             scope="conv_out")
     action_out = slim.flatten(conv_output)
     with tf.name_scope("mnih15_FC"):
         # Share weights of the following layer with other instances of this
         # model (usually by other macad_agents in a Multi-Agent setting)
         with tf.variable_scope(
                 tf.VariableScope(tf.AUTO_REUSE, "shared"),
                 reuse=tf.AUTO_REUSE):
             shared_layer = slim.fully_connected(
                 action_out, 128, activation_fn=activation)
         action_logits = slim.fully_connected(
             action_out, num_outputs=num_outputs, activation_fn=None)
     return action_logits, shared_layer
    def __init__(self, input_size, layers, hidden_units, max_length,
                 learning_rate):
        with tf.VariableScope(name="BiLSTM", reuse=tf.AUTO_REUSE):
            self.input_X = tf.placeholder(dtype=tf.float32,
                                          shape=[None, max_length, input_size],
                                          name="input_X")
            self.input_y = tf.placeholder(dtype=tf.float32,
                                          shape=[None, 1],
                                          name="input_y")
            self.sequence_length = tf.placeholder(dtype=tf.int32,
                                                  shape=[None],
                                                  name="sequence_length")
            self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                    shape=(),
                                                    name="dropout_keep_prob")

            self.output = self.build_bilstm(self.input_X, layers, hidden_units,
                                            self.dropout_keep_prob)

            self.loss = -(self.input_y * tf.log(self.output) +
                          (1 - self.input_y) * tf.log(1 - self.output))
            self.train = tf.train.AdamOptimizer(learning_rate).minimize(
                self.loss)

            self.prediction = tf.cast(tf.to_int32(self.loss >= 0.5),
                                      dtype=tf.float32,
                                      name="prediction")
            self.accuracy = tf.multiply(tf.reduce_mean(
                tf.cast(tf.equal(self.input_y, self.prediction),
                        dtype=tf.float32)),
                                        100,
                                        name="accuracy")

            tf.summary.scalar("loss", self.loss)
            tf.summary.scalar("accuracy", self.accuracy)
            self.merge_graph = tf.summary.merge_all()
Beispiel #13
0
    def build_graph(self, graph):
        self.env.seed(self.random_seed)
        np.random.seed(self.random_seed)
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            # Graph of the  LSTM model of the world
            input_scope = tf.VariableScope(reuse=False, name="inputs")
            with tf.variable_scope(input_scope):
                self.state_input_plh = tf.placeholder(
                    tf.float32,
                    shape=[None, None, self.m_params['env_state_size']],
                    name='state_input_plh')
                self.action_input_plh = tf.placeholder(tf.int32,
                                                       shape=[None, None, 1],
                                                       name='action_input_plh')
                self.mask_plh = tf.placeholder(tf.float32,
                                               shape=[None, None, 1],
                                               name="mask_plh")

                input_shape = tf.shape(self.state_input_plh)
                dynamic_batch_size, dynamic_num_steps = input_shape[
                    0], input_shape[1]

                action_input = tf.one_hot(indices=tf.squeeze(
                    self.action_input_plh, 2),
                                          depth=self.m_params['nb_actions'])
                m_inputs = tf.concat([self.state_input_plh, action_input],
                                     2,
                                     name="m_inputs")

            m_scope = tf.VariableScope(reuse=False, name="m")
            with tf.variable_scope(m_scope):
                self.state_reward_preds, self.m_final_state, self.m_initial_state = capacities.predictive_model(
                    self.m_params,
                    m_inputs,
                    dynamic_batch_size,
                    None,
                    summary_collections=[self.M_SUMMARIES])

            fixed_m_scope = tf.VariableScope(reuse=False, name='FixedM')
            with tf.variable_scope(fixed_m_scope):
                self.update_m_fixed_vars_op = capacities.fix_scope(m_scope)

            m_training_scope = tf.VariableScope(reuse=False, name='m_training')
            with tf.variable_scope(m_training_scope):
                self.m_next_states = tf.placeholder(
                    tf.float32,
                    shape=[None, None, self.m_params['env_state_size']],
                    name="m_next_states")
                self.m_rewards = tf.placeholder(tf.float32,
                                                shape=[None, None, 1],
                                                name="m_rewards")
                y_true = tf.concat([self.m_rewards, self.m_next_states], 2)

                with tf.control_dependencies([self.state_reward_preds]):
                    self.m_loss = 1 / 2 * tf.reduce_mean(
                        tf.square(self.state_reward_preds - y_true) *
                        self.mask_plh)
                    tf.summary.scalar('m_loss',
                                      self.m_loss,
                                      collections=[self.M_SUMMARIES])

                m_adam = tf.train.AdamOptimizer(self.m_params['lr'])
                self.m_global_step = tf.Variable(0,
                                                 trainable=False,
                                                 name="m_global_step")
                tf.summary.scalar('m_global_step',
                                  self.m_global_step,
                                  collections=[self.M_SUMMARIES])
                self.m_train_op = m_adam.minimize(
                    self.m_loss, global_step=self.m_global_step)

            self.all_m_summary_t = tf.summary.merge_all(key=self.M_SUMMARIES)

            # Graph of the controller
            c_scope = tf.VariableScope(reuse=False, name="c")
            c_summary_collection = [self.C_SUMMARIES]
            with tf.variable_scope(c_scope):
                # c_cell = LSTMCell(
                #     num_units=self.c_params['nb_units']
                #     , initializer=tf.truncated_normal_initializer(
                #         mean=self.c_params['initial_mean']
                #         , stddev=self.c_params['initial_stddev']
                #     )
                # )
                # self.c_initial_state = c_cell.zero_state(dynamic_batch_size, dtype=tf.float32)
                # c_c_h_states, self.c_final_state = tf.nn.dynamic_rnn(c_cell, self.state_input_plh, initial_state=self.c_initial_state)
                # c_c_states, c_h_states = tf.split(value=c_c_h_states, num_or_size_splits=[self.c_params['nb_units'], self.c_params['nb_units']], axis=2)
                # # Compute the Controller projection
                # self.probs_t, self.actions_t = projection_func(c_h_states)
                m_params = self.m_params
                model_func = lambda m_inputs, m_state: capacities.predictive_model(
                    m_params, m_inputs, dynamic_batch_size, m_state)
                c_params = self.c_params
                projection_func = lambda inputs: capacities.projection(
                    c_params, inputs)
                cm_cell = CMCell(num_units=self.c_params['nb_units'],
                                 m_units=self.m_params['nb_units'],
                                 fixed_model_scope=fixed_m_scope,
                                 model_func=model_func,
                                 projection_func=projection_func,
                                 num_proj=self.c_params['nb_actions'],
                                 initializer=tf.truncated_normal_initializer(
                                     mean=self.c_params['initial_mean'],
                                     stddev=self.c_params['initial_stddev']))

                self.cm_initial_state = cm_cell.zero_state(dynamic_batch_size,
                                                           dtype=tf.float32)
                probs_and_actions_t, self.cm_final_state = tf.nn.dynamic_rnn(
                    cm_cell,
                    self.state_input_plh,
                    initial_state=self.cm_initial_state)
                self.probs_t, actions_t = tf.split(
                    value=probs_and_actions_t,
                    num_or_size_splits=[self.c_params['nb_actions'], 1],
                    axis=2)
                self.actions_t = tf.cast(actions_t, tf.int32)
                # helper tensor used for inference
                self.action_t = self.actions_t[0, 0, 0]

            c_training_scope = tf.VariableScope(reuse=False, name='c_training')
            with tf.variable_scope(c_training_scope):
                self.c_rewards_plh = tf.placeholder(tf.float32,
                                                    shape=[None, None, 1],
                                                    name="c_rewards_plh")

                baseline = tf.reduce_mean(self.c_rewards_plh)

                batch_size, num_steps = tf.shape(self.actions_t)[0], tf.shape(
                    self.actions_t)[1]
                line_indices = tf.matmul(  # Line indice
                    tf.reshape(tf.range(0, batch_size), [-1, 1]),
                    tf.ones([1, num_steps], dtype=tf.int32))
                column_indices = tf.matmul(  # Column indice
                    tf.ones([batch_size, 1], dtype=tf.int32),
                    tf.reshape(tf.range(0, num_steps), [1, -1]))
                depth_indices = tf.squeeze(self.actions_t, 2)
                stacked_actions = tf.stack(
                    [line_indices, column_indices, depth_indices], 2)

                with tf.control_dependencies([self.probs_t]):
                    log_probs = tf.expand_dims(
                        tf.log(tf.gather_nd(self.probs_t, stacked_actions)), 2)
                    masked_log_probs = log_probs * self.mask_plh
                    self.c_loss = tf.reduce_mean(-tf.reduce_sum(
                        masked_log_probs * (self.c_rewards_plh - baseline), 1))
                    tf.summary.scalar('c_loss',
                                      self.c_loss,
                                      collections=c_summary_collection)

                c_adam = tf.train.AdamOptimizer(self.c_params['lr'])
                self.c_global_step = tf.Variable(
                    0,
                    trainable=False,
                    name="global_step",
                    collections=[
                        tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES
                    ],
                    dtype=tf.int32)
                tf.summary.scalar('c_global_step',
                                  self.c_global_step,
                                  collections=c_summary_collection)
                self.c_train_op = c_adam.minimize(
                    self.c_loss, global_step=self.c_global_step)

            self.all_c_summary_t = tf.summary.merge_all(key=self.C_SUMMARIES)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")
            self.episode_id_sum = tf.summary.scalar('episode_id',
                                                    self.episode_id)
            self.time, self.inc_time_op = capacities.counter("time")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph
import os
import sys

import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tflearn
from genotypes import PRIMITIVES
from genotypes import Genotype
from operations import *
import utils
null_scope = tf.VariableScope("")


def MixedOp(x, C_out, stride, index, reduction):
    ops = []

    with tf.variable_scope(null_scope):
        with tf.variable_scope("arch_params", reuse=tf.AUTO_REUSE):
            weight = tf.get_variable(
                "weight{}_{}".format(2 if reduction else 1,
                                     index), [len(PRIMITIVES)],
                initializer=tf.random_normal_initializer(0, 1e-3),
                regularizer=slim.l2_regularizer(0.0001))
    weight = tf.nn.softmax(weight)
    weight = tf.reshape(weight, [-1, 1, 1, 1])
    index = 0
    for primitive in PRIMITIVES:

        op = OPS[primitive](x, C_out, stride)
        if 'pool' in primitive:
    def __init__(self,
                 config,
                 env,
                 sess,
                 writer,
                 name='haggle',
                 trainable=True):
        super(Model, self).__init__()
        self.config = {
            'pre': [],
            'lstm': 128,
            'value_scale': 0.5,
            'lr': 0.001,
            'grad_clip': 0.5,
            'ppo': 0.1,
            'ppo_epochs': 10,
        }

        self.max_rounds = env.max_rounds

        self.config.update(config)

        self.original_name = name
        self.version = 0

        self.name = name

        self.observation_space = env.observation_space
        self.action_space = env.action_space
        self.context_space = env.context_space
        self.reward_space = env.reward_space

        self.empty_reward = np.zeros(self.reward_space, dtype='float32')

        # Used for getting offers by index
        self.env = env

        self.scope = tf.VariableScope(reuse=False, name=name)
        self.sess = sess
        self.writer = writer
        self.writer_step = 0

        with tf.variable_scope(self.scope):
            self.input = tf.placeholder(tf.int32,
                                        shape=(
                                            None,
                                            self.observation_space,
                                        ),
                                        name='input')
            self.context = tf.placeholder(tf.int32,
                                          shape=(
                                              None,
                                              self.context_space,
                                          ),
                                          name='input_context')

            # Init layers

            self.layers = {
                'embedding':
                tf.get_variable('embedding',
                                dtype=tf.float32,
                                initializer=tf.initializers.random_normal,
                                shape=(self.action_space,
                                       self.config['lstm'])),
                'pre': [],
                'action':
                tf.layers.Dense(self.action_space, name='action'),
                'value':
                tf.layers.Dense(self.reward_space, name='value'),
                'context':
                tf.layers.Dense(self.config['lstm'] * 2,
                                activation=tf.nn.relu,
                                name='context'),
                'lstm':
                tf.contrib.rnn.LSTMBlockCell(name='lstm',
                                             num_units=self.config['lstm']),
            }

            self.build_context = self.layers['context'](tf.cast(
                self.context, dtype=tf.float32))

            for i, width in enumerate(self.config['pre']):
                pre = tf.layers.Dense(width,
                                      activation=tf.nn.relu,
                                      name='preprocess_{}'.format(i))
                self.layers['pre'].append(pre)

            state_size = self.layers['lstm'].state_size
            self.rnn_state = tf.placeholder(tf.float32,
                                            shape=(None, state_size.c +
                                                   state_size.h),
                                            name='rnn_state')

            state = tf.contrib.rnn.LSTMStateTuple(
                c=self.rnn_state[:, :state_size.c],
                h=self.rnn_state[:, state_size.c:])

            new_state, action, action_probs, value = \
                self._network(state, self.input)

            self.action = action
            self.action_probs = action_probs
            self.value = value
            self.new_state = tf.concat([ new_state.c, new_state.h ], axis=-1, \
                name='new_state')

            # Losses
            if trainable:
                self._losses()

            # Weight loading
            self.trainable_variables = self.scope.trainable_variables()

            self.weight_placeholders = {}
            self.load_ops = []
            for var in self.trainable_variables:
                name = var.name.split(':', 1)[0]
                name = name.split('/', 1)[1]
                placeholder = tf.placeholder(
                    var.dtype,
                    shape=var.shape,
                    name='{}/placeholder'.format(name))
                self.weight_placeholders[name] = placeholder
                self.load_ops.append(var.assign(placeholder))
    def __init__(self, cell, name):
        '''ScopeRNNCellWrapper constructor'''

        self._cell = cell
        self.scope = tf.VariableScope(None, name)
Beispiel #17
0
 def _get_input_scope(self, default_name=""):
   if self.share_embeddings == EmbeddingsSharingLevel.SOURCE_TARGET_INPUT:
     name = "shared_embeddings"
   else:
     name = default_name
   return tf.VariableScope(None, name=tf.get_variable_scope().name + "/" + name)
Beispiel #18
0
    def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs = tf.placeholder(
                tf.float32,
                shape=[None, self.observation_space.shape[0] + 1],
                name='inputs')

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                self.probs, self.actions = capacities.policy(
                    self.policy_params, self.inputs)
            self.action_t = tf.squeeze(self.actions, 1)[0]

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.q_values = capacities.value_f(self.q_params, self.inputs)
            self.q = self.q_values[0, tf.stop_gradient(self.action_t)]

            with tf.variable_scope('Training'):
                stacked_actions = tf.stack([
                    tf.range(0,
                             tf.shape(self.actions)[0]),
                    tf.squeeze(self.actions, 1)
                ], 1)
                qs = tf.gather_nd(self.q_values, stacked_actions)
                log_probs = tf.log(tf.gather_nd(self.probs, stacked_actions))
                self.policy_loss = -tf.reduce_sum(
                    log_probs * tf.stop_gradient(qs))

                self.rewards = tf.placeholder(tf.float32,
                                              shape=[None],
                                              name="rewards")
                self.next_states = tf.placeholder(
                    tf.float32,
                    shape=[None, self.observation_space.shape[0] + 1],
                    name="next_states")
                self.next_actions = tf.placeholder(tf.int32,
                                                   shape=[None],
                                                   name="next_actions")
                with tf.variable_scope(q_scope, reuse=True):
                    next_q_values = capacities.value_f(self.q_params,
                                                       self.next_states)
                next_stacked_actions = tf.stack([
                    tf.range(0,
                             tf.shape(self.next_actions)[0]), self.next_actions
                ], 1)
                next_qs = tf.gather_nd(next_q_values, next_stacked_actions)
                target_qs1 = tf.stop_gradient(self.rewards +
                                              self.discount * next_qs)
                target_qs2 = self.rewards
                stacked_targets = tf.stack([target_qs1, target_qs2], 1)
                select_targets = tf.stack([
                    tf.range(0,
                             tf.shape(self.next_states)[0]),
                    tf.cast(self.next_states[:, -1], tf.int32)
                ], 1)
                target_qs = tf.gather_nd(stacked_targets, select_targets)
                self.q_loss = 1 / 2 * tf.reduce_sum(tf.square(target_qs - qs))

                self.loss = self.policy_loss + self.q_scale_lr * self.q_loss

                adam = tf.train.AdamOptimizer(self.lr)
                self.global_step = tf.Variable(
                    0,
                    trainable=False,
                    name="global_step",
                    collections=[
                        tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES
                    ])
                self.train_op = adam.minimize(self.loss,
                                              global_step=self.global_step)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.policy_loss_plh = tf.placeholder(tf.float32, shape=[])
            self.policy_loss_sum_t = tf.summary.scalar('policy_loss',
                                                       self.policy_loss_plh)
            self.q_loss_plh = tf.placeholder(tf.float32, shape=[])
            self.q_loss_sum_t = tf.summary.scalar('q_loss', self.q_loss_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph
Beispiel #19
0
    def neural_net(self):
        ###############
        # Input Module
        ###############

        # Hyperparameters
        # The number of dimensions used to store data passed between recurrent layers in the network.
        recurrent_cell_size = 128

        # The number of dimensions in our word vectorizations.
        D = 50

        # How quickly the network learns. Too high, and we may run into numeric instability
        # or other issues.
        learning_rate = 0.005

        # Dropout probabilities. For a description of dropout and what these probabilities are,
        # see Entailment with TensorFlow.
        input_p, output_p = 0.5, 0.5

        # How many questions we train on at a time.
        batch_size = 128

        # Number of passes in episodic memory. We'll get to this later.
        passes = 4

        # Feed Forward layer sizes: the number of dimensions used to store data passed from feed-forward layers.
        ff_hidden_size = 256

        weight_decay = 0.00000001
        # The strength of our regularization. Increase to encourage sparsity in episodic memory,
        # but makes training slower. Don't make this larger than leraning_rate.

        training_iterations_count = 400000
        # How many questions the network trains on each time it is trained.
        # Some questions are counted multiple times.

        display_step = 100
        # How many iterations of training occur before each validation check.






        # Context: A [batch_size, maximum_context_length, word_vectorization_dimensions] tensor
        # that contains all the context information.
        # context = tf.placeholder(tf.float64, [None, None, D], "context")
        # context_placeholder = context  # I use context as a variable name later on
        # input_sentence_endings: A [batch_size, maximum_sentence_count, 2] tensor that
        # contains the locations of the ends of sentences.
        input_sentence_endings = tf.placeholder(tf.int32, [None, None, 2], "sentence")

        # recurrent_cell_size: the number of hidden units in recurrent layers.
        input_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size)

        # input_p: The probability of maintaining a specific hidden input unit.
        # Likewise, output_p is the probability of maintaining a specific hidden output unit.
        gru_drop = tf.contrib.rnn.DropoutWrapper(input_gru, input_p, output_p)

        # dynamic_rnn also returns the final internal state. We don't need that, and can
        # ignore the corresponding output (_).
        input_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, self._story, dtype=tf.float64, scope="input_module")

        # cs: the facts gathered from the context.
        cs = tf.gather_nd(input_module_outputs, input_sentence_endings)
        # to use every word as a fact, useful for tasks with one-sentence contexts
        s = input_module_outputs

        # Question Module

        # query: A [batch_size, maximum_question_length, word_vectorization_dimensions] tensor
        #  that contains all of the questions.

        query = tf.placeholder(tf.float64, [None, None, D], "query")

        # input_query_lengths: A [batch_size, 2] tensor that contains question length information.
        # input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range()
        # so that it plays nice with gather_nd.
        input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths")

        question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float64, scope=tf.VariableScope(True, "input_module"))

        # q: the question states. A [batch_size, recurrent_cell_size] tensor.
        q = tf.gather_nd(question_module_outputs, input_query_lengths)

        # Episodic Memory

        # make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension
        size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)])
        re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size)

        # Final output for attention, needs to be 1 in order to create a mask
        output_size = 1

        # Weights and biases
        attend_init = tf.random_normal_initializer(stddev=0.1)
        w_1 = tf.get_variable("attend_w1", [1, recurrent_cell_size * 7, recurrent_cell_size], tf.float64, initializer=attend_init)
        w_2 = tf.get_variable("attend_w2", [1, recurrent_cell_size, output_size], tf.float64, initializer=attend_init)

        b_1 = tf.get_variable("attend_b1", [1, recurrent_cell_size], tf.float64, initializer=attend_init)
        b_2 = tf.get_variable("attend_b2", [1, output_size], tf.float64, initializer=attend_init)

        # Regulate all the weights and biases
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_1))
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_1))
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_2))
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_2))

        def attention(c, mem, existing_facts):
            """
            Custom attention mechanism.
            c: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor
                that contains all the facts from the contexts.
            mem: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that
                contains the current memory. It should be the same memory for all facts for accurate results.
            existing_facts: A [batch_size, maximum_sentence_count, 1] tensor that
                acts as a binary mask for which facts exist and which do not.

            """
            with tf.variable_scope("attending") as scope:
                # attending: The metrics by which we decide what to attend to.
                attending = tf.concat([c, mem, re_q, c * re_q, c * mem, (c - re_q) ** 2, (c - mem) ** 2], 2)

                # m1: First layer of multiplied weights for the feed-forward network.
                #     We tile the weights in order to manually broadcast, since tf.matmul does not
                #     automatically broadcast batch matrix multiplication as of TensorFlow 1.2.
                m1 = tf.matmul(attending * existing_facts, tf.tile(w_1, tf.stack([tf.shape(attending)[0], 1, 1]))) * existing_facts
                # bias_1: A masked version of the first feed-forward layer's bias
                #     over only existing facts.

                bias_1 = b_1 * existing_facts

                # tnhan: First nonlinearity. In the original paper, this is a tanh nonlinearity;
                #        choosing relu was a design choice intended to avoid issues with
                #        low gradient magnitude when the tanh returned values close to 1 or -1.
                tnhan = tf.nn.relu(m1 + bias_1)

                # m2: Second layer of multiplied weights for the feed-forward network.
                #     Still tiling weights for the same reason described in m1's comments.
                m2 = tf.matmul(tnhan, tf.tile(w_2, tf.stack([tf.shape(attending)[0], 1, 1])))

                # bias_2: A masked version of the second feed-forward layer's bias.
                bias_2 = b_2 * existing_facts

                # norm_m2: A normalized version of the second layer of weights, which is used
                #     to help make sure the softmax nonlinearity doesn't saturate.
                norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1)

                # softmaxable: A hack in order to use sparse_softmax on an otherwise dense tensor.
                #     We make norm_m2 a sparse tensor, then make it dense again after the operation.
                softmax_idx = tf.where(tf.not_equal(norm_m2, 0))[:, :-1]
                softmax_gather = tf.gather_nd(norm_m2[..., 0], softmax_idx)
                softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1]
                softmaxable = tf.SparseTensor(softmax_idx, softmax_gather, softmax_shape)

                return tf.expand_dims(tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1)

        # facts_0s: a [batch_size, max_facts_length, 1] tensor
        #     whose values are 1 if the corresponding fact exists and 0 if not.
        facts_0s = tf.cast(tf.count_nonzero(input_sentence_endings[:, :, -1:], -1, keep_dims=True), tf.float64)

        with tf.variable_scope("Episodes") as scope:
            attention_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size)

            # memory: A list of all tensors that are the (current or past) memory state
            #   of the attention mechanism.
            memory = [q]

            # attends: A list of all tensors that represent what the network attends to.
            attends = []
            for a in range(passes):
                # attention mask
                attend_to = attention(cs, tf.tile(tf.reshape(memory[-1], [-1, 1, recurrent_cell_size]), size), facts_0s)

                # Inverse attention mask, for what's retained in the state.
                retain = 1 - attend_to

                # GRU pass over the facts, according to the attention mask.
                while_valid_index = (lambda state, index: index < tf.shape(cs)[1])
                update_state = (lambda state, index: (attend_to[:, index, :] * attention_gru(cs[:, index, :], state)[0] + retain[:, index, :] * state))
                # start loop with most recent memory and at the first index
                memory.append(tuple(tf.while_loop(while_valid_index, (lambda state, index: (update_state(state, index), index + 1)), loop_vars=[memory[-1], 0]))[0])

                attends.append(attend_to)

                # Reuse variables so the GRU pass uses the same variables every pass.
                scope.reuse_variables()

        # Answer Module

        # a0: Final memory state. (Input to answer module)
        a0 = tf.concat([memory[-1], q], -1)

        # fc_init: Initializer for the final fully connected layer's weights.
        fc_init = tf.random_normal_initializer(stddev=0.1)

        with tf.variable_scope("answer"):
            # w_answer: The final fully connected layer's weights.
            w_answer = tf.get_variable("weight", [recurrent_cell_size * 2, D], tf.float64, initializer=fc_init)
            # Regulate the fully connected layer's weights
            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_answer))

            # The regressed word. This isn't an actual word yet;
            #    we still have to find the closest match.
            logit = tf.expand_dims(tf.matmul(a0, w_answer), 1)

            # Make a mask over which words exist.
            with tf.variable_scope("ending"):
                all_ends = tf.reshape(input_sentence_endings, [-1, 2])
                range_ends = tf.range(tf.shape(all_ends)[0])
                ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1)
                ind = tf.reduce_max(tf.scatter_nd(ends_indices, all_ends[:, 1], [tf.shape(q)[0], tf.shape(all_ends)[0]]), axis=-1)
                range_ind = tf.range(tf.shape(ind)[0])
                mask_ends = tf.cast(tf.scatter_nd(tf.stack([ind, range_ind], axis=1), tf.ones_like(range_ind), [tf.reduce_max(ind) + 1, tf.shape(ind)[0]]), bool)
                # A bit of a trick. With the locations of the ends of the mask (the last periods in
                # each of the contexts) as 1 and the rest as 0, we can scan with exclusive or
                # (starting from all 1). For each context in the batch, this will result in 1s
                # up until the marker (the location of that last period) and 0s afterwards.
                mask = tf.scan(tf.logical_xor, mask_ends, tf.ones_like(range_ind, dtype=bool))

            # We score each possible word inversely with their Euclidean distance to the regressed word.
            #  The highest score (lowest distance) will correspond to the selected word.
            logits = -tf.reduce_sum(tf.square(self._story * tf.transpose(tf.expand_dims(tf.cast(mask, tf.float64), -1), [1, 0, 2]) - logit), axis=-1)

            return logits
Beispiel #20
0
    _seqlens = tf.placeholder(tf.int32, shape=[batch_size])

    if pre_trained:
        embeddings = tf.Variable(tf.constant(
            0.0, shape=[vocabulary_size, glove_size]),
                                 trainable=True)
        embedding_init = embeddings.assign(embedding_placeholder)
        embed = tf.nn.embedding_lookup(embeddings, _inputs)
    else:
        embeddings = tf.Variable(
            tf.random_uniform([vocabulary_size, embedding_dimension], -1.0,
                              1.0))
        embed = tf.nn.embedding_lookup(embeddings, _inputs)

    with tf.name_scope('biGRU'):
        with tf.VariableScope('forward'):
            gru_fw_cell = tf.contrib.rnn.GRUCell(hidden_layer_size)
            gru_fw_cell = tf.contrib.rnn.DropoutWrapper(gru_fw_cell)
        with tf.VariableScope('backwwar'):
            gru_bw_cell = tf.contrib.rnn.GRUCell(hidden_layer_size)
            gru_bw_cell = tf.contrib.rnn.DropoutWrapper(gru_bw_cell)

        outputs, states = tf.nn.bidirectional_dynamic_rnn(
            cell_bw=gru_bw_cell,
            cell_fw=gru_fw_cell,
            inputs=embed,
            sequence_length=_seqlens,
            dtype=tf.float32,
            scope='BiGRU')
    states = tf.concat(values=states, axis=1)
Beispiel #21
0
def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope:
    """Forcefully enter the specified variable scope, ignoring any surrounding scopes."""
    return tf.variable_scope(tf.VariableScope(name=scope, **kwargs),
                             auxiliary_name_scope=False)
Beispiel #22
0
    def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs_plh = tf.placeholder(tf.int32,
                                             shape=[None],
                                             name="inputs_plh")

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.Qs = tf.get_variable(
                    'Qs',
                    shape=[self.nb_state, self.action_space.n],
                    initializer=tf.constant_initializer(self.initial_q_value),
                    dtype=tf.float32)
                tf.summary.histogram('Qarray', self.Qs)
                self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                if 'UCB' in self.config and self.config['UCB']:
                    self.actions_t, self.probs_t = capacities.tabular_UCB(
                        self.Qs, self.inputs_plh)
                else:
                    self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
                        self.inputs_plh, self.q_preds_t, self.nb_state,
                        self.env.action_space.n, self.N0, self.min_eps)
                self.action_t = self.actions_t[0]
                self.q_value_t = self.q_preds_t[0][self.action_t]

            et_scope = tf.VariableScope(reuse=False, name='EligibilityTraces')
            with tf.variable_scope(et_scope):
                et, update_et_op, self.reset_et_op = capacities.eligibility_traces(
                    self.Qs, self.inputs_plh, self.actions_t, self.discount,
                    self.lambda_value)

            with tf.variable_scope('Learning'):
                self.rewards_plh = tf.placeholder(tf.float32,
                                                  shape=[None],
                                                  name="rewards_plh")
                self.next_states_plh = tf.placeholder(tf.int32,
                                                      shape=[None],
                                                      name="next_states_plh")

                self.targets_t = capacities.get_q_learning_target(
                    self.Qs, self.rewards_plh, self.next_states_plh,
                    self.discount)
                target = self.targets_t[0]
                state_action_pairs = tf.stack(
                    [self.inputs_plh, self.actions_t], 1)
                estimate = tf.gather_nd(self.Qs, state_action_pairs)[0]
                err_estimate = target - estimate

                global_step = tf.Variable(0,
                                          trainable=False,
                                          name="global_step",
                                          collections=[
                                              tf.GraphKeys.GLOBAL_STEP,
                                              tf.GraphKeys.GLOBAL_VARIABLES
                                          ])
                lr = tf.train.exponential_decay(tf.constant(self.lr,
                                                            dtype=tf.float32),
                                                global_step,
                                                self.lr_decay_steps,
                                                0.5,
                                                staircase=True)
                tf.summary.scalar('lr', lr)
                inc_global_step = global_step.assign_add(1)
                with tf.control_dependencies([update_et_op, inc_global_step]):
                    self.loss = tf.reduce_sum(err_estimate * et)
                    self.train_op = tf.assign_add(self.Qs,
                                                  lr * err_estimate * et)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph
Beispiel #23
0
 def __init__(self):
     with tf.VariableScope(True, 'd'):
         pass
Beispiel #24
0
 def __init__(self, name, env_spec):
     self._name = name
     self._env_spec = env_spec
     self._variable_scope = tf.VariableScope(reuse=False, name=name)
    def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs_plh = tf.placeholder(tf.int32,
                                             shape=[None],
                                             name="inputs_plh")

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.Qs = tf.get_variable(
                    'Qs',
                    shape=[self.nb_state, self.action_space.n],
                    initializer=tf.constant_initializer(self.initial_q_value),
                    dtype=tf.float32)
                tf.summary.histogram('Qarray', self.Qs)
                self.q_preds_t = tf.gather(self.Qs, self.inputs_plh)

            fixed_q_scope = tf.VariableScope(reuse=False, name='FixedQValues')
            with tf.variable_scope(fixed_q_scope):
                self.update_fixed_vars_op = capacities.fix_scope(q_scope)

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                if 'UCB' in self.config and self.config['UCB']:
                    self.actions_t, self.probs_t = capacities.tabular_UCB(
                        self.Qs, self.inputs_plh)
                else:
                    self.actions_t, self.probs_t = capacities.tabular_eps_greedy(
                        self.inputs_plh, self.q_preds_t, self.nb_state,
                        self.env.action_space.n, self.N0, self.min_eps)
                self.action_t = self.actions_t[0]
                self.q_value_t = self.q_preds_t[0][self.action_t]

            # Experienced replay part
            with tf.variable_scope('Learning'):
                with tf.variable_scope(fixed_q_scope, reuse=True):
                    fixed_Qs = tf.get_variable('Qs')

                self.rewards_plh = tf.placeholder(tf.float32,
                                                  shape=[None],
                                                  name="rewards_plh")
                self.next_states_plh = tf.placeholder(tf.int32,
                                                      shape=[None],
                                                      name="next_states_plh")

                # Note that we use the fixed Qs to create the targets
                self.targets_t = capacities.get_q_learning_target(
                    fixed_Qs, self.rewards_plh, self.next_states_plh,
                    self.discount)
                self.loss, self.train_op = capacities.tabular_learning_with_lr(
                    self.lr, self.lr_decay_steps, self.Qs, self.inputs_plh,
                    self.actions_t, self.targets_t)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")
            self.event_count, self.inc_event_count_op = capacities.counter(
                "event_count")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph
Beispiel #26
0
def virtual_assistant(server_message):
    import itertools

    # import matplotlib.pyplot as plt
    import numpy as np
    import tensorflow as tf

    # print(os.getcwd())
    def fill_unk(unk):
        global glove_wordmap
        glove_wordmap[unk] = RS.multivariate_normal(m, np.diag(v))
        return glove_wordmap[unk]

    '''-----------------------------------------------------------'''

    def sentence2sequence(sentence):
        tokens = sentence.strip('"(),-').lower().split(" ")
        rows = []
        words = []
        # Greedy search for tokens
        for token in tokens:
            i = len(token)
            while len(token) > 0:
                word = token[:i]
                # print(word)
                if word in glove_wordmap:
                    rows.append(glove_wordmap[word])
                    words.append(word)
                    token = token[i:]
                    i = len(token)
                    continue
                else:
                    i = i - 1
                if i == 0:
                    # word OOV
                    # https://arxiv.org/pdf/1611.01436.pdf
                    rows.append(fill_unk(token))
                    words.append(token)
                    break
        return np.array(rows), words

    # import sys
    # text = " ".join(sys.argv[1:]).replace('_', '\n') + "   a   1"

    # print("-----------------", text)
    '''-----------------------------------------------------------'''

    def contextualize(category, server_message):
        data = []
        context = []

        server_message = server_message + "\tt\t3"
        from pprint import pprint
        pprint(server_message)
        print(server_message)
        for line in server_message.split('\n'):
            l, ine = tuple(line.split(" ", 1))
            # Split the line numbers from the sentences they refer to.
            if l is "1":
                # New contexts always start with 1,
                # so this is a signal to reset the context.
                context = []
            if "\t" in ine:
                # Tabs are the separator between questions and answers,
                # and are not present in context statements.
                # print(tuple(ine.split("\t")))
                question, answer, support = tuple(ine.split("\t"))

                # print("old", question, answer, support.replace("\n",''))
                # print("old", "-------------------------------------------")
                # print(*context)
                # print("-------------------------------------------")
                data.append((tuple(zip(*context)) +
                             sentence2sequence(question) +
                             sentence2sequence(answer) +
                             ([int(s) for s in support.replace("\n", '')],)))
                # Multiple questions may refer to the same context, so we don't reset it.
            else:
                # Context sentence.
                # print(ine.replace("\n", ''))
                context.append(sentence2sequence(ine.replace("\n", '')))
            # print("-------------------------------------------")

        # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
        # print(data[0])
        # print(data)
        return data

    def finalize(data):
        """
        Prepares data generated by contextualize() for use in the network.
        """
        final_data = []
        for cqas in data:
            contextvs, contextws, qvs, qws, avs, aws, spt = cqas

            lengths = itertools.accumulate(len(cvec) for cvec in contextvs)
            context_vec = np.concatenate(contextvs)
            context_words = sum(contextws, [])

            # Location markers for the beginnings of new sentences.
            sentence_ends = np.array(list(lengths))
            final_data.append((context_vec, sentence_ends, qvs, spt, context_words, cqas, avs, aws))
        return np.array(final_data)

    def attention(c, mem, existing_facts):
        """
        Custom attention mechanism.
        c: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor
            that contains all the facts from the contexts.
        mem: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that
            contains the current memory. It should be the same memory for all facts for accurate results.
        existing_facts: A [batch_size, maximum_sentence_count, 1] tensor that
            acts as a binary mask for which facts exist and which do not.

        """
        with tf.variable_scope("attending") as scope:
            # attending: The metrics by which we decide what to attend to.
            attending = tf.concat([c, mem, re_q, c * re_q, c * mem, (c - re_q) ** 2, (c - mem) ** 2], 2)

            # m1: First layer of multiplied weights for the feed-forward network.
            #     We tile the weights in order to manually broadcast, since tf.matmul does not
            #     automatically broadcast batch matrix multiplication as of TensorFlow 1.2.
            m1 = tf.matmul(attending * existing_facts,
                           tf.tile(w_1, tf.stack([tf.shape(attending)[0], 1, 1]))) * existing_facts
            # bias_1: A masked version of the first feed-forward layer's bias
            #     over only existing facts.

            bias_1 = b_1 * existing_facts

            # tnhan: First nonlinearity. In the original paper, this is a tanh nonlinearity;
            #        choosing relu was a design choice intended to avoid issues with
            #        low gradient magnitude when the tanh returned values close to 1 or -1.
            tnhan = tf.nn.relu(m1 + bias_1)

            # m2: Second layer of multiplied weights for the feed-forward network.
            #     Still tiling weights for the same reason described in m1's comments.
            m2 = tf.matmul(tnhan, tf.tile(w_2, tf.stack([tf.shape(attending)[0], 1, 1])))

            # bias_2: A masked version of the second feed-forward layer's bias.
            bias_2 = b_2 * existing_facts

            # norm_m2: A normalized version of the second layer of weights, which is used
            #     to help make sure the softmax nonlinearity doesn't saturate.
            norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1)

            # softmaxable: A hack in order to use sparse_softmax on an otherwise dense tensor.
            #     We make norm_m2 a sparse tensor, then make it dense again after the operation.
            softmax_idx = tf.where(tf.not_equal(norm_m2, 0))[:, :-1]
            softmax_gather = tf.gather_nd(norm_m2[..., 0], softmax_idx)
            softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1]
            softmaxable = tf.SparseTensor(softmax_idx, softmax_gather, softmax_shape)
            return tf.expand_dims(tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1)

    def prep_batch(batch_data, more_data=False):
        """
            Prepare all the preproccessing that needs to be done on a batch-by-batch basis.
        """
        context_vec, sentence_ends, questionvs, spt, context_words, cqas, answervs, _ = zip(*batch_data)
        ends = list(sentence_ends)
        maxend = max(map(len, ends))
        aends = np.zeros((len(ends), maxend))
        for index, i in enumerate(ends):
            for indexj, x in enumerate(i):
                aends[index, indexj] = x - 1
        new_ends = np.zeros(aends.shape + (2,))

        for index, x in np.ndenumerate(aends):
            new_ends[index + (0,)] = index[0]
            new_ends[index + (1,)] = x

        contexts = list(context_vec)
        max_context_length = max([len(x) for x in contexts])
        contextsize = list(np.array(contexts[0]).shape)
        contextsize[0] = max_context_length
        final_contexts = np.zeros([len(contexts)] + contextsize)

        contexts = [np.array(x) for x in contexts]
        for i, context in enumerate(contexts):
            final_contexts[i, 0:len(context), :] = context
        max_query_length = max(len(x) for x in questionvs)
        querysize = list(np.array(questionvs[0]).shape)
        querysize[:1] = [len(questionvs), max_query_length]
        queries = np.zeros(querysize)
        querylengths = np.array(list(zip(range(len(questionvs)), [len(q) - 1 for q in questionvs])))
        questions = [np.array(q) for q in questionvs]
        for i, question in enumerate(questions):
            queries[i, 0:len(question), :] = question
        data = {context_placeholder: final_contexts, input_sentence_endings: new_ends,
                query: queries, input_query_lengths: querylengths, gold_standard: answervs}
        return (data, context_words, cqas) if more_data else data

    def restore_sess(location):
        saver = tf.train.Saver()
        session = tf.Session()
        saver.restore(session, location)
        return session

    def session_manage(location):
        full_location = location + "model.ckpt"
        return restore_sess(full_location)

    '''-----------------------------------------------------------'''
    # Deserialize GloVe vectors
    # print(os.getcwd())
    # os.chdir("..")
    # print(os.getcwd())
    from entity.embeddingrepo import EmbeddingDbRepo

    glove_wordmap = {}
    embrepo = EmbeddingDbRepo()
    gloves = embrepo.get()
    for glove in gloves:
        name, vector = list(glove)[1], list(glove)[2]
        glove_wordmap[name] = np.fromstring(vector, sep=" ")
    # glove_wordmap = {}
    # with open(glove_vectors_file, "r", encoding='utf-8-sig') as glove:
    #     for line in glove:
    #         name, vector = tuple(line.split(" ", 1))
    #         glove_wordmap[name] = np.fromstring(vector, sep=" ")
    wvecs = []
    for item in glove_wordmap.items():
        wvecs.append(item[1])
    s = np.vstack(wvecs)

    # Gather the distribution hyperparameters
    v = np.var(s, 0)
    m = np.mean(s, 0)
    RS = np.random.RandomState()

    # final_train_data = finalize(train_data)
    final_test_data = finalize(contextualize(1, server_message))  # finalize(test_data)
    '''-----------------------------------------------------------'''
    tf.reset_default_graph()
    '''-----------------------------------------------------------'''
    # Hyperparameters

    # The number of dimensions used to store data passed between recurrent layers in the network.
    recurrent_cell_size = 128

    # The number of dimensions in our word vectorizations.
    D = 50

    # How quickly the network learns. Too high, and we may run into numeric instability
    # or other issues.
    learning_rate = 0.005

    # Dropout probabilities. For a description of dropout and what these probabilities are,
    # see Entailment with TensorFlow.
    input_p, output_p = 0.5, 0.5

    # How many questions we train on at a time.
    batch_size = 128

    # Number of passes in episodic memory. We'll get to this later.
    passes = 4

    # Feed Forward layer sizes: the number of dimensions used to store data passed from feed-forward layers.
    ff_hidden_size = 256

    weight_decay = 0.00000001
    # The strength of our regularization. Increase to encourage sparsity in episodic memory,
    # but makes training slower. Don't make this larger than leraning_rate.

    training_iterations_count = 400000
    # How many questions the network trains on each time it is trained.
    # Some questions are counted multiple times.

    display_step = 100
    # How many iterations of training occur before each validation check.
    '''-----------------------------------------------------------'''
    # Input Module

    # Context: A [batch_size, maximum_context_length, word_vectorization_dimensions] tensor
    # that contains all the context information.
    context = tf.placeholder(tf.float32, [None, None, D], "context")
    context_placeholder = context  # I use context as a variable name later on

    # input_sentence_endings: A [batch_size, maximum_sentence_count, 2] tensor that
    # contains the locations of the ends of sentences.
    input_sentence_endings = tf.placeholder(tf.int32, [None, None, 2], "sentence")

    # recurrent_cell_size: the number of hidden units in recurrent layers.
    input_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size)

    # input_p: The probability of maintaining a specific hidden input unit.
    # Likewise, output_p is the probability of maintaining a specific hidden output unit.
    gru_drop = tf.contrib.rnn.DropoutWrapper(input_gru, input_p, output_p)

    # dynamic_rnn also returns the final internal state. We don't need that, and can
    # ignore the corresponding output (_).
    input_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, context, dtype=tf.float32, scope="input_module")

    # cs: the facts gathered from the context.
    cs = tf.gather_nd(input_module_outputs, input_sentence_endings)
    # to use every word as a fact, useful for tasks with one-sentence contexts
    s = input_module_outputs
    '''-----------------------------------------------------------'''
    # Question Module

    # query: A [batch_size, maximum_question_length, word_vectorization_dimensions] tensor
    #  that contains all of the questions.

    query = tf.placeholder(tf.float32, [None, None, D], "query")

    # input_query_lengths: A [batch_size, 2] tensor that contains question length information.
    # input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range()
    # so that it plays nice with gather_nd.
    input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths")

    question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float32,
                                                   scope=tf.VariableScope(True, "input_module"))

    # q: the question states. A [batch_size, recurrent_cell_size] tensor.
    q = tf.gather_nd(question_module_outputs, input_query_lengths)
    '''-----------------------------------------------------------'''
    # Episodic Memory

    # make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension
    size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)])
    re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size)

    # Final output for attention, needs to be 1 in order to create a mask
    output_size = 1

    # Weights and biases
    attend_init = tf.random_normal_initializer(stddev=0.1)
    w_1 = tf.get_variable("attend_w1", [1, recurrent_cell_size * 7, recurrent_cell_size],
                          tf.float32, initializer=attend_init)
    w_2 = tf.get_variable("attend_w2", [1, recurrent_cell_size, output_size],
                          tf.float32, initializer=attend_init)

    b_1 = tf.get_variable("attend_b1", [1, recurrent_cell_size],
                          tf.float32, initializer=attend_init)
    b_2 = tf.get_variable("attend_b2", [1, output_size],
                          tf.float32, initializer=attend_init)

    # Regulate all the weights and biases
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_1))
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_1))
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_2))
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_2))

    # facts_0s: a [batch_size, max_facts_length, 1] tensor
    #     whose values are 1 if the corresponding fact exists and 0 if not.
    facts_0s = tf.cast(tf.count_nonzero(input_sentence_endings[:, :, -1:], -1, keepdims=True), tf.float32)

    with tf.variable_scope("Episodes") as scope:
        attention_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size)

        # memory: A list of all tensors that are the (current or past) memory state
        #   of the attention mechanism.
        memory = [q]

        # attends: A list of all tensors that represent what the network attends to.
        attends = []
        for a in range(passes):
            # attention mask
            attend_to = attention(cs, tf.tile(tf.reshape(memory[-1], [-1, 1, recurrent_cell_size]), size),
                                  facts_0s)

            # Inverse attention mask, for what's retained in the state.
            retain = 1 - attend_to

            # GRU pass over the facts, according to the attention mask.
            while_valid_index = (lambda state, index: index < tf.shape(cs)[1])
            update_state = (lambda state, index: (attend_to[:, index, :] *
                                                  attention_gru(cs[:, index, :], state)[0] +
                                                  retain[:, index, :] * state))
            # start loop with most recent memory and at the first index
            memory.append(tuple(tf.while_loop(while_valid_index,
                                              (lambda state, index: (update_state(state, index), index + 1)),
                                              loop_vars=[memory[-1], 0]))[0])

            attends.append(attend_to)

            # Reuse variables so the GRU pass uses the same variables every pass.
            scope.reuse_variables()
    '''-----------------------------------------------------------'''
    # Answer Module

    # a0: Final memory state. (Input to answer module)
    a0 = tf.concat([memory[-1], q], -1)

    # fc_init: Initializer for the final fully connected layer's weights.
    fc_init = tf.random_normal_initializer(stddev=0.1)

    with tf.variable_scope("answer"):
        # w_answer: The final fully connected layer's weights.
        w_answer = tf.get_variable("weight", [recurrent_cell_size * 2, D],
                                   tf.float32, initializer=fc_init)
        # Regulate the fully connected layer's weights
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                             tf.nn.l2_loss(w_answer))

        # The regressed word. This isn't an actual word yet;
        #    we still have to find the closest match.
        logit = tf.expand_dims(tf.matmul(a0, w_answer), 1)

        # Make a mask over which words exist.
        with tf.variable_scope("ending"):
            all_ends = tf.reshape(input_sentence_endings, [-1, 2])
            range_ends = tf.range(tf.shape(all_ends)[0])
            ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1)
            ind = tf.reduce_max(tf.scatter_nd(ends_indices, all_ends[:, 1],
                                              [tf.shape(q)[0], tf.shape(all_ends)[0]]),
                                axis=-1)
            range_ind = tf.range(tf.shape(ind)[0])
            mask_ends = tf.cast(tf.scatter_nd(tf.stack([ind, range_ind], axis=1),
                                              tf.ones_like(range_ind), [tf.reduce_max(ind) + 1,
                                                                        tf.shape(ind)[0]]), bool)
            # A bit of a trick. With the locations of the ends of the mask (the last periods in
            #  each of the contexts) as 1 and the rest as 0, we can scan with exclusive or
            #  (starting from all 1). For each context in the batch, this will result in 1s
            #  up until the marker (the location of that last period) and 0s afterwards.
            mask = tf.scan(tf.logical_xor, mask_ends, tf.ones_like(range_ind, dtype=bool))

        # We score each possible word inversely with their Euclidean distance to the regressed word.
        #  The highest score (lowest distance) will correspond to the selected word.
        logits = -tf.reduce_sum(tf.square(context * tf.transpose(tf.expand_dims(
            tf.cast(mask, tf.float32), -1), [1, 0, 2]) - logit), axis=-1)
    '''-----------------------------------------------------------'''
    # Training

    # gold_standard: The real answers.
    gold_standard = tf.placeholder(tf.float32, [None, 1, D], "answer")
    with tf.variable_scope('accuracy'):
        eq = tf.equal(context, gold_standard)
        corrbool = tf.reduce_all(eq, -1)
        logloc = tf.reduce_max(logits, -1, keepdims=True)
        # locs: A boolean tensor that indicates where the score
        #  matches the minimum score. This happens on multiple dimensions,
        #  so in the off chance there's one or two indexes that match
        #  we make sure it matches in all indexes.
        locs = tf.equal(logits, logloc)

        # correctsbool: A boolean tensor that indicates for which
        #   words in the context the score always matches the minimum score.
        correctsbool = tf.reduce_any(tf.logical_and(locs, corrbool), -1)
        # corrects: A tensor that is simply correctsbool cast to floats.
        corrects = tf.where(correctsbool, tf.ones_like(correctsbool, dtype=tf.float32),
                            tf.zeros_like(correctsbool, dtype=tf.float32))

        # corr: corrects, but for the right answer instead of our selected answer.
        corr = tf.where(corrbool, tf.ones_like(corrbool, dtype=tf.float32),
                        tf.zeros_like(corrbool, dtype=tf.float32))
    with tf.variable_scope("loss"):
        # Use sigmoid cross entropy as the base loss,
        #  with our distances as the relative probabilities. There are
        #  multiple correct labels, for each location of the answer word within the context.
        loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.nn.l2_normalize(logits, -1),
                                                       labels=corr)

        # Add regularization losses, weighted by weight_decay.
        total_loss = tf.reduce_mean(loss) + weight_decay * tf.add_n(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    # TensorFlow's default implementation of the Adam optimizer works. We can adjust more than
    #  just the learning rate, but it's not necessary to find a very good optimum.
    optimizer = tf.train.AdamOptimizer(learning_rate)

    # Once we have an optimizer, we ask it to minimize the loss
    #   in order to work towards the proper training.
    opt_op = optimizer.minimize(total_loss)
    '''-----------------------------------------------------------'''
    # Initialize variables
    init = tf.global_variables_initializer()

    # Launch the TensorFlow session
    sess = tf.Session()
    sess.run(init)
    '''-----------------------------------------------------------'''

    '''-----------------------------------------------------------'''
    # Prepare validation set
    # print(final_test_data.shape[0])
    batch = np.random.randint(final_test_data.shape[0], size=batch_size * 10)
    batch_data = final_test_data[batch]

    validation_set, val_context_words, val_cqas = prep_batch(batch_data, True)

    train_location = "./neural/pre_trained_model/"
    sess = session_manage(train_location)
    '''-----------------------------------------------------------'''
    ancr = sess.run([corrbool, locs, total_loss, logits, facts_0s, w_1] + attends +
                    [query, cs, question_module_outputs], feed_dict=validation_set)
    a = ancr[0]
    n = ancr[1]
    cr = ancr[2]
    attenders = np.array(ancr[6:-3])
    faq = np.sum(ancr[4], axis=(-1, -2))  # Number of facts in each context

    # print(list(map((lambda x: x.shape),ancr[3:])), new_ends.shape)
    '''-----------------------------------------------------------'''
    # Locations of responses within contexts
    indices = np.argmax(n, axis=1)

    # Locations of actual answers within contexts
    indicesc = np.argmax(a, axis=1)
    limit = 1

    for i, e, cw, cqa in list(zip(indices, indicesc, val_context_words, val_cqas))[:limit]:
        ccc = " ".join(cw)
        print("TEXT: ", ccc)
        print("QUESTION: ", " ".join(cqa[3]))
        print("RESPONSE: ", cw[i], ["Correct", "Incorrect"][i != e])
        print("EXPECTED: ", cw[e])
        print()
        return "RESPONSE: " + cw[i]
    '''-----------------------------------------------------------'''
    # train_location = "./max_train_model/"
    # sess = session_manage(train_location, rewrite=True, iter=training_iterations_count, batch_size=batch_size)
    '''-----------------------------------------------------------'''

    # Final testing accuracy
    print(np.mean(sess.run([corrects], feed_dict=prep_batch(final_test_data))[0]))
    '''-----------------------------------------------------------'''
    sess.close()
    '''-----------------------------------------------------------'''
query = tf.placeholder(tf.float32, [None, None, D], "query")

# input_query_lengths: A [batch_size, 2] tensor that contains question length information.
# input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range()
# so that it plays nice with gather_nd.
input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths")

# question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float32, scope = tf.VariableScope(True, "input_module"))

question_module_outputs, question_module_outputs_bw = tf.nn.bidirectional_dynamic_rnn(
    lstm_fw_cell,
    lstm_bw_cell,
    query,
    dtype=tf.float32,
    scope=tf.VariableScope(True, "input_module"))
question_module_outputs = tf.concat(question_module_outputs, 2)
# q: the question states. A [batch_size, recurrent_cell_size] tensor.
# q = tf.squeeze(tf.gather_nd(question_module_outputs, input_query_lengths),axis=1)
q = tf.gather_nd(question_module_outputs, input_query_lengths)
# q=tf.squeeze(q,)
#print("q:",q)

# Episodic Memory

# make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension
size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)])
# print("size:",size)
re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size)
# print("re_q:",re_q)
Beispiel #28
0
    def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs = tf.placeholder(
                tf.float32,
                shape=[None, self.observation_space.shape[0] + 1],
                name='inputs')

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.q_values = tf.squeeze(
                    capacities.value_f(self.q_params, self.inputs))

            self.action_t = capacities.eps_greedy(self.inputs, self.q_values,
                                                  self.env.action_space.n,
                                                  self.N0, self.min_eps)
            self.q_t = self.q_values[self.action_t]

            fixed_q_scope = tf.VariableScope(reuse=False, name='FixedQValues')
            with tf.variable_scope(fixed_q_scope):
                self.update_fixed_vars_op = capacities.fix_scope(q_scope)

            with tf.variable_scope('ExperienceReplay'):
                self.er_inputs = tf.placeholder(
                    tf.float32,
                    shape=[None, self.observation_space.shape[0] + 1],
                    name="ERInputs")
                self.er_actions = tf.placeholder(tf.int32,
                                                 shape=[None],
                                                 name="ERInputs")
                self.er_rewards = tf.placeholder(tf.float32,
                                                 shape=[None],
                                                 name="ERReward")
                self.er_next_states = tf.placeholder(
                    tf.float32,
                    shape=[None, self.observation_space.shape[0] + 1],
                    name="ERNextState")

                with tf.variable_scope(q_scope, reuse=True):
                    er_q_values = capacities.value_f(self.q_params,
                                                     self.er_inputs)
                er_stacked_actions = tf.stack([
                    tf.range(0,
                             tf.shape(self.er_actions)[0]), self.er_actions
                ], 1)
                er_qs = tf.gather_nd(er_q_values, er_stacked_actions)

                with tf.variable_scope(fixed_q_scope, reuse=True):
                    er_fixed_next_q_values = capacities.value_f(
                        self.q_params, self.er_next_states)
                with tf.variable_scope(q_scope, reuse=True):
                    er_next_q_values = capacities.value_f(
                        self.q_params, self.er_next_states)
                er_next_max_action_t = tf.cast(tf.argmax(er_next_q_values, 1),
                                               tf.int32)
                er_next_stacked_actions = tf.stack([
                    tf.range(0,
                             tf.shape(self.er_next_states)[0]),
                    er_next_max_action_t
                ], 1)
                er_next_qs = tf.gather_nd(er_fixed_next_q_values,
                                          er_next_stacked_actions)

                er_target_qs1 = tf.stop_gradient(self.er_rewards +
                                                 self.discount * er_next_qs)
                er_target_qs2 = self.er_rewards
                er_stacked_targets = tf.stack([er_target_qs1, er_target_qs2],
                                              1)
                select_targets = tf.stack([
                    tf.range(0,
                             tf.shape(self.er_next_states)[0]),
                    tf.cast(self.er_next_states[:, -1], tf.int32)
                ], 1)
                er_target_qs = tf.gather_nd(er_stacked_targets, select_targets)

                self.er_loss = 1 / 2 * tf.reduce_sum(
                    tf.square(er_target_qs - er_qs))
                er_adam = tf.train.AdamOptimizer(self.lr)
                self.global_step = tf.Variable(
                    0,
                    trainable=False,
                    name="global_step",
                    collections=[
                        tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES
                    ])
                self.er_train_op = er_adam.minimize(
                    self.er_loss, global_step=self.global_step)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")
            self.timestep, self.inc_timestep_op = capacities.counter(
                "timestep")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph
Beispiel #29
0
    def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.inputs = tf.placeholder(
                tf.float32,
                shape=[None, self.observation_space.shape[0] + 1],
                name='inputs')

            policy_scope = tf.VariableScope(reuse=False, name='Policy')
            with tf.variable_scope(policy_scope):
                self.probs, self.actions = capacities.policy(
                    self.policy_params, self.inputs)
            self.action_t = tf.squeeze(self.actions, 1)[0]
            # self.action_t = tf.Print(self.action_t, data=[self.probs, self.action_t], message="self.probs, self.action_t:")

            v_scope = tf.VariableScope(reuse=False, name='VValues')
            with tf.variable_scope(v_scope):
                vs = capacities.value_f(self.v_params, self.inputs)

            with tf.control_dependencies([self.probs, vs]):
                with tf.variable_scope('Training'):
                    stacked_actions = tf.stack([
                        tf.range(0,
                                 tf.shape(self.actions)[0]),
                        tf.squeeze(self.actions, 1)
                    ], 1)

                    self.rewards = tf.placeholder(tf.float32,
                                                  shape=[None],
                                                  name="rewards")
                    self.next_states = tf.placeholder(
                        tf.float32,
                        shape=[None, self.observation_space.shape[0] + 1],
                        name="next_states")
                    self.next_actions = tf.placeholder(tf.int32,
                                                       shape=[None],
                                                       name="next_actions")

                    with tf.variable_scope(v_scope, reuse=True):
                        next_vs = tf.squeeze(
                            capacities.value_f(self.v_params,
                                               self.next_states), 1)

                    with tf.variable_scope('TargetVs'):
                        target_vs1 = tf.stop_gradient(self.rewards +
                                                      self.discount * next_vs)
                        target_vs2 = self.rewards
                        stacked_targets = tf.stack([target_vs1, target_vs2], 1)
                        select_targets = tf.stack([
                            tf.range(0,
                                     tf.shape(self.next_states)[0]),
                            tf.cast(self.next_states[:, -1], tf.int32)
                        ], 1)
                        target_vs = tf.gather_nd(stacked_targets,
                                                 select_targets)

                    log_probs = tf.log(
                        tf.gather_nd(self.probs, stacked_actions))

                    with tf.control_dependencies([log_probs, target_vs]):
                        self.v_loss = 1 / 2 * tf.reduce_sum(
                            tf.square(target_vs - vs))
                        v_adam = tf.train.AdamOptimizer(self.v_lr)
                        self.v_global_step = tf.Variable(0,
                                                         trainable=False,
                                                         name="v_global_step")
                        self.v_train_op = v_adam.minimize(
                            self.v_loss, global_step=self.v_global_step)

                        td = target_vs - vs
                        self.policy_loss = -tf.reduce_sum(
                            log_probs * tf.stop_gradient(td))
                        policy_adam = tf.train.AdamOptimizer(self.policy_lr)
                        self.policy_global_step = tf.Variable(
                            0,
                            trainable=False,
                            name="policy_global_step",
                            collections=[
                                tf.GraphKeys.GLOBAL_STEP,
                                tf.GraphKeys.GLOBAL_VARIABLES
                            ])
                        self.policy_train_op = policy_adam.minimize(
                            self.policy_loss,
                            global_step=self.policy_global_step)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.policy_loss_plh = tf.placeholder(tf.float32, shape=[])
            self.policy_loss_sum_t = tf.summary.scalar('policy_loss',
                                                       self.policy_loss_plh)
            self.v_loss_plh = tf.placeholder(tf.float32, shape=[])
            self.v_loss_sum_t = tf.summary.scalar('v_loss', self.v_loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph
Beispiel #30
0
    def build_graph(self, graph):
        with graph.as_default():
            tf.set_random_seed(self.random_seed)

            self.N0_t = tf.constant(self.N0, tf.float32, name='N_0')
            self.N = tf.Variable(0.,
                                 dtype=tf.float32,
                                 name='N',
                                 trainable=False)
            self.min_eps_t = tf.constant(self.min_eps,
                                         tf.float32,
                                         name='min_eps')

            self.inputs = tf.placeholder(
                tf.float32,
                shape=[None, self.observation_space.shape[0] + 1],
                name='inputs')

            q_scope = tf.VariableScope(reuse=False, name='QValues')
            with tf.variable_scope(q_scope):
                self.q_values = tf.squeeze(
                    capacities.value_f(self.q_params, self.inputs))

            self.action_t = capacities.eps_greedy(self.inputs, self.q_values,
                                                  self.env.action_space.n,
                                                  self.N0, self.min_eps)
            self.q_t = self.q_values[self.action_t]

            with tf.variable_scope('Training'):
                self.reward = tf.placeholder(tf.float32,
                                             shape=[],
                                             name="reward")
                self.next_state = tf.placeholder(
                    tf.float32,
                    shape=[1, self.observation_space.shape[0] + 1],
                    name="nextState")
                self.next_action = tf.placeholder(tf.int32,
                                                  shape=[],
                                                  name="nextAction")

                with tf.variable_scope(q_scope, reuse=True):
                    next_q_values = tf.squeeze(
                        capacities.value_f(self.q_params, self.next_state))
                target_q1 = tf.stop_gradient(self.reward + self.discount *
                                             next_q_values[self.next_action])
                target_q2 = self.reward
                is_done = tf.cast(self.next_state[0, 4], tf.bool)
                target_q = tf.where(is_done, target_q2, target_q1)
                with tf.control_dependencies([target_q]):
                    self.loss = 1 / 2 * tf.square(target_q - self.q_t)

                adam = tf.train.AdamOptimizer(self.lr)
                self.global_step = tf.Variable(
                    0,
                    trainable=False,
                    name="global_step",
                    collections=[
                        tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES
                    ])
                self.train_op = adam.minimize(self.loss,
                                              global_step=self.global_step)

            self.score_plh = tf.placeholder(tf.float32, shape=[])
            self.score_sum_t = tf.summary.scalar('score', self.score_plh)
            self.loss_plh = tf.placeholder(tf.float32, shape=[])
            self.loss_sum_t = tf.summary.scalar('loss', self.loss_plh)
            self.all_summary_t = tf.summary.merge_all()

            self.episode_id, self.inc_ep_id_op = capacities.counter(
                "episode_id")

            # Playing part
            self.pscore_plh = tf.placeholder(tf.float32, shape=[])
            self.pscore_sum_t = tf.summary.scalar('play_score',
                                                  self.pscore_plh)

        return graph