Beispiel #1
0
    def __init__(self, config, infer=False):
        self.config = config
        if infer:
            config.batch_size = 1
            config.decoder.max_ast_depth = 1

        # setup the encoder
        self.encoder = NonBayesianEncoder(config)

        # setup the decoder with the encoding as the initial state
        self.decoder = NonBayesianDecoder(config, initial_state=self.encoder.encoding, infer=infer)

        # get the decoder outputs
        output = tf.reshape(tf.concat(self.decoder.outputs, 1),
                            [-1, self.decoder.cell1.output_size])
        logits = tf.matmul(output, self.decoder.projection_w) + self.decoder.projection_b
        self.probs = tf.nn.softmax(logits)

        # 1. generation loss: log P(X | \Psi)
        self.targets = tf.placeholder(tf.int32, [config.batch_size, config.decoder.max_ast_depth])
        self.gen_loss = seq2seq.sequence_loss([logits], [tf.reshape(self.targets, [-1])],
                                              [tf.ones([config.batch_size * config.decoder.max_ast_depth])])

        # The optimizer
        self.loss = self.gen_loss
        self.train_op = tf.train.AdamOptimizer(config.learning_rate).minimize(self.loss)

        var_params = [np.prod([dim.value for dim in var.get_shape()])
                      for var in tf.trainable_variables()]
        if not infer:
            print('Model parameters: {}'.format(np.sum(var_params)))
Beispiel #2
0
    def __init__(self, config, infer=False):
        assert config.model == 'lle', 'Trying to load different model implementation: ' + config.model
        self.config = config
        if infer:
            config.batch_size = 1
            config.decoder.max_seq_length = 1

        # setup the encoder
        self.encoder = BayesianEncoder(config)
        samples = tf.random_normal([config.batch_size, config.latent_size],
                                   mean=0., stddev=1., dtype=tf.float32)
        self.psi = self.encoder.psi_mean + tf.sqrt(self.encoder.psi_covariance) * samples

        # setup the decoder with psi as the initial state
        lift_w = tf.get_variable('lift_w', [config.latent_size, config.decoder.units])
        lift_b = tf.get_variable('lift_b', [config.decoder.units])
        self.initial_state = tf.nn.xw_plus_b(self.psi, lift_w, lift_b)
        self.decoder = BayesianDecoder(config, initial_state=self.initial_state, infer=infer)

        # get the decoder outputs
        output = tf.reshape(tf.concat(self.decoder.outputs, 1),
                            [-1, self.decoder.cell1.output_size])
        logits = tf.matmul(output, self.decoder.projection_w) + self.decoder.projection_b
        self.probs = tf.nn.softmax(logits)

        # 1. generation loss: log P(X | \Psi)
        self.targets = tf.placeholder(tf.int32, [config.batch_size, config.decoder.max_seq_length])
        self.gen_loss = seq2seq.sequence_loss([logits], [tf.reshape(self.targets, [-1])],
                                              [tf.ones([config.batch_size * config.decoder.max_seq_length])])

        # 2. latent loss: KL-divergence between P(\Psi | f(\Theta)) and P(\Psi)
        latent_loss = 0.5 * tf.reduce_sum(- tf.log(self.encoder.psi_covariance)
                                          - 1 + self.encoder.psi_covariance
                                          + tf.square(self.encoder.psi_mean), axis=1)
        self.latent_loss = config.alpha * latent_loss

        # 3. evidence loss: log P(f(\theta) | \Psi; \sigma)
        evidence_loss = [ev.evidence_loss(self.psi, encoding, config) for ev, encoding
                         in zip(config.evidence, self.encoder.encodings)]
        evidence_loss = [tf.reduce_sum(loss, axis=1) for loss in evidence_loss]
        self.evidence_loss = config.beta * tf.reduce_sum(tf.stack(evidence_loss), axis=0)

        # The optimizer
        self.loss = self.gen_loss + self.latent_loss + self.evidence_loss
        self.train_op = tf.train.AdamOptimizer(config.learning_rate).minimize(self.loss)

        var_params = [np.prod([dim.value for dim in var.get_shape()])
                      for var in tf.trainable_variables()]
        if not infer:
            print('Model parameters: {}'.format(np.sum(var_params)))
Beispiel #3
0
def generate_sequence_output(num_encoder_symbols,
                             encoder_outputs, 
                             encoder_state, 
                             targets,
                             sequence_length, 
                             num_decoder_symbols, 
                             weights,
                             buckets, 
                             softmax_loss_function=None,
                             per_example_loss=False, 
                             name=None, 
                             use_attention=False):
  if len(targets) < buckets[-1][1]:
    raise ValueError("Length of targets (%d) must be at least that of last"
                     "bucket (%d)." % (len(targets), buckets[-1][1]))

  all_inputs = encoder_outputs + targets + weights
  with tf.name_scope(name, "model_with_buckets", all_inputs):
    with tf.variable_scope("decoder_sequence_output", reuse=None):
      logits, attention_weights = attention_RNN(encoder_outputs, 
                                                encoder_state,
                                                num_decoder_symbols,
                                                sequence_length,
                                                use_attention=use_attention)
      if per_example_loss is None:
        assert len(logits) == len(targets)
        # We need to make target and int64-tensor and set its shape.
        bucket_target = [tf.reshape(tf.to_int64(x), [-1]) for x in targets]
        crossent = sequence_loss_by_example(
              logits, bucket_target, weights,
              softmax_loss_function=softmax_loss_function)
      else:
        assert len(logits) == len(targets)
        bucket_target = [tf.reshape(tf.to_int64(x), [-1]) for x in targets]
        crossent = sequence_loss(
              logits, bucket_target, weights,
              softmax_loss_function=softmax_loss_function)

  return logits, crossent
Beispiel #4
0
    def __init__(self, config, infer=False):
        self.config = config
        if infer:
            config.batch_size = 1
            config.decoder.max_tokens = 1

        # setup the encoder
        self.encoder = BayesianEncoder(config)
        samples = tf.random_normal([config.batch_size, config.latent_size],
                                   mean=0.,
                                   stddev=1.,
                                   dtype=tf.float32)
        self.psi = self.encoder.psi_mean + tf.sqrt(
            self.encoder.psi_covariance) * samples

        # setup the decoder with psi as the initial state
        lift_w = tf.get_variable('lift_w',
                                 [config.latent_size, config.decoder.units])
        lift_b = tf.get_variable('lift_b', [config.decoder.units])
        self.initial_state = tf.nn.xw_plus_b(self.psi, lift_w, lift_b)
        self.decoder = BayesianDecoder(config,
                                       initial_state=self.initial_state,
                                       infer=infer)

        # get the decoder outputs
        output = tf.reshape(tf.concat(self.decoder.outputs, 1),
                            [-1, self.decoder.cell.output_size])
        logits = tf.matmul(
            output, self.decoder.projection_w) + self.decoder.projection_b
        self.probs = tf.nn.softmax(logits)

        # 1. generation loss: log P(X | \Psi)
        self.targets = tf.placeholder(
            tf.int32, [config.batch_size, config.decoder.max_tokens])
        self.gen_loss = seq2seq.sequence_loss(
            [logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([config.batch_size * config.decoder.max_tokens])])

        # 2. latent loss: KL-divergence between P(\Psi | f(\Theta)) and P(\Psi)
        latent_loss = 0.5 * tf.reduce_sum(
            -tf.log(self.encoder.psi_covariance) - 1 +
            self.encoder.psi_covariance + tf.square(self.encoder.psi_mean),
            axis=1)
        self.latent_loss = config.alpha * latent_loss

        # 3. evidence loss: log P(f(\theta) | \Psi; \sigma)
        evidence_loss = [
            ev.evidence_loss(self.psi, encoding, config)
            for ev, encoding in zip(config.evidence, self.encoder.encodings)
        ]
        evidence_loss = [tf.reduce_sum(loss, axis=1) for loss in evidence_loss]
        self.evidence_loss = config.beta * tf.reduce_sum(
            tf.stack(evidence_loss), axis=0)

        # The optimizer
        self.loss = self.gen_loss + self.latent_loss + self.evidence_loss
        self.train_op = tf.train.AdamOptimizer(config.learning_rate).minimize(
            self.loss)

        var_params = [
            np.prod([dim.value for dim in var.get_shape()])
            for var in tf.trainable_variables()
        ]
        if not infer:
            print('Model parameters: {}'.format(np.sum(var_params)))
Beispiel #5
0
        name="GO")] + encode_input[:-1])

previous_memory = tf.zeros(shape=(batch_size, memory_dim))

cell = core_rnn_cell.GRUCell(num_units=memory_dim)

decode_outputs, decode_memory = legacy_seq2seq.embedding_rnn_seq2seq(
    encoder_inputs=encode_input,
    decoder_inputs=decode_input,
    cell=cell,
    num_encoder_symbols=vocab_size,
    num_decoder_symbols=vocab_size,
    embedding_size=embedding_dim)

loss = legacy_seq2seq.sequence_loss(
    logits=decode_outputs,
    targets=labels,
    weights=weights)

tf.summary.scalar("loss", loss)

manitude = tf.sqrt(tf.reduce_sum(tf.square(decode_memory[1])))
tf.summary.scalar("manitude at t=1", manitude)

summary_op = tf.summary.merge_all()

learning_rate = 0.05
momentum = 0.9
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
train_op = optimizer.minimize(loss)

summary_writer = tf.summary.FileWriter("./sources/seq2seq/log/", sess.graph)
Beispiel #6
0
    def build_model(self, forward_only):
        print("[*] Building a PTRModel math model")

        with tf.variable_scope(self.scope):
            self.a = weight('a', [1, 1])
            # self.c = weight('c', [1, 1])
            # self.d = weight('d', [1, 1])
            self.b = weight('b', [1, 1], init='constant')
            self.beta = 1 + tf.nn.softplus(weight('beta', [1, 1]))

            prev_state = self.controller.init_state()

            tf.get_variable_scope().reuse_variables()

            for seq_length in range(1, self.max_length * self.max_length + 1):
                true_output = tf.placeholder(tf.float32, [self.output_dim],
                                             name='true_output_%s' %
                                             seq_length)
                self.true_outputs.append(true_output)

            for seq_length in range(1, self.max_length + 1):
                input_1 = tf.placeholder(tf.float32, [self.input_dim],
                                         name='input_1_%s' % seq_length)
                input_2 = tf.placeholder(tf.float32, [self.input_dim],
                                         name='input_2_%s' % seq_length)

                self.inputs_1.append(input_1)
                self.inputs_2.append(input_2)

                # present inputs
                prev_state = self.controller.update_memory(
                    prev_state, [
                        tf.reshape(input_1, [1, -1]),
                        tf.reshape(input_2, [1, -1]),
                        tf.zeros((1, self.W))
                    ])
                self.collect_states[seq_length] = self.collect_states[
                    seq_length - 1][0:(seq_length -
                                       1)] + [self.copy_state(prev_state)]

                self.debug[seq_length] = []

                state = prev_state
                self.prev_states[seq_length] = state

                candidate_outputs = []
                for j in range(0, self.MAX_STEP):
                    state, _ = self.controller(state, j)
                    new_state = self.copy_state(state)
                    self.collect_states[seq_length].append(new_state)
                    candidate_outputs.append(
                        tf.unstack(state['M'][-1][0:(seq_length *
                                                     seq_length)]))
                    self.debug[seq_length].append(
                        (new_state['ptr'], new_state['dptr']))

                self.outputs[seq_length] = candidate_outputs

            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    print(" [*] Building a loss model for seq_length %s" %
                          seq_length)
                    print(len(self.outputs[seq_length]),
                          len(self.true_outputs[0:seq_length * seq_length]),
                          len([1] * (seq_length * seq_length)))
                    # print(self.outputs[seq_length][0].shape,self.true_outputs[0:2*seq_length][0].shape,len([1] * (2*seq_length)))
                    all_losses = []
                    for index in range(self.MAX_STEP):
                        loss = sequence_loss(
                            logits=self.outputs[seq_length][index],
                            targets=self.true_outputs[0:seq_length *
                                                      seq_length],
                            weights=[1] * (seq_length * seq_length),
                            average_across_timesteps=False,
                            average_across_batch=False,
                            softmax_loss_function=l2_loss)
                        all_losses.append(loss)

                    all_losses = tf.stack(all_losses)

                    cn = tf.pow(tf.to_float(seq_length), self.a) + self.b
                    max_pos = tf.clip_by_value(cn, 0, self.MAX_STEP - 1)
                    stop_pos = D(self.MAX_STEP, max_pos, 1, self.beta)

                    loss1 = tf.reduce_sum(
                        tf.expand_dims(all_losses, 0) *
                        stop_pos) + 0.0001 * tf.reduce_sum(cn)

                    self.losses[seq_length] = loss1

                    if not self.params:
                        self.params = tf.trainable_variables()

                    grads = []
                    for grad in tf.gradients(
                            loss1, self.params
                    ):  # + self.weight_decay*tf.add_n(tf.get_collection('l2'))
                        if grad is not None:
                            grads.append(
                                tf.clip_by_value(grad, self.min_grad,
                                                 self.max_grad))
                        else:
                            grads.append(grad)
                    self.grads[seq_length] = grads

        with tf.variable_scope("opt", reuse=None):
            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    self.optims[seq_length] = self.opt.apply_gradients(
                        zip(self.grads[seq_length], self.params),
                        global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a PTRModel math model finished")
Beispiel #7
0
def chat(input_text):
    word_cnt, train_dict, train_reverse_dict = load_dict(DICT_FILE)

    LINE_BREAK = u'<Break>'
    WORD_DELIMITER = u'/'
    UNK_WORD = u'<UNK>'
    PADDING_WORD = u'<PAD>'
    START_WORD = u'<GO>'
    END_WORD = u'<EOS>'

    START_ID = train_dict[START_WORD]
    END_ID = train_dict[END_WORD]
    PAD_ID = train_dict[PADDING_WORD]
    UNK_ID = train_dict[UNK_WORD]

    #Attenion
    tf.reset_default_graph()

    RNN_CELL_TYPE = 'LSTMCell_Attention'
    learning_rate = 1.0

    encoder_length = 15
    decoder_length = 20
    embed_dim = 128

    cell = tf.contrib.rnn.LSTMCell(embed_dim)
    num_encoder_symbols = VOCAB_SIZE
    num_decoder_symbols = VOCAB_SIZE
    embedding_size = embed_dim

    encoder_len_placeholder = tf.placeholder(tf.int32)

    encoder_placeholders = [
        tf.placeholder(tf.int32, shape=[None], name="encoder_%d" % i)
        for i in range(encoder_length)
    ]
    decoder_placeholders = [
        tf.placeholder(tf.int32, shape=[None], name="decoder_%d" % i)
        for i in range(decoder_length)
    ]
    target_placeholders = [
        tf.placeholder(tf.int32, shape=[None], name="target_%d" % i)
        for i in range(decoder_length)
    ]
    target_weights_placeholders = [
        tf.placeholder(tf.float32, shape=[None], name="decoder_weight_%d" % i)
        for i in range(decoder_length)
    ]
    outputs, states = embedding_attention_seq2seq(encoder_placeholders,
                                                  decoder_placeholders,
                                                  cell,
                                                  num_encoder_symbols,
                                                  num_decoder_symbols,
                                                  embedding_size,
                                                  output_projection=None,
                                                  feed_previous=False)

    loss = sequence_loss(outputs, target_placeholders,
                         target_weights_placeholders)
    #train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    #train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    train_step = tf.train.AdagradOptimizer(learning_rate).minimize(loss)

    saver = tf.train.Saver()
    sess = tf.Session()

    #sess.run(tf.global_variables_initializer())
    saved_model = MODEL_FILE
    #print('Loading model from:', saved_model)

    #t0 = time.time()
    saver.restore(sess, saved_model)
    #t1 = time.time()
    #print(t1-t0)

    #input_text = u'你要去哪?'
    output_text = generate_response(sess, input_text, train_dict,
                                    train_reverse_dict, encoder_length,
                                    decoder_length, PAD_ID, UNK_ID, START_ID,
                                    END_ID, cell, embed_dim, VOCAB_SIZE,
                                    encoder_placeholders, decoder_placeholders,
                                    target_weights_placeholders)
    #print(output_text.encode("utf-8"))
    return output_text
               enc_inp[:-1])
    weights = [
        tf.placeholder(tf.float32, shape=(None, ), name="weight%i" % t)
        for t in range(seq_length)
    ]
    labels = [
        tf.placeholder(tf.int32, shape=(None, ), name="labels%i" % t)
        for t in range(seq_length)
    ]
    # weights = [tf.ones_like(labels_t, dtype=tf.float32) for labels_t in labels]
    prev_mem = tf.zeros((batch_size, memory_dim))
    cell = MultiRNNCell([BasicLSTMCell(memory_dim)] * 3)

    dec_outputs, dec_memory = legacy_seq2seq.embedding_rnn_seq2seq(
        enc_inp, dec_inp, cell, vocab_size, vocab_size, embedding_dim)
    loss = legacy_seq2seq.sequence_loss(dec_outputs, labels, weights,
                                        vocab_size)
    optimizer = tf.train.AdamOptimizer(starter_learning_rate).minimize(loss)

    tf.summary.scalar('loss', loss)
    summary_op = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter('./logs', sess.graph)
    sess.run(tf.global_variables_initializer())

    for step in range(iterations):
        loss_t, summary = train_batch(batch_size)
        if step % 100 == 0:
            print("itrations: %d, train_loss: %.5f." % (step, loss_t),
                  end='\r')
        if step % 500 == 0:
            summary_writer.add_summary(summary, step)
            summary_writer.flush()
Beispiel #9
0
    def __init__(self, config, iterator, infer=False, bayou_mode=True):
        assert config.model == 'lle', 'Trying to load different model implementation: ' + config.model
        self.config = config

        newBatch = iterator.get_next()
        nodes, edges, targets = newBatch[:3]
        ev_data = newBatch[3:]

        nodes = tf.transpose(nodes)
        edges = tf.transpose(edges)

        with tf.variable_scope("Embedding"):
            emb = tf.get_variable(
                'emb', [config.decoder.vocab_size, config.decoder.units])

        with tf.variable_scope("Encoder"):

            self.encoder = BayesianEncoder(config, ev_data, infer)
            samples_1 = tf.random_normal(
                [config.batch_size, config.latent_size],
                mean=0.,
                stddev=1.,
                dtype=tf.float32)

            self.psi_encoder = self.encoder.psi_mean + tf.sqrt(
                self.encoder.psi_covariance) * samples_1

        # setup the reverse encoder.
        with tf.variable_scope("Reverse_Encoder"):
            embAPI = tf.get_variable('embAPI', [
                config.reverse_encoder.vocab_size, config.reverse_encoder.units
            ])
            embRT = tf.get_variable(
                'embRT',
                [config.evidence[4].vocab_size, config.reverse_encoder.units])
            embFS = tf.get_variable(
                'embFS',
                [config.evidence[5].vocab_size, config.reverse_encoder.units])
            self.reverse_encoder = BayesianReverseEncoder(
                config, embAPI, nodes, edges, ev_data[4], embRT, ev_data[5],
                embFS)
            samples_2 = tf.random_normal(
                [config.batch_size, config.latent_size],
                mean=0.,
                stddev=1.,
                dtype=tf.float32)

            self.psi_reverse_encoder = self.reverse_encoder.psi_mean + tf.sqrt(
                self.reverse_encoder.psi_covariance) * samples_2

        # setup the decoder with psi as the initial state
        with tf.variable_scope("Decoder"):

            lift_w = tf.get_variable(
                'lift_w', [config.latent_size, config.decoder.units])
            lift_b = tf.get_variable('lift_b', [config.decoder.units])
            if bayou_mode or infer:
                initial_state = tf.nn.xw_plus_b(self.psi_encoder,
                                                lift_w,
                                                lift_b,
                                                name="Initial_State")
            else:
                initial_state = tf.nn.xw_plus_b(self.psi_reverse_encoder,
                                                lift_w,
                                                lift_b,
                                                name="Initial_State")
            self.decoder = BayesianDecoder(config, emb, initial_state, nodes,
                                           edges)

        with tf.variable_scope("RE_Decoder"):
            ## RE

            emb_RE = config.evidence[
                4].emb * 0.0  #tf.get_variable('emb_RE', [config.evidence[4].vocab_size, config.evidence[4].units])

            lift_w_RE = tf.get_variable(
                'lift_w_RE', [config.latent_size, config.evidence[4].units])
            lift_b_RE = tf.get_variable('lift_b_RE',
                                        [config.evidence[4].units])

            if bayou_mode or infer:
                initial_state_RE = tf.nn.xw_plus_b(self.psi_encoder,
                                                   lift_w_RE,
                                                   lift_b_RE,
                                                   name="Initial_State_RE")
            else:
                initial_state_RE = tf.nn.xw_plus_b(self.psi_reverse_encoder,
                                                   lift_w_RE,
                                                   lift_b_RE,
                                                   name="Initial_State_RE")

            input_RE = tf.transpose(
                tf.reverse_v2(tf.zeros_like(ev_data[4]), axis=[1]))
            output = SimpleDecoder(config, emb_RE, initial_state_RE, input_RE,
                                   config.evidence[4])

            projection_w_RE = tf.get_variable(
                'projection_w_RE',
                [config.evidence[4].units, config.evidence[4].vocab_size])
            projection_b_RE = tf.get_variable('projection_b_RE',
                                              [config.evidence[4].vocab_size])
            logits_RE = tf.nn.xw_plus_b(output.outputs[-1], projection_w_RE,
                                        projection_b_RE)

            labels_RE = tf.one_hot(tf.squeeze(ev_data[4]),
                                   config.evidence[4].vocab_size,
                                   dtype=tf.int32)
            loss_RE = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=labels_RE, logits=logits_RE)

            cond = tf.not_equal(tf.reduce_sum(self.encoder.psi_mean, axis=1),
                                0)
            # cond = tf.reshape( tf.tile(tf.expand_dims(cond, axis=1) , [1,config.evidence[5].max_depth]) , [-1] )
            self.loss_RE = tf.reduce_mean(
                tf.where(cond, loss_RE, tf.zeros(cond.shape)))

        with tf.variable_scope("FS_Decoder"):
            #FS
            emb_FS = config.evidence[
                5].emb  #tf.get_variable('emb_FS', [config.evidence[5].vocab_size, config.evidence[5].units])
            lift_w_FS = tf.get_variable(
                'lift_w_FS', [config.latent_size, config.evidence[5].units])
            lift_b_FS = tf.get_variable('lift_b_FS',
                                        [config.evidence[5].units])

            if bayou_mode or infer:
                initial_state_FS = tf.nn.xw_plus_b(self.psi_encoder,
                                                   lift_w_FS,
                                                   lift_b_FS,
                                                   name="Initial_State_FS")
            else:
                initial_state_FS = tf.nn.xw_plus_b(self.psi_reverse_encoder,
                                                   lift_w_FS,
                                                   lift_b_FS,
                                                   name="Initial_State_FS")

            input_FS = tf.transpose(tf.reverse_v2(ev_data[5], axis=[1]))
            self.decoder_FS = SimpleDecoder(config, emb_FS, initial_state_FS,
                                            input_FS, config.evidence[5])

            output = tf.reshape(tf.concat(self.decoder_FS.outputs, 1),
                                [-1, self.decoder_FS.cell1.output_size])
            logits_FS = tf.matmul(output, self.decoder_FS.projection_w_FS
                                  ) + self.decoder_FS.projection_b_FS

            # logits_FS = output
            targets_FS = tf.reverse_v2(tf.concat(
                [tf.zeros_like(ev_data[5][:, -1:]), ev_data[5][:, :-1]],
                axis=1),
                                       axis=[1])

            # self.gen_loss_FS = tf.contrib.seq2seq.sequence_loss(logits_FS, target_FS,
            #                                       tf.ones_like(target_FS, dtype=tf.float32))
            cond = tf.not_equal(tf.reduce_sum(self.encoder.psi_mean, axis=1),
                                0)
            cond = tf.reshape(
                tf.tile(tf.expand_dims(cond, axis=1),
                        [1, config.evidence[5].max_depth]), [-1])
            cond = tf.where(cond, tf.ones(cond.shape), tf.zeros(cond.shape))

            self.gen_loss_FS = seq2seq.sequence_loss(
                [logits_FS], [tf.reshape(targets_FS, [-1])], [cond])

        # get the decoder outputs
        with tf.name_scope("Loss"):
            output = tf.reshape(tf.concat(self.decoder.outputs, 1),
                                [-1, self.decoder.cell1.output_size])
            logits = tf.matmul(
                output, self.decoder.projection_w) + self.decoder.projection_b
            ln_probs = tf.nn.log_softmax(logits)

            # 1. generation loss: log P(Y | Z)
            cond = tf.not_equal(tf.reduce_sum(self.encoder.psi_mean, axis=1),
                                0)
            cond = tf.reshape(
                tf.tile(tf.expand_dims(cond, axis=1),
                        [1, config.decoder.max_ast_depth]), [-1])
            cond = tf.where(cond, tf.ones(cond.shape), tf.zeros(cond.shape))

            self.gen_loss = seq2seq.sequence_loss([logits],
                                                  [tf.reshape(targets, [-1])],
                                                  [cond])

            # 2. latent loss: negative of the KL-divergence between P(\Psi | f(\Theta)) and P(\Psi)
            KL_loss = 0.5 * tf.reduce_mean(
                tf.log(self.encoder.psi_covariance) -
                tf.log(self.reverse_encoder.psi_covariance) - 1 +
                self.reverse_encoder.psi_covariance /
                self.encoder.psi_covariance +
                tf.square(self.encoder.psi_mean - self.reverse_encoder.psi_mean
                          ) / self.encoder.psi_covariance,
                axis=1)

            #KL_cond = tf.not_equal(tf.reduce_sum(self.encoder.psi_mean, axis=1) , 0)
            self.KL_loss = KL_loss  #tf.reduce_mean( tf.where( KL_cond  , KL_loss, tf.zeros_like(KL_loss)) , axis = 0 )

            if bayou_mode or infer:
                self.loss = self.gen_loss + 1 / 32 * self.loss_RE + 8 / 32 * self.gen_loss_FS
            else:
                self.loss = self.KL_loss + 1 * (self.gen_loss +
                                                1 / 32 * self.loss_RE +
                                                8 / 32 * self.gen_loss_FS)

            if infer:
                # self.gen_loss is  P(Y|Z) where Z~P(Z|X)
                # P(Y) = int_Z P(YZ) = int_Z P(Y|Z)P(Z) = int_Z P(Y|Z)P(Z|X)P(Z)/P(Z|X) = sum_Z P(Y|Z)P(Z)/P(Z|X) where Z~P(Z|X)
                # last step by importace_sampling
                # this self.prob_Y is approximate and you need to introduce one more tensor dimension to do this efficiently over multiple trials
                # P(Y) = P(Y|Z)P(Z)/P(Z|X) where Z~P(Z|X)
                self.probY = -1 * self.loss + self.get_multinormal_lnprob(self.psi_encoder) \
                                            - self.get_multinormal_lnprob(self.psi_encoder,self.encoder.psi_mean,self.encoder.psi_covariance)
                self.EncA, self.EncB = self.calculate_ab(
                    self.encoder.psi_mean, self.encoder.psi_covariance)
                self.RevEncA, self.RevEncB = self.calculate_ab(
                    self.reverse_encoder.psi_mean,
                    self.reverse_encoder.psi_covariance)

            self.allEvSigmas = [ev.sigma for ev in self.config.evidence]
            #unused if MultiGPU is being used
            with tf.name_scope("train"):
                if bayou_mode:
                    train_ops = get_var_list()['decoder_vars']
                else:
                    train_ops = get_var_list()['rev_encoder_vars']

        if not infer:
            opt = tf.train.AdamOptimizer(config.learning_rate)
            self.train_op = opt.minimize(self.loss, var_list=train_ops)

            var_params = [
                np.prod([dim.value for dim in var.get_shape()])
                for var in tf.trainable_variables()
            ]
            print('Model parameters: {}'.format(np.sum(var_params)))
Beispiel #10
0
# Decoder input: prepend some "GO" token and drop the final
# token of the encoder input
dec_inp = ([tf.zeros_like(enc_inp[0], dtype=np.int32, name="GO")] +
           enc_inp[:-1])

cell = LSTMCell(memory_dim)

dec_outputs, dec_memory = embedding_rnn_seq2seq(enc_inp,
                                                dec_inp,
                                                cell,
                                                vocab_size,
                                                vocab_size,
                                                embedding_dim,
                                                feed_previous=False)

loss = sequence_loss(dec_outputs, labels, weights)

global_step = tf.Variable(0, trainable=False)
boundaries = [10000]
values = [0.01, 0.001]
learning_rate = tf.train.piecewise_constant(global_step, boundaries, values)
#learning_rate = 0.01 #0.97485
#momentum = 0.9
#optimizer = tf.train.GradientDescentOptimizer(learning_rate)
optimizer = tf.train.AdamOptimizer(learning_rate)
train_op = optimizer.minimize(loss)

sess.run(tf.global_variables_initializer())


def train_batch(batch_size):
Beispiel #11
0
    def build_model(self, forward_only):
        print("[*] Building a PTRModel math model")

        with tf.variable_scope(self.scope):
            #embedding_matrix = tf.eye(self.input_dim, self.W)
            #embedding_matrix = weight('embedding', [self.input_dim, self.W], init='xavier')
            self.a = weight('a', [1, 1], init='constant', value=0.45)
            self.b = weight('b', [1, 1], init='constant', value=-0.55)
            self.c = weight('c', [1, 1], init='constant', value=0.0)
            self.beta = 1 + tf.nn.softplus(weight('beta', [1, 1]))

            prev_state = self.controller.init_state()

            tf.get_variable_scope().reuse_variables()
            for seq_length in range(1, self.max_length + 1):
                input_1 = tf.placeholder(tf.float32, [self.input_dim],
                                         name='input_1_%s' % seq_length)
                true_output = tf.placeholder(tf.float32, [self.output_dim],
                                             name='true_output_%s' %
                                             seq_length)

                self.inputs_1.append(input_1)
                self.true_outputs.append(true_output)

                # present inputs
                prev_state = self.controller.update_memory(
                    prev_state, [
                        tf.reshape(input_1, [1, -1]),
                        tf.reshape(input_1, [1, -1])
                    ])
                self.collect_states[seq_length] = self.collect_states[
                    seq_length - 1][0:(seq_length -
                                       1)] + [self.copy_state(prev_state)]

                state = prev_state
                self.prev_states[seq_length] = state

                stops = []
                candidate_outputs = []
                for j in range(self.MAX_STEP):
                    state, _ = self.controller(state, j)
                    self.collect_states[seq_length].append(
                        self.copy_state(state))
                    candidate_outputs.append(
                        tf.unstack(state['M'][-1][0:seq_length]))
                    # stops.append(state['stop'])

                self.outputs[seq_length] = candidate_outputs
                self.stops[seq_length] = stops

            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    print(" [*] Building a loss model for seq_length %s" %
                          seq_length)
                    all_losses = []
                    for index in range(self.MAX_STEP):
                        loss = sequence_loss(
                            logits=self.outputs[seq_length][index],
                            targets=self.true_outputs[0:seq_length],
                            weights=[1] * seq_length,
                            average_across_timesteps=False,
                            average_across_batch=False,
                            softmax_loss_function=l2_loss)
                        all_losses.append(loss)

                    all_losses = tf.stack(all_losses)

                    #step_dist = tf.nn.softmax(tf.concat(self.stops[seq_length], 1))
                    #step_dist = tf.nn.softmax(tf.nn.embedding_lookup(self.length2stepdist, [seq_length]))
                    #max_pos = tf.to_float(tf.argmax(step_dist))
                    max_pos = tf.clip_by_value(
                        self.a * (tf.to_float(seq_length)**2) +
                        self.b * tf.to_float(seq_length) + self.c, 0,
                        self.MAX_STEP - 1)
                    stop_pos = D(self.MAX_STEP, max_pos, 1, self.beta)

                    loss1 = tf.reduce_sum(
                        tf.expand_dims(all_losses, 0) *
                        stop_pos) + 0.001 * tf.reduce_sum(max_pos)

                    self.losses[seq_length] = loss1

                    if not self.params:
                        self.params = tf.trainable_variables()

                    grads = []
                    for grad in tf.gradients(
                            loss1, self.params
                    ):  # + self.weight_decay*tf.add_n(tf.get_collection('l2'))
                        if grad is not None:
                            grads.append(
                                tf.clip_by_value(grad, self.min_grad,
                                                 self.max_grad))
                        else:
                            grads.append(grad)
                    self.grads[seq_length] = grads

        with tf.variable_scope("opt", reuse=None):
            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    self.optims[seq_length] = self.opt.apply_gradients(
                        zip(self.grads[seq_length], self.params),
                        global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a PTRModel math model finished")
Beispiel #12
0
    def __init__(self,
                 vocab_size,
                 buckets,
                 size,
                 num_layers,
                 batch_size,
                 num_softmax_samples,
                 do_decode,
                 num_gpus=2,
                 train_and_test=False):
        """
        :param source_vocab_size:  原始词词数目
        :param target_vocab_size:  目标词词数目
        :param buckets:  桶
        :param size:  cell的神经元数量
        :param num_layers:  神经网络层数
        :param batch_size:
        :param do_decode:  训练还是测试 影响seq2seq的解码过程
        :param num_gpus:  gpu的数量
        :param 训练和预测一起进行
        """
        self._cur_gpu = 0  # 此参数用于自动选择gpu和cpu
        self._num_gpus = num_gpus  # gpu的数量
        self.sess = None  # tf的session 若为None则后面需要创建一个新的
        self.buckets = buckets
        self.global_step = tf.Variable(
            0, trainable=False)  # 一个tensor 用于记录训练集训练的次数

        encoder_inputs = []  # encoder inputs
        decoder_inputs = []
        target_inputs = []
        loss_weight_inputs = []

        # 所有的编码输入标识符号
        for i in range(buckets[-1][0]):
            encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[batch_size],
                               name="encoder{}".format(i)))
        squence_length = tf.placeholder(tf.int32, [batch_size],
                                        name='squence_length')
        self.squence_length = squence_length
        # 所有的解码输出标识符号
        for i in range(buckets[-1][1]):
            decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[batch_size],
                               name="decoder{}".format(i)))
            target_inputs.append(
                tf.placeholder(tf.int64,
                               shape=[batch_size],
                               name="target{}".format(i)))
            loss_weight_inputs.append(
                tf.placeholder(tf.float32,
                               shape=[batch_size],
                               name="loss_weight{}".format(i)))
        encoder_inputs_buckets = {}
        decoder_inputs_buckets = {}
        target_inputs_buckets = {}
        loss_weight_inputs_buckets = {}
        # bucket部分的 encoder decoder target
        # 解码和编码部分的bucket
        for bucket_id, bucket in enumerate(buckets):
            encoder_inputs_buckets[bucket_id] = encoder_inputs[0:bucket[0]]
            decoder_inputs_buckets[bucket_id] = decoder_inputs[0:bucket[1]]
            target_inputs_buckets[bucket_id] = target_inputs[0:bucket[1]]
            loss_weight_inputs_buckets[bucket_id] = loss_weight_inputs[
                0:bucket[1]]

        self.encoder_inputs_buckets = encoder_inputs_buckets
        self.decoder_inputs_buckets = decoder_inputs_buckets
        self.target_inputs_buckets = target_inputs_buckets
        self.loss_weight_inputs_buckets = loss_weight_inputs_buckets

        # 所有的编码部分和解码部分的embedding
        with tf.variable_scope(
                'embedding',
                reuse=True if train_and_test else None), tf.device('/cpu:0'):
            embedding = tf.get_variable(
                'embedding', [vocab_size, size],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=1e-4))
            # every word look up a word vector.
            emb_encoder_inputs = [
                tf.nn.embedding_lookup(embedding, x) for x in encoder_inputs
            ]
            emb_decoder_inputs = [
                tf.nn.embedding_lookup(embedding, x) for x in decoder_inputs
            ]
        encoder_embedding_buckets = {}
        decoder_embedding_buckets = {}
        # bucket embedding 部分的 encoder decoder
        for i, bucket in enumerate(buckets):
            encoder_embedding_buckets[i] = emb_encoder_inputs[0:bucket[0]]
            decoder_embedding_buckets[i] = emb_decoder_inputs[0:bucket[1]]
        # 这里需要使用bucket
        encoder_output_buckets = {}
        encoder_state_buckets = {}
        device = self._next_device()
        for bucket_id, bucket in enumerate(buckets):
            encoder_input_embedding = encoder_embedding_buckets[bucket_id]
            for layer_id in range(num_layers):
                with tf.variable_scope(
                        "encoder%d" % layer_id,
                        reuse=(True if bucket_id > 0 else None) or
                    (True if train_and_test else None)), tf.device(device):
                    cell = LSTMCell(num_units=size,
                                    initializer=tf.random_uniform_initializer(
                                        -0.1, 0.1, seed=123),
                                    state_is_tuple=True)
                    encoder_input_embedding, state = static_rnn(
                        cell=cell,
                        inputs=encoder_input_embedding,
                        sequence_length=squence_length,
                        dtype=tf.float32)
                output = encoder_input_embedding
                encoder_output_buckets[bucket_id] = output
                encoder_state_buckets[bucket_id] = state
        with tf.variable_scope('output_projection',
                               reuse=True if train_and_test else None):
            w = tf.get_variable(
                'w', [size, vocab_size],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=1e-4))
            w_t = tf.transpose(w)
            v = tf.get_variable(
                'v', [vocab_size],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=1e-4))

        loop_function = _extract_argmax_and_embed(embedding,
                                                  (w,
                                                   v)) if do_decode else None
        cell = LSTMCell(size,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=123),
                        state_is_tuple=True)
        decoder_output_buckets = {}
        decoder_state_buckets = {}
        device = self._next_device()
        for bucket_id, bucket in enumerate(buckets):
            with tf.variable_scope(
                    "decoder",
                    reuse=(True if bucket_id > 0 else None)
                    or (True if train_and_test else None)), tf.device(device):
                t = tf.concat(values=[
                    tf.reshape(x, [-1, 1, size])
                    for x in encoder_output_buckets[bucket_id]
                ],
                              axis=1)
                decoder_output, decoder_state = attention_decoder(
                    decoder_inputs=decoder_embedding_buckets[bucket_id],
                    initial_state=encoder_state_buckets[bucket_id],
                    attention_states=t,
                    cell=cell,
                    num_heads=1,
                    loop_function=loop_function,
                    initial_state_attention=do_decode)
                decoder_output_buckets[bucket_id] = decoder_output
                decoder_state_buckets[bucket_id] = decoder_state
        model_output_buckets = {}  # 输出的 logits
        model_output_predict_buckets = {}
        model_output_predict_merger_buckets = {}
        model_output_accuracy = {}
        device = self._next_device()
        for bucket_id, bucket in enumerate(buckets):
            model_output = []
            model_output_predict = []
            model_accuracy = []
            with tf.variable_scope(
                    "output",
                    reuse=(True if bucket_id > 0 else None)
                    or (True if train_and_test else None)), tf.device(device):
                for j in range(len(decoder_output_buckets[bucket_id])):
                    output = tf.nn.xw_plus_b(
                        decoder_output_buckets[bucket_id][j], w, v)
                    predict = tf.argmax(input=output,
                                        axis=1,
                                        name="predict_{}_{}".format(
                                            bucket_id, j))
                    accuracy_bool = tf.equal(
                        x=target_inputs_buckets[bucket_id][j], y=predict)
                    model_accuracy.append(
                        tf.reduce_mean(
                            tf.cast(x=accuracy_bool, dtype=tf.float32)))
                    model_output.append(output)
                    model_output_predict.append(
                        tf.reshape(tensor=predict, shape=[-1, 1]))
            model_output_buckets[bucket_id] = model_output
            model_output_predict_buckets[bucket_id] = model_output_predict
            model_output_predict_merger_buckets[bucket_id] = tf.concat(
                values=model_output_predict, axis=1)
            model_output_accuracy[bucket_id] = tf.add_n(inputs=model_accuracy, name="bucket_id_{}".format(bucket_id)) / \
                                               buckets[bucket_id][1]
        self.model_output_buckets = model_output_buckets
        self.model_output_predict_buckets = model_output_predict_buckets
        self.model_output_predict_merger_buckets = model_output_predict_merger_buckets
        self.model_output_accuracy = model_output_accuracy

        def sampled_loss_func(labels, logits):  # tf1.0的规范更加严格
            with tf.device('/cpu:0'):  # Try gpu.
                labels = tf.reshape(labels, [-1, 1])
                local_w_t = tf.cast(w_t, tf.float32)
                local_b = tf.cast(v, tf.float32)
                local_inputs = tf.cast(logits, tf.float32)
                return tf.cast(
                    tf.nn.sampled_softmax_loss(weights=local_w_t,
                                               biases=local_b,
                                               labels=labels,
                                               inputs=local_inputs,
                                               num_sampled=num_softmax_samples,
                                               num_classes=vocab_size),
                    tf.float32)

        device = self._next_device()
        loss_buckets = {}
        for bucket_id, bucket in enumerate(buckets):
            with tf.variable_scope(
                    'loss',
                    reuse=(True if bucket_id > 0 else None)
                    or (True if train_and_test else None)), tf.device(device):
                if num_softmax_samples != 0 and not do_decode:
                    # 这里的输入部分不相同的原因是前者替换了softmax函数
                    loss = sequence_loss_by_example(
                        logits=decoder_output_buckets[bucket_id],
                        targets=target_inputs_buckets[bucket_id],
                        weights=loss_weight_inputs_buckets[bucket_id],
                        average_across_timesteps=True,
                        softmax_loss_function=sampled_loss_func)
                    # loss = sequence_loss(logits=model_output_buckets[bucket_id],
                    #                      targets=target_inputs_buckets[bucket_id],
                    #                      weights=loss_weight_inputs_buckets[bucket_id]
                    #                      )
                else:
                    loss = sequence_loss(
                        logits=model_output_buckets[bucket_id],
                        targets=target_inputs_buckets[bucket_id],
                        weights=loss_weight_inputs_buckets[bucket_id])
                loss_buckets[bucket_id] = tf.reduce_mean(loss)  # 计算平均loss
        self.loss_buckets = loss_buckets
        feed_previous=False)

with tf.variable_scope('decoder', reuse=True):
    cell = tf.contrib.rnn.GRUCell(num_units)
    decode_outputs_t, decode_states_t = seq2seq.embedding_rnn_seq2seq(
        enc_in2,
        dec_in2,
        cell,
        vocab_size,
        vocab_size,
        embed_dim,
        output_projection=None,
        feed_previous=True)

loss_weights = [tf.ones(l.shape, dtype=tf.float32) for l in labels2]
loss = seq2seq.sequence_loss(decode_outputs, labels2, loss_weights, vocab_size)
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

# </Model>


def train(training_data, testing_data):
    in_data_train, la_data_train, out_data_train = training_data
    in_data_test, la_data_test, out_data_test = testing_data

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        total_num = len(in_data_train)
        num_per_epoch = total_num // batch_size
        for epoch in range(epochs):
            global global_num
Beispiel #14
0
 def add_loss_op(self, output):
     with tf.name_scope('losses'):
         all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]
         # sequence loss is the mean of batch and sentence loss
         cross_entropy = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab))
         return cross_entropy
Beispiel #15
0
    def build_model(self, forward_only, is_copy=True):
        print(" [*] Building a NTM model")

        with tf.variable_scope(self.scope):
            # present start symbol
            if is_copy:
                _, _, prev_state = self.cell(self.start_symbol, state=None)
                self.save_state(prev_state, 0, self.max_length)

            zeros = np.zeros(self.cell.input_dim, dtype=np.float32)

            tf.get_variable_scope().reuse_variables()
            for seq_length in range(1, self.max_length + 1):
                progress(seq_length / float(self.max_length))

                input_ = tf.placeholder(tf.float32, [self.cell.input_dim],
                                        name='input_%s' % seq_length)
                true_output = tf.placeholder(
                    tf.float32, [self.cell.output_dim],
                    name='true_output_%s' % seq_length)

                self.inputs.append(input_)
                self.true_outputs.append(true_output)

                # present inputs
                _, _, prev_state = self.cell(input_, prev_state)
                self.save_state(prev_state, seq_length, self.max_length)

                # present end symbol
                if is_copy:
                    _, _, state = self.cell(self.end_symbol, prev_state)
                    self.save_state(state, seq_length)

                self.prev_states[seq_length] = state

                if not forward_only:
                    # present targets
                    outputs, output_logits = [], []
                    for _ in range(seq_length):
                        output, output_logit, state = self.cell(zeros, state)
                        self.save_state(state, seq_length, is_output=True)
                        outputs.append(output)
                        output_logits.append(output_logit)

                    self.outputs[seq_length] = outputs
                    self.output_logits[seq_length] = output_logits

            if not forward_only:
                for seq_length in range(self.min_length, self.max_length + 1):
                    print(" [*] Building a loss model for seq_length %s" %
                          seq_length)

                    loss = sequence_loss(
                        logits=self.output_logits[seq_length],
                        targets=self.true_outputs[0:seq_length],
                        weights=[1] * seq_length,
                        average_across_timesteps=False,
                        average_across_batch=False,
                        softmax_loss_function=softmax_loss_function)

                    self.losses[seq_length] = loss

                    if not self.params:
                        self.params = tf.trainable_variables()

                    # grads, norm = tf.clip_by_global_norm(
                    #                  tf.gradients(loss, self.params), 5)

                    grads = []
                    for grad in tf.gradients(loss, self.params):
                        if grad is not None:
                            grads.append(
                                tf.clip_by_value(grad, self.min_grad,
                                                 self.max_grad))
                        else:
                            grads.append(grad)

                    self.grads[seq_length] = grads
                    opt = tf.train.RMSPropOptimizer(self.lr,
                                                    decay=self.decay,
                                                    momentum=self.momentum)

                    reuse = seq_length != 1
                    with tf.variable_scope(tf.get_variable_scope(),
                                           reuse=reuse):
                        self.optims[seq_length] = opt.apply_gradients(
                            zip(grads, self.params),
                            global_step=self.global_step)

                        if not reuse:
                            tf.get_variable_scope().reuse_variables()

        model_vars = \
            [v for v in tf.global_variables() if v.name.startswith(self.scope)]
        self.saver = tf.train.Saver(model_vars)
        print(" [*] Build a NTM model finished")