コード例 #1
0
ファイル: graph.py プロジェクト: rekriz11/sockeye-recipes
 def teacherforce_loss():
     if self.model_config.number_samples > 0:
         loss_fn = tf.nn.sampled_softmax_loss
     else:
         loss_fn = None
     loss = sequence_loss(
         logits=tf.stack(output.decoder_logit_list, axis=1),
         targets=gt_target,
         weights=decode_word_weight,
         # softmax_loss_function=loss_fn,
         # w=w,
         # b=b,
         # decoder_outputs=decoder_outputs,
         # number_samples=self.model_config.number_samples
     )
     return loss
コード例 #2
0
ファイル: graph.py プロジェクト: afcarl/keyphrase_generation
    def create_model(self):
        with tf.variable_scope('variables'):
            abstr_ph = []
            for _ in range(self.model_config.max_abstr_len):
                abstr_ph.append(
                    tf.zeros(self.model_config.batch_size,
                             tf.int32,
                             name='abstract_input'))

            kwords_ph = []
            for _ in range(self.model_config.max_cnt_kword):
                kword = []
                for _ in range(self.model_config.max_kword_len):
                    kword.append(
                        tf.zeros(self.model_config.batch_size,
                                 tf.int32,
                                 name='kword_input'))
                kwords_ph.append(kword)

            emb_abstr, emb_kword, proj_w, proj_b = self.get_embedding()
            abstr = tf.stack(self.embedding_fn(abstr_ph, emb_abstr), axis=1)
            kwords = []
            for kword_idx in range(self.model_config.max_cnt_kword):
                kwords.append(
                    self.embedding_fn(kwords_ph[kword_idx], emb_kword))

        with tf.variable_scope('model_encoder'):
            if self.hparams.pos == 'timing':
                abstr = common_attention.add_timing_signal_1d(abstr)
            encoder_embed_inputs = tf.nn.dropout(
                abstr, 1.0 - self.hparams.layer_prepostprocess_dropout)
            abstr_bias = common_attention.attention_bias_ignore_padding(
                tf.to_float(
                    tf.equal(tf.stack(abstr_ph, axis=1),
                             self.voc_kword.encode(constant.SYMBOL_PAD))))
            abstr_outputs = transformer.transformer_encoder(
                encoder_embed_inputs, abstr_bias, self.hparams)

            if 'tuzhaopeng' in self.model_config.cov_mode:
                attn_stick = tf.ones([
                    self.model_config.batch_size, self.model_config.num_heads,
                    1,
                    self.model_config.dimension / self.model_config.num_heads
                ], tf.float32, 'attn_memory')

        losses = []
        targets = []
        obj = {}
        with tf.variable_scope('model_decoder'):
            for kword_idx in range(self.model_config.max_cnt_kword):
                if self.is_train:
                    kword = kwords[kword_idx][:-1]
                    kword_ph = kwords_ph[kword_idx]
                    kword_output_list, new_attn_stick = self.decode_step(
                        kword, abstr_outputs, abstr_bias, attn_stick)
                    kword_logit_list = [
                        self.output_to_logit(o, proj_w, proj_b)
                        for o in kword_output_list
                    ]
                    kword_target_list = [
                        tf.argmax(o, output_type=tf.int32, axis=-1)
                        for o in kword_logit_list
                    ]
                    attn_stick = new_attn_stick

                    if self.model_config.number_samples > 0:
                        loss_fn = tf.nn.sampled_softmax_loss
                    else:
                        loss_fn = None
                    kword_lossbias = [
                        tf.to_float(
                            tf.not_equal(
                                d, self.voc_kword.encode(constant.SYMBOL_PAD)))
                        for d in kword_ph
                    ]
                    kword_lossbias = tf.stack(kword_lossbias, axis=1)
                    loss = sequence_loss(
                        logits=tf.stack(kword_logit_list, axis=1),
                        targets=tf.stack(kword_ph, axis=1),
                        weights=kword_lossbias,
                        softmax_loss_function=loss_fn,
                        w=proj_w,
                        b=proj_b,
                        decoder_outputs=tf.stack(kword_output_list, axis=1),
                        number_samples=self.model_config.number_samples)
                    targets.append(tf.stack(kword_target_list, axis=1))

                    if 'tuzhaopeng' in self.model_config.cov_mode and 'kp_attn' in self.model_config.cov_mode:
                        target_emb = tf.stack(self.embedding_fn(
                            kword_target_list, emb_kword),
                                              axis=1)
                        target_emb = common_attention.split_heads(
                            target_emb, self.model_config.num_heads)
                        target_emb = tf.reduce_mean(target_emb, axis=2)
                        target_emb_trans = tf.get_variable(
                            'dim_weight_trans',
                            shape=[
                                1,
                                target_emb.get_shape()[-1].value,
                                target_emb.get_shape()[-1].value
                            ],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
                        target_emb = tf.nn.conv1d(target_emb, target_emb_trans,
                                                  1, 'SAME')
                        target_emb = tf.expand_dims(target_emb, axis=2)
                        attn_stick += target_emb
                    losses.append(loss)
                else:
                    if self.model_config.beam_search_size > 0:
                        loss, target, new_attn_stick = self.transformer_beam_search(
                            abstr_outputs,
                            abstr_bias,
                            emb_kword,
                            proj_w,
                            proj_b,
                            attn_stick=attn_stick)
                    else:
                        loss, target, new_attn_stick = self.greed_search(
                            kword_idx,
                            abstr_outputs,
                            abstr_bias,
                            emb_kword,
                            proj_w,
                            proj_b,
                            attn_stick=attn_stick)
                    targets.append(target)
                    losses = loss
                    attn_stick = new_attn_stick
                    if 'tuzhaopeng' in self.model_config.cov_mode and 'kp_attn' in self.model_config.cov_mode:
                        target.set_shape([
                            self.model_config.batch_size,
                            self.model_config.max_kword_len
                        ])
                        target_list = tf.unstack(target, axis=1)
                        target_emb = tf.stack(self.embedding_fn(
                            target_list, emb_kword),
                                              axis=1)
                        target_emb = common_attention.split_heads(
                            target_emb, self.model_config.num_heads)
                        target_emb = tf.reduce_mean(target_emb, axis=2)
                        target_emb_trans = tf.get_variable(
                            'dim_weight_trans',
                            shape=[
                                1,
                                target_emb.get_shape()[-1].value,
                                target_emb.get_shape()[-1].value
                            ],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.xavier_initializer())
                        target_emb = tf.nn.conv1d(target_emb, target_emb_trans,
                                                  1, 'SAME')
                        target_emb = tf.expand_dims(target_emb, axis=2)
                        attn_stick += target_emb
                tf.get_variable_scope().reuse_variables()
        if targets:
            obj['targets'] = tf.stack(targets, axis=1)
        obj['abstr_ph'] = abstr_ph
        obj['kwords_ph'] = kwords_ph
        obj['attn_stick'] = attn_stick
        if type(losses) is list:
            losses = tf.add_n(losses)
        return losses, obj
コード例 #3
0
    def create_model(self):
        with tf.variable_scope('variables'):
            abstr_ph = []
            for _ in range(self.model_config.max_abstr_len):
                abstr_ph.append(tf.zeros(self.model_config.batch_size, tf.int32, name='abstract_input'))

            kwords_ph = []
            for _ in range(self.model_config.max_cnt_kword):
                kword = []
                for _ in range(self.model_config.max_kword_len):
                    kword.append(tf.zeros(self.model_config.batch_size, tf.int32, name='kword_input'))
                kwords_ph.append(kword)

            # Train for length control
            if self.is_train:
                kword_occupies_ph = []
                for _ in range(self.model_config.max_cnt_kword):
                    kword_occupies_ph.append(
                        tf.zeros(self.model_config.batch_size, tf.float32, name='kword_occupy_input'))

            emb_abstr, emb_kword, proj_w, proj_b = self.get_embedding()
            abstr = tf.stack(self.embedding_fn(abstr_ph, emb_abstr), axis=1)
            kwords = []
            for kword_idx in range(self.model_config.max_cnt_kword):
                kwords.append(self.embedding_fn(kwords_ph[kword_idx], emb_kword))

        with tf.variable_scope('model_encoder'):
            if self.hparams.pos == 'timing':
                abstr = common_attention.add_timing_signal_1d(abstr)
            encoder_embed_inputs = tf.nn.dropout(abstr,
                                                 1.0 - self.hparams.layer_prepostprocess_dropout)
            abstr_bias = common_attention.attention_bias_ignore_padding(
                tf.to_float(tf.equal(tf.stack(abstr_ph, axis=1),
                                     self.voc_kword.encode(constant.SYMBOL_PAD))))
            abstr_outputs = transformer.transformer_encoder(
                encoder_embed_inputs, abstr_bias, self.hparams)

        losses = []
        targets = []
        pred_occupies = []
        obj = {}

        hist_vector = None
        if 'kp_attn' in self.model_config.cov_mode:
            hist_vector = tf.zeros(
                [self.model_config.batch_size, 1, self.model_config.dimension,])

        with tf.variable_scope('model_decoder'):
            if self.model_config.subword_vocab_size:
                go_id = self.voc_kword.encode(constant.SYMBOL_GO)[0]
            else:
                go_id = self.voc_kword.encode(constant.SYMBOL_GO)
            batch_go = tf.tile(
                tf.expand_dims(self.embedding_fn(go_id, emb_kword), axis=0),
                [self.model_config.batch_size, 1])

            for kword_idx in range(self.model_config.max_cnt_kword):
                if self.is_train:
                    kword = kwords[kword_idx][:-1]
                    kword_ph = kwords_ph[kword_idx]
                    kword_output, kword_output_list = self.decode_step(
                        kword, abstr_outputs, abstr_bias, batch_go, hist_vector=hist_vector)
                    kword_logit_list = [self.output_to_logit(o, proj_w, proj_b) for o in kword_output_list]
                    kword_target_list = [tf.argmax(o, output_type=tf.int32, axis=-1)
                                         for o in kword_logit_list]

                    kword_lossbias = [
                        tf.to_float(tf.not_equal(d, self.voc_kword.encode(constant.SYMBOL_PAD)))
                        for d in kword_ph]
                    kword_lossbias = tf.stack(kword_lossbias, axis=1)
                    if self.model_config.number_samples > 0:
                        loss_fn = tf.nn.sampled_softmax_loss
                    else:
                        loss_fn = None
                    loss = sequence_loss(logits=tf.stack(kword_logit_list, axis=1),
                                         targets=tf.stack(kword_ph, axis=1),
                                         weights=kword_lossbias,
                                         softmax_loss_function=loss_fn,
                                         w=proj_w,
                                         b=proj_b,
                                         decoder_outputs=tf.stack(kword_output_list, axis=1),
                                         number_samples=self.model_config.number_samples
                                         )
                    kword_target = tf.stack(kword_target_list, axis=1)
                    targets.append(kword_target)

                    if 'kp_attn' in self.model_config.cov_mode:
                        kword_embed = self.embedding_fn(kword_ph, emb_kword)
                        hist_vector += tf.expand_dims(tf.reduce_mean(
                            tf.stack(kword_embed, axis=1), axis=1), axis=1)

                    # Train for length control
                    pred_occupy = self.get_pred_occupy_logit(hist_vector, abstr_outputs)
                    occupy_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                        logits=pred_occupy, labels=kword_occupies_ph[kword_idx])
                    loss += tf.reduce_mean(occupy_loss)
                    pred_occupies.append(pred_occupy)

                    losses.append(loss)
                else:
                    loss, kword_target = self.transformer_beam_search(
                        abstr_outputs, abstr_bias, emb_kword, proj_w, proj_b, hist_vector=hist_vector)

                    targets.append(kword_target)
                    losses = loss

                    if 'kp_attn' in self.model_config.cov_mode:
                        kword_embed = self.embedding_fn(kword_target, emb_kword)
                        hist_vector += tf.expand_dims(tf.reduce_mean(kword_embed, axis=1), axis=1)

                    pred_occupy = tf.round(tf.sigmoid(self.get_pred_occupy_logit(hist_vector, abstr_outputs)))
                    pred_occupies.append(pred_occupy)

                tf.get_variable_scope().reuse_variables()
        if targets:
            obj['targets'] = tf.stack(targets, axis=1)
        obj['abstr_ph'] = abstr_ph
        obj['kwords_ph'] = kwords_ph
        if self.is_train:
            obj['kword_occupies_ph'] = kword_occupies_ph
        pred_occupies = tf.stack(pred_occupies, axis=1)
        obj['pred_occupies'] = pred_occupies

        if type(losses) is list:
            losses = tf.add_n(losses)
        return losses, obj
コード例 #4
0
ファイル: graph.py プロジェクト: afcarl/seq_pred
    def create_model(self):
        # Input
        with tf.variable_scope('embedding'):
            self.emb = tf.get_variable(
                'embedding',
                [args.event_size + constant.NUM_SPEC_MARK, args.dimension],
                tf.float32,
                initializer=xavier_initializer())

        with tf.variable_scope('inputs'):
            self.inputs_ph = []
            for step in range(args.max_len):
                self.inputs_ph.append(
                    tf.zeros(args.batch_size, tf.int32, name='event'))

            self.inpt_events = [
                tf.zeros(args.batch_size, tf.int32, name='go')
            ] + self.inputs_ph[:-1]
            self.inpt_events_emb = tf.stack(self.get_embedding(
                self.inpt_events),
                                            axis=1)

            self.pred_events = self.inputs_ph

            self_attention_bias = (
                common_attention.attention_bias_lower_triangle(args.max_len))

        with tf.variable_scope('model'):
            outputs = self.attention_lm_decoder(self.inpt_events_emb,
                                                self_attention_bias,
                                                self.hparams, 'trans')
            self.w = tf.get_variable(
                'output_w',
                [args.dimension, args.event_size + constant.NUM_SPEC_MARK],
                tf.float32,
                initializer=xavier_initializer())
            self.b = tf.get_variable(
                'output_b', [args.event_size + constant.NUM_SPEC_MARK],
                tf.float32,
                initializer=xavier_initializer())
            # logits = tf.nn.xw_plus_b(outputs, tf.transpose(self.w), self.b)
            logits = tf.nn.conv1d(outputs, tf.expand_dims(self.w, 0), 1,
                                  'SAME')

        with tf.variable_scope('loss'):
            self.loss = sequence_loss(logits=logits,
                                      targets=tf.stack(self.pred_events,
                                                       axis=1))

        with tf.variable_scope('optim'):
            self.global_step = tf.get_variable('global_step',
                                               initializer=tf.constant(
                                                   0, dtype=tf.int64),
                                               trainable=False)

            if self.is_train:
                self.increment_global_step = tf.assign_add(self.global_step, 1)
                opt = tf.train.AdagradOptimizer(args.learning_rate)
                grads_and_vars = opt.compute_gradients(
                    self.loss, var_list=tf.trainable_variables())
                grads = [g for (g, v) in grads_and_vars]
                clipped_grads, _ = tf.clip_by_global_norm(grads, 5.0)
                self.train_op = opt.apply_gradients(
                    zip(clipped_grads, tf.trainable_variables()),
                    global_step=self.global_step)
            else:
                self.last_event = tf.argmax(logits[:, -1, :], axis=-1)

            self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
        print('Graph Built.')