コード例 #1
0
def gradient_clip(gradients, max_gradient_norm, safe_clip):
    """Clipping gradients of a model."""
    if safe_clip:
        utils.print_out('Enable Safe Clip')
        safe_value = max_gradient_norm
        gradients = [
            tf.clip_by_value(x, -safe_value, safe_value) for x in gradients
        ]
        gradient_norm = tf.reduce_mean(gradients[0])
        # clipped_gradients, gradient_norm = tf.clip_by_global_norm(
        #     gradients, max_gradient_norm)
        gradient_norm_summary = [tf.summary.scalar("grad_norm", gradient_norm)]
        gradient_norm_summary.append(
            tf.summary.scalar("clipped_gradient", gradient_norm))
        return gradients, gradient_norm_summary, gradient_norm

    else:
        clipped_gradients, gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        gradient_norm_summary = [tf.summary.scalar("grad_norm", gradient_norm)]
        gradient_norm_summary.append(
            tf.summary.scalar("clipped_gradient",
                              tf.global_norm(clipped_gradients)))

        return clipped_gradients, gradient_norm_summary, gradient_norm
コード例 #2
0
def eval_std_metrics(hparams, ref_tgt_file, ref_src_file, generated_file):
    metrics = 'embed,rouge,bleu-1,bleu-2,bleu-3,bleu-4,distinct-1,distinct-2,distinct_c-1,distinct_c-2,accuracy,len,entropy'.split(
        ',')
    scores = []
    metric_num = len(metrics)
    pool = Pool(metric_num)
    jobs = []

    for metric in metrics:
        job = pool.apply_async(
            evaluation_utils.evaluate,
            (ref_tgt_file, ref_src_file, generated_file,
             hparams['pre_embed_file'], metric, hparams['tgt_vocab_size'],
             None, None, hparams['pre_embed_dim']))
        jobs.append(job)
    pool.close()
    pool.join()

    res = dict()
    for metric, job in zip(metrics, jobs):
        score = job.get()
        if type(score) is list or type(score) is tuple:
            score = '-'.join([str(x) for x in score])
        else:
            score = str(score)
        utils.print_out('%s->%s\n' % (metric, score))
        res[metric] = score

    return res
コード例 #3
0
def print_variables_in_ckpt(ckpt_path):
    """Print a list of variables in a checkpoint together with their shapes."""
    utils.print_out("# Variables in ckpt %s" % ckpt_path)
    reader = tf.train.NewCheckpointReader(ckpt_path)
    variable_map = reader.get_variable_to_shape_map()
    for key in sorted(variable_map.keys()):
        utils.print_out("  %s: %s" % (key, variable_map[key]))
コード例 #4
0
def eval_std_metrics_st(hparams, ref_tgt_file, ref_src_file, generated_file):
    metrics = 'embed,rouge,bleu-1,bleu-2,bleu-3,bleu-4,distinct-1,distinct-2,distinct_c-1,distinct_c-2,accuracy,len'.split(
        ',')
    scores = []
    metric_num = len(metrics)

    for metric in metrics:
        score = evaluation_utils.evaluate(ref_tgt_file,
                                          ref_src_file,
                                          generated_file,
                                          hparams['pre_embed_file'],
                                          metric,
                                          dim=hparams['pre_embed_dim'])
        utils.print_out(('%s->%s\n') % (metric, score))
        if type(score) is list or type(score) is tuple:
            for x in score:
                scores.append(str(x))
        else:
            scores.append(str(score))
    metrics = ['entropy']
    for metric in metrics:
        score = evaluation_utils.evaluate(hparams['tgt_file'],
                                          hparams['src_file'],
                                          generated_file,
                                          hparams['pre_embed_file'],
                                          metric,
                                          vocab_size=hparams['tgt_vocab_size'])
        utils.print_out(('%s->%s\n') % (metric, score))
        if type(score) is list or type(score) is tuple:
            for x in score:
                scores.append(str(x))
        else:
            scores.append(str(score))
コード例 #5
0
def check_vocab(vocab_file,
                out_dir,
                check_special_token=True,
                sos=None,
                eos=None,
                unk=None):
    """Check if vocab_file doesn't exist, create from corpus_file."""
    if tf.gfile.Exists(vocab_file):
        utils.print_out("# Vocab file %s exists" % vocab_file)
        vocab, vocab_size = load_vocab(vocab_file)
        if check_special_token:
            # Verify if the vocab starts with unk, sos, eos
            # If not, prepend those tokens & generate a new vocab file
            if not unk: unk = UNK
            if not sos: sos = SOS
            if not eos: eos = EOS
            assert len(vocab) >= 3
            if vocab[0] != unk or vocab[1] != sos or vocab[2] != eos:
                utils.print_out("The first 3 vocab words [%s, %s, %s]"
                                " are not [%s, %s, %s]" %
                                (vocab[0], vocab[1], vocab[2], unk, sos, eos))
                vocab = [unk, sos, eos] + vocab
                vocab_size += 3
                new_vocab_file = os.path.join(out_dir,
                                              os.path.basename(vocab_file))
                with codecs.getwriter("utf-8")(tf.gfile.GFile(
                        new_vocab_file, "wb")) as f:
                    for word in vocab:
                        f.write("%s\n" % word)
                vocab_file = new_vocab_file
    else:
        raise ValueError("vocab_file '%s' does not exist." % vocab_file)

    vocab_size = len(vocab)
    return vocab_size, vocab_file
コード例 #6
0
    def create_encoder(self,
                       seq_inputs,
                       entity_inputs,
                       lengths,
                       name='encoder'):
        """

        :param inputs:  [batch,time,dimension]
        :param lengths:  [batch]
        :param hparams: hparams
        :return:
        """
        hparams = self.hparams
        mode = self.mode
        num_layers = hparams['encoder_num_layers']
        cell_type = hparams['cell_type']
        num_units = hparams['num_units']
        forget_bias = hparams['forget_bias']
        embed_dim = hparams['embed_dim']
        dropout = self.dropout

        with tf.variable_scope(name) as scope:
            inputs_for_std = seq_inputs
            inputs_for_fact = entity_inputs
            inputs = tf.concat([inputs_for_std, inputs_for_fact], axis=-1)

            # Crate KEFU Encoder RNN Cells
            def create_kefu_cell(name):
                cell_list = [
                    model_helper.create_cell(cell_type, num_units, forget_bias,
                                             dropout, mode) for x in range(2)
                ]
                cell_fw = tf.contrib.rnn.MultiRNNCell(cell_list)
                return cell_fw

            with tf.variable_scope('Knowledge_RNN'):
                cell_fw = create_kefu_cell('KEFU_FW')
                cell_bw = create_kefu_cell('KEFU_BW')

                utils.print_out(
                    'Creating bi_directional RNN Encoder, num_layers=%s, cell_type=%s, num_units=%d'
                    % (num_layers, cell_type, num_units))

                bi_encoder_outputs, bi_encoder_state = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw,
                    cell_bw,
                    inputs,
                    dtype=tf.float32,
                    sequence_length=lengths,
                    time_major=False,
                    swap_memory=True)
                encoder_outputs = tf.concat(bi_encoder_outputs, -1)
                # 级联最后一层
                encoder_state = [tf.concat(x, -1) for x in bi_encoder_state]

            return encoder_outputs, encoder_state
コード例 #7
0
def create_or_restore_a_model(out_dir, model, sess):
    latest_ckpt = tf.train.latest_checkpoint(out_dir)
    if latest_ckpt:
        try:
            print('Try to load from %s' % latest_ckpt)
            model.saver.restore(sess, latest_ckpt)
        except tf.errors.NotFoundError as e:
            utils.print_out("Can't load checkpoint")
            print_variables_in_ckpt(latest_ckpt)
            utils.print_out("%s" % str(e))
            raise e

        sess.run(tf.tables_initializer())
        utils.print_out("  loaded model parameters from %s" % (latest_ckpt))

        step, epoch = sess.run([model.global_step, model.epoch_step])
    else:
        init_op = tf.random_uniform_initializer(
            -0.08,
            0.08,
        )
        tf.get_variable_scope().set_initializer(init_op)
        sess.run(tf.global_variables_initializer())
        sess.run(tf.tables_initializer())
        utils.print_out(" created model with fresh parameters")
        step, epoch = 0, 0
    return step, epoch
コード例 #8
0
def get_learning_rate_decay(learning_rate, global_step, hparams):
    """Get learning rate decay."""
    decay_scheme = hparams['decay_scheme']
    start_decay_step, decay_steps, decay_factor = get_decay_info(hparams)
    utils.print_out(
        "  decay_scheme=%s, start_decay_step=%d, decay_steps %d, "
        "decay_factor %g" %
        (decay_scheme, start_decay_step, decay_steps, decay_factor))

    return tf.cond(global_step < start_decay_step,
                   lambda: learning_rate,
                   lambda: tf.compat.v1.train.exponential_decay(
                       learning_rate, (global_step - start_decay_step),
                       decay_steps,
                       decay_factor,
                       staircase=True),
                   name="learning_rate_decay_cond")
コード例 #9
0
def load_and_restore_config(config_path, verbose=False):
    hparams = load_config(config_path, verbose=True)
    out_dir = hparams['model_path']
    utils.default_path = os.path.join(out_dir, 'log.txt')
    model_config_path = os.path.join(out_dir, 'config.json')
    eval_file = os.path.join(out_dir, 'eval_out.txt')
    if os.path.exists(out_dir) is False:
        os.makedirs(out_dir)

    if os.path.exists(model_config_path):
        utils.print_out('reload the parameters from the %s' % model_config_path)
        loaded_hparams = load_config(model_config_path, verbose=True)
        for key in hparams.keys():
            if key not in loaded_hparams:
                utils.print_out('ADD HParam Key : %s' % key)
                loaded_hparams[key] = hparams[key]
        hparams = loaded_hparams
    return hparams
コード例 #10
0
def load_config(config_path, verbose=False):
    """

    :param config_path:
    :return:  hparams
    """
    utils.print_out('load json config file from %s' % config_path)
    with open(config_path, encoding='utf-8') as fin:
        config = json.load(fin)
        if verbose:
            pprint.pprint(config)

        if 'loss' not in config:
            config['loss'] = []
            config['loss_r'] = []
            config['loss_c'] = []
            config['epochs'] = []
        if 'loss_c' not  in config:
            config['loss_c'] = []
        return config
コード例 #11
0
def load_embed_txt(embed_file):
    """Load embed_file into a python dictionary.

  Note: the embed_file should be a Glove/word2vec formatted txt file. Assuming
  Here is an exampe assuming embed_size=5:

  the -0.071549 0.093459 0.023738 -0.090339 0.056123
  to 0.57346 0.5417 -0.23477 -0.3624 0.4037
  and 0.20327 0.47348 0.050877 0.002103 0.060547

  For word2vec format, the first line will be: <num_words> <emb_size>.

  Args:
    embed_file: file path to the embedding file.
  Returns:
    a dictionary that maps word to vector, and the size of embedding dimensions.
  """
    emb_dict = dict()
    emb_size = None

    is_first_line = True
    with codecs.getreader("utf-8")(tf.gfile.GFile(embed_file, "rb")) as f:
        for line in f:
            tokens = line.rstrip().split(" ")
            if is_first_line:
                is_first_line = False
                if len(tokens) == 2:  # header line
                    emb_size = int(tokens[1])
                    continue
            word = tokens[0]
            vec = list(map(float, tokens[1:]))
            emb_dict[word] = vec
            if emb_size:
                if emb_size != len(vec):
                    utils.print_out(
                        "Ignoring %s since embeding size is inconsistent." %
                        word)
                    del emb_dict[word]
            else:
                emb_size = len(vec)
    return emb_dict, emb_size
コード例 #12
0
def prepare_copynet_vocab(vocab_file,
                          out_dir,
                          src_len,
                          pattern='<src_#>',
                          sos=None,
                          eos=None,
                          unk=None):
    """Check if vocab_file doesn't exist, create from corpus_file."""
    if tf.gfile.Exists(vocab_file):
        utils.print_out("# Vocab file %s exists" % vocab_file)
        vocab, vocab_size = load_vocab(vocab_file)
        if True:
            # Verify if the vocab starts with unk, sos, eos
            # If not, prepend those tokens & generate a new vocab file
            if not unk: unk = UNK
            if not sos: sos = SOS
            if not eos: eos = EOS
            assert len(vocab) >= 3
            if vocab[0] != unk or vocab[1] != sos or vocab[2] != eos:
                utils.print_out("The first 3 vocab words [%s, %s, %s]"
                                " are not [%s, %s, %s]" %
                                (vocab[0], vocab[1], vocab[2], unk, sos, eos))
                vocab = [unk, sos, eos] + vocab
                vocab_size += 3
            for i in range(src_len):
                vocab.append(pattern.replace('#', str(i)))
            new_vocab_file = vocab_file + '.copy'
            print('Output CopyNet Vocab -> %s' % new_vocab_file)
            with codecs.getwriter("utf-8")(tf.gfile.GFile(
                    new_vocab_file, "wb")) as f:
                for word in vocab:
                    f.write("%s\n" % word)
            vocab_file = new_vocab_file
    else:
        raise ValueError("vocab_file '%s' does not exist." % vocab_file)

    vocab_size = len(vocab)
    return vocab_size, vocab_file
コード例 #13
0
def get_learning_rate_warmup(learning_rate, global_step, hparams):
    """Get learning rate warmup."""
    warmup_steps = hparams['warmup_steps']
    warmup_scheme = hparams['warmup_scheme']
    utils.print_out("  learning_rate=%g, warmup_steps=%d, warmup_scheme=%s" %
                    (hparams['learning_rate'], warmup_steps, warmup_scheme))

    # Apply inverse decay if global steps less than warmup steps.
    # Inspired by https://arxiv.org/pdf/1706.03762.pdf (Section 5.3)
    # When step < warmup_steps,
    #   learing_rate *= warmup_factor ** (warmup_steps - step)
    if warmup_scheme == "t2t":
        # 0.01^(1/warmup_steps): we start with a lr, 100 times smaller
        warmup_factor = tf.exp(tf.math.log(0.01) / warmup_steps)
        inv_decay = warmup_factor**(tf.cast(warmup_steps - global_step,
                                            tf.float32))
    else:
        raise ValueError("Unknown warmup scheme %s" % warmup_scheme)

    return tf.cond(global_step < warmup_steps,
                   lambda: inv_decay * learning_rate,
                   lambda: learning_rate,
                   name="learning_rate_warump_cond")
コード例 #14
0
    def add_record(self, report_dict, step, epoch):

        if self.start_time == -1:  # First report
            self.start_time = time.time()
            self.last_report_time = time.time()
            self.current_time = time.time()
            self.last_report_step = step - 1
            self.current_step = step
        else:
            self.current_time = time.time()
            self.current_step = step

        # update
        for key in report_dict:
            self.value_dict[key] += report_dict[key]

        if self.current_step - self.last_report_step >= self.report_per_steps:
            num_steps = self.current_step - self.last_report_step
            num_time = self.current_time - self.last_report_time
            step_time = num_time / num_steps

            summary = []
            for key in report_dict:
                if key == 'lr' or key == 'learning_rate':
                    summary.append('%s=%f' %
                                   (key, self.value_dict[key] / num_steps))
                else:
                    summary.append('%s=%.2f' %
                                   (key, self.value_dict[key] / num_steps))
            utils.print_out(
                '#[E%d/Step%d]  Training Summary: interval steps: %d, step_per_time=%.2f'
                % (epoch, self.current_step, num_steps, step_time))
            utils.print_out('\t'.join(summary))

            self.last_report_step = step
            self.last_report_time = time.time()
            self.value_dict = defaultdict(float)
コード例 #15
0
def create_cell(unit_type,
                num_units,
                forget_bias,
                dropout,
                mode,
                residual_connection=False,
                device_str=None,
                residual_fn=None):

    # dropout (= 1 - keep_prob) is set to 0 during eval and infer
    dropout = dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0

    # Cell Type
    if unit_type == "lstm":
        single_cell = tf.contrib.rnn.BasicLSTMCell(num_units,
                                                   forget_bias=forget_bias)
    elif unit_type == "gru":
        single_cell = tf.contrib.rnn.GRUCell(num_units)
    elif unit_type == "layer_norm_lstm":
        utils.print_out("  Layer Normalized LSTM, forget_bias=%g" %
                        forget_bias)
        single_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
            num_units, forget_bias=forget_bias, layer_norm=True)
    elif unit_type == "nas":
        single_cell = tf.contrib.rnn.NASCell(num_units)
    else:
        raise ValueError("Unknown unit type %s!" % unit_type)

    single_cell = tf.contrib.rnn.DropoutWrapper(cell=single_cell,
                                                input_keep_prob=(1.0 -
                                                                 dropout))

    # Residual
    if residual_connection:
        single_cell = tf.contrib.rnn.ResidualWrapper(single_cell,
                                                     residual_fn=residual_fn)
        utils.print_out("  %s" % type(single_cell).__name__)

    # Device Wrapper
    if device_str:
        single_cell = tf.contrib.rnn.DeviceWrapper(single_cell, device_str)
        utils.print_out("  %s, device=%s" %
                        (type(single_cell).__name__, device_str))

    return single_cell
コード例 #16
0
def restore_a_model(out_dir, model, sess):
    latest_ckpt = tf.train.latest_checkpoint(out_dir)
    if latest_ckpt:
        try:
            print('Try to load from %s' % latest_ckpt)
            model.saver.restore(sess, latest_ckpt)
        except tf.errors.NotFoundError as e:
            utils.print_out("Can't load checkpoint")
            print_variables_in_ckpt(latest_ckpt)
            utils.print_out("%s" % str(e))
            raise e

        sess.run(tf.tables_initializer())
        utils.print_out("  loaded model parameters from %s" % (latest_ckpt))

        step, epoch = sess.run([model.global_step, model.epoch_step])
    else:
        raise Exception()
    return step, epoch
コード例 #17
0
def test():
    # Dataset

    hparams = config_parser.load_and_restore_config(args.config_path, verbose=True)
    if args.beam != -1:
        hparams['beam_width'] = args.beam
        utils.print_out("Reset beam_width to %d" % args.beam)
    if args.beam > 10:
        hparams['batch_size'] = hparams['batch_size'] * 30 // args.beam

    hparams['length_penalty_weight'] = args.length_penalty_weight
    hparams['diverse_decoding_rate'] = args.diverse_decoding_rate
    hparams['coverage_penalty_weight'] = args.coverage_penalty_weight

    # Dataset
    dataset = dataset_utils.create_flexka3_iterator(hparams, is_eval=True)
    if hparams.get('rank_based', False):
        model = RModel(dataset, hparams, model_helper.INFER)
    else:
        model = Model(dataset, hparams, model_helper.INFER)

    dropout = dataset['dropout']
    fact_vocab = []
    with open(hparams['fact_path'], encoding='utf-8') as fin:
        for line in fin.readlines():
            items = line.strip('\n').split()
            #entity_in_post, ent
            items[0] = 'P:'+items[0]
            items[1] = 'E:'+items[1]
            fact_vocab.append(','.join(items))

    out_dir = os.path.join(hparams['model_path'], 'min_ppl')
    if os.path.exists(os.path.join(hparams['model_path'],'decoded')) is False:
        os.mkdir(os.path.join(hparams['model_path'],'decoded'))

    top1_position_path = os.path.join(hparams['model_path'], 'decoded', 'test.predicted_golden_fact_position_top1.txt')
    topk_position_path = os.path.join(hparams['model_path'], 'decoded', 'test.predicted_golden_fact_position_top10.txt')
    top1_output_path = os.path.join(hparams['model_path'], 'decoded', 'predicted_top1.fh0')
    top10_output_path = os.path.join(hparams['model_path'], 'decoded', 'predicted_top10.fh0')
    meta_output_path = os.path.join(hparams['model_path'], 'decoded', 'fact_prediction.txt')


    test_query_file = hparams['test_src_file']
    test_response_file = hparams['test_tgt_file']

    with open(test_query_file, 'r+', encoding='utf-8') as fin:
        queries = [x.strip('\n') for x in fin.readlines()]
    with open(test_response_file, 'r+', encoding='utf-8') as fin:
        responses = [x.strip('\n') for x in fin.readlines()]

    with tf.Session(config=model_helper.create_tensorflow_config()) as sess:
        step, epoch = model_helper.create_or_restore_a_model(out_dir, model, sess)
        dataset['init_fn'](sess,'test_')

        MRs = []
        MRRs = []
        hit1s = []
        hit5s = []
        hit10s = []
        hit20s = []

        utils.print_out('Current Epoch,Step : %s/%s, Max Epoch,Step : %s/%s' % (epoch, step, hparams['num_train_epochs'], hparams['num_train_steps']))
        case_id = 0
        with open(meta_output_path, 'w+', encoding='utf-8') as fout:
            with open(top1_position_path, 'w+', encoding='utf-8') as ftop1:
                with open(topk_position_path, 'w+', encoding='utf-8') as ftopk:
                    with open(top1_output_path, 'w+', encoding='utf-8') as fout1:
                        with open(top10_output_path, 'w+', encoding='utf-8') as foutk:
                            while True:
                                try:
                                   cue_fact, facts, probs = sess.run([
                                       dataset['cue_fact'], dataset['inputs_for_facts'],  model.classifier_scores, ],
                                        feed_dict={dropout:  0.0})
                                   topk_index, topk_labels = batch_top_k(probs, facts)


                                   ranks, reversed_ranks, hits = batch_rank_eval(cue_fact, probs,
                                                                                 hitAT=(1, 5, 10, 20))
                                   MRs = MRs + ranks
                                   MRRs = MRRs + reversed_ranks
                                   hit1s = hit1s + hits[0]
                                   hit5s = hit5s + hits[1]
                                   hit10s = hit10s + hits[2]
                                   hit20s = hit20s + hits[3]


                                   for my_index, my_label in zip(topk_index, topk_labels):
                                        ftop1.write('%s\n' % my_index[0])
                                        fout1.write('%s\n' % fact_vocab[my_label[0]].split(',')[1][2:] )
                                        for index in my_index:
                                            ftopk.write('%s\n' % index)
                                            foutk.write('%s\n' % fact_vocab[index])
                                        case_id += 1
                                except tf.errors.OutOfRangeError as e:
                                    pass
                                    break
                            MR = np.average(MRs)
                            MRR = np.average(MRRs)
                            hit1 = np.average(hit1s) * 100
                            hit5 = np.average(hit5s) * 100
                            hit10 = np.average(hit10s) * 100
                            hit20 = np.average(hit20s) * 100
                            utils.print_out('MR=%.2f,MRR=%.2f,hit1=%.2f,hit5=%.2f,hit10=%.2f,hit20=%.2f' %
                                            (MR, MRR, hit1, hit5, hit10, hit20))
コード例 #18
0
ファイル: eval.py プロジェクト: pku-sixing/IJCAI2020-TopicKA
def main(args):
    hparams = config_parser.load_config(args.config_path, verbose=True)
    if args.beam != -1:
        hparams['beam_width'] = args.beam
        utils.print_out("Reset beam_width to %d" % args.beam)

    res_suffix = 'res'
    if args.pre_embed_file != '':
        hparams['pre_embed_file'] = args.pre_embed_file
        utils.print_out("Reset pre_embed_file to %s" % args.pre_embed_file)
        res_suffix = 'ores'

    if args.pre_embed_dim != -1:
        hparams['pre_embed_dim'] = args.pre_embed_dim
        utils.print_out("Reset pre_embed_dim to %s" % args.pre_embed_file)

    if args.rerank == 0:
        config_id = 'B%s_L%.1f_D%.1f_C%.1f' % (
            hparams['beam_width'], args.length_penalty_weight, args.diverse_decoding_rate, args.coverage_penalty_weight)
    else:
        config_id = 'R%s_B%s_L%.1f_D%.1f_C%.1f' % (
            args.rerank, hparams['beam_width'], args.length_penalty_weight, args.diverse_decoding_rate,
            args.coverage_penalty_weight)

    if os.path.exists(os.path.join(hparams['model_path'], 'decoded')) is False:
        os.mkdir(os.path.join(hparams['model_path'], 'decoded'))

    if args.binary:
        top1_out_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_top1.txt.bi' % config_id)
        topk_out_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_topk.txt.bi' % config_id)

    else:
        top1_out_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_top1.txt' % config_id)
        topk_out_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_topk.txt' % config_id)

        if args.rerank > 0:
            top1_out_file_path += '.mmi'
            topk_out_file_path += '.mmi'

    if os.path.exists(os.path.join(hparams['model_path'], 'decoded')) is False:
        os.mkdir(os.path.join(hparams['model_path'], 'decoded'))

    # Evalutation
    if args.binary:
        score_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_top1.%s.bi' % (config_id, 'eres'))
    else:
        score_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_top1.%s' % (config_id, 'eres'))

    # check
    entity_list = []
    entity_dict = dict()
    entity_dict_path = hparams['entity_path']
    with open(entity_dict_path, encoding='utf-8') as f:
        for i, line in enumerate(f):
            e = line.strip()
            entity_list.append(e)
            entity_dict[e] = i

    # load generations
    generations = []
    with open(top1_out_file_path, 'r', encoding='utf-8') as fin:
        for line in fin:
            line = line.replace('#', '')
            line = line.replace('$C:', '')
            line = line.replace('$R:', '')
            line = line.replace('$E:', '')
            generations.append(line.strip('\n').split())

    # load refs
    refs = []
    with open(hparams['test_tgt_file'], 'r', encoding='utf-8') as fin:
        for line in fin:
            line = line.replace('#', '')
            line = line.replace('$C:', '')
            line = line.replace('$R:', '')
            line = line.replace('$E:', '')
            refs.append(line.strip('\n').split())

    # load facts
    facts = []
    with open(hparams['fact_path'], 'r', encoding='utf-8') as fin:
        for line in fin:
            facts.append([x.replace('#', '') for x in line.strip('\n').split()])

    # load fact idx
    fact_idx = []
    with open(hparams['test_fact_file'], 'r', encoding='utf-8') as fin:
        for line in fin:
            fact_idx.append([int(x) for x in line.strip('\n').split()])

    entity_scores = []
    entity_rates = []
    entity_recalls = []
    entity_precisions = []
    entity_distincts = []
    entity_targets = []
    with open(score_file_path, 'w+', encoding='utf-8') as fout:
        for generation, ref, idx in zip(generations, refs, fact_idx):
            # print(generation)
            # print(ref)
            entity_set = set()
            target_entity_set = set()
            for i in idx:
                if len(facts[i]) == 4:
                    if facts[i][0] not in stopwords:
                        target_entity_set.add(facts[i][0])
                    if facts[i][1] not in stopwords:
                        entity_set.add(facts[i][1])
                    if facts[i][3] not in stopwords:
                        entity_set.add(facts[i][3])
                elif len(facts[i]) == 5:
                    if facts[i][1] not in stopwords:
                        target_entity_set.add(facts[i][1])
                    if facts[i][1] not in stopwords:
                        entity_set.add(facts[i][1])
                    if facts[i][0] not in stopwords:
                        entity_set.add(facts[i][0])

            entity_score = 0.0
            target_entity_score = 0.0
            generation_entities = set()
            matched_entity = set()
            for word in generation:
                if word in entity_set:
                    generation_entities.add(word)
                    entity_score += 1
                if word in target_entity_set:
                    matched_entity.add(word)

            target_entity_score = len(matched_entity)
            entity_distincts.append(len(generation_entities))
            if len(generation) != 0:
                entity_rate = entity_score / len(generation)
            else:
                entity_rate = 0
            entity_scores.append(entity_score)
            entity_rates.append(entity_rate)
            entity_targets.append(target_entity_score)
            fout.write('%s\n' % ' '.join(matched_entity))
            ref_entities = set()
            for word in ref:
                if word in entity_set:
                    ref_entities.add(word)

            if len(ref_entities) != 0:
                entity_recalls.append(len(ref_entities & generation_entities) / (0.0+len(ref_entities)))
            else:
                entity_recalls.append(1.0)

            if len(generation_entities) != 0:
                entity_precisions.append(len(ref_entities & generation_entities) / (0.0+len(generation_entities)))
            else:
                if len(ref_entities) != 0:
                    entity_precisions.append(0.0)
                else:
                    entity_precisions.append(1.0)


        fout.write('%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n' % (
                                                 sum(entity_targets)/len(generations), # Matched Entity Score
                                                 sum(entity_distincts)/len(generations), # Used Entity Score
                                                 sum(entity_rates)/len(generations), # Used Entity 占比
                                                 sum(entity_recalls)/len(generations), # Recall
                                                 sum(entity_precisions)/len(generations), # Precision
                                                 ))



    if args.only_entity is False:
        if args.binary:
            score_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_top1.%s.bi' % (config_id, res_suffix))
        else:
            score_file_path = os.path.join(hparams['model_path'], 'decoded', '%s_top1.%s' % (config_id, res_suffix))

        scores = []
        metrics = 'rouge,bleu-1,bleu-2,distinct-1,distinct-2'.split(',')
        thread_pool = Pool(args.thread)
        jobs = []
        for metric in metrics:
            job = thread_pool.apply_async(evaluation_utils.evaluate, (
                hparams['test_tgt_file'], hparams['test_src_file'], top1_out_file_path,
                hparams['pre_embed_file'], metric, hparams['pre_embed_dim'], None, None, hparams['beam_width']))
            jobs.append(job)

        # entropy
        metrics.append('entropy')
        job = thread_pool.apply_async(evaluation_utils.evaluate, (
            hparams['tgt_file'], hparams['src_file'], top1_out_file_path, hparams['pre_embed_file'], 'entropy',
            hparams['pre_embed_dim'], hparams['tgt_vocab_size']))
        jobs.append(job)
        thread_pool.close()
        thread_pool.join()

        # Embedding-based
        complex_score = evaluation_utils.evaluate(hparams['test_tgt_file'], hparams['test_src_file'],
                                                  top1_out_file_path,
                                                  hparams['pre_embed_file'], 'embed', dim=hparams['pre_embed_dim'])
        score = complex_score[0:len(complex_score) // 2]
        if len(score) == 1:
            score = score[0]

        utils.print_out(('%s->%s\n') % ('embed', score))
        if type(score) is list or type(score) is tuple:
            for x in score:
                scores.append(str(x))
        else:
            scores.append(str(score))

        for job, metric in zip(jobs, metrics):
            complex_score = job.get()
            score = complex_score[0:len(complex_score) // 2]
            if len(score) == 1:
                score = score[0]
            utils.print_out(('%s->%s\n') % (metric, score))

            if type(score) is list or type(score) is tuple:
                for x in score:
                    scores.append(str(x))
            else:
                scores.append(str(score))

        with open(score_file_path, 'w+', encoding='utf-8') as fin:
            fin.write('\t'.join(scores))
コード例 #19
0
    def create_decoder(self, encoder_outputs, encoder_states, name='decoder'):
        hparams = self.hparams
        mode = self.mode
        sim_dim = self.hparams.get("sim_dim", 64)

        lengths = self._lengths_for_decoder
        copy_embedding_transform_fn = None

        copy_embedding_transform_fn = tf.layers.Dense(
            units=hparams['embed_dim'], name='copy_embedding_transformation')
        copy_fn_var_scope = tf.get_variable_scope()

        if self.mode == model_helper.TRAIN and hparams.get(
                "multi_decoder_input", False):
            # Common Words
            embedding_list = []
            common_word_embedding = self._input_embeddings_for_decoder
            embedding_list.append(common_word_embedding)

            if hparams.get("copy_predict_mode", False):

                decoder_input_idx = self._inputs_for_decoder
                not_common_words = tf.greater_equal(decoder_input_idx,
                                                    hparams['tgt_vocab_size'])
                not_entity_words = tf.less(
                    decoder_input_idx,
                    hparams['tgt_vocab_size'] + hparams['copy_token_nums'])
                is_copy_words = not_common_words & not_entity_words
                is_copy_mask = tf.cast(is_copy_words, tf.float32)

                copy_idx = decoder_input_idx - hparams['tgt_vocab_size']
                copy_idx = tf.maximum(copy_idx, 0)
                copy_idx = tf.minimum(copy_idx, hparams['copy_token_nums'] - 1)

                src_idx = self._inputs_for_encoder
                batch_size = tf.shape(src_idx)[0]
                max_src_len = tf.shape(src_idx)[1]
                max_tgt_len = tf.shape(copy_idx)[1]

                offset = tf.range(batch_size) * max_src_len
                offset = tf.expand_dims(offset, -1)
                offset = tf.tile(offset, [1, max_tgt_len])
                offset_copy_idx = copy_idx + offset

                flatten_encoder_outputs = tf.reshape(
                    encoder_outputs, [-1, tf.shape(encoder_outputs)[-1]])
                copy_embedding = tf.nn.embedding_lookup(
                    flatten_encoder_outputs, offset_copy_idx)
                copy_embedding = tf.reshape(
                    copy_embedding,
                    [batch_size, max_tgt_len,
                     hparams.get("num_units") * 2])
                copy_embedding = copy_embedding_transform_fn(copy_embedding)

                # common_word_idx = tf.where(is_copy_mask, copy_to_word_idx, common_word_idx)
                is_copy_mask = tf.expand_dims(is_copy_mask, -1)
                copy_embedding = copy_embedding * is_copy_mask

                embedding_list.append(copy_embedding)

            if hparams.get("entity_predict_mode", False):
                embedding_list.append(
                    self._input_entity_embeddings_for_decoder)

            targets_in_embedding = tf.concat(embedding_list, -1)
        else:
            targets_in_embedding = self._input_embeddings_for_decoder

        with tf.variable_scope(name) as scope:
            num_layers = hparams['decoder_num_layers']
            cell_type = hparams['cell_type']
            num_units = hparams['num_units']
            forget_bias = hparams['forget_bias']
            dropout = self.dropout
            maximum_iterations = tf.reduce_max(self._lengths_for_encoder) * 2

            # Create RNN Cell
            with tf.variable_scope('std_rnn'):
                cell_list = [
                    model_helper.create_cell(
                        unit_type=cell_type,
                        num_units=num_units,
                        forget_bias=forget_bias,
                        dropout=dropout,
                        mode=mode,
                    ) for x in range(num_layers)
                ]

            if num_layers > 1:
                cell_std = tf.contrib.rnn.MultiRNNCell(cell_list)
            else:
                cell_std = cell_list[0]

            if hparams.get("decoder_num_layers") == hparams.get(
                    "encoder_num_layers") and hparams.get(
                        "pass_raw_encoder_state", False):

                decoder_initial_state = []
                for i in range(num_layers):
                    decoder_initial_state.append(encoder_states[i])

                if num_layers > 1:
                    decoder_initial_state = tuple(decoder_initial_state)
                else:
                    decoder_initial_state = decoder_initial_state[0]
            else:
                decoder_initial_state = []
                if self.knowledge_fusion is None:
                    concatenated_encoder_states = tf.nn.dropout(
                        tf.concat(encoder_states, -1), keep_prob=1.0 - dropout)
                else:
                    concatenated_encoder_states = tf.nn.dropout(
                        tf.concat(encoder_states + [self.knowledge_fusion],
                                  -1),
                        keep_prob=1.0 - dropout)

                if self.hparams.get('word_bow_loss', 0.0) > 0.0:

                    def safe_log(y):
                        return tf.log(
                            tf.clip_by_value(y, 1e-9, tf.reduce_max(y)))

                    if self.hparams.get('word_bow_loss_type_2',
                                        False) is False:
                        common_word_inputs = tf.layers.dense(
                            concatenated_encoder_states,
                            self.hparams.get("mid_projection_dim"),
                            tf.nn.elu,
                            name='word_bow_predictor_1')
                        word_logits = tf.layers.dense(
                            common_word_inputs,
                            self.hparams.get("tgt_vocab_size"),
                            use_bias=False,
                            name='word_bow_predictor_2')
                    else:
                        word_logits = tf.layers.dense(
                            self.knowledge_fusion,
                            self.hparams.get("tgt_vocab_size"),
                            use_bias=False,
                            name='word_bow_predictor_2')
                    word_probs = tf.nn.softmax(word_logits)
                    word_bow_loss = -tf.reduce_sum(
                        self._golden_word_bow * safe_log(word_probs),
                        -1) / tf.maximum(
                            tf.reduce_sum(self._golden_word_bow, -1), 1)
                    self.word_bow_loss = tf.reduce_sum(
                        word_bow_loss) / self.batch_size
                else:
                    self.word_bow_loss = tf.constant(0.0)

                for i in range(num_layers):
                    init_out = tf.layers.dense(concatenated_encoder_states,
                                               num_units,
                                               activation=tf.nn.tanh,
                                               use_bias=False,
                                               name='std_transformer_%d' % i)
                    decoder_initial_state.append(init_out)

                if num_layers > 1:
                    decoder_initial_state = tuple(decoder_initial_state)
                else:
                    decoder_initial_state = decoder_initial_state[0]

            with tf.variable_scope('cue_rnn'):
                cell_list = [
                    model_helper.create_cell(
                        unit_type=cell_type,
                        num_units=num_units,
                        forget_bias=forget_bias,
                        dropout=dropout,
                        mode=mode,
                    ) for x in range(num_layers)
                ]

            _batch_size = tf.shape(self._fact_candidate)[0]
            _fact_num = tf.shape(self._fact_candidate)[1]
            fact_projection = self.fact_projection

            if num_layers > 1:
                cell_cue = tf.contrib.rnn.MultiRNNCell(cell_list)
            else:
                cell_cue = cell_list[0]

            # Attention
            assert hparams['attention'] is not None
            memory = encoder_outputs
            if (self.mode == model_helper.INFER
                    and hparams['infer_mode'] == "beam_search"):
                memory, source_sequence_length, decoder_initial_state, batch_size = (
                    self._prepare_beam_search_decoder_inputs(
                        hparams["beam_width"], memory,
                        self._lengths_for_encoder, decoder_initial_state))

                if hparams.get('kefu_decoder', False):
                    _lengths_for_fact_candidate = tf.contrib.seq2seq.tile_batch(
                        self._lengths_for_fact_candidate,
                        multiplier=hparams['beam_width'])
                    _fact_candidate_embedding = tf.contrib.seq2seq.tile_batch(
                        fact_projection, multiplier=hparams['beam_width'])
                    _cue_input_embedding = tf.contrib.seq2seq.tile_batch(
                        self._cue_fact_embedding,
                        multiplier=hparams['beam_width'])

                    fact_entity_idx = tf.contrib.seq2seq.tile_batch(
                        self._fact_candidate, multiplier=hparams['beam_width'])
                    encoder_memory = tf.contrib.seq2seq.tile_batch(
                        encoder_outputs, multiplier=hparams['beam_width'])
                    encoder_memory_len = tf.contrib.seq2seq.tile_batch(
                        self._lengths_for_encoder,
                        multiplier=hparams['beam_width'])
                    if self.knowledge_distribution is not None:
                        knowledge_distribution = tf.contrib.seq2seq.tile_batch(
                            self.knowledge_distribution,
                            multiplier=hparams['beam_width'])
                    else:
                        knowledge_distribution = self.knowledge_distribution

            else:
                fact_entity_idx = self._fact_candidate
                _lengths_for_fact_candidate = self._lengths_for_fact_candidate
                _fact_candidate_embedding = fact_projection
                source_sequence_length = self._lengths_for_encoder
                batch_size = self.batch_size
                _cue_input_embedding = self._cue_fact_embedding
                encoder_memory = encoder_outputs
                encoder_memory_len = self._lengths_for_encoder
                knowledge_distribution = self.knowledge_distribution

            attention_mechanism = self.create_attention_mechanism(
                hparams["attention"], num_units, memory,
                source_sequence_length, self.mode)

            generate_probs_in_cell = hparams.get(
                'kefu_decoder',
                True) and (hparams.get("entity_predict_mode", False)
                           or hparams.get("copy_predict_mode", False))

            if generate_probs_in_cell:
                common_word_projection = self._projection_layer
            else:
                common_word_projection = None

            # Only generate alignment in greedy INFER mode.
            alignment_history = (self.mode == model_helper.INFER
                                 and hparams["infer_mode"] != "beam_search")

            k_openness_history = self.mode == model_helper.INFER

            if hparams.get('kefu_decoder', False):
                if hparams.get("use_dynamic_knowledge_distribution",
                               True) is False:
                    knowledge_distribution = None
                cell_fw = KEFUAttentionWrapper2.AttentionWrapper(
                    cell_std,
                    cell_cue,
                    _cue_input_embedding,
                    _fact_candidate_embedding,
                    _lengths_for_fact_candidate,
                    knowledge_distribution,
                    attention_mechanism,
                    mid_projection_dim=hparams.get(
                        "mid_projection_dim_for_commonword",
                        hparams.get("mid_projection_dim", 1280)),
                    cue_fact_mode=hparams.get("cue_fact", False),
                    cue_fact_mask=self.mode == model_helper.INFER,
                    encoder_memory=encoder_memory,
                    encoder_memory_len=encoder_memory_len,
                    balance_gate=hparams.get("balance_gate", True),
                    entity_predict_mode=hparams.get('entity_predict_mode',
                                                    False),
                    copy_predict_mode=hparams.get('copy_predict_mode', False),
                    vocab_sizes=(hparams['tgt_vocab_size'],
                                 hparams['copy_token_nums'],
                                 hparams['entity_token_nums']),
                    common_word_projection=common_word_projection,
                    attention_layer_size=num_units,
                    alignment_history=alignment_history,
                    k_openness_history=k_openness_history,
                    output_attention=hparams["output_attention"],
                    sim_dim=sim_dim,
                    name="attention")
            else:

                cell_fw = tf.contrib.seq2seq.AttentionWrapper(
                    cell_std,
                    attention_mechanism,
                    attention_layer_size=num_units,
                    alignment_history=alignment_history,
                    output_attention=hparams["output_attention"],
                    name="attention")

            batch_size = tf.to_int32(batch_size)
            decoder_initial_state = cell_fw.zero_state(
                batch_size, tf.float32).clone(cell_state=decoder_initial_state)

            # Train or Eval
            if mode != tf.contrib.learn.ModeKeys.INFER:
                utils.print_out(
                    'Creating Training RNN Decoder, num_layers=%s, cell_type=%s, num_units=%d'
                    % (num_layers, cell_type, num_units))
                # Helper
                helper = tf.contrib.seq2seq.TrainingHelper(
                    targets_in_embedding, lengths, time_major=False)

                # Decoder
                my_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell_fw, helper, decoder_initial_state)

                # Dynamic decoding
                outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    my_decoder,
                    output_time_major=False,
                    swap_memory=True,
                    scope=scope)

                rnn_outputs = outputs.rnn_output
                if generate_probs_in_cell:
                    logits = rnn_outputs
                else:
                    logits = self._projection_layer(rnn_outputs)

                if hparams.get("cue_fact", False):
                    self._cue_fact_loss = final_context_state.cue_fact_openness
                else:
                    self._cue_fact_loss = tf.constant(0.0)
                sampled_id = None
                scores = tf.no_op()

                self.selector_logits = tf.transpose(
                    final_context_state.model_selector_openness.stack(),
                    [1, 0, 2])

            else:
                utils.print_out(
                    'Creating Infer RNN Decoder, num_layers=%s, cell_type=%s, num_units=%d'
                    % (num_layers, cell_type, num_units))

                infer_mode = hparams["infer_mode"]
                utils.print_out('Infer mode : %s' % infer_mode)

                start_token = tf.cast(
                    self.tgt_vocab_table.lookup(tf.constant(vocab_utils.SOS)),
                    tf.int32)
                end_token = tf.cast(
                    self.tgt_vocab_table.lookup(tf.constant(vocab_utils.EOS)),
                    tf.int32)

                start_tokens = tf.fill([tf.shape(self._inputs_for_encoder)[0]],
                                       start_token)

                def embedding_fn_multi(
                        input_idx,
                        fact_entity_idx=fact_entity_idx,
                        copy_embedding_transform_fn=copy_embedding_transform_fn
                ):
                    common_word_idx = input_idx
                    embedding_list = []  # Reverse
                    # Common Copy Entity
                    if hparams.get("entity_predict_mode", False):
                        # entity mode
                        relative_entity_idx = input_idx - hparams.get(
                            'src_vocab_size') - hparams.get('copy_token_nums')
                        is_entity = tf.greater_equal(relative_entity_idx, 0)
                        is_entity_mask = tf.cast(is_entity, tf.float32)
                        relative_entity_idx = tf.maximum(
                            0, relative_entity_idx)

                        # [batch, fact_len]
                        fact_entity_idx = fact_entity_idx
                        batch_size = tf.shape(input_idx)[0]
                        max_fact_num = tf.shape(fact_entity_idx)[1]
                        flatten_fact_idx = tf.reshape(fact_entity_idx, [-1])

                        offset = tf.expand_dims(tf.range(batch_size),
                                                -1) * max_fact_num
                        relative_entity_idx = relative_entity_idx + offset
                        fact_idx = tf.nn.embedding_lookup(
                            flatten_fact_idx, relative_entity_idx)
                        entity_idx = tf.nn.embedding_lookup(
                            self._fact_entity_in_response, fact_idx)
                        entity2word_idx = tf.nn.embedding_lookup(
                            self._entity2word, entity_idx)

                        common_word_idx = tf.where(is_entity, entity2word_idx,
                                                   common_word_idx)

                        tmp_common_word_idx = common_word_idx
                        if hparams.get("copy_predict_mode", False) is False:
                            common_word_idx_to_entity_idx = tf.nn.embedding_lookup(
                                self._word2entity, tmp_common_word_idx)
                            entity_embedding = tf.nn.embedding_lookup(
                                self._embedding_entity,
                                common_word_idx_to_entity_idx)
                            embedding_list.append(entity_embedding)

                    if hparams.get("copy_predict_mode", False):
                        src_idx = self._inputs_for_encoder
                        max_src_len = tf.shape(src_idx)[1]
                        batch_size = tf.shape(input_idx)[0]

                        isnot_common_words = tf.greater_equal(
                            input_idx, hparams['tgt_vocab_size'])
                        isnot_entity_words = tf.less(
                            input_idx, hparams['tgt_vocab_size'] + max_src_len)
                        is_copy_words = isnot_common_words & isnot_entity_words
                        is_copy_mask = tf.cast(is_copy_words, tf.float32)

                        copy_idx = input_idx - hparams['tgt_vocab_size']
                        copy_idx = tf.maximum(copy_idx, 0)
                        copy_idx = tf.minimum(copy_idx, max_src_len - 1)

                        max_tgt_len = tf.shape(copy_idx)[1]

                        offset = tf.range(batch_size) * max_src_len
                        offset = tf.expand_dims(offset, -1)
                        offset = tf.tile(offset, [1, max_tgt_len])
                        offset_copy_idx = copy_idx + offset

                        flatten_src_idx = tf.reshape(src_idx, [-1])
                        flatten_encoder_outputs = tf.reshape(
                            encoder_outputs,
                            [-1, tf.shape(encoder_outputs)[-1]])
                        copy_to_word_idx = tf.nn.embedding_lookup(
                            flatten_src_idx, offset_copy_idx)
                        copy_embedding = tf.nn.embedding_lookup(
                            flatten_encoder_outputs, offset_copy_idx)
                        copy_embedding = tf.reshape(copy_embedding, [
                            batch_size, max_tgt_len,
                            hparams.get("num_units") * 2
                        ])
                        with tf.variable_scope(copy_fn_var_scope):
                            copy_embedding = copy_embedding_transform_fn(
                                copy_embedding)

                        common_word_idx = tf.where(is_copy_words,
                                                   copy_to_word_idx,
                                                   common_word_idx)
                        tmp_common_word_idx = common_word_idx
                        if hparams.get("entity_predict_mode", False):
                            common_word_idx_to_entity_idx = tf.nn.embedding_lookup(
                                self._word2entity, tmp_common_word_idx)
                            entity_embedding = tf.nn.embedding_lookup(
                                self._embedding_entity,
                                common_word_idx_to_entity_idx)
                            embedding_list.append(entity_embedding)
                        is_copy_mask = tf.expand_dims(is_copy_mask, -1)
                        copy_embedding = copy_embedding * is_copy_mask
                        embedding_list.append(copy_embedding)

                    embedding_list.append(
                        tf.nn.embedding_lookup(self._embedding_vocab,
                                               common_word_idx))

                    if hparams.get('add_token_type_feature', False):
                        embedding_list.append(
                            tf.nn.embedding_lookup(self._embedding_id2type,
                                                   input_idx))
                    # Must
                    embedding_list.reverse()
                    return tf.concat(embedding_list, -1)

                def embedding_fn(x, fact_entity_idx=fact_entity_idx):
                    if hparams.get('entity_predict_mode', False):
                        # > 0 is_entity else is word or copy token [0,500]
                        relative_entity_idx = x - hparams.get(
                            'src_vocab_size') - hparams.get('copy_token_nums')

                        is_entity = tf.greater(relative_entity_idx, 0)
                        relative_entity_idx = tf.maximum(
                            0, relative_entity_idx)
                        # [batch, fact_len]
                        fact_entity_idx = fact_entity_idx
                        # Cast relative idx to right idx
                        batch_size = tf.shape(fact_entity_idx)[0]
                        max_fact_num = tf.shape(fact_entity_idx)[1]
                        fact_entity_idx = tf.reshape(fact_entity_idx, [-1, 1])

                        # batch_range
                        offset = tf.expand_dims(tf.range(batch_size),
                                                -1) * max_fact_num
                        relative_entity_idx = relative_entity_idx + offset
                        relative_entity_idx = tf.reshape(
                            relative_entity_idx, tf.shape(x))

                        entity_idx = tf.nn.embedding_lookup(
                            fact_entity_idx, relative_entity_idx)

                        entity_idx = tf.squeeze(entity_idx, -1)

                        entity_idx = tf.nn.embedding_lookup(
                            self._fact_entity_in_response, entity_idx)
                        entity2word_idx = tf.nn.embedding_lookup(
                            self._entity2word, entity_idx)

                        x = tf.where(is_entity, entity2word_idx, x)

                    return tf.nn.embedding_lookup(self._embedding_vocab, x)

                if infer_mode == "greedy":
                    helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                        embedding_fn, start_tokens, end_token)
                if infer_mode == "beam_search":
                    beam_width = hparams["beam_width"]
                    beam_decoder_fn = BeamSearchDecoder
                    if generate_probs_in_cell:
                        projection_layer = None
                    else:
                        projection_layer = self._projection_layer
                    if hparams.get("multi_decoder_input", False):
                        my_embedding_fn = embedding_fn_multi
                    else:
                        my_embedding_fn = embedding_fn
                    my_decoder = beam_decoder_fn(
                        cell=cell_fw,
                        embedding=my_embedding_fn,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=decoder_initial_state,
                        beam_width=beam_width,
                        output_layer=projection_layer,
                        coverage_penalty_weight=hparams.get(
                            'coverage_penalty_weight', 0),
                        diverse_decoding_rate=hparams.get(
                            'diverse_decoding_rate', 0),
                        length_penalty_weight=hparams.get(
                            'length_penalty_weight', 0))
                else:
                    raise ValueError("Unknown infer_mode '%s'", infer_mode)

                if infer_mode != 'beam_search':
                    my_decoder = tf.contrib.seq2seq.BasicDecoder(
                        cell_fw,
                        helper,
                        decoder_initial_state,
                        output_layer=projection_layer  # applied per timestep
                    )

                # Dynamic decoding
                outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    my_decoder,
                    maximum_iterations=maximum_iterations,
                    output_time_major=False,
                    swap_memory=True,
                    scope=scope)

                if infer_mode == "beam_search":
                    # sampled_id [batch_id,length,beam_id]
                    sampled_id = outputs.predicted_ids
                    logits = tf.no_op()
                    scores = outputs.beam_search_decoder_output.scores
                    # first dim is set to the beam_id
                    sampled_id = tf.transpose(sampled_id, [2, 0, 1])

                    #mapped_sampled_id = sampled_id
                    scores = tf.transpose(scores, [2, 0, 1])

                    if hparams.get('kefu_decoder', False):
                        self.mode_selector = final_context_state.cell_state.model_selector_openness
                        self.fact_alignments = final_context_state.cell_state.fact_alignments
                        if hparams.get('cue_fact', False):
                            self.k_openness = final_context_state.cell_state.k_openness
                        else:
                            self.k_openness = tf.constant(0.0)
                        self.copy_alignments = final_context_state.cell_state.copy_alignments
                        self.fact_alignments = final_context_state.cell_state.fact_alignments

                        if hparams.get("fact_memory_read", False):
                            self.fact_memory_alignments = final_context_state.cell_state.fact_memory_alignments
                        else:
                            self.fact_memory_alignments = tf.no_op()
                    else:
                        self.debug = tf.no_op()

                else:
                    logits = outputs.rnn_output
                    sampled_id = outputs.sample_id
                    scores = outputs.scores
                    sampled_id = tf.expand_dims(sampled_id, 0)
                    scores = tf.expand_dims(scores, 0)

            return logits, sampled_id, scores
コード例 #20
0
    def create_model(self, name='flexka'):
        def safe_log(y):
            return tf.log(tf.clip_by_value(y, 1e-9, tf.reduce_max(y)))

        hparams = self.hparams
        with tf.variable_scope(name) as scope:
            encoder_outputs, encoder_states = self.create_encoder(
                self._input_embeddings_for_encoder,
                self._input_entity_embeddings_for_encoder,
                self._lengths_for_encoder,
            )

            if self._fact_distribution:
                self.knowledge_fusion = None
                max_candidate_num = tf.shape(self._fact_candidate_embedding)[1]
                fact_embedding_projection = self.fact_projection
                prior_inputs = tf.concat(encoder_states, -1)
                prior_projection = tf.layers.dense(prior_inputs,
                                                   units=hparams.get(
                                                       "sim_dim", 64),
                                                   activation=tf.nn.tanh,
                                                   use_bias=True,
                                                   name='prior_distribution')
                prior_projection = tf.expand_dims(prior_projection, 1)
                prior_projection = tf.tile(prior_projection,
                                           [1, max_candidate_num, 1])
                prior_scores = tf.reduce_sum(
                    prior_projection * fact_embedding_projection, -1)
                fact_seq_mask = tf.sequence_mask(
                    self._lengths_for_fact_candidate, dtype=tf.float32)
                unk_mask = tf.sequence_mask(tf.ones_like(
                    self._lengths_for_fact_candidate),
                                            maxlen=max_candidate_num,
                                            dtype=tf.float32)
                fact_mask = (1.0 - fact_seq_mask) * -1e10 + unk_mask * -1e10
                prior_scores += fact_mask
                prior_distribution = tf.nn.softmax(prior_scores)

                if self.mode == model_helper.TRAIN:
                    with tf.variable_scope(tf.get_variable_scope(),
                                           reuse=True):
                        decoder_encoder_outputs, decoder_encoder_states = self.create_encoder(
                            self._input_embeddings_for_decoder,
                            self._input_entity_embeddings_for_decoder,
                            self._lengths_for_decoder)
                    post_inputs = tf.concat(
                        decoder_encoder_states + encoder_states, -1)
                    post_projection = tf.layers.dense(post_inputs,
                                                      units=hparams.get(
                                                          "sim_dim", 64),
                                                      activation=tf.nn.tanh,
                                                      use_bias=True,
                                                      name='post_distribution')
                    post_projection = tf.expand_dims(post_projection, 1)
                    post_projection = tf.tile(post_projection,
                                              [1, max_candidate_num, 1])
                    post_scores = tf.reduce_sum(
                        post_projection * fact_embedding_projection, -1)
                    post_scores += fact_mask
                    post_distribution = tf.nn.softmax(post_scores)
                    self.knowledge_distribution = post_distribution

                    if hparams.get('knowledge_fusion',
                                   "none") == 'initDecoder':
                        self.knowledge_fusion = tf.reduce_sum(
                            self._fact_candidate_embedding *
                            tf.expand_dims(post_distribution, -1), 1)

                    kld_loss = post_distribution * safe_log(
                        post_distribution / tf.clip_by_value(
                            prior_distribution, 1e-9, 1.0))  #* fact_seq_mask
                    kld_loss = tf.reduce_mean(kld_loss, -1)
                    self.kld_loss = tf.reduce_sum(kld_loss) / self.batch_size

                    knowledge_bow_loss = -tf.reduce_sum(
                        self._golden_fact_bow * safe_log(
                            self.knowledge_distribution), -1) / tf.maximum(
                                tf.reduce_sum(self._golden_fact_bow, -1), 1)
                    self.knowledge_bow_loss = tf.reduce_sum(
                        knowledge_bow_loss) / self.batch_size

                else:
                    with tf.variable_scope(tf.get_variable_scope(),
                                           reuse=True):
                        decoder_encoder_outputs, decoder_encoder_states = self.create_encoder(
                            self._input_embeddings_for_decoder,
                            self._input_entity_embeddings_for_decoder,
                            self._lengths_for_decoder)
                    post_inputs = tf.concat(
                        decoder_encoder_states + encoder_states, -1)
                    post_projection = tf.layers.dense(post_inputs,
                                                      units=hparams.get(
                                                          "sim_dim", 64),
                                                      activation=tf.nn.tanh,
                                                      use_bias=True,
                                                      name='post_distribution')
                    post_projection = tf.expand_dims(post_projection, 1)
                    post_projection = tf.tile(post_projection,
                                              [1, max_candidate_num, 1])
                    post_scores = tf.reduce_sum(
                        post_projection * fact_embedding_projection, -1)
                    post_scores += fact_mask
                    self.post_knowledge_distribution = tf.nn.softmax(
                        post_scores)

                    self.knowledge_distribution = prior_distribution
                    if hparams.get('knowledge_fusion',
                                   "none") == 'initDecoder':
                        self.knowledge_fusion = tf.reduce_sum(
                            self._fact_candidate_embedding *
                            tf.expand_dims(prior_distribution, -1), 1)

                if self.knowledge_fusion is not None:
                    self.knowledge_fusion = tf.nn.dropout(
                        self.knowledge_fusion, keep_prob=1.0 - self.dropout)

            else:
                self.kld_loss = tf.constant(0.0)
                self.knowledge_distribution = None
                self.knowledge_fusion = None
                self.knowledge_bow_loss = tf.constant(0.0)

            logits, sampled_id, scores = self.create_decoder(
                encoder_outputs,
                encoder_states,
            )
            self.logits = logits
            self.scores = scores
            if self.mode != model_helper.INFER:

                loss = self.compute_loss(logits,
                                         self._outputs_for_decoder,
                                         self._lengths_for_decoder,
                                         unk_helper=hparams.get(
                                             "unk_helper", True))
                self.train_loss = tf.reduce_sum(loss) / self.batch_size

                self._train_update_loss = self.train_loss
                teach_force_loss = self.compute_loss(
                    self.selector_logits,
                    self._outputs_type_for_decoder,
                    self._lengths_for_decoder,
                    unk_helper=False)
                self.teach_force_loss = tf.reduce_sum(
                    teach_force_loss) / self.batch_size

                if hparams.get("teach_force", False):
                    self._train_update_loss += self.teach_force_loss * hparams.get(
                        "teach_force_rate", 0.5)
                else:
                    pass

                if self._fact_distribution:
                    self._train_update_loss += self.kld_loss

                if hparams.get("knowledge_bow_loss", False):
                    self._train_update_loss += self.knowledge_bow_loss * hparams.get(
                        "knowledge_bow_loss")

                if self.hparams.get('word_bow_loss', 0.0) > 0.0:
                    self._train_update_loss += self.word_bow_loss * self.hparams.get(
                        'word_bow_loss')

                self._cue_fact_loss = tf.constant(0.0)

            else:
                self.sampled_id = self.reverse_target_vocab_table.lookup(
                    tf.to_int64(sampled_id))

        # Print vars
        utils.print_out('-------------Trainable Variables------------------')
        for var in tf.trainable_variables():
            utils.print_out(var)
コード例 #21
0
def load_knolwedge_graph(hparams):
    """
    加载和知识图谱相关的概念
    :param hparams:
    :return:
    """
    entity_dict_path = hparams['entity_path']
    relation_dict_path = hparams['relation_path']
    utils.print_out("load entity dict from %s" % entity_dict_path)
    utils.print_out("load relation dict from %s" % relation_dict_path)

    entity_embed_path = hparams['entity_embedding_path']
    relation_embed_path = hparams['relation_embedding_path']

    embed_dim = hparams['entity_dim']

    entity_vocab = lookup_ops.index_table_from_file(entity_dict_path,
                                                    default_value=0)
    reverse_entity_vocab = lookup_ops.index_to_string_table_from_file(
        entity_dict_path, default_value=UNK_ENTITY)
    padding_entity_list = [
        UNK_ENTITY, NONE_ENTITY, PAD_ENTITY, NOT_HEAD_ENTITY, NOT_TAIL_ENTITY
    ]
    padding_relation_list = [NONE_RELATION, PAD_RELATION, NOT_TBD]

    entity_list = []
    relation_list = []

    entity_dict = dict()
    relation_dict = dict()

    #  保证位置正确
    with open(entity_dict_path, encoding='utf-8') as f:
        for i, line in enumerate(f):
            e = line.strip()
            entity_list.append(e)
            entity_dict[e] = i
    for i in range(len(padding_entity_list)):
        assert padding_entity_list[i] == entity_list[i]

    with open(relation_dict_path, encoding='utf-8') as f:
        for i, line in enumerate(f):
            e = line.strip()
            relation_list.append(e)
            relation_dict[e] = i
    for i in range(len(padding_relation_list)):
        assert padding_relation_list[i] == relation_list[i]

    print("Loading entity vectors...")
    entity_embed = []
    with open(entity_embed_path, 'r+', encoding='utf-8') as f:
        for i, line in enumerate(f):
            if '\t' not in line:
                s = line.strip().split(' ')
            else:
                s = line.strip().split('\t')
            entity_embed.append([float(x) for x in s])

    print("Loading relation vectors...")
    relation_embed = []
    with open(relation_embed_path, 'r+', encoding='utf-8') as f:
        for i, line in enumerate(f):
            if '\t' not in line:
                s = line.strip().split(' ')
            else:
                s = line.strip().split('\t')
            relation_embed.append([float(x) for x in s])

    entity_embed = np.array(entity_embed, dtype=np.float32)
    relation_embed = np.array(relation_embed, dtype=np.float32)
    entity_embed = tf.get_variable('entity_embed',
                                   dtype=tf.float32,
                                   initializer=entity_embed,
                                   trainable=False)
    relation_embed = tf.get_variable('relation_embed',
                                     dtype=tf.float32,
                                     initializer=relation_embed,
                                     trainable=False)
    entity_embed = tf.reshape(entity_embed, [-1, embed_dim])
    relation_embed = tf.reshape(relation_embed, [-1, embed_dim])

    padding_entity_embedding = tf.get_variable(
        'entity_padding_embed', [len(padding_entity_list), embed_dim],
        dtype=tf.float32,
        initializer=tf.zeros_initializer())
    padding_relation_embedding = tf.get_variable(
        'relation_padding_embed', [len(padding_relation_list), embed_dim],
        dtype=tf.float32,
        initializer=tf.zeros_initializer())

    tf_entity_embed = tf.concat([padding_entity_embedding, entity_embed],
                                axis=0)
    tf_relation_embed = tf.concat([padding_relation_embedding, relation_embed],
                                  axis=0)
    tf_entity_embed = tf.layers.dense(tf_entity_embed,
                                      hparams['entity_dim'],
                                      use_bias=False,
                                      name='entity_embedding')
    tf_relation_embed = tf.layers.dense(tf_relation_embed,
                                        hparams['entity_dim'],
                                        use_bias=False,
                                        name='relation_embedding')
    tf_entity_embed = tf.concat([tf_entity_embed, tf_relation_embed], axis=0)

    # Facts
    utils.print_out('Loading facts')
    fact_dict_path = hparams['fact_path']
    entity_fact = []
    entity_target = []
    with open(fact_dict_path, encoding='utf-8') as fin:
        lines = fin.readlines()
        utils.print_out('Total Entity-Fact : %d' % len(lines))
        for line in lines:
            items = line.strip('\n').split()
            for i in [0, 1, 3]:
                items[i] = int(entity_dict.get(items[i], 0))
            items[2] = int(relation_dict.get(items[2])) + len(
                entity_dict)  # realtion和 entity共用一个列表
            entity_fact.append(items[1:])
            entity_target.append(items[0])  # uni ids
    entity_fact = np.array(entity_fact, dtype=np.int32)
    entity_target = np.array(entity_target, dtype=np.int32)
    entity_fact = np.reshape(entity_fact, [len(lines), 3])
    entity_target = np.reshape(entity_target, [len(lines)])
    tf_entity_fact = tf.constant(value=entity_fact, dtype=np.int32)
    tf_entity_target = tf.constant(value=entity_target, dtype=np.int32)

    tf_entity_fact_embedding = tf.nn.embedding_lookup(tf_entity_embed,
                                                      tf_entity_fact)
    tf_entity_fact_embedding = tf.reshape(tf_entity_fact_embedding,
                                          [-1, 3 * hparams['entity_dim']])

    return tf_entity_embed, tf_entity_fact_embedding, tf_entity_target, entity_vocab, reverse_entity_vocab
コード例 #22
0
def train():
    # Load config
    hparams = config_parser.load_and_restore_config(args.config_path, verbose=True)
    out_dir = hparams['model_path']
    eval_file = os.path.join(out_dir, 'eval_out.txt')

    status_per_steps = hparams['status_per_steps']
    status_counter = Status(status_per_steps)

    # Dataset
    dataset = dataset_utils.create_flexka3_iterator(hparams)
    if hparams.get('rank_based', False):
        model = RModel(dataset, hparams, model_helper.TRAIN)
    else:
        model = Model(dataset, hparams, model_helper.TRAIN)
    dropout = dataset['dropout']

    with tf.Session(config=model_helper.create_tensorflow_config()) as sess:
        step, epoch = model_helper.create_or_restore_a_model(out_dir, model, sess)
        dataset['init_fn'](sess)
        epoch_start_time = time.time()
        while utils.should_stop(epoch, step, hparams) is False:
            try:
                gradient,lr, _, loss, regulation_loss, step, epoch, batch_size, cue_fact, probs, kld_loss = sess.run([
                    model.grad_norm,model.learning_rate, model.update, model._knowledge_bow_loss, model.regulation_loss, model.global_step, model.epoch_step,
                    model.batch_size, dataset['cue_fact'],
                    model.classifier_scores, model.kld_loss
                ],
                    feed_dict={dropout: hparams['dropout'], model.learning_rate: hparams['learning_rate']})

                ranks, reversed_ranks, hits = batch_rank_eval(cue_fact, probs, hitAT=(1, 5, 10, 20))
                MR = np.average(ranks)
                MRR = np.average(reversed_ranks)
                hit1 = np.average(hits[0]) * 100
                hit5 = np.average(hits[1]) * 100
                hit10 = np.average(hits[2]) * 100
                hit20 = np.average(hits[3]) * 100

                # print(sess.run(model.debug))
                status_counter.add_record({'gradient':gradient,'loss': loss, 'kld': kld_loss*1000000, 'lr': lr,
                                           'MR':MR, 'MRR':MRR,
                                           'hit1':hit1, 'hit5':hit5, 'hit10':hit10, 'hit20':hit20
                                           }, step, epoch)

            except tf.errors.InvalidArgumentError as e:
                print('Found Inf or NaN global norm')
                raise e
            except tf.errors.OutOfRangeError:
                utils.print_out('epoch %d is finished,  step %d' % (epoch, step))
                sess.run([model.next_epoch])
                # Save Epoch
                model.saver.save(
                    sess,
                    os.path.join(out_dir, "seq2seq.ckpt"),
                    global_step=model.global_step)
                utils.print_out('Saved model to -> %s' % out_dir)

                # EVAL on Dev/Test Set:
                for prefix in ['valid_', 'test_']:
                    dataset['init_fn'](sess, prefix)
                    eval_loss = []
                    eval_count = []
                    eval_batch = []
                    MRs = []
                    MRRs = []
                    hit1s = []
                    hit5s = []
                    hit10s = []
                    hit20s = []

                    while True:
                        try:
                            loss, batch_size, batch_size,cue_fact, probs,kld_loss = sess.run(
                                [model._knowledge_bow_loss, model.batch_size, model.batch_size,
                                 dataset['cue_fact'], model.classifier_scores, model.kld_loss],
                                feed_dict={dropout: 0.0})
                            eval_loss.append(loss)
                            eval_batch.append(batch_size)

                            ranks, reversed_ranks, hits = batch_rank_eval(cue_fact, probs, hitAT=(1, 5, 10, 20))
                            MRs = MRs + ranks
                            MRRs = MRRs + reversed_ranks
                            hit1s = hit1s + hits[0]
                            hit5s = hit5s + hits[1]
                            hit10s = hit10s + hits[2]
                            hit20s = hit20s + hits[3]


                        except tf.errors.OutOfRangeError as e:
                            pass
                            break
                    loss = sum(eval_loss) / len(eval_loss)
                    MR = np.average(MRs)
                    MRR = np.average(MRRs)
                    hit1 = np.average(hit1s) * 100
                    hit5 = np.average(hit5s) * 100
                    hit10 = np.average(hit10s) * 100
                    hit20 = np.average(hit20s) * 100
                    KLD = kld_loss*1000000

                    if prefix == 'valid_':
                        utils.print_out('Eval on Dev: EVAL LOSS: %.4f' % (loss))
                        utils.eval_print(eval_file, 'Eval on Dev: Epoch %d Step %d EVAL LOSS: %.4f' % (epoch, step, loss))
                        utils.print_out('Eval on Dev KLD=%.2f,MR=%.2f,MRR=%.2f,hit1=%.2f,hit5=%.2f,hit10=%.2f,hit20=%.2f' % (KLD,MR,MRR,hit1,hit5,hit10,hit20))
                        utils.eval_print(eval_file,
                                         'Eval on Dev KLD=%.2f,MR=%.2f,MRR=%.2f,hit1=%.2f,hit5=%.2f,hit10=%.2f,hit20=%.2f' % (KLD,MR,MRR,hit1,hit5,hit10,hit20))

                        hparams['loss'].append(float(loss))
                        hparams['epochs'].append(int(step))
                        config_parser.save_config(hparams)
                        if min(hparams['loss']) - loss >= 0:
                            model.ppl_saver.save(
                                sess,
                                os.path.join(out_dir, 'min_ppl', "seq2seq.ckpt"),
                                global_step=model.global_step)
                            utils.print_out('Saved min_ppl model to -> %s' % out_dir)

                        if len(hparams['loss']) > 1:
                            if hparams['loss'][-1] > hparams['loss'][-2]:
                                hparams['learning_rate'] = hparams['learning_rate'] * hparams['learning_halve']
                                utils.eval_print(eval_file, 'Halved the learning rate to %f' % hparams['learning_rate'])
                                config_parser.save_config(hparams)
                    else:
                        utils.print_out('Eval on Test: EVAL PPL: %.4f' % (loss))
                        utils.print_out('Eval on Test KLD=%.2f,MR=%.2f,MRR=%.2f,hit1=%.2f,hit5=%.2f,hit10=%.2f,hit20=%.2f' % (
                        KLD,MR, MRR, hit1, hit5, hit10, hit20))
                        utils.eval_print(eval_file,
                                         'Eval on Test KLD=%.2f,MR=%.2f,MRR=%.2f,hit1=%.2f,hit5=%.2f,hit10=%.2f,hit20=%.2f' % (
                        KLD,MR, MRR, hit1, hit5, hit10, hit20))
                        utils.eval_print(eval_file,
                                         'Eval on Test: Epoch %d Step %d EVAL PPL: %.4f' % (epoch, step, loss))

                # NEXT EPOCH
                epoch_time = time.time() - epoch_start_time
                utils.print_time(epoch_time, 'Epoch Time:')
                epoch_time = time.time() - epoch_start_time
                epoch_time *= (hparams['num_train_epochs'] - epoch - 1)
                utils.print_time(epoch_time, 'Reaming Time:')
                epoch_start_time = time.time()

                dataset['init_fn'](sess)

        utils.print_out('model has been fully trained !')
コード例 #23
0
    def create_model(self, name='flexka'):

        def safe_log(y):
            return tf.log(tf.clip_by_value(y, 1e-9, tf.reduce_max(y)))

        hparams = self.hparams
        with tf.variable_scope(name) as scope:
            encoder_outputs, encoder_states = self.create_encoder(self._input_embeddings_for_encoder,
                                                                  self._input_entity_embeddings_for_encoder,
                                                                  self._lengths_for_encoder,
                                                                  )

            self.kld_loss = tf.constant(0.0)
            self.knowledge_distribution = None
            self.knowledge_fusion = None
            self.knowledge_bow_loss = tf.constant(0.0)



            maximium_candidate_num = tf.shape(self._fact_candidate_embedding)[1]

            fact_seq_mask = tf.sequence_mask(self._lengths_for_fact_candidate, dtype=tf.float32)
            unk_mask = tf.sequence_mask(tf.ones_like(self._lengths_for_fact_candidate), maxlen=maximium_candidate_num,
                                        dtype=tf.float32)
            fact_mask = (1.0 - fact_seq_mask) * -1e10 + unk_mask * -1e10

            fact_embedding_projection = self.fact_projection

            if hparams.get("flexka_classifier_mode", 'dot') == 'dot': # Student Network
                fact_embedding_projection = tf.nn.dropout(fact_embedding_projection, keep_prob=1.0 - self.dropout)
                classifier_inputs = tf.concat(encoder_states, -1)
                classifier_inputs = tf.nn.dropout(classifier_inputs, keep_prob=1.0 - self.dropout)
                classifier_projection = tf.layers.dense(classifier_inputs, units=300, activation=tf.nn.tanh,
                                                  use_bias=True, name='classifier_inputs')
                classifier_projection = tf.expand_dims(classifier_projection, 1)
                classifier_projection = tf.tile(classifier_projection, [1, maximium_candidate_num, 1])
                classifier_scores = tf.reduce_sum(classifier_projection * fact_embedding_projection, -1)
                classifier_scores += fact_mask
                classifier_probs = tf.nn.softmax(classifier_scores)
            elif hparams.get("flexka_classifier_mode", 'dot') == 'attention':  # Student Network
                # [batch, fact_len, dim]
                fact_query = self.fact_projection
                # fact_value = tf.layers.dense(self._fact_candidate_embedding, units=300,
                #                                activation=tf.nn.tanh,
                #                                name='dynamic_fact_value')
                # [batch, encoder_len, dim]
                concated_encoder_states = tf.concat(encoder_outputs, -1)
                concated_encoder_states = tf.nn.dropout(concated_encoder_states, keep_prob=1.0 - self.dropout)
                encoder_key = tf.layers.dense(concated_encoder_states, units=300,
                                               activation=tf.nn.tanh,
                                               name='encoder_keys')
                # [batch, encoder_len, dim]
                encoder_value = tf.layers.dense(concated_encoder_states, units=300,
                                               activation=tf.nn.tanh,
                                               name='encoder_values')
                # [batch, fact_len, encoder_len]
                fact_encoder_logits = tf.matmul(fact_query, tf.transpose(encoder_key, [0, 2, 1]))
                fact_encoder_probs = tf.nn.softmax(fact_encoder_logits, -1)
                # [batch, fact_len, dim]
                fact_encoder = tf.matmul(fact_encoder_probs, encoder_value)
                classifier_scores = tf.reduce_sum(fact_encoder * fact_embedding_projection, -1)
                classifier_scores += fact_mask
                classifier_probs = tf.nn.softmax(classifier_scores)
            elif hparams.get("flexka_classifier_mode", 'dot') == 'prior_posterior_attention':  # Student Network
                # [batch, fact_len, dim]
                fact_query = self.fact_projection
                concated_encoder_states = tf.concat(encoder_outputs, -1)
                concated_encoder_states = tf.nn.dropout(concated_encoder_states, keep_prob=1.0 - self.dropout)
                encoder_key = tf.layers.dense(concated_encoder_states, units=300,
                                               activation=tf.nn.tanh,
                                               name='encoder_keys')
                # [batch, encoder_len, dim]
                encoder_value = tf.layers.dense(concated_encoder_states, units=300,
                                               activation=tf.nn.tanh,
                                               name='encoder_values')
                # [batch, fact_len, encoder_len]
                fact_encoder_logits = tf.matmul(fact_query, tf.transpose(encoder_key, [0, 2, 1]))
                fact_encoder_probs = tf.nn.softmax(fact_encoder_logits, -1)
                # [batch, fact_len, dim]
                fact_encoder = tf.matmul(fact_encoder_probs, encoder_value)
                classifier_scores = tf.reduce_sum(fact_encoder * fact_embedding_projection, -1)
                classifier_scores += fact_mask
                prior_classifier_probs = tf.nn.softmax(classifier_scores)

                with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                    decoder_encoder_outputs, decoder_encoder_states = self.create_encoder(
                        self._input_embeddings_for_decoder,
                        self._input_entity_embeddings_for_decoder,
                        self._lengths_for_decoder)


                # decoder_encoder_states = tf.concat(decoder_encoder_states, -1)
                # decoder_encoder_states = tf.nn.dropout(decoder_encoder_states,
                #                                                 keep_prob=1.0 - self.dropout)
                # #[batch, dim]
                # posterior_knowledge = tf.layers.dense(decoder_encoder_states, units=100,
                #                                                   activation=tf.nn.tanh,
                #                                                   use_bias=True, name='posterior_knowledge')
                # posterior_knowledge = tf.expand_dims(posterior_knowledge, 1)
                # posterior_knowledge = tf.tile(posterior_knowledge,[1,maximium_candidate_num,1])
                # posterior_fact_query =  tf.layers.dense(tf.concat([self._fact_candidate_embedding,posterior_knowledge], -1)
                #                 , units=300, activation=tf.nn.tanh, name='posterior_fact_projection')
                #
                # # [batch, fact_len, encoder_len]
                # posterior_fact_encoder_logits = tf.matmul(posterior_fact_query, tf.transpose(encoder_key, [0, 2, 1]))
                # posterior_fact_encoder_probs = tf.nn.softmax(posterior_fact_encoder_logits, -1)
                # # [batch, fact_len, dim]
                # posterior_fact_encoder = tf.matmul(posterior_fact_encoder_probs, encoder_value)
                # posterior_classifier_scores = tf.reduce_sum(posterior_fact_encoder * fact_embedding_projection, -1)
                # posterior_classifier_scores += fact_mask
                # posterior_classifier_probs = tf.nn.softmax(posterior_classifier_scores)
                # posterior_classifier_probs_for_kld = posterior_classifier_probs


                posterior_classifier_inputs = tf.concat(encoder_states+decoder_encoder_states, -1)
                posterior_classifier_inputs = tf.nn.dropout(posterior_classifier_inputs, keep_prob=1.0 - self.dropout)
                posterior_classifier_projection = tf.layers.dense(posterior_classifier_inputs, units=300, activation=tf.nn.tanh,
                                                        use_bias=True, name='posterior_classifier_inputs')
                posterior_classifier_projection = tf.expand_dims(posterior_classifier_projection, 1)
                posterior_classifier_projection = tf.tile(posterior_classifier_projection, [1, maximium_candidate_num, 1])
                posterior_classifier_scores = tf.reduce_sum(posterior_classifier_projection * fact_embedding_projection, -1)
                posterior_classifier_scores += fact_mask
                posterior_classifier_probs = tf.nn.softmax(posterior_classifier_scores)
                posterior_classifier_probs_for_kld = tf.nn.softmax(posterior_classifier_scores / hparams.get("kld_temp", 1.0))

                kld_loss = posterior_classifier_probs_for_kld * safe_log(
                    posterior_classifier_probs_for_kld / tf.clip_by_value(prior_classifier_probs, 1e-9,
                                                                          1.0))

                self.kld_loss = tf.reduce_sum(kld_loss) / self.batch_size
                classifier_probs = prior_classifier_probs




            elif hparams.get("flexka_classifier_mode", 'dot') == 'posterior_dot': # Teacher Network
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                    decoder_encoder_outputs, decoder_encoder_states = self.create_encoder(
                        self._input_embeddings_for_decoder,
                        self._input_entity_embeddings_for_decoder,
                        self._lengths_for_decoder)
                fact_embedding_projection = tf.nn.dropout(fact_embedding_projection, keep_prob=1.0 - self.dropout)
                posterior_classifier_inputs = tf.concat(encoder_states+decoder_encoder_states, -1)
                posterior_classifier_inputs = tf.nn.dropout(posterior_classifier_inputs, keep_prob=1.0 - self.dropout)
                posterior_classifier_projection = tf.layers.dense(posterior_classifier_inputs, units=300, activation=tf.nn.tanh,
                                                        use_bias=True, name='posterior_classifier_inputs')
                posterior_classifier_projection = tf.expand_dims(posterior_classifier_projection, 1)
                posterior_classifier_projection = tf.tile(posterior_classifier_projection, [1, maximium_candidate_num, 1])
                posterior_classifier_scores = tf.reduce_sum(posterior_classifier_projection * fact_embedding_projection, -1)
                posterior_classifier_scores += fact_mask
                posterior_classifier_probs = tf.nn.softmax(posterior_classifier_scores)
                classifier_probs = posterior_classifier_probs
            elif hparams.get("flexka_classifier_mode", 'dot') in {'prior_posterior_dot','lazy_prior_posterior_dot'}: # Teacher Network
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                        decoder_encoder_outputs, decoder_encoder_states = self.create_encoder(
                            self._input_embeddings_for_decoder,
                            self._input_entity_embeddings_for_decoder,
                            self._lengths_for_decoder)

                fact_embedding_projection = tf.nn.dropout(fact_embedding_projection, keep_prob=1.0 - self.dropout)

                posterior_classifier_inputs = tf.concat(encoder_states+decoder_encoder_states, -1)
                posterior_classifier_inputs = tf.nn.dropout(posterior_classifier_inputs, keep_prob=1.0 - self.dropout)
                posterior_classifier_projection = tf.layers.dense(posterior_classifier_inputs, units=300, activation=tf.nn.tanh,
                                                        use_bias=True, name='posterior_classifier_inputs')
                posterior_classifier_projection = tf.expand_dims(posterior_classifier_projection, 1)
                posterior_classifier_projection = tf.tile(posterior_classifier_projection, [1, maximium_candidate_num, 1])
                posterior_classifier_scores = tf.reduce_sum(posterior_classifier_projection * fact_embedding_projection, -1)
                posterior_classifier_scores += fact_mask
                posterior_classifier_probs = tf.nn.softmax(posterior_classifier_scores)
                posterior_classifier_probs_for_kld = tf.nn.softmax(posterior_classifier_scores / hparams.get("kld_temp", 1.0))

                classifier_inputs = tf.concat(encoder_states, -1)
                classifier_inputs = tf.nn.dropout(classifier_inputs, keep_prob=1.0 - self.dropout)
                classifier_projection = tf.layers.dense(classifier_inputs, units=300, activation=tf.nn.tanh,
                                                        use_bias=True, name='classifier_inputs')
                classifier_projection = tf.expand_dims(classifier_projection, 1)
                classifier_projection = tf.tile(classifier_projection, [1, maximium_candidate_num, 1])
                classifier_scores = tf.reduce_sum(classifier_projection * fact_embedding_projection, -1)
                classifier_scores += fact_mask
                prior_classifier_probs = tf.nn.softmax(classifier_scores)

                kld_loss = posterior_classifier_probs_for_kld * safe_log(
                    posterior_classifier_probs_for_kld / tf.clip_by_value(prior_classifier_probs, 1e-9,
                                                                  1.0))

                # kld_loss = tf.square(posterior_classifier_probs - prior_classifier_probs)
                self.kld_loss = tf.reduce_sum(kld_loss) / self.batch_size
                classifier_probs = prior_classifier_probs

            elif hparams.get("flexka_classifier_mode", 'dot') == 'mlp':
                classifier_inputs = tf.concat(encoder_states, -1)
                classifier_inputs = tf.nn.dropout(classifier_inputs, keep_prob=1.0 - self.dropout)
                classifier_projection = tf.layers.dense(classifier_inputs, units=300, activation=tf.nn.tanh,
                                                        use_bias=True, name='classifier_inputs')
                classifier_projection = tf.expand_dims(classifier_projection, 1)
                classifier_projection = tf.tile(classifier_projection, [1, maximium_candidate_num, 1])
                score_input = tf.concat([classifier_projection, fact_embedding_projection], -1)
                score_input = tf.nn.dropout(score_input, keep_prob=1.0 - self.dropout)
                classifier_scores = tf.layers.dense(score_input, units=1, activation=tf.nn.tanh,
                                                    name='score_estimator')
                classifier_scores = tf.squeeze(classifier_scores)
                classifier_scores += fact_mask
                classifier_probs = tf.nn.softmax(classifier_scores)

            else:
                raise ValueError()
            self.classifier_scores = classifier_probs

            if self.mode == model_helper.TRAIN:

                knowledge_bow_loss = - tf.reduce_sum(self._golden_fact_bow * safe_log(classifier_probs),-1)
                self._knowledge_bow_loss = tf.reduce_sum(knowledge_bow_loss) / self.batch_size
                self._train_update_loss = self._knowledge_bow_loss
                if hparams.get("flexka_classifier_mode", 'dot') in {'prior_posterior_dot',
                                                                    'prior_posterior_attention',
                                                                    'lazy_prior_posterior_dot'}:
                    posterior_knowledge_bow_loss = - tf.reduce_sum(self._golden_fact_bow * safe_log(posterior_classifier_probs),
                                                         -1)
                    posterior_knowledge_bow_loss = tf.reduce_sum(posterior_knowledge_bow_loss) / self.batch_size
                    self._train_update_loss += posterior_knowledge_bow_loss

                regulation_loss = (tf.reduce_sum((1.0 - classifier_probs * classifier_probs) * fact_seq_mask) / self.batch_size)
                self.regulation_loss = regulation_loss
                if hparams.get("flexka_classifier_regulation_loss", 0.0) > 0.0:
                    # self._train_update_loss = self._knowledge_bow_loss - self._neg_knowledge_bow_loss
                    self._train_update_loss = self._knowledge_bow_loss + regulation_loss * hparams.get("flexka_classifier_regulation_loss", 0.0)

        # Print vars
        utils.print_out('-------------Trainable Variables------------------')
        for var in tf.trainable_variables():
            utils.print_out(var)
コード例 #24
0
ファイル: run_kefu2.py プロジェクト: yuelala/ACL2020-ConKADI
def train():
    # Load config
    hparams = config_parser.load_and_restore_config(args.config_path,
                                                    verbose=True)
    out_dir = hparams['model_path']
    eval_file = os.path.join(out_dir, 'eval_out.txt')

    status_per_steps = hparams['status_per_steps']
    status_counter = Status(status_per_steps)

    # dataset iterator
    dataset = dataset_utils.create_flexka2_iterator(hparams, is_eval=False)
    model = Model(dataset, hparams, model_helper.TRAIN)
    dropout = dataset['dropout']

    with tf.Session(config=model_helper.create_tensorflow_config()) as sess:

        # Initialize or restore a model
        step, epoch = model_helper.create_or_restore_a_model(
            out_dir, model, sess)
        dataset['init_fn'](sess)
        epoch_start_time = time.time()
        while utils.should_stop(epoch, step, hparams) is False:
            try:
                teach_force_loss, kld_loss, knowledge_bow_loss, word_bow_loss, lr, _, loss, step, epoch, predict_count, batch_size \
                    = sess.run([
                    model.teach_force_loss, model.kld_loss, model.knowledge_bow_loss, model.word_bow_loss,
                    model.learning_rate, model.update, model.train_loss,
                    model.global_step, model.epoch_step,
                    model.predict_count, model.batch_size],
                    feed_dict={dropout: hparams['dropout'], model.learning_rate: hparams['learning_rate']})

                # print(sess.run(model.debug))
                ppl = utils.safe_exp(loss * batch_size / predict_count)
                status_counter.add_record(
                    {
                        'ppl': ppl,
                        'loss': loss,
                        'mode_loss': teach_force_loss,
                        'word_bow_loss': word_bow_loss,
                        'knowledge_bow_loss': knowledge_bow_loss,
                        'kld_loss': kld_loss * 1000000,
                        'lr': lr,
                        'count': predict_count
                    }, step, epoch)

            except tf.errors.InvalidArgumentError as e:
                print('Found Inf or NaN global norm')
                raise e
            except tf.errors.OutOfRangeError:
                utils.print_out('epoch %d is finished,  step %d' %
                                (epoch, step))
                sess.run([model.next_epoch])
                # Save Epoch
                model.saver.save(sess,
                                 os.path.join(out_dir, "seq2seq.ckpt"),
                                 global_step=model.global_step)
                utils.print_out('Saved model to -> %s' % out_dir)

                # EVAL on Dev/Test Set:
                for prefix in ['valid_', 'test_']:
                    dataset['init_fn'](sess, prefix)
                    eval_loss = []
                    eval_count = []
                    eval_batch = []
                    while True:
                        try:
                            loss, predict_count, batch_size, batch_size = sess.run(
                                [
                                    model.train_loss, model.predict_count,
                                    model.batch_size, model.batch_size
                                ],
                                feed_dict={dropout: 0.0})
                            eval_loss.append(loss)
                            eval_count.append(predict_count)
                            eval_batch.append(batch_size)
                        except tf.errors.OutOfRangeError as e:
                            pass
                            break
                    ppl = utils.safe_exp(
                        sum(eval_loss) * sum(eval_batch) / len(eval_batch) /
                        sum(eval_count))

                    if prefix == 'valid_':
                        utils.print_out('Eval on Dev: EVAL PPL: %.4f' % (ppl))
                        utils.eval_print(
                            eval_file,
                            'Eval on Dev: Epoch %d Step %d EVAL PPL: %.4f' %
                            (epoch, step, ppl))

                        hparams['loss'].append(float(ppl))
                        hparams['epochs'].append(int(step))
                        config_parser.save_config(hparams)

                        if min(hparams['loss']) - ppl >= 0:
                            model.ppl_saver.save(sess,
                                                 os.path.join(
                                                     out_dir, 'min_ppl',
                                                     "seq2seq.ckpt"),
                                                 global_step=model.global_step)
                            utils.print_out('Saved min_ppl model to -> %s' %
                                            out_dir)

                        if len(hparams['loss']) > 1:
                            if hparams['loss'][-1] > hparams['loss'][-2]:
                                hparams['learning_rate'] = hparams[
                                    'learning_rate'] * hparams['learning_halve']
                                utils.eval_print(
                                    eval_file,
                                    'Halved the learning rate to %f' %
                                    hparams['learning_rate'])
                                config_parser.save_config(hparams)
                    else:
                        utils.print_out('Eval on Test: EVAL PPL: %.4f' % (ppl))
                        utils.eval_print(
                            eval_file,
                            'Eval on Test: Epoch %d Step %d EVAL PPL: %.4f' %
                            (epoch, step, ppl))

                # NEXT EPOCH
                epoch_time = time.time() - epoch_start_time
                utils.print_time(epoch_time, 'Epoch Time:')
                epoch_time = time.time() - epoch_start_time
                epoch_time *= (hparams['num_train_epochs'] - epoch - 1)
                utils.print_time(epoch_time, 'Remaining Time:')
                epoch_start_time = time.time()

                dataset['init_fn'](sess)

        utils.print_out('model has been fully trained !')
コード例 #25
0
def load_entity_vocab(hparams):
    """
    Currently same as GenDS.knowledge_utils.load_entity_vocab
    :param hparams:
    :return:
    """
    word2entity_dict_path = hparams['word2entity_dict_path']
    entity2word_dict_path = hparams['entity2word_dict_path']
    entity_dict_path = hparams['entity_path']
    relation_dict_path = hparams['relation_path']
    entity_embed_path = hparams['entity_embedding_path']
    relation_embed_path = hparams['relation_embedding_path']
    embed_dim = hparams['entity_dim']
    utils.print_out("load entity dict from %s" % entity_dict_path)
    inv_relation = hparams.get('flexka_inv_relation', False)

    entity_vocab = lookup_ops.index_table_from_file(entity_dict_path,
                                                    default_value=0)
    reverse_entity_vocab = lookup_ops.index_to_string_table_from_file(
        entity_dict_path, default_value=UNK_ENTITY)
    padding_entity_list = [
        UNK_ENTITY, NONE_ENTITY, PAD_ENTITY, NOT_HEAD_ENTITY, NOT_TAIL_ENTITY
    ]
    padding_relation_list = [NONE_RELATION, PAD_RELATION, NOT_TBD]

    # word2entity
    with open(word2entity_dict_path, encoding='utf-8') as fin:
        word2entities = np.array([int(x.strip('\n')) for x in fin.readlines()],
                                 dtype=np.int32)
        word2entities = tf.get_variable('word2entities',
                                        dtype=tf.int32,
                                        initializer=word2entities,
                                        trainable=False)

    # entity2word
    with open(entity2word_dict_path, encoding='utf-8') as fin:
        entiy2words = np.array([int(x.strip('\n')) for x in fin.readlines()],
                               dtype=np.int32)
        entiy2words = tf.get_variable('entity2words',
                                      dtype=tf.int32,
                                      initializer=entiy2words,
                                      trainable=False)

    entity_list = []
    relation_list = []

    entity_dict = dict()
    relation_dict = dict()

    # check
    with open(entity_dict_path, encoding='utf-8') as f:
        for i, line in enumerate(f):
            e = line.strip()
            entity_list.append(e)
            entity_dict[e] = i
    for i in range(len(padding_entity_list)):
        assert padding_entity_list[i] == entity_list[i]

    with open(relation_dict_path, encoding='utf-8') as f:
        for i, line in enumerate(f):
            e = line.strip()
            relation_list.append(e)
            relation_dict[e] = i
    for i in range(len(padding_relation_list)):
        assert padding_relation_list[i] == relation_list[i]

    print("Loading entity vectors...")
    entity_embed = []
    with open(entity_embed_path, 'r+', encoding='utf-8') as f:
        for i, line in enumerate(f):
            if '\t' not in line:
                s = line.strip().split(' ')
            else:
                s = line.strip().split('\t')
            entity_embed.append([float(x) for x in s])
    print("Loading relation vectors...")
    relation_embed = []
    with open(relation_embed_path, 'r+', encoding='utf-8') as f:
        for i, line in enumerate(f):
            if '\t' not in line:
                s = line.strip().split(' ')
            else:
                s = line.strip().split('\t')
            relation_embed.append([float(x) for x in s])

    entity_embed = np.array(entity_embed, dtype=np.float32)
    relation_embed = np.array(relation_embed, dtype=np.float32)

    entity_embed = tf.get_variable('entity_embed',
                                   dtype=tf.float32,
                                   initializer=entity_embed,
                                   trainable=False)
    relation_embed = tf.get_variable('relation_embed',
                                     dtype=tf.float32,
                                     initializer=relation_embed,
                                     trainable=False)

    entity_embed = tf.reshape(entity_embed, [-1, embed_dim])
    relation_embed = tf.reshape(relation_embed, [-1, embed_dim])

    padding_entity_embedding = tf.get_variable(
        'entity_padding_embed', [len(padding_entity_list), embed_dim],
        dtype=tf.float32,
        initializer=tf.zeros_initializer())
    padding_relation_embedding = tf.get_variable(
        'relation_padding_embed', [len(padding_relation_list), embed_dim],
        dtype=tf.float32,
        initializer=tf.zeros_initializer())
    tf_entity_embed = tf.concat([padding_entity_embedding, entity_embed],
                                axis=0)
    tf_relation_embed = tf.concat([padding_relation_embedding, relation_embed],
                                  axis=0)
    tf_entity_embed = tf.layers.dense(tf_entity_embed,
                                      hparams['entity_dim'],
                                      use_bias=False,
                                      name='entity_embedding_transformer')
    tf_relation_embed = tf.layers.dense(tf_relation_embed,
                                        hparams['entity_dim'],
                                        use_bias=False,
                                        name='relation_embedding_transformer')

    if inv_relation:
        print('inv_relation')
        inv_relation = tf.layers.dense(
            relation_embed,
            hparams['entity_dim'],
            use_bias=False,
            name='inv_relation_embedding_transformer')
        tf_relation_embed = tf.concat([tf_relation_embed, inv_relation],
                                      axis=0)

    tf_entity_embed = tf.concat([tf_entity_embed, tf_relation_embed], axis=0)

    # Facts
    utils.print_out('Loading facts')
    fact_dict_path = hparams['fact_path']
    entity_fact = []
    entity_source = []
    entity_target = []

    fact_idf = []
    with open(fact_dict_path, encoding='utf-8') as fin:
        lines = fin.readlines()
        utils.print_out('Total Entity-Fact : %d' % len(lines))
        print(lines[0].strip('\n').split())
        for line in lines:
            items = line.strip('\n').split()
            # 0:entity_in_post, 1:entity_in_response, 2 head, 3 relation, 4 tail 5/6/7 score
            for i in [0, 1, 2, 4]:
                items[i] = int(entity_dict.get(items[i], 0))
            if items[3] not in relation_dict:
                print(items[3])
                print(relation_dict)
                assert items[3] not in relation_dict

            items[3] = int(relation_dict.get(items[3])) + len(
                entity_dict)  # realtion和 entity共用一个列表
            entity_fact.append(items[2:5])
            entity_source.append(items[0])
            entity_target.append(items[1])  # uni ids

            if len(items) > 5:
                idf = [float(items[5]), float(items[6]), float(items[7])]
            else:
                idf = [0.0, 0.0, 0.0]
            fact_idf.append(idf)

    fact_idf = np.array(fact_idf, dtype=np.float32)
    entity_fact = np.array(entity_fact, dtype=np.int32)
    entity_source = np.array(entity_source, dtype=np.int32)
    entity_target = np.array(entity_target, dtype=np.int32)
    entity_fact = np.reshape(entity_fact, [len(lines), 3])
    entity_source = np.reshape(entity_source, [len(lines)])
    entity_target = np.reshape(entity_target, [len(lines)])

    tf_fact_idf = tf.constant(value=fact_idf, dtype=np.float32)
    tf_entity_fact = tf.constant(value=entity_fact, dtype=np.int32)
    tf_entity_source = tf.constant(value=entity_source, dtype=np.int32)
    tf_entity_target = tf.constant(value=entity_target, dtype=np.int32)
    tf_entity_fact_embedding = tf.nn.embedding_lookup(tf_entity_embed,
                                                      tf_entity_fact)
    # index by context id
    tf_entity_fact_embedding = tf.reshape(tf_entity_fact_embedding,
                                          [-1, 3 * hparams['entity_dim']])

    return tf_entity_embed, tf_entity_fact_embedding, tf_entity_source, tf_entity_target, entity_vocab, reverse_entity_vocab, word2entities, entiy2words, tf_fact_idf
コード例 #26
0
def save_config(config, config_path=None):
    if config_path is None:
        config_path = config['model_path']+'/config.json'
    utils.print_out('save json config file to %s' % config_path)
    with open(config_path, 'w+', encoding='utf-8') as fout:
        json.dump(config, fout)
コード例 #27
0
ファイル: run_kefu2.py プロジェクト: yuelala/ACL2020-ConKADI
def test():
    hparams = config_parser.load_and_restore_config(args.config_path,
                                                    verbose=True)
    if args.beam != -1:
        hparams['beam_width'] = args.beam
        utils.print_out("Reset beam_width to %d" % args.beam)
    if args.beam > 10:
        hparams['batch_size'] = hparams['batch_size'] * 30 // args.beam

    hparams['length_penalty_weight'] = args.length_penalty_weight
    hparams['diverse_decoding_rate'] = args.diverse_decoding_rate
    hparams['coverage_penalty_weight'] = args.coverage_penalty_weight

    # Dataset
    dataset = dataset_utils.create_flexka2_iterator(hparams, is_eval=True)
    model = Model(dataset, hparams, model_helper.INFER)
    dropout = dataset['dropout']
    entity_word_vocab = []
    with open(hparams['fact_path'], encoding='utf-8') as fin:
        for line in fin.readlines():
            items = line.strip('\n').split()
            items[0] = 'P:' + items[0]
            items[1] = 'E:' + items[1]
            entity_word_vocab.append(','.join(items))

    entity_set = set()
    with open(hparams['entity_path'], encoding='utf-8') as fin:
        for line in fin.readlines():
            items = line.strip('\n')
            entity_set.add(items)

    input_srcs = []
    input_src_lens = []
    with open(hparams['%ssrc_file' % 'test_'], encoding='utf-8') as fin:
        for line in fin.readlines():
            items = line.strip('\n')
            input_srcs.append(items)
            input_src_lens.append(len(items.split()))

    out_dir = os.path.join(hparams['model_path'], 'min_ppl')
    if os.path.exists(os.path.join(hparams['model_path'], 'decoded')) is False:
        os.mkdir(os.path.join(hparams['model_path'], 'decoded'))
    if os.path.exists(
            os.path.join(hparams['model_path'], 'decoded',
                         'fact_attention')) is False:
        os.mkdir(
            os.path.join(hparams['model_path'], 'decoded', 'fact_attention'))

    config_id = 'B%s_L%.1f_D%.1f_C%.1f' % \
                (hparams['beam_width'], args.length_penalty_weight, args.diverse_decoding_rate,
                 args.coverage_penalty_weight)

    beam_out_file_path = os.path.join(hparams['model_path'], 'decoded',
                                      '%s.txt' % config_id)
    top1_out_file_path = os.path.join(hparams['model_path'], 'decoded',
                                      '%s_top1.txt' % config_id)
    topk_out_file_path = os.path.join(hparams['model_path'], 'decoded',
                                      '%s_topk.txt' % config_id)

    test_query_file = hparams['test_src_file']
    test_response_file = hparams['test_tgt_file']

    with open(test_query_file, 'r+', encoding='utf-8') as fin:
        queries = [x.strip('\n') for x in fin.readlines()]
    with open(test_response_file, 'r+', encoding='utf-8') as fin:
        responses = [x.strip('\n') for x in fin.readlines()]

    with tf.Session(config=model_helper.create_tensorflow_config()) as sess:
        step, epoch = model_helper.create_or_restore_a_model(
            out_dir, model, sess)
        dataset['init_fn'](sess, 'test_')

        utils.print_out('Current Epoch,Step : %s/%s, Max Epoch,Step : %s/%s' %
                        (epoch, step, hparams['num_train_epochs'],
                         hparams['num_train_steps']))
        case_id = 0
        with open(beam_out_file_path, 'w+', encoding='utf-8') as fout:
            with open(top1_out_file_path, 'w+', encoding='utf-8') as ftop1:
                with open(topk_out_file_path, 'w+', encoding='utf-8') as ftopk:
                    while True:
                        try:
                            model_selector, facts, lengts_for_facts, src_ids, sample_ids, probs, scores = sess.run(
                                [
                                    model.mode_selector,
                                    dataset['inputs_for_facts'],
                                    dataset['lengths_for_facts'],
                                    dataset['inputs_for_encoder'],
                                    model.sampled_id, model.logits,
                                    model.scores
                                ],
                                feed_dict={dropout: 0.0})
                            # print(()

                            num_responses_per_query = sample_ids.shape[0]
                            num_cases = sample_ids.shape[1]
                            for sent_id in range(num_cases):
                                fout.write('#Case : %d\n' % case_id)
                                fout.write('\tquery:\t%s\n' % queries[case_id])
                                fout.write('\tresponse:\t%s\n' %
                                           responses[case_id])

                                if hparams['beam_width'] == 1 and hparams.get(
                                        'fusion_encoder', True):
                                    input_src = input_srcs[case_id].split()
                                    for i in range(len(input_src)):
                                        if input_src[i] in entity_set:
                                            input_src[i] = input_src[i].upper()

                                for beam_id in range(num_responses_per_query):
                                    translations, score = model_helper.get_translation(
                                        sample_ids[beam_id], scores[beam_id],
                                        sent_id, '</s>')
                                    new_translation = []
                                    for pid, token in enumerate(
                                            translations.split()):
                                        if token[:len('$ENT_')] == '$ENT_':
                                            relative_fact_id = int(
                                                token[len('$ENT_'):])
                                            fact = entity_word_vocab[facts[
                                                sent_id, relative_fact_id]]
                                            entity_in_response = fact.split(
                                                ',')[1]
                                            new_translation.append(
                                                '$' + entity_in_response)
                                        elif token[:len('$CP_')] == '$CP_':
                                            position = int(token[len('$CP_'):])
                                            new_translation.append(
                                                '$C:' +
                                                input_srcs[case_id].split()
                                                [position])
                                        else:
                                            new_translation.append(token)
                                    translations = ' '.join(new_translation)
                                    fout.write('\tBeam%d\t%.4f\t%s\n' %
                                               (beam_id, score, translations))

                                    if beam_id == 0:
                                        ftop1.write(
                                            '%s\n' % (translations.replace(
                                                '#', '').replace(
                                                    '$R:', '').replace(
                                                        '$C:', '').replace(
                                                            '$E:', '')))
                                    ftopk.write('%s\n' % (translations.replace(
                                        '#', '').replace('$R:', '').replace(
                                            '$C:', '').replace('$E:', '')))
                                case_id += 1
                        except tf.errors.OutOfRangeError as e:
                            break
コード例 #28
0
    def create_model(self, name='flexka'):
        def safe_log(y):
            return tf.log(tf.clip_by_value(y, 1e-9, tf.reduce_max(y)))

        hparams = self.hparams
        with tf.variable_scope(name) as scope:
            encoder_outputs, encoder_states = self.create_encoder(
                self._input_embeddings_for_encoder,
                self._input_entity_embeddings_for_encoder,
                self._lengths_for_encoder,
            )

            self.kld_loss = tf.constant(0.0)
            self.knowledge_distribution = None
            self.knowledge_fusion = None
            self.knowledge_bow_loss = tf.constant(0.0)

            maximium_candidate_num = tf.shape(
                self._fact_candidate_embedding)[1]

            fact_seq_mask = tf.sequence_mask(self._lengths_for_fact_candidate,
                                             dtype=tf.float32)
            unk_mask = tf.sequence_mask(tf.ones_like(
                self._lengths_for_fact_candidate),
                                        maxlen=maximium_candidate_num,
                                        dtype=tf.float32)
            fact_mask = (1.0 - fact_seq_mask) * -1e10 + unk_mask * -1e10

            fact_embedding_projection = self.fact_projection
            classifier_inputs = tf.concat(encoder_states, -1)
            classifier_inputs = tf.nn.dropout(classifier_inputs,
                                              keep_prob=1.0 - self.dropout)
            classifier_projection = tf.layers.dense(classifier_inputs,
                                                    units=300,
                                                    activation=tf.nn.elu,
                                                    use_bias=True,
                                                    name='classifier_inputs')
            classifier_projection = tf.expand_dims(classifier_projection, 1)
            classifier_projection = tf.tile(classifier_projection,
                                            [1, maximium_candidate_num, 1])
            score_input = tf.concat(
                [classifier_projection, fact_embedding_projection], -1)
            score_input = tf.nn.dropout(score_input,
                                        keep_prob=1.0 - self.dropout)

            classifier_scores = tf.layers.dense(score_input,
                                                units=1,
                                                activation=tf.nn.sigmoid,
                                                name='score_estimator')
            classifier_scores = tf.squeeze(classifier_scores)
            self.classifier_scores = classifier_scores
            if self.mode == model_helper.TRAIN:

                pos_scores = tf.reduce_sum(
                    self._golden_fact_bow * classifier_scores, -1)
                neg_scores = tf.reduce_sum(
                    self._neg_fact_bow * classifier_scores, -1)
                knowledge_bow_loss = tf.maximum(0.0,
                                                0.3 - pos_scores + neg_scores)
                self._knowledge_bow_loss = tf.reduce_sum(
                    knowledge_bow_loss) / self.batch_size
                self._train_update_loss = self._knowledge_bow_loss
                self._knowledge_bow_loss *= 100
                self.regulation_loss = tf.constant(0.0)

        # Print vars
        utils.print_out('-------------Trainable Variables------------------')
        for var in tf.trainable_variables():
            utils.print_out(var)