def evaluate_joint_ace05_test(estimator,
                              evaluator,
                              test,
                              ere_test,
                              params,
                              ace05_kshot_data=None,
                              ere_kshot_data=None):
    # for joint model
    with open(params['score_file'] + '_ace', 'a') as score_output:
        print('bc0 %4.2f' % (100.0 * evaluator.best_dev), end=' ')
        print('bc0 %4.2f' % (100.0 * evaluator.best_dev),
              end=' ',
              file=score_output)
        all_preds, all_truth = [], []
        macro_avg = 0.0
        for corpus in test:
            if params['kshot']:
                test_input_fn = make_joint_kshot_eval_inputs(
                    test[corpus], ace05_kshot_data, ere_test, ere_kshot_data,
                    params)
            else:
                test_input_fn = make_joint_eval_inputs(test[corpus], ere_test)
            score, preds = evaluator.evaluate_dataset(
                estimator, test_input_fn, test[corpus][1],
                params['ace05_num_classes'], 'ace05_relation')
            all_preds += [preds]
            all_truth += [test[corpus][1]]
            macro_avg += score
            print(corpus, '%4.2f' % (100.0 * score), end=' ')
            print(corpus,
                  '%4.2f' % (100.0 * score),
                  end=' ',
                  file=score_output)
        all_preds = np.concatenate(all_preds, axis=0)
        all_truth = np.concatenate(all_truth, axis=0)
        micro_avg = metrics.f1_metric(all_truth, all_preds,
                                      params['ace05_num_classes'])
        macro_avg /= float(len(test))
        print('micro', '%4.2f' % (100.0 * micro_avg), end=' ')
        print('micro',
              '%4.2f' % (100.0 * micro_avg),
              end=' ',
              file=score_output)
        print('macro', '%4.2f' % (100.0 * macro_avg), end=' ')
        print('macro',
              '%4.2f' % (100.0 * macro_avg),
              end=' ',
              file=score_output)
        print('epoch', evaluator.best_epoch, end=' ')
        print('epoch', evaluator.best_epoch, end=' ', file=score_output)
        print('dir', evaluator.best_ckpt, file=score_output)
def evaluate_joint_ere_test(estimator,
                            evaluator,
                            test,
                            ace05_test,
                            params,
                            ere_label_list=None,
                            output_types=None,
                            ace05_kshot_data=None,
                            ere_kshot_data=None):
    if params['kshot']:
        test_input_fn = make_joint_kshot_eval_inputs(ace05_test,
                                                     ace05_kshot_data, test,
                                                     ere_kshot_data, params)
    else:
        test_input_fn = make_joint_eval_inputs(ace05_test, test)
    test_score, preds = evaluator.evaluate_dataset(estimator,
                                                   test_input_fn,
                                                   test[1],
                                                   params['ere_num_classes'],
                                                   'ere_relation',
                                                   output_types=output_types)
    all_scores = metrics.f1_metric(test[1],
                                   preds,
                                   params['ere_num_classes'],
                                   average=None)
    print(all_scores)
    with open(params['score_file'] + '_ere', 'a') as score_output:
        print('dev %4.2f' % (100.0 * evaluator.best_dev), end=' ')
        print('dev %4.2f' % (100.0 * evaluator.best_dev),
              end=' ',
              file=score_output)
        print('test %4.2f' % (100.0 * test_score), end=' ')
        print('test %4.2f' % (100.0 * test_score), end=' ', file=score_output)
        if ere_label_list is not None:
            for i in xrange(len(all_scores)):
                print(str(ere_label_list[i]) + ' %4.2f' %
                      (100.0 * all_scores[i]),
                      end=' ')
                print(str(ere_label_list[i]) + ' %4.2f' %
                      (100.0 * all_scores[i]),
                      end=' ',
                      file=score_output)
        print('epoch', evaluator.best_epoch, end=' ')
        print('epoch', evaluator.best_epoch, end=' ', file=score_output)
        print('dir', evaluator.best_ckpt, file=score_output)
        print()
Exemple #3
0
def experiment_ace05(params):
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.logging.info(params)
    ace05_data, embed = ace05.load_dataset(max_len=params['max_len'])
    ace05_data = preprocess_ace05(ace05_data, params)
    train, dev, test = ace05_data
    params['embed'] = embed
    trainX, trainY = train
    devX, devY = dev

    trainX, trainY = take_percentage(trainX, trainY, params['percent_train'])

    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x=trainX,
        y=trainY,
        num_epochs=1,
        batch_size=params['batch_size'],
        shuffle=True)
    num_steps_per_epoch = trainY.shape[0] / params['batch_size'] + 1
    params['learning_rate_decay_step'] = num_steps_per_epoch * params[
        'lr_decay_epoch']
    if params['stack']:
        model_fn = relation_stack_model
    else:
        model_fn = relation_model
    config = tf.estimator.RunConfig()
    config = config.replace(
        tf_random_seed=params['random_seed'],
        keep_checkpoint_max=1,
    )
    estimator = tf.estimator.Estimator(model_dir=params['model_dir'],
                                       model_fn=model_fn,
                                       params=params,
                                       config=config)

    print('num steps per epoch', num_steps_per_epoch)
    print('start training')

    evaluator = metrics.EvaluatorACE05Hook(estimator, ace05_data)

    if params['debug']:
        print('debug mode')
        num_steps_per_epoch = 100
        params['epoch'] = 1
    for epoch in range(1, params['epoch'] + 1):
        print('==========')
        print('epoch', epoch)
        estimator.train(input_fn=train_input_fn,
                        steps=num_steps_per_epoch,
                        hooks=[evaluator])

    print('finish training, best dev (%4.4f) found at epoch: %d' %
          (evaluator.best_dev, evaluator.best_epoch))
    with open(params['score_file'], 'a') as score_output:
        estimator._model_dir = evaluator.best_ckpt
        print('bc0 %4.2f' % (100.0 * evaluator.best_dev.item()), end=' ')
        print('bc0 %4.2f' % (100.0 * evaluator.best_dev.item()),
              end=' ',
              file=score_output)
        all_preds, all_truth = [], []
        for corpus in test:
            score, preds = metrics.evaluate_predict(estimator, test[corpus],
                                                    params['num_classes'])
            all_preds += [preds]
            all_truth += [test[corpus][1]]
            print(corpus, '%4.2f' % (100.0 * score.item()), end=' ')
            print(corpus,
                  '%4.2f' % (100.0 * score.item()),
                  end=' ',
                  file=score_output)
        all_preds = np.concatenate(all_preds, axis=0)
        all_truth = np.concatenate(all_truth, axis=0)
        micro_avg = metrics.f1_metric(all_truth, all_preds,
                                      params['num_classes'])
        print('micro', '%4.2f' % (100.0 * micro_avg), end=' ')
        print('micro',
              '%4.2f' % (100.0 * micro_avg),
              end=' ',
              file=score_output)
        print('epoch', evaluator.best_epoch)
        print('epoch', evaluator.best_epoch, file=score_output)
Exemple #4
0
    def dev_step(split, global_step):

        if split == 'test_seen':
            test_loader = test_seen_loader
        elif split == 'test_unseen':
            test_loader = test_unseen_loader
        else:
            raise ValueError

        dis_model.eval()
        gen_model.eval()

        n_token, test_loss = 0, 0.0  # ppl
        test_hyp, test_ref = [], []
        count = 0

        with torch.no_grad():
            for knowledges, histories, users, responses, knowledge_lens in test_loader:
                knowledges = [know.split('\n\n') for know in knowledges]
                histories = [his.split('\n\n') for his in histories]

                dis_args = dis_batcher(knowledges, histories, knowledge_lens,
                                       args.n_sent)
                dis_out = dis_model(*dis_args)
                dis_knowledges = [[knowledges[bi][dis_out[0][bi].item()]]
                                  for bi in range(len(knowledges))]

                gen_args = gen_batcher(dis_knowledges, histories, users,
                                       responses, args.segment, True)
                loss = gen_criterion(
                    gen_model(gen_args[0], token_type_ids=gen_args[1])[0],
                    gen_args[2])
                n_token += loss.size(0)
                test_loss += loss.sum().item()

                for bi in range(len(dis_knowledges)):
                    dec_in = gen_batcher(dis_knowledges[bi:bi + 1],
                                         histories[bi:bi + 1],
                                         users[bi:bi + 1],
                                         segment=args.segment,
                                         training=False)
                    dec_out = gen_model.batch_decode(
                        dec_in, args.max_length, args.min_length,
                        args.early_stopping, args.beam_size,
                        args.repetition_penalty, gen_batcher.eos_id,
                        args.length_penalty, args.no_repeat_ngram_size)
                    dec_out = dec_out[0].tolist()[dec_in.size(1):]
                    _hyp = gen_batcher.tokenizer.decode(
                        dec_out,
                        skip_special_tokens=True,
                        clean_up_tokenization_spaces=False)
                    _ref = responses[bi]
                    test_hyp.append(_hyp)
                    test_ref.append(_ref)

                    count += 1
                    if count % 1000 == 0:
                        print(count)

        with open(
                os.path.join(
                    out_dir,
                    '{}-decoded-iter-{}.txt'.format(split, global_step)),
                'w') as f:
            for _hyp, _ref in zip(test_hyp, test_ref):
                f.writelines('{} ||| {}\n'.format(_hyp, _ref))

        MeanLoss = test_loss / n_token
        b1, b2, b3, b4 = bleu_metric(test_hyp, test_ref)
        d1, d2 = distinct_metric(test_hyp)
        f1 = f1_metric(test_hyp, test_ref)

        time_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        print("**********************************")
        print("{} results..........".format(split))
        print('hypothesis: ', len(test_hyp))
        print("Step: %d \t| ppl: %.3f \t|  %s" %
              (global_step, math.exp(MeanLoss), time_str))
        print("BLEU-1/2/3/4: {:.4f}/{:.4f}/{:.4f}/{:.4f}".format(
            b1, b2, b3, b4))
        print("Distinct-1/2: {:.4f}/{:.4f}".format(d1, d2))
        print("F1: {:.4f}".format(f1))
        print("**********************************")

        return {
            'f1': f1,
            'loss': MeanLoss,
            'bleu1': b1,
            'bleu2': b2,
            'bleu3': b3,
            'bleu4': b4,
            'distinct1': d1,
            'distinct2': d2
        }
Exemple #5
0
def s3dg_fn(features, labels, mode, params):
    # Compute logits.
    with slim.arg_scope(s3dg_arg_scope(weight_decay=params['weight_decay'])):
        logits, endpoints = s3dg(
            features,
            num_classes=params['num_classes'],
            dropout_keep_prob=1. - params['dropout_rate'],
            is_training=mode == tf.estimator.ModeKeys.TRAIN,
            prediction_fn=scoped_sigmoid,
            min_depth=params['min_depth'],
            depth_multiplier=params['depth_multiplier'])

    # Compute predictions using round instead of argmax since our prediction
    # function is sigmoid (for multi-label classification) and not softmax
    # (for multi-class classification).
    predicted_classes = tf.round(endpoints['Predictions'])

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'class_ids': predicted_classes,
            'probabilities': endpoints['Predictions'],
            'logits': logits,
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    # Compute primary loss.
    sigmoid_loss = tf.losses.sigmoid_cross_entropy(labels, logits)
    tf.summary.scalar('Losses/sigmoid_loss', sigmoid_loss)

    # L1 loss is not included by default, but helps with our particular task
    for var in tf.trainable_variables():
        if var.op.name.find(r'weights') > 0 \
            and var not in tf.get_collection(tf.GraphKeys.WEIGHTS):
            tf.add_to_collection(tf.GraphKeys.WEIGHTS, var)

    l1_loss = tf.contrib.layers.apply_regularization(
        regularizer=tf.contrib.layers.l1_regularizer(
            scale=params['weight_decay']),
        weights_list=tf.get_collection(tf.GraphKeys.WEIGHTS))
    tf.summary.scalar('Losses/l1_loss', l1_loss)

    # L2 loss is already computed when utilizing the slim argument scope,
    # including the weight decay arument. Just display the existing value
    l2_loss = tf.reduce_sum(
        tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
    tf.summary.scalar('Losses/l2_loss', l2_loss)

    regularization_loss = tf.add(l1_loss, l2_loss)
    tf.summary.scalar('Losses/regularization_loss', regularization_loss)

    total_loss = tf.add(sigmoid_loss, regularization_loss)
    tf.summary.scalar('Losses/total_loss', total_loss)

    # Compute evaluation metrics.
    auc = tf.metrics.auc(labels=labels,
                         predictions=predicted_classes,
                         name='auc_op',
                         weights=params['metric_weights'])

    precision = tf.metrics.precision(labels=labels,
                                     predictions=predicted_classes,
                                     name='precision_op',
                                     weights=params['metric_weights'])

    recall = tf.metrics.recall(labels=labels,
                               predictions=predicted_classes,
                               name='recall_op',
                               weights=params['metric_weights'])

    f1 = f1_metric(labels=labels,
                   predictions=predicted_classes,
                   name='f1_op',
                   weights=params['metric_weights'])

    if mode == tf.estimator.ModeKeys.EVAL:
        metrics = {
            'Metrics/eval/auc': auc,
            'Metrics/eval/f1': f1,
            'Metrics/eval/precision': precision,
            'Metrics/eval/recall': recall
        }

        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          eval_metric_ops=metrics)

    # Create training op.
    assert mode == tf.estimator.ModeKeys.TRAIN

    if params['add_image_summaries']:
        for batch_num in range(params['batch_size']):
            tf.summary.image(
                'processed_video_frame',
                tf.expand_dims(
                    features[batch_num,
                             int(params['clip_length'] / 2)], 0))

    # Add summaries for end_points.
    for endpoint in endpoints:
        x = endpoints[endpoint]
        tf.summary.histogram('activations/' + endpoint, x)
        tf.summary.scalar('sparsity/' + endpoint, tf.nn.zero_fraction(x))

    # Add summaries if we are training only and not evaluating
    # If evaluating, the estimator spec will add summaries automatically
    tf.summary.scalar('Metrics/train/auc', auc[1])
    tf.summary.scalar('Metrics/train/precision', precision[1])
    tf.summary.scalar('Metrics/train/recall', recall[1])
    tf.summary.scalar('Metrics/train/f1', f1[1])

    # Add histograms for variables.
    for variable in tf.global_variables():
        tf.summary.histogram(variable.op.name, variable)

    # prepare optimizer.
    if params['optimizer'] == 'momentum':
        #SGD + Momentum is the optimizer used to pre-train s3dg
        optimizer = tf.train.MomentumOptimizer(
            learning_rate=params['learning_rate'], momentum=params['momentum'])
    else:
        # pure SDG is a safe optimizer to use when troubleshooting problems
        # restoring Momentum variables from checkpoints using the Estimator API
        optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=params['learning_rate'])

    variables_to_train = get_variables_to_train(params['variables_to_train'])

    train_op = tf.contrib.training.create_train_op(
        total_loss=total_loss,
        optimizer=optimizer,
        variables_to_train=variables_to_train)

    return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)