예제 #1
0
def main(unused_argv):
  if not FLAGS.data_dir:
    raise ValueError("--data_dir is required.")
  if not FLAGS.model_config:
    raise ValueError("--model_config is required.")


  encoder = encoder_manager.EncoderManager()

  with open(FLAGS.model_config) as json_config_file:
    model_config = json.load(json_config_file)

  if type(model_config) is dict:
    model_config = [model_config]

  for mdl_cfg in model_config:
    model_config = configuration.model_config(mdl_cfg, mode="encode")
    encoder.load_model(model_config)

  if FLAGS.eval_task in ["MR", "CR", "SUBJ", "MPQA"]:
    results = eval_classification.eval_nested_kfold(
        encoder, FLAGS.eval_task, FLAGS.data_dir, use_nb=False)
    scores = results[0]
    print('Mean score', np.mean(scores))
  elif FLAGS.eval_task == "SICK":
    results = eval_sick.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir)
  elif FLAGS.eval_task == "MSRP":
    results = eval_msrp.evaluate(
        encoder, evalcv=True, evaltest=True, use_feats=False, loc=FLAGS.data_dir)
  elif FLAGS.eval_task == "TREC":
    eval_trec.evaluate(encoder, evalcv=True, evaltest=True, loc=FLAGS.data_dir)
  else:
    raise ValueError("Unrecognized eval_task: %s" % FLAGS.eval_task)

  encoder.close()
예제 #2
0
import tools
import eval_sick

import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

if __name__ == '__main__':
    embed_map = tools.load_googlenews_vectors()
    model = tools.load_model(embed_map)

    eval_sick.evaluate(model, evaltest=True)
예제 #3
0
def main(unused_argv):
    if not FLAGS.data_dir:
        raise ValueError("--data_dir is required.")
    if not FLAGS.model_config:
        raise ValueError("--model_config is required.")

    encoder = encoder_manager.EncoderManager()

    with open(FLAGS.model_config) as json_config_file:
        model_config = json.load(json_config_file)

    if type(model_config) is dict:
        model_config = [model_config]

    sp = None
    if FLAGS.sentencepiece_model_path:
        print('Loading sentencepiece model', FLAGS.sentencepiece_model_path)
        sp = spm.SentencePieceProcessor()
        sp.Load(FLAGS.sentencepiece_model_path)

    for mdl_cfg in model_config:
        model_config = configuration.model_config(mdl_cfg, mode="encode")
        encoder.load_model(model_config)

    if FLAGS.eval_task in ["MR", "CR", "SUBJ", "MPQA"]:
        results = eval_classification.eval_nested_kfold(encoder,
                                                        FLAGS.eval_task,
                                                        FLAGS.data_dir,
                                                        use_nb=False)
        scores = results[0]
        print('Mean score', np.mean(scores))
    elif FLAGS.eval_task == "SICK":
        results = eval_sick.evaluate(encoder,
                                     evaltest=True,
                                     loc=FLAGS.data_dir,
                                     sp=sp)
    elif FLAGS.eval_task == "MSRP":
        results = eval_msrp.evaluate(encoder,
                                     evalcv=True,
                                     evaltest=True,
                                     use_feats=False,
                                     loc=FLAGS.data_dir)
    elif FLAGS.eval_task == "TREC":
        eval_trec.evaluate(encoder,
                           evalcv=True,
                           evaltest=True,
                           loc=FLAGS.data_dir)
    elif FLAGS.eval_task == 'SNLI-MT-TR':
        file_meta_data = {
            'file_names': {
                'train': 'snli_train_translation.jsonl',
                'dev': 'snli_dev_translation.jsonl',
                'test': 'snli_test_translation.jsonl'
            },
            'sentence_keys': {
                'sentence1': 'translate-sentence1',
                'sentence2': 'translate-sentence2'
            },
            'label_classes': ['contradiction', 'entailment', 'neutral']
        }
        eval_nli.evaluate(encoder,
                          evaltest=True,
                          loc=FLAGS.data_dir,
                          file_meta_data=file_meta_data,
                          sp=sp)

    elif FLAGS.eval_task == 'SNLI':
        file_meta_data = {
            'file_names': {
                'train': 'snli_1.0_train.jsonl',
                'dev': 'snli_1.0_dev.jsonl',
                'test': 'snli_1.0_test.jsonl'
            },
            'sentence_keys': {
                'sentence1': 'sentence1',
                'sentence2': 'sentence2'
            },
            'label_classes': ['contradiction', 'entailment', 'neutral']
        }
        eval_nli.evaluate(encoder,
                          evaltest=True,
                          loc=FLAGS.data_dir,
                          file_meta_data=file_meta_data,
                          sp=sp)

    elif FLAGS.eval_task == 'MULTINLI-MT-TR-MATCHED':
        file_meta_data = {
            'file_names': {
                'train':
                'multinli_train_translation.jsonl',
                'dev':
                'multinli_dev_matched_translation.jsonl',
                'test':
                'multinli_0.9_test_matched_translation_unlabeled.jsonl',
                'test_output':
                'multinli_0.9_test_matched_translation_unlabeled_output.csv'
            },
            'sentence_keys': {
                'sentence1': 'translate-sentence1',
                'sentence2': 'translate-sentence2'
            },
            'label_classes': ['contradiction', 'entailment', 'neutral']
        }
        eval_multinli.evaluate(encoder,
                               evaltest=True,
                               loc=FLAGS.data_dir,
                               file_meta_data=file_meta_data,
                               sp=sp)

    elif FLAGS.eval_task == 'MULTINLI-MATCHED':
        file_meta_data = {
            'file_names': {
                'train': 'multinli_1.0_train.jsonl',
                'dev': 'multinli_1.0_dev_matched.jsonl',
                'test': 'multinli_0.9_test_matched_unlabeled.jsonl',
                'test_output': 'multinli_0.9_test_matched_unlabeled_output.csv'
            },
            'sentence_keys': {
                'sentence1': 'sentence1',
                'sentence2': 'sentence2'
            },
            'label_classes': ['contradiction', 'entailment', 'neutral']
        }
        eval_multinli.evaluate(encoder,
                               evaltest=True,
                               loc=FLAGS.data_dir,
                               file_meta_data=file_meta_data,
                               sp=sp)
    elif FLAGS.eval_task == 'MULTINLI-MT-TR-MISMATCHED':
        file_meta_data = {
            'file_names': {
                'train':
                'multinli_train_translation.jsonl',
                'dev':
                'multinli_dev_mismatched_translation.jsonl',
                'test':
                'multinli_0.9_test_mismatched_translation_unlabeled.jsonl',
                'test_output':
                'multinli_0.9_test_mismatched_translation_unlabeled_output.csv',
            },
            'sentence_keys': {
                'sentence1': 'translate-sentence1',
                'sentence2': 'translate-sentence2'
            },
            'label_classes': ['contradiction', 'entailment', 'neutral']
        }
        eval_multinli.evaluate(encoder,
                               evaltest=True,
                               loc=FLAGS.data_dir,
                               file_meta_data=file_meta_data,
                               sp=sp)
    elif FLAGS.eval_task == 'MULTINLI-MISMATCHED':
        file_meta_data = {
            'file_names': {
                'train': 'multinli_1.0_train.jsonl',
                'dev': 'multinli_1.0_dev_mismatched.jsonl',
                'test': 'multinli_0.9_test_mismatched_unlabeled.jsonl',
                'test_output':
                'multinli_0.9_test_mismatched_unlabeled_output.csv'
            },
            'sentence_keys': {
                'sentence1': 'sentence1',
                'sentence2': 'sentence2'
            },
            'label_classes': ['contradiction', 'entailment', 'neutral']
        }
        eval_multinli.evaluate(encoder,
                               evaltest=True,
                               loc=FLAGS.data_dir,
                               file_meta_data=file_meta_data,
                               sp=sp)
    elif FLAGS.eval_task == 'XNLI-MT-TR':
        file_meta_data = {
            'file_names': {
                'train': 'multinli_train_translation.jsonl',
                'dev': 'xnli_dev_translation.jsonl',
                'test': 'xnli_test_translation.jsonl'
            },
            'sentence_keys': {
                'sentence1': 'translate-sentence1',
                'sentence2': 'translate-sentence2'
            },
            'label_classes': ['contradiction', 'entailment', 'neutral'],
            'language': 'any'
        }
        eval_xnli.evaluate(encoder,
                           evaltest=True,
                           loc=FLAGS.data_dir,
                           file_meta_data=file_meta_data,
                           sp=sp)
    elif FLAGS.eval_task == 'XNLI':
        file_meta_data = {
            'file_names': {
                'train': 'multinli_1.0_train.jsonl',
                'dev': 'xnli.dev.jsonl',
                'test': 'xnli.test.jsonl'
            },
            'sentence_keys': {
                'sentence1': 'sentence1',
                'sentence2': 'sentence2'
            },
            'label_classes': ['contradiction', 'entailment', 'neutral'],
            'language': 'en'
        }
        eval_xnli.evaluate(encoder,
                           evaltest=True,
                           loc=FLAGS.data_dir,
                           file_meta_data=file_meta_data,
                           sp=sp)

    else:
        raise ValueError("Unrecognized eval_task: %s" % FLAGS.eval_task)

    encoder.close()
예제 #4
0
def trainer(
        X,
        dim_word=620,  # word vector dimensionality
        dim=2400,  # the number of GRU units
        encoder='gru',
        decoder='gru',
        max_epochs=5,
        dispFreq=1,
        decay_c=0.,
        grad_clip=5.,
        n_words=20000,
        maxlen_w=30,
        optimizer='adam',
        batch_size=512,
        saveto='/u/rkiros/research/semhash/models/toy.npz',
        dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl',
        saveFreq=5000,
        reload_=False,
        reload_path='output_books_full/model_ae_full_bsz_64_iter_313000.npz',
        SICK_eval=False):

    # Model options
    model_options = {}
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['saveFreq'] = saveFreq
    model_options['reload_'] = reload_
    model_options['reload_path'] = reload_path

    print model_options

    # reload options
    if reload_ and os.path.exists(reload_path):
        print 'reloading...' + reload_path
        with open('%s.pkl' % reload_path, 'rb') as f:
            models_options = pkl.load(f)

        reload_idx = int(reload_path.split('_')[-1].split('.')[0])

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(reload_path):
        params = load_params(reload_path, params)

    tparams = init_tparams(params)

    trng, x, x_mask, y, y_mask, z, z_mask, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask, z, z_mask]

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads],
                                  profile=False)
    f_weight_norm = theano.function([], [(t**2).sum()
                                         for k, t in tparams.iteritems()],
                                    profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(
                tensor.switch(g2 > (grad_clip**2),
                              g / tensor.sqrt(g2) * grad_clip, g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    trainX = homogeneous_data.grouper(X)
    train_iter = homogeneous_data.HomogeneousData(trainX,
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    if not reload_:
        uidx = 0
    else:
        uidx = reload_idx
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for x, y, z in train_iter:
            n_samples += len(x)
            uidx += 1

            x, x_mask, y, y_mask, z, z_mask = homogeneous_data.prepare_data(
                x, y, z, worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                saveto_iternum = saveto.format(uidx)

                params = unzip(tparams)
                numpy.savez(saveto_iternum, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl' % saveto_iternum, 'wb'))
                print 'Done'

                if SICK_eval:
                    print "Evaluating SICK Test performance"
                    embed_map = tools.load_googlenews_vectors()
                    model = tools.load_model(path_to_model=saveto_iternum,
                                             embed_map=embed_map)
                    yhat, pr, sr, mse = eval_sick.evaluate(model,
                                                           evaltest=True)

                    del (model)
                    del (embed_map)
                    print pr, sr, mse

                    res_save_file = saveto.format('ALL').split(
                        '.')[0] + '_SICK_EVAL.txt'
                    with open(res_save_file, 'a') as rsf:
                        cur_time = strftime("%a, %d %b %Y %H:%M:%S +0000",
                                            gmtime())
                        rsf.write('\n \n {}'.format(cur_time))
                        rsf.write('\n{}, {}, {}, {}'.format(uidx, pr, se, mse))
                    print "Done"

        print 'Seen %d samples' % n_samples
예제 #5
0
파일: eval.py 프로젝트: ZJaume/paraphrasing
import eval_sick
import eval_msrp
import datagen
import sys

# Set random seeds for reproducibility
import random
random.seed(333)
np.random.seed(333)
tf.set_random_seed(333)

parser = ArgumentParser()
parser.add_argument('model', help='Model to evaluate')
parser.add_argument('tokenizer', help='Tokenizer object')
parser.add_argument('-d', '--data', default='data/', help='Path to test data')
parser.add_argument('-e', '--embeddings', help='Embedding file')
parser.add_argument('-v', type=int, default=0, help='Verbose level')
args = parser.parse_args(sys.argv[1:])
print(args)

tokenizer = datagen.load_tokenizer(args.tokenizer)
encoder = Encoder(args.model, args.embeddings, tokenizer)
print('Encoder created')

eval_sick.evaluate(encoder, evaltest=True, loc=args.data, verbose=args.v)
eval_msrp.evaluate(encoder,
                   evalcv=True,
                   evaltest=True,
                   use_feats=True,
                   loc=args.data)