Beispiel #1
0
def main(argv):
    args = parse_args(argv)
    set_package_verbosity(args.debug)

    data_collection, meta_data = data_factory.preprocess(args, return_meta=True)
    tokenizer = meta_data.tokenizer

    metric_calcuators = []
    if args.fed:
        metric_calcuators.append(FEDMetrics(data_collection, tokenizer, sample_size=args.fed))
    if args.bleu:
        metric_calcuators.append(
            BLEUMetrics(
                data_collection,
                max_gram=args.bleu,
                eos_idx=tokenizer.eos_idx,
                cache_dir=meta_data.cache_dir,
            ),
        )

    with open(args.eval_path, 'r') as f_in:
        texts = [line.rstrip() for line in f_in.readlines()]
        tokens = tokenizer.texts_to_array(texts)

    metrics = {}
    for calc in metric_calcuators:
        metrics.update(calc.calculate(tokens=tokens, texts=texts))

    print(
        f"{os.path.basename(args.eval_path)},",
        *[f"{key}: {val:.5f}" for key, val in metrics.items()],
        sep="\n",
    )
Beispiel #2
0
def main(argv):
    args = parse_args(argv)
    set_package_verbosity(args.debug)
    data_collection = data_factory.preprocess(args)

    with tf.Session(config=get_tf_config_proto()).as_default():
        signature = load_serving_signature(
            os.path.join(args.model_path, args.version_number))
        generator = PerplexityCalculator.from_signature(
            signature['perplexity'])
        for tag, dataset in data_collection.items():
            print(f"Evaluate {tag} perplexity:")
            perplexity = generator.perplexity(dataset.ids)
            print(f"Perplexity = {perplexity}")
Beispiel #3
0
def main(argv):
    args = parse_args(argv)
    set_package_verbosity(args.debug)

    with tf.Session(config=get_tf_config_proto()).as_default():
        signature = load_serving_signature(os.path.join(args.model_path, args.version_number))
        tokenizer = Tokenizer.load(os.path.join(os.path.dirname(args.model_path), 'tokenizer.json'))
        generator = TextGenerator.from_signature(signature['generate'], tokenizer=tokenizer)
        print(f"Generate sentences to '{args.export_path}'")
        with open(args.export_path, 'w') as f_out:
            f_out.writelines([
                sentence + "\n"
                for sentence in generator.generate_texts(args.samples)
            ])
Beispiel #4
0
def main(argv, base_tag=None, checkpoint=None, override_namespace=None):
    args = parse_args(argv)
    if override_namespace:
        args.__dict__.update(override_namespace.__dict__)

    set_package_verbosity(args.debug)

    with logging_block("Set global random seed"):
        set_global_random_seed(args.random_seed)

    with logging_block("Preprocess data"):
        data_collection, meta_data = data_factory.preprocess(args,
                                                             return_meta=True)
        data_collection.summary()
        meta_data.summary()
        data_generator = DataGenerator(data_collection.train,
                                       batch_size=args.batch_size)

    with logging_block("Prepare Generator"):
        generator = generator_factory.create(args, meta_data)

    with logging_block("Prepare Generator Trainer"):
        trainer = trainer_factory.create(args, meta_data, generator)
        trainer.summary()

    with logging_block("Prepare Callback"):
        if base_tag is None:
            base_tag = f"{args.dataset}@{time.strftime('%Y%m%d-%H%M%S')}"
        data_generator.callback = CallbackFactory(
            trainer=trainer,
            generator=generator,
            data_collection=data_collection,
            meta_data=meta_data,
            tags=args.tags + [base_tag],
        ).create_by_args(args)

    with tf.Session(config=get_tf_config_proto(args.jit)) as sess:
        if checkpoint:
            print(f"Restore from checkpoint: {checkpoint}")
            tf.train.Saver().restore(sess, save_path=checkpoint)
            data_generator.skip_epochs(int(checkpoint.split('-')[-1]))
        else:
            tf.global_variables_initializer().run()

        for batch_data in data_generator.iter_batch_until(
                n_epochs=args.epochs,
                logs={'arg_string':
                      " ".join(argv)},  # for callback.on_train_begin()
        ):
            trainer.fit_batch(batch_data)