Beispiel #1
0
def main(unused_argv):
    del unused_argv  # Unused

    corpus = get_lm_corpus(FLAGS.data_dir, FLAGS.dataset)

    save_dir = os.path.join(FLAGS.data_dir, "tfrecords")
    if not exists(save_dir):
        makedirs(save_dir)

    # test mode
    if FLAGS.per_host_test_bsz > 0:
        corpus.convert_to_tfrecords("test",
                                    save_dir,
                                    FLAGS.per_host_test_bsz,
                                    FLAGS.tgt_len,
                                    FLAGS.num_core_per_host,
                                    FLAGS=FLAGS)
        return

    for split, batch_size in zip(
        ["train", "valid"],
        [FLAGS.per_host_train_bsz, FLAGS.per_host_valid_bsz]):

        if batch_size <= 0: continue
        print("Converting {} set...".format(split))
        corpus.convert_to_tfrecords(split,
                                    save_dir,
                                    batch_size,
                                    FLAGS.tgt_len,
                                    FLAGS.num_core_per_host,
                                    FLAGS=FLAGS)
Beispiel #2
0
def main(unused_argv):
    del unused_argv  # Unused

    corpus = get_lm_corpus(FLAGS.data_dir, FLAGS.dataset)

    save_dir = os.path.join(FLAGS.data_dir, "tfrecords")
    if not exists(save_dir):
        makedirs(save_dir)

    # # test mode
    if FLAGS.per_host_test_bsz > 0:
        corpus.convert_to_tfrecords("test",
                                    save_dir,
                                    FLAGS.per_host_test_bsz,
                                    FLAGS.tgt_len,
                                    FLAGS.num_core_per_host,
                                    FLAGS=FLAGS)
        return

    for split, batch_size in zip(
        ["train", "valid"],
        [FLAGS.per_host_train_bsz, FLAGS.per_host_valid_bsz]):

        if batch_size <= 0: continue
        print("Converting {} set...".format(split))
        corpus.convert_to_tfrecords(split,
                                    save_dir,
                                    batch_size,
                                    FLAGS.tgt_len,
                                    FLAGS.num_core_per_host,
                                    FLAGS=FLAGS)

    fn = os.path.join(FLAGS.data_dir, "cache.pkl")
    print("Saving dataset...")
    with open(fn, "wb") as fp:
        pickle.dump(corpus, fp, protocol=2)