def main(_): if FLAGS.for_commoncrawl: problem = wikisum.WikisumCommoncrawl() else: problem = wikisum.WikisumWeb() out_filepaths = problem.out_filepaths(FLAGS.out_dir) out_filepaths = utils.shard(out_filepaths, FLAGS.num_tasks)[FLAGS.task_id] if not FLAGS.vocab_dir: FLAGS.vocab_dir = FLAGS.out_dir shard_ids = utils.shard(list(range(utils.NUM_SHARDS)), FLAGS.num_tasks)[FLAGS.task_id] with utils.timing("produce_examples"): wikisum.produce_examples(shard_ids=shard_ids, wikis_dir=FLAGS.wikis_dir, refs_dir=FLAGS.refs_dir, urls_dir=FLAGS.urls_dir, vocab_path=os.path.join( FLAGS.vocab_dir, problem.vocab_filename), out_filepaths=out_filepaths)
def main(_): if FLAGS.for_commoncrawl: problem = wikisum.WikisumCommoncrawl() else: problem = wikisum.WikisumWeb() out_filepaths = problem.out_filepaths(FLAGS.out_dir) out_filepaths = utils.shard(out_filepaths, FLAGS.num_tasks)[FLAGS.task_id] if not FLAGS.vocab_dir: FLAGS.vocab_dir = FLAGS.out_dir shard_ids = utils.shard(list(range(utils.NUM_SHARDS)), FLAGS.num_tasks)[FLAGS.task_id] with utils.timing("produce_examples"): wikisum.produce_examples( shard_ids=shard_ids, wikis_dir=FLAGS.wikis_dir, refs_dir=FLAGS.refs_dir, urls_dir=FLAGS.urls_dir, vocab_path=os.path.join(FLAGS.vocab_dir, problem.vocab_filename), out_filepaths=out_filepaths)