def main(argv): if len(argv) > 1: raise app.UsageError("Too many command-line arguments.") config = tf_example_utils.PretrainConversionConfig( vocab_file=FLAGS.vocab_file, max_seq_length=FLAGS.max_seq_length, max_predictions_per_seq=FLAGS.max_predictions_per_seq, random_seed=FLAGS.random_seed, masked_lm_prob=FLAGS.masked_lm_prob, max_column_id=FLAGS.max_column_id, max_row_id=FLAGS.max_row_id, min_question_length=FLAGS.min_question_length, max_question_length=FLAGS.max_question_length, always_continue_cells=FLAGS.always_continue_cells, strip_column_names=False, ) pipeline = pretrain_utils.build_pretrain_data_pipeline( input_file=FLAGS.input_file, output_dir=FLAGS.output_dir, config=config, dupe_factor=FLAGS.dupe_factor, min_num_rows=FLAGS.min_num_rows, min_num_columns=FLAGS.min_num_columns, ) beam_runner.run(pipeline)
def run(inputs, outputs, input_format): beam_runner.run( create_data.build_retrieval_pipeline( input_files=inputs, input_format=input_format, output_files=outputs, config=tf_example_utils.RetrievalConversionConfig( vocab_file=FLAGS.vocab_file, max_seq_length=FLAGS.max_seq_length, max_column_id=FLAGS.max_column_id, max_row_id=FLAGS.max_row_id, strip_column_names=False, cell_trim_length=FLAGS.cell_trim_length, use_document_title=FLAGS.use_document_title, ), converter_impl=FLAGS.converter_impl, )).wait_until_finish()
def main(unused_argv): r"""Reads nearest neigbors adds them to the interactions.""" del unused_argv inputs_outputs = _get_inputs_outputs( FLAGS.interaction_dir, FLAGS.json_dir, FLAGS.output_dir, ) pipeline = add_negative_tables_to_interactions( max_num_negatives=FLAGS.max_num_negatives, input_interactions_files=inputs_outputs.input_interaction_files, input_tables_file=FLAGS.input_tables_file, input_json_files=inputs_outputs.input_json_files, output_files=inputs_outputs.output_interaction_files, ) beam_runner.run(pipeline)
def main(unused_argv): del unused_argv config = synthesize_entablement.SynthesizationConfig( prob_count_aggregation=FLAGS.prob_count_aggregation, ) conversion_config = None if FLAGS.convert_to_examples: conversion_config = tf_example_utils.ClassifierConversionConfig( vocab_file=FLAGS.vocab_file, max_seq_length=FLAGS.max_seq_length, max_column_id=FLAGS.max_seq_length, max_row_id=FLAGS.max_seq_length, strip_column_names=False, ) pipeline = intermediate_pretrain_utils.build_pipeline( mode=FLAGS.mode, config=config, use_fake_table=FLAGS.use_fake_table, add_opposite_table=FLAGS.add_opposite_table, drop_without_support_rate=FLAGS.drop_without_support_rate, input_file=FLAGS.input_file, output_dir=FLAGS.output_dir, output_suffix=FLAGS.output_suffix, conversion_config=conversion_config) beam_runner.run(pipeline).wait_until_finish()
def main(_): beam_runner.run( build_pipeline(inputs=FLAGS.inputs, output_dir=FLAGS.output_dir)).wait_until_finish()