Example #1
0
def main(unused_argv):
  examples = tsv_utils.read_tsv(FLAGS.input)
  new_examples = []
  for source, target in examples:
    new_examples.append((nqg_tokenization.process_source(source),
                         nqg_tokenization.process_target(target)))
  tsv_utils.write_tsv(new_examples, FLAGS.output)
def main(unused_argv):
    examples = tsv_utils.read_tsv(FLAGS.input)
    random.seed(FLAGS.seed)
    random.shuffle(examples)
    examples_1 = examples[:FLAGS.num_examples_1]
    examples_2 = examples[FLAGS.num_examples_1:]
    tsv_utils.write_tsv(examples_1, FLAGS.output_1)
    tsv_utils.write_tsv(examples_2, FLAGS.output_2)
Example #3
0
def main(unused_argv):
    examples = tsv_utils.read_tsv(FLAGS.input)
    examples_1, examples_2 = template_utils.split_by_template(
        examples,
        template_fn=spider_template_fn,
        max_num_examples_1=FLAGS.max_num_examples_1,
        seed=FLAGS.seed)
    tsv_utils.write_tsv(examples_1, FLAGS.output_1)
    tsv_utils.write_tsv(examples_2, FLAGS.output_2)
Example #4
0
def main(unused_argv):
    examples = tsv_utils.read_tsv(FLAGS.input)
    if FLAGS.use_target:
        sorted_examples = sorted(examples, key=lambda x: len(x[1].split(" ")))
    else:
        sorted_examples = sorted(examples, key=lambda x: len(x[0].split(" ")))
    examples_1 = sorted_examples[:FLAGS.num_examples]
    examples_2 = sorted_examples[FLAGS.num_examples:]
    tsv_utils.write_tsv(examples_1, FLAGS.output_1)
    tsv_utils.write_tsv(examples_2, FLAGS.output_2)
def main(unused_argv):
    splits = load_splits()
    examples = tsv_utils.read_tsv(FLAGS.input)
    example_id_to_example = {
        example_id: example
        for example_id, example in enumerate(examples)
    }

    for split, split_ids in splits.items():
        examples = []
        for split_id in split_ids:
            examples.append(example_id_to_example[split_id])
        filename = os.path.join(FLAGS.output_dir, "%s.tsv" % split)
        tsv_utils.write_tsv(examples, filename)
def main(unused_argv):
    tables_json = load_json(FLAGS.tables)
    db_id_to_schema_string = {}
    for table_json in tables_json:
        db_id = table_json["db_id"].lower()
        db_id_to_schema_string[db_id] = _get_schema_string(table_json)

    examples = tsv_utils.read_tsv(FLAGS.input)
    new_examples = []
    for source, target in examples:
        db_id = source.split()[0].rstrip(":")
        schema_string = db_id_to_schema_string[db_id]
        new_source = "%s%s" % (source, schema_string)
        new_examples.append((new_source.lower(), target.lower()))
    tsv_utils.write_tsv(new_examples, FLAGS.output)
Example #7
0
def main(unused_argv):
    examples = tsv_utils.read_tsv(FLAGS.input)

    # First, randomly split examples.
    random.seed(FLAGS.seed)
    random.shuffle(examples)
    examples_1 = examples[:FLAGS.num_examples_1]
    examples_2 = examples[FLAGS.num_examples_1:]

    # Swap examples to meet atom constraint and maximize compound divergence.
    examples_1, examples_2 = mcd_utils.swap_examples(
        examples_1,
        examples_2,
        get_compounds_fn=tmcd_utils.get_example_compounds,
        get_atoms_fn=tmcd_utils.get_example_atoms,
        max_iterations=1000,
        max_divergence=None)
    tsv_utils.write_tsv(examples_1, FLAGS.output_1)
    tsv_utils.write_tsv(examples_2, FLAGS.output_2)
Example #8
0
def main(unused_argv):
    examples_json = load_json(FLAGS.examples)
    examples = []
    for example_json in examples_json:
        database = example_json["db_id"]
        source = example_json["question"]
        target = example_json["query"]

        # Skip if database not in set of databases with >= 50 examples.
        if database not in database_constants.DATABASES:
            continue

        # Prepend database.
        source = "%s: %s" % (database, source)

        target = normalize_whitespace(target)
        examples.append((source.lower(), target.lower()))

    tsv_utils.write_tsv(examples, FLAGS.output)
def main(unused_argv):
    examples = load_examples(FLAGS.input)
    tsv_utils.write_tsv(examples, FLAGS.output)
Example #10
0
def main(unused_argv):
    examples = get_examples()
    tsv_utils.write_tsv(examples, FLAGS.output)
def main(unused_argv):
  examples = read_examples(FLAGS.source, FLAGS.target)
  tsv_utils.write_tsv(examples, FLAGS.output)