Beispiel #1
0
def load_schema_obj(dataset_name: str, data_dir: str) -> Dict[Any, Any]:
    if dataset_name.lower() == "spider":
        return load_spider_tables(os.path.join(data_dir, "tables.json"))
    elif dataset_name.lower() == "wikisql":
        raise ValueError("WikiSQL inference is not supported yet")
    else:
        schema_csv = os.path.join(
            data_dir,
            dataset_name + "_schema.csv",
        )
        return read_schema(schema_csv)
def inference_wrapper(inference_fn, sharded=False):
    """Wrapper for running inference."""
    dataset_name = FLAGS.dataset_name

    if not FLAGS.predictions_path:
        raise ValueError('Predictions path must be set.')

    predictions = FLAGS.predictions_path + '*'
    # Don't run inference if predictions have already been generated.
    if not tf.gfile.Glob(FLAGS.predictions_path + '*'):
        inference_fn(FLAGS.input, FLAGS.predictions_path,
                     FLAGS.checkpoint_filepath, dataset_name)

    # If using Abstract SQL, need to restore under-specified FROM clauses
    # output above.
    if FLAGS.restore_preds_from_asql:
        spider = dataset_name.lower() == 'spider'

        if not FLAGS.restored_predictions_path:
            raise ValueError('Restored predictions path must be set '
                             'if restoring predictions from AbSQL.')

        if not tf.io.gfile.exists(FLAGS.restored_predictions_path):
            restore_from_asql.restore_from_clauses(
                predictions,
                FLAGS.restored_predictions_path,
                spider_examples_json=FLAGS.spider_examples_json
                if spider else '',
                spider_tables_json=FLAGS.spider_tables_json if spider else '',
                michigan_schema=None if spider else read_schema(
                    os.path.join(FLAGS.data_filepath, FLAGS.dataset_name +
                                 '_schema.csv')),
                dataset_name=FLAGS.dataset_name,
                use_oracle_foriegn_keys=FLAGS.use_oracle_foriegn_keys)
        predictions = FLAGS.restored_predictions_path

    if FLAGS.match_and_save:
        # Load the database tables.
        schema_obj = None
        if dataset_name.lower() == 'spider':
            schema_obj = load_spider_tables(
                os.path.join(FLAGS.data_filepath, 'tables.json'))
        elif dataset_name.lower() == 'wikisql':
            raise ValueError('WikiSQL inference is not supported yet')
        else:
            schema_csv = os.path.join(FLAGS.data_filepath,
                                      FLAGS.dataset_name + '_schema.csv')
            schema_obj = read_schema(schema_csv)

        # Now match with the original data and save
        match_and_save(predictions, FLAGS.output, dataset_name.lower(),
                       FLAGS.splits, FLAGS.data_filepath, schema_obj, sharded)
def process_spider(output_file, debugging_file, tokenizer):
  """Loads, converts, and writes Spider examples to the standard format."""
  if len(FLAGS.splits) > 1:
    raise ValueError('Not expecting more than one split for Spider.')
  split = FLAGS.splits[0]

  table_definitions = load_spider_tables(
      os.path.join(FLAGS.input_filepath, 'tables.json'))
  print('Loaded %d table definitions.' % len(table_definitions))

  spider_examples = \
    load_spider_examples(os.path.join(FLAGS.input_filepath,
                                      split + '.json'))

  num_examples_created = 0
  num_examples_failed = 0

  # TODO(petershaw): Reduce duplication with other code path for schema
  # pre-processing.
  tables_json = _load_json_from_file(
      os.path.join(FLAGS.input_filepath, 'tables.json'))
  spider_table_schemas_map = abstract_sql_converters.spider_table_schemas_map(
      tables_json)

  for spider_example in spider_examples:
    # Make sure the DB specified exists.
    example_db = spider_example['db_id']
    try:
      example = convert_spider(
          spider_example,
          table_definitions[example_db],
          tokenizer,
          FLAGS.generate_sql,
          FLAGS.anonymize_values,
          abstract_sql=FLAGS.abstract_sql,
          table_schemas=spider_table_schemas_map[example_db],
          allow_value_generation=FLAGS.allow_value_generation)
    except abstract_sql.UnsupportedSqlError as e:
      print(e)
      example = None
    if example:
      output_file.write(json.dumps(example.to_json()) + '\n')
      num_examples_created += 1

      debugging_file.write(example.model_input.original_utterance + '\n')
      if FLAGS.generate_sql:
        debugging_file.write(example.gold_query_string() + '\n\n')
    else:
      num_examples_failed += 1
  return num_examples_created, num_examples_failed