def compute_michigan_coverage():
    """Prints out statistics for asql conversions."""
    # Read data files.
    schema_csv_path = os.path.join(FLAGS.michigan_data_dir,
                                   '%s-schema.csv' % FLAGS.dataset_name)
    examples_json_path = os.path.join(FLAGS.michigan_data_dir,
                                      '%s.json' % FLAGS.dataset_name)
    schema = michigan_preprocessing.read_schema(schema_csv_path)
    if FLAGS.use_oracle_foriegn_keys:
        foreign_keys = abstract_sql_converters.michigan_db_to_foreign_key_tuples_orcale(
            FLAGS.dataset_name)
    else:
        foreign_keys = abstract_sql_converters.michigan_db_to_foreign_key_tuples(
            schema)
    table_schema = abstract_sql_converters.michigan_db_to_table_tuples(schema)
    nl_sql_pairs = michigan_preprocessing.get_nl_sql_pairs(
        examples_json_path, FLAGS.splits)

    # Iterate through examples and generate counts.
    num_examples = 0
    num_conversion_failures = 0
    num_successes = 0
    num_parse_failures = 0
    num_reconstruction_failtures = 0
    exception_counts = collections.defaultdict(int)
    for _, gold_sql_query in nl_sql_pairs:
        num_examples += 1
        print('Parsing example number %s.' % num_examples)
        try:
            sql_spans = abstract_sql.sql_to_sql_spans(gold_sql_query,
                                                      table_schema)
            sql_spans = abstract_sql.replace_from_clause(sql_spans)
        except abstract_sql.UnsupportedSqlError as e:
            print('Error converting:\n%s\n%s' % (gold_sql_query, e))
            num_conversion_failures += 1
            exception_counts[str(e)[:100]] += 1
            continue
        except abstract_sql.ParseError as e:
            print('Error parsing:\n%s\n%s' % (gold_sql_query, e))
            num_parse_failures += 1
            exception_counts[str(e)[:100]] += 1
            continue
        try:
            sql_spans = abstract_sql.restore_from_clause(
                sql_spans, foreign_keys)
        except abstract_sql.UnsupportedSqlError as e:
            print('Error recontructing:\n%s\n%s' % (gold_sql_query, e))
            exception_counts[str(e)[:100]] += 1
            num_reconstruction_failtures += 1
            continue
        print('Success:\n%s\n%s' %
              (gold_sql_query, abstract_sql.sql_spans_to_string(sql_spans)))
        num_successes += 1
    print('exception_counts: %s' % exception_counts)
    print('Examples: %s' % num_examples)
    print('Failed conversions: %s' % num_conversion_failures)
    print('Failed parses: %s' % num_parse_failures)
    print('Failed reconstructions: %s' % num_reconstruction_failtures)
    print('Successes: %s' % num_successes)
Example #2
0
def _get_restored_predictions(predictions_dict,
                              utterance_to_db_map=None,
                              schema=None,
                              dataset_name=None,
                              use_oracle_foreign_keys=False):
    """Returns new predictions dict with FROM clauses restored."""
    utterance = predictions_dict['utterance']
    if utterance_to_db_map:
        db = utterance_to_db_map[utterance]
        foreign_keys = abstract_sql_converters.spider_db_to_foreign_key_tuples(
            db)
        table_schemas = abstract_sql_converters.spider_db_to_table_tuples(db)

    else:
        if use_oracle_foreign_keys:
            foreign_keys = abstract_sql_converters.michigan_db_to_foreign_key_tuples_orcale(
                dataset_name)
        else:
            foreign_keys = abstract_sql_converters.michigan_db_to_foreign_key_tuples(
                schema)
        table_schemas = abstract_sql_converters.michigan_db_to_table_tuples(
            schema)

    restored_predictions = []
    restored_scores = []
    for prediction, score in zip(predictions_dict['predictions'],
                                 predictions_dict['scores']):
        # Some predictions have repeated single quotes around values.
        prediction = prediction.replace("''", "'")

        try:
            restored_prediction = abstract_sql_converters.restore_predicted_sql(
                prediction, table_schemas, foreign_keys)
        except abstract_sql.UnsupportedSqlError as e:
            # Remove predictions that fail conversion.
            print('For query %s' % prediction)
            print('Unsupport Error: ' + str(e))
        except abstract_sql.ParseError as e:
            print('Parse Error!!!')
        else:
            restored_predictions.append(restored_prediction)
            restored_scores.append(score)
    restored_predictions_dict = {
        'utterance': utterance,
        'predictions': restored_predictions,
        'scores': restored_scores,
    }
    return restored_predictions_dict
def _get_restored_predictions(
    predictions_dict: Prediction,
    utterance_to_db_map=None,
    schema=None,
    dataset_name=None,
    use_oracle_foreign_keys: bool = False,
) -> Tuple[Prediction, int]:
    """
    Returns new predictions dict with FROM clauses restored and a count of errors.
    """
    utterance = predictions_dict["utterance"]
    if utterance_to_db_map:
        db = utterance_to_db_map[utterance]
        foreign_keys = abstract_sql_converters.spider_db_to_foreign_key_tuples(db)
        table_schemas = abstract_sql_converters.spider_db_to_table_tuples(db)

    else:
        if use_oracle_foreign_keys:
            foreign_keys = abstract_sql_converters.michigan_db_to_foreign_key_tuples_orcale(
                dataset_name
            )
        else:
            foreign_keys = abstract_sql_converters.michigan_db_to_foreign_key_tuples(
                schema
            )
        table_schemas = abstract_sql_converters.michigan_db_to_table_tuples(schema)

    restored_predictions = []
    restored_scores = []
    error_count = 0

    for prediction, score in zip(
        predictions_dict["predictions"], predictions_dict["scores"]
    ):
        # Some predictions have repeated single quotes around values.
        prediction = prediction.replace("''", "'")

        try:
            restored_prediction = abstract_sql_converters.restore_predicted_sql(
                prediction, table_schemas, foreign_keys
            )
            restored_predictions.append(restored_prediction)
            restored_scores.append(score)
        except abstract_sql.UnsupportedSqlError as e:
            # Remove predictions that fail conversion.
            print("For query %s" % prediction)
            print("Unsupport Error: " + str(e))
            error_count += 1
        except abstract_sql.ParseError as e:
            print("For query %s" % prediction)
            print("Parse Error: " + str(e))
            error_count += 1

    return (
        {
            "utterance": utterance,
            "predictions": restored_predictions,
            "scores": restored_scores,
        },
        error_count,
    )