Ejemplo n.º 1
0
    def predict(self, output_dict, arguments, helper):
        """
        Inference by raw_feature

        * Args:
            output_dict: model's output dictionary consisting of
            arguments: arguments dictionary consisting of user_input
            helper: dictionary for helping get answer

        * Returns:
            query: Generated SQL Query
            execute_result: Execute result by generated query
        """
        output_dict["table_id"] = arguments["table_id"]
        output_dict["tokenized_question"] = helper["tokenized_question"]

        prediction = self.generate_queries(output_dict)[0]
        pred_query = Query.from_dict(prediction["query"], ordered=True)

        dbengine = DBEngine(arguments["db_path"])
        try:
            pred_execute_result = dbengine.execute_query(
                prediction["table_id"], pred_query, lower=True)
        except IndexError as e:
            pred_execute_result = str(e)

        return {
            "query": str(pred_query),
            "execute_result": pred_execute_result
        }
Ejemplo n.º 2
0
def evaluate(labels, predictions, db_path, ordered=True):  # pragma: no cover
    """ labels and predictions: dictionary {data_uid: sql_data, ...} """
    engine = DBEngine(db_path)

    exact_match, grades = [], []
    for idx, data_uid in enumerate(predictions):
        eg = labels[data_uid]
        ep = predictions[data_uid]

        qg = eg["sql_query"]
        gold = eg["execution_result"]

        pred = ep.get("error", None)
        qp = None
        if not ep.get("error", None):
            try:
                qp = Query.from_dict(ep["query"], ordered=ordered)
                pred = engine.execute_query(ep["table_id"], qp, lower=True)
            except Exception as e:
                pred = repr(e)

        correct = pred == gold
        match = qp == qg
        grades.append(correct)
        exact_match.append(match)

    return {
        "ex_accuracy": sum(grades) / len(grades) * 100.0,
        "lf_accuracy": sum(exact_match) / len(exact_match) * 100.0,
    }
Ejemplo n.º 3
0
    def _read(self, file_path, data_type=None):
        file_path = self.data_handler.read(file_path, return_path=True)
        file_path = Path(file_path)

        data_dir = file_path.parent
        file_name = file_path.stem

        db_path = data_dir / f"{file_name}.db"
        table_path = data_dir / f"{file_name}.tables.jsonl"

        self.dbengine = DBEngine(db_path)

        helper = {"file_path": file_path, "db_path": db_path, "examples": {}}
        features, labels = [], []

        sql_datas, table_data = self.load_data(file_path, table_path, data_type=data_type)
        for sql_data in tqdm(sql_datas, desc=data_type):
            question = sql_data["question"]
            table_id = sql_data["table_id"]
            column_headers = table_data[table_id]["header"]

            feature_row = {"column": column_headers, "question": question}

            data_uid = str(uuid.uuid1())
            conditions_value_position = self.get_coditions_value_position(
                sql_data["question"], [x[2] for x in sql_data["sql"]["conds"]]
            )

            sql_query = Query.from_dict(sql_data["sql"], ordered=True)
            execution_result = self.dbengine.execute_query(table_id, sql_query, lower=True)

            label_row = {
                "id": data_uid,
                "table_id": table_id,
                "tokenized_question": self.word_tokenizer.tokenize(question),
                "aggregator_idx": sql_data["sql"]["agg"],
                "select_column_idx": sql_data["sql"]["sel"],
                "conditions_num": len(sql_data["sql"]["conds"]),
                "conditions_column_idx": [x[0] for x in sql_data["sql"]["conds"]],
                "conditions_operator_idx": [x[1] for x in sql_data["sql"]["conds"]],
                "conditions_value_string": [str(x[2]) for x in sql_data["sql"]["conds"]],
                "conditions_value_position": conditions_value_position,
                "sql_query": sql_query,
                "execution_result": execution_result,
            }

            features.append(feature_row)
            labels.append(label_row)

            helper["examples"][data_uid] = {
                "question": question,
                "sql_query": sql_query,
                "execution_result": execution_result,
            }

            if self.is_test and len(labels) == 10:
                break

        return make_batch(features, labels), helper
Ejemplo n.º 4
0
    def print_examples(self, index, inputs, predictions):
        """
        Print evaluation examples

        * Args:
            index: data index
            inputs: mini-batch inputs
            predictions: prediction dictionary consisting of
                - key: 'id' (question id)
                - value: consisting of dictionary
                    table_id, query (agg, sel, conds)

        * Returns:
            print(Context, Question, Answers and Predict)
        """

        data_index = inputs["labels"]["data_idx"][index].item()
        data_id = self._dataset.get_id(data_index)

        helper = self._dataset.helper
        question = helper["examples"][data_id]["question"]

        label = self._dataset.get_ground_truth(data_id)

        dbengine = DBEngine(helper["db_path"])

        prediction = predictions[data_id]
        pred_query = Query.from_dict(prediction["query"], ordered=True)
        pred_execute_result = dbengine.execute_query(prediction["table_id"],
                                                     pred_query,
                                                     lower=True)

        print("- Question:", question)
        print("- Answers:")
        print("    SQL Query: ", label["sql_query"])
        print("    Execute Results:", label["execution_result"])
        print("- Predict:")
        print("    SQL Query: ", pred_query)
        print("    Execute Results:", pred_execute_result)
        print("-" * 30)
Ejemplo n.º 5
0
    }


if __name__ == "__main__":  # pragma: no cover
    parser = ArgumentParser()
    parser.add_argument("source_file", help="source file for the prediction")
    parser.add_argument("db_file", help="source database for the prediction")
    parser.add_argument("pred_file", help="predictions by the model")
    parser.add_argument(
        "--ordered",
        action="store_true",
        help="whether the exact match should consider the order of conditions",
    )
    args = parser.parse_args()

    engine = DBEngine(args.db_file)
    exact_match = []
    with open(args.source_file) as fs, open(args.pred_file) as fp:
        grades = []
        for ls, lp in tqdm(zip(fs, fp), total=count_lines(args.source_file)):
            eg = json.loads(ls)
            ep = json.loads(lp)
            qg = Query.from_dict(eg["sql"], ordered=args.ordered)
            gold = engine.execute_query(eg["table_id"], qg, lower=True)
            pred = ep.get("error", None)
            qp = None
            if not ep.get("error", None):
                try:
                    qp = Query.from_dict(ep["query"], ordered=args.ordered)
                    pred = engine.execute_query(eg["table_id"], qp, lower=True)
                except Exception as e: