Beispiel #1
0
def to_batch_seq(sql_data, table_data, idxes, st, ed,
                 is_train=True):
    """

    :return:
    """
    examples = []

    for i in range(st, ed):
        sql = sql_data[idxes[i]]
        table = table_data[sql['db_id']]

        process_dict = process(sql, table)

        for c_id, col_ in enumerate(process_dict['col_set_iter']):
            for q_id, ori in enumerate(process_dict['q_iter_small']):
                if ori in col_:
                    process_dict['col_set_type'][c_id][0] += 1

        schema_linking(process_dict['question_arg'], process_dict['question_arg_type'],
                       process_dict['one_hot_type'], process_dict['col_set_type'], process_dict['col_set_iter'], sql)

        col_table_dict = get_col_table_dict(process_dict['tab_cols'], process_dict['tab_ids'], sql)
        table_col_name = get_table_colNames(process_dict['tab_ids'], process_dict['col_iter'])

        process_dict['col_set_iter'][0] = ['count', 'number', 'many']

        rule_label = None
        if 'rule_label' in sql:
            rule_label = [eval(x) for x in sql['rule_label'].strip().split(' ')]
            if is_valid(rule_label, col_table_dict=col_table_dict, sql=sql) is False:
                continue

        example = Example(
            src_sent=process_dict['question_arg'],
            col_num=len(process_dict['col_set_iter']),
            vis_seq=(sql['question'], process_dict['col_set_iter'], sql['query']),
            tab_cols=process_dict['col_set_iter'],
            sql=sql['query'],
            one_hot_type=process_dict['one_hot_type'],
            col_hot_type=process_dict['col_set_type'],
            table_names=process_dict['table_names'],
            table_len=len(process_dict['table_names']),
            col_table_dict=col_table_dict,
            cols=process_dict['tab_cols'],
            table_col_name=table_col_name,
            table_col_len=len(table_col_name),
            tokenized_src_sent=process_dict['col_set_type'],
            tgt_actions=rule_label
        )
        example.sql_json = copy.deepcopy(sql)
        examples.append(example)

    if is_train:
        examples.sort(key=lambda e: -len(e.src_sent))
        return examples
    else:
        return examples
Beispiel #2
0
def to_batch_seq(sql_data, schema_data,idxes, st, ed,
                 is_train=True):
    """

    :return:
    """
    examples = []

    for i in range(st, ed):
        sql = sql_data[idxes[i]]
        schema_id = sql['db_id']
        tab_cols = []
        tab_ids = []
        for i in schema_data[schema_id]["column_names"]:
            tab_cols.append(i[1])
            tab_ids.append(i[0])
        col_set = [''.join(col) for col in sql['column_names']][:-1]
        col_table_dict = get_col_table_dict(tab_cols, tab_ids, col_set)

        rule_label = None
        if 'label_str' in sql:
            try:
                rule_label = [eval(x) for x in sql['label_str'].strip().split(' ')]
            except:
                continue
            '''
            TODO: 以后再去这部分改
            if is_valid(rule_label, col_table_dict=col_table_dict, sql=sql) is False:
                print('*'*50)
                continue
            '''
        example = Example(
            src_sent=sql['question_tokens'],
            src_len=len(sql['question_tokens']),

            col_names=sql['column_names'],
            col_len=len(sql['column_names']),
            feature_c = sql['column_features'],

            table_names=sql['table_names'],
            table_len=len(sql['table_names']),

            value_name=sql['values'],
            value_len = len(sql['values']),

            col_table_dict=col_table_dict,
            tgt_actions=rule_label,

        )
        example.sql_json = copy.deepcopy(sql)
        examples.append(example)

    if is_train:
        examples.sort(key=lambda e: -len(e.src_sent))
        return examples
    else:
        return examples