Example #1
0
    # For the case when use "argument"
    parser = ArgumentParser()
    parser.add_argument('--source_file', help='source file for the prediction', default=path_source)
    parser.add_argument('--db_file', help='source database for the prediction', default=path_db)
    parser.add_argument('--pred_file', help='predictions by the model', default=path_pred)
    parser.add_argument('--ordered', action='store_true',
                        help='whether the exact match should consider the order of conditions')
    args = parser.parse_args()
    args.ordered = ordered

    engine = DBEngine(args.db_file)
    exact_match = []
    with open(args.source_file) as fs, open(args.pred_file) as fp:
        grades = []
        for ls, lp in tqdm(zip(fs, fp), total=count_lines(args.source_file)):
            eg = json.loads(ls)
            ep = json.loads(lp)
            qg = Query.from_dict(eg['sql'], ordered=args.ordered)
            gold = engine.execute_query(eg['table_id'], qg, lower=True)
            pred = ep.get('error', None)
            qp = None
            if not ep.get('error', None):
                try:
                    qp = Query.from_dict(ep['query'], ordered=args.ordered)
                    pred = engine.execute_query(eg['table_id'], qp, lower=True)
                except Exception as e:
                    pred = repr(e)
            correct = pred == gold
            match = qp == qg
            grades.append(correct)
Example #2
0
    if not os.path.isdir(args.dout):
        os.makedirs(args.dout)

    # for split in ['train', 'dev', 'test']:
    for split in ['train', 'dev', 'test']:
        fsplit = os.path.join(args.din, split) + '.jsonl'
        ftable = os.path.join(args.din, split) + '.tables.jsonl'
        fout = os.path.join(args.dout, split) + '_tok.jsonl'

        print('annotating {}'.format(fsplit))
        with open(fsplit) as fs, open(ftable) as ft, open(fout, 'wt') as fo:
            print('loading tables')

            # ws: Construct table dict with table_id as a key.
            tables = {}
            for line in tqdm(ft, total=count_lines(ftable)):
                d = json.loads(line)
                tables[d['id']] = d
            print('loading examples')
            n_written = 0
            cnt = -1
            for line in tqdm(fs, total=count_lines(fsplit)):
                cnt += 1
                d = json.loads(line)
                # a = annotate_example(d, tables[d['table_id']])
                a = annotate_example_ws(d, tables[d['table_id']])
                fo.write(json.dumps(a) + '\n')
                n_written += 1

                if answer_toy:
                    if cnt > toy_size:
Example #3
0
    if not os.path.isdir(args.dout):
        os.makedirs(args.dout)

    # for split in ['train', 'dev', 'test']:
    for split in args.split.split(','):
        fsplit = os.path.join(args.din, split) + '.jsonl'
        ftable = os.path.join(args.din, split) + '.tables.jsonl'
        fout = os.path.join(args.dout, split) + '_tok.jsonl'

        print('annotating {}'.format(fsplit))
        with open(fsplit) as fs, open(ftable) as ft, open(fout, 'wt') as fo:
            # print('loading tables')

            # ws: Construct table dict with table_id as a key.
            tables = {}
            for line in tqdm(ft, total=count_lines(ftable),ncols = 60, desc = 'Loading Tables'):
                d = json.loads(line)
                tables[d['id']] = d
            # print('loading examples')
            n_written = 0
            cnt = -1
            for line in tqdm(fs, total=count_lines(fsplit),ncols = 60, desc = 'Loading Examples'):
                cnt += 1
                d = json.loads(line)
                # a = annotate_example(d, tables[d['table_id']])
                a = annotate_example_ws(d, tables[d['table_id']])
                fo.write(json.dumps(a) + '\n')
                n_written += 1

                if answer_toy:
                    if cnt > toy_size:
Example #4
0
    parser.add_argument('--db_file',
                        help='source database for the prediction',
                        default=path_db)
    parser.add_argument('--pred_file', help='predictions by the model')
    parser.add_argument(
        '--ordered',
        action='store_true',
        help='whether the exact match should consider the order of conditions')
    args = parser.parse_args()
    args.ordered = ordered

    engine = DBEngine(args.db_file)
    exact_match = []
    with open(args.source_file) as fs, open(args.pred_file) as fp:
        grades = []
        for ls, lp in tqdm(zip(fs, fp), total=count_lines(args.pred_file)):
            eg = json.loads(ls)
            ep = json.loads(lp)
            qg = Query.from_dict(eg['sql'], ordered=args.ordered)
            gold = engine.execute_query(eg['table_id'], qg, lower=True)
            pred = ep.get('error', None)
            qp = None
            if not ep.get('error', None):
                try:
                    qp = Query.from_dict(ep['query'], ordered=args.ordered)
                    pred = engine.execute_query(eg['table_id'], qp, lower=True)
                except Exception as e:
                    pred = repr(e)
            correct = pred == gold
            match = qp == qg
            grades.append(correct)