Example #1
0
def load_model():
    train_emb = True
    ca = True

    N_word = 300
    B_word = 42
    USE_SMALL = False
    GPU = True

    word_emb = load_word_emb('glove/glove.%dB.%dd.txt'%(B_word,N_word), \
        load_used=True, use_small=USE_SMALL) # load_used can speed up loading

    model = SQLNet(word_emb,
                   N_word=N_word,
                   use_ca=ca,
                   gpu=GPU,
                   trainable_emb=True)

    sql_data, table_data, val_sql_data, val_table_data, \
            test_sql_data, test_table_data, \
            TRAIN_DB, DEV_DB, TEST_DB = load_dataset(
                    0, use_small=USE_SMALL)

    model.val_table_data = val_table_data

    if train_emb:
        agg_m, sel_m, cond_m, agg_e, sel_e, cond_e = best_model_name(
            train_emb, ca)
        print "Loading from %s" % agg_m
        model.agg_pred.load_state_dict(torch.load(agg_m))
        print "Loading from %s" % sel_m
        model.sel_pred.load_state_dict(torch.load(sel_m))
        print "Loading from %s" % cond_m
        model.cond_pred.load_state_dict(torch.load(cond_m))
        print "Loading from %s" % agg_e
        model.agg_embed_layer.load_state_dict(torch.load(agg_e))
        print "Loading from %s" % sel_e
        model.sel_embed_layer.load_state_dict(torch.load(sel_e))
        print "Loading from %s" % cond_e
        model.cond_embed_layer.load_state_dict(torch.load(cond_e))
    else:
        agg_m, sel_m, cond_m = best_model_name(train_emb, ca)
        print "Loading from %s" % agg_m
        model.agg_pred.load_state_dict(torch.load(agg_m))
        print "Loading from %s" % sel_m
        model.sel_pred.load_state_dict(torch.load(sel_m))
        print "Loading from %s" % cond_m
        model.cond_pred.load_state_dict(torch.load(cond_m))

    return model
 def __init__(self):      
     
     print(os.getcwd())
         
     UserInputUtility.word_emb = load_word_emb('glove/glove.%dB.%dd.txt' % (UserInputUtility.B_word, UserInputUtility.N_word), \
                              load_used=True, use_small=UserInputUtility.USE_SMALL)  # load_used can speed up loading
     
     sql_data, table_data, val_sql_data, val_table_data, \
         test_sql_data, self.test_table_data, \
         TRAIN_DB, DEV_DB, self.TEST_DB = load_dataset(
                 0, use_small=UserInputUtility.USE_SMALL)
     
     UserInputUtility.model = SQLNet(UserInputUtility.word_emb, N_word=UserInputUtility.N_word, \
                                     use_ca=UserInputUtility.args.use_ca, gpu=UserInputUtility.GPU, trainable_emb=True)
     
     agg_m, sel_m, cond_m = best_model_name(UserInputUtility.args)
     print("Loading from %s" % agg_m)
     UserInputUtility.model.agg_pred.load_state_dict(torch.load(agg_m))
     print("Loading from %s" % sel_m)
     UserInputUtility.model.sel_pred.load_state_dict(torch.load(sel_m))
     print("Loading from %s" % cond_m)
     UserInputUtility.model.cond_pred.load_state_dict(torch.load(cond_m))
     print("Done loading...")
Example #3
0
            TRAIN_DB, DEV_DB, TEST_DB = load_dataset(
                    args.dataset, use_small=USE_SMALL)

    word_emb = load_word_emb('glove/glove.%dB.%dd.txt'%(B_word,N_word), \
            load_used=args.train_emb, use_small=USE_SMALL)

    if args.baseline:
        model = Seq2SQL(word_emb,
                        N_word=N_word,
                        gpu=GPU,
                        trainable_emb=args.train_emb)
        assert not args.train_emb, "Seq2SQL can\'t train embedding."
    else:
        model = SQLNet(word_emb,
                       N_word=N_word,
                       use_ca=args.ca,
                       gpu=GPU,
                       trainable_emb=args.train_emb)
        assert not args.rl, "SQLNet can\'t do reinforcement learning."
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=0)

    if args.train_emb:
        agg_m, sel_m, cond_m, agg_e, sel_e, cond_e = best_model_name(args)
    else:
        agg_m, sel_m, cond_m = best_model_name(args)

    if args.rl or args.train_emb:  # Load pretrained model.
        agg_lm, sel_lm, cond_lm = best_model_name(args, for_load=True)
        print("Loading from %s" % agg_lm)
Example #4
0
# load word embedding
tic = time()
print '==> loading word embedding'
word_emb = load_word_emb('glove/glove.%dB.%dd.txt'%(B_word,N_word), load_used=args.train_emb, use_small=USE_SMALL)
# import pickle
# with open('glove/word_emb42B.pkl', 'rb') as f:
#     pickle.load(f)
    # word_emb = pickle.load(f)
print 'time to load word emb: ' + str(time() - tic)

# build sqlnet model
if not args.baseline:
    tic = time()
    print '==> loading sqlnet constructor'
    model = SQLNet(word_emb, N_word=N_word, use_ca=args.ca, gpu=GPU, trainable_emb = args.train_emb)
    print 'time to load sqlnet constructor: ' + str(time() - tic)
    assert not args.rl, "SQLNet can\'t do reinforcement learning."
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay = 0)

if args.train_emb:
    agg_m, sel_m, cond_m, agg_e, sel_e, cond_e = best_model_name(args)

# load model
agg_m, sel_m, cond_m = best_model_name(args, savedstr='_pretrain_wikisql')
print('==> best model names:', agg_m, sel_m, cond_m)
print "Loading from %s"%agg_m
model.agg_pred.load_state_dict(torch.load(agg_m))
print "Loading from %s"%sel_m
model.sel_pred.load_state_dict(torch.load(sel_m))
print "Loading from %s"%cond_m
Example #5
0
    sql_data, table_data, val_sql_data, val_table_data, test_sql_data, test_table_data, TRAIN_DB, DEV_DB, TEST_DB = load_dataset(
        args.dataset, use_small=USE_SMALL)
    examples, tables = load_dataset_dummy(0)
    examples.extend(sql_data)
    tables.update(table_data)

    word_emb = load_word_emb('glove/glove.%dB.%dd.txt'%(B_word,N_word), \
        load_used=True, use_small=USE_SMALL) # load_used can speed up loading

    if args.baseline:
        model = Seq2SQL(word_emb, N_word=N_word, gpu=GPU, trainable_emb=True)
    else:
        model = SQLNet(word_emb,
                       N_word=N_word,
                       use_ca=args.ca,
                       gpu=GPU,
                       trainable_emb=True)

    if args.train_emb:
        agg_m, sel_m, cond_m, agg_e, sel_e, cond_e = best_model_name(
            args, savedstr='_mconly')
        print('==> best model names:', agg_m, sel_m, cond_m)
        print "Loading from %s" % agg_m
        model.agg_pred.load_state_dict(torch.load(agg_m))
        print "Loading from %s" % sel_m
        model.sel_pred.load_state_dict(torch.load(sel_m))
        print "Loading from %s" % cond_m
        model.cond_pred.load_state_dict(torch.load(cond_m))
        print "Loading from %s" % agg_e
        model.agg_embed_layer.load_state_dict(torch.load(agg_e))
Example #6
0
    bert_path = 'code/chinese_L-12_H-768_A-12'
    if args.toy:
        use_small = True
        gpu = args.gpu
        batch_size = 16
    else:
        use_small = False
        gpu = args.gpu
        batch_size = 64

    dev_sql, dev_table, dev_db, test_sql, test_table, test_db = load_dataset(
        use_small=use_small, mode='test')
    #train_sql, train_table, train_db, dev_sql, dev_table, dev_db = load_dataset(use_small=use_small, mode='test')
    model_bert, tokenizer, bert_config = get_bert(bert_path)
    model = SQLNet(N_word=n_word,
                   use_ca=args.ca,
                   gpu=gpu,
                   trainable_emb=args.train_emb)

    model_path = 'saved_model/best_model'
    print("Loading from %s" % model_path)
    model.load_state_dict(torch.load(model_path, map_location='cpu'))
    print("Loaded model from %s" % model_path)
    dev_acc = epoch_acc(model_bert, tokenizer, model, batch_size, dev_sql,
                        dev_table, dev_db, args.mode_type)
    print('Dev Logic Form Accuracy: %.3f, Execution Accuracy: %.3f' %
          (dev_acc[1], dev_acc[2]))

    print("Start to predict test set")
    predict_test(model_bert, tokenizer, model, batch_size, test_sql,
                 test_table, args.output_dir)
    print("Output path of prediction result is %s" % args.output_dir)
Example #7
0
        gpu = args.gpu
        batch_size = args.bs
    learning_rate = 1e-3

    # load dataset
    train_sql, train_table, train_db, dev_sql, dev_table, dev_db = load_dataset(
        use_small=use_small)

    # word_emb = load_word_emb('data/char_embedding')
    word_emb = load_word_emb(
        '/media/yinshuai/d8644f6c-5a97-4e12-909b-b61d2271b61c/nlp-datasets/nlp2sql/char_embedding'
    )

    model = SQLNet(word_emb,
                   N_word=n_word,
                   use_ca=args.ca,
                   gpu=gpu,
                   trainable_emb=args.train_emb)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=0)

    if args.restore:
        model_path = 'saved_model/best_model_32'
        print "Loading trained model from %s" % model_path
        model.load_state_dict(torch.load(model_path))

    # used to record best score of each sub-task
    best_sn, best_sc, best_sa, best_wn, best_wc, best_wo, best_wv, best_wr = 0, 0, 0, 0, 0, 0, 0, 0
    best_sn_idx, best_sc_idx, best_sa_idx, best_wn_idx, best_wc_idx, best_wo_idx, best_wv_idx, best_wr_idx = 0, 0, 0, 0, 0, 0, 0, 0
    best_lf, best_lf_idx = 0.0, 0
Example #8
0
    if args.toy:
        use_small = True
        gpu = args.gpu
        batch_size = 16
    else:
        use_small = False
        gpu = args.gpu
        batch_size = 64

    dev_sql, dev_table, dev_db, test_sql, test_table, test_db = load_dataset(
        use_small=use_small, mode='test')

    word_emb = load_word_emb('data/char_embedding')
    model = SQLNet(word_emb,
                   N_word=n_word,
                   use_ca=args.ca,
                   gpu=gpu,
                   trainable_emb=args.train_emb)

    model_path = 'saved_model/best_model'
    print "Loading from %s" % model_path
    model.load_state_dict(torch.load(model_path))
    print "Loaded model from %s" % model_path

    dev_acc = epoch_acc(model, batch_size, dev_sql, dev_table, dev_db)
    print 'Dev Logic Form Accuracy: %.3f, Execution Accuracy: %.3f' % (
        dev_acc[1], dev_acc[2])

    print "Start to predict test set"
    predict_test(model, batch_size, test_sql, test_table, args.output_dir)
    print "Output path of prediction result is %s" % args.output_dir
Example #9
0
                        action='store_true',
                        help='Whether use gpu to train')
    parser.add_argument('--restore',
                        action='store_true',
                        help='Whether restore trained model')
    args = parser.parse_args()

    gpu = args.gpu
    batch_size = args.bs
    n_word = 300
    learning_rate = 3e-3

    # load dataset
    train_sql, train_schema, dev_sql, dev_schema = load_dataset()
    word_emb = load_word_emb('data/char_embedding')
    model = SQLNet(word_emb, N_word=n_word, gpu=gpu)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=0)

    if args.restore:
        model_path = 'saved_model/test_model'
        print("Loading trained model from %s" % model_path)
        model.load_state_dict(torch.load(model_path))

    # used to record best score of each sub-task
    best_sn, best_sc, best_sa, best_wn, best_wc, best_wo, best_wv, best_wr = 0, 0, 0, 0, 0, 0, 0, 0
    best_sn_idx, best_sc_idx, best_sa_idx, best_wn_idx, best_wc_idx, best_wo_idx, best_wv_idx, best_wr_idx = 0, 0, 0, 0, 0, 0, 0, 0
    best_lf, best_lf_idx = -1.0, 0

    print("#" * 20 + "  Star to Train  " + "#" * 20)
Example #10
0
    if args.toy:
        use_small = True
        gpu = args.gpu
        batch_size = 16
    else:
        use_small = False
        gpu = args.gpu
        batch_size = 64

    dev_sql, dev_table, dev_db, test_sql, test_table, test_db = load_dataset(
        use_small=use_small, mode='test')

    if args.use_bert:
        n_word = 768
        model = SQLNet(N_word=n_word, gpu=gpu, bert_path=args.bert_path)
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.learning_rate,
                                     weight_decay=0)
    else:
        n_word = 300
        word_emb = load_word_emb('data/char_embedding.json')
        model = SQLNet(N_word=n_word,
                       gpu=gpu,
                       word_emb=word_emb,
                       trainable_emb=args.train_emb)
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.learning_rate,
                                     weight_decay=0)

    model_path = '../drive/My Drive/saved_model/best_model'
Example #11
0
import argparse

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', action='store_true', help='Whether use gpu')
    parser.add_argument('--output_dir',
                        type=str,
                        default='./saved_model/res.txt',
                        help='Output path of prediction result')
    args = parser.parse_args()

    gpu = args.gpu
    n_word = 300
    batch_size = 30
    dev_sql, dev_schema = load_dataset(mode='test')

    word_emb = load_word_emb('data/char_embedding')
    model = SQLNet(word_emb, N_word=n_word, gpu=gpu)

    model_path = 'saved_model/test_model'
    print("Loading from %s" % model_path)
    model.load_state_dict(torch.load(model_path))
    print("Loaded model from %s" % model_path)

    dev_acc = epoch_acc(model, batch_size, dev_sql, dev_schema)
    print('Dev Logic Form Accuracy: %.3f' % (dev_acc[1]))

    # print("Start to predict test set")
    # predict_test(model, batch_size, dev_sql, dev_schema, args.output_dir)
    # print("Output path of prediction result is %s" % args.output_dir)
Example #12
0
        use_small = True
        gpu = args.gpu
        batch_size = 16
    else:
        use_small = False
        gpu = args.gpu
        batch_size = args.bs
    learning_rate = 1e-3
    learning_rate_bert = 1e-3

    # load dataset
    train_sql, train_table, train_db, dev_sql, dev_table, dev_db = load_dataset(
        use_small=use_small)
    model_bert, tokenizer, bert_config = get_bert(bert_path)

    model = SQLNet(N_word=n_word, use_ca=args.ca, gpu=gpu)
    #optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0)
    opt, opt_bert = get_opt(model,
                            model_bert,
                            learning_rate,
                            learning_rate_bert,
                            fine_tune=args.fine_tune)

    if args.restore:
        model_path = 'saved_model/best_model'
        print("Loading trained model from %s" % model_path)
        model.load_state_dict(torch.load(model_path))

    # used to record best score of each sub-task
    best_sn, best_sc, best_sa, best_wn, best_wc, best_wo, best_wv, best_wr = 0, 0, 0, 0, 0, 0, 0, 0
    best_sn_idx, best_sc_idx, best_sa_idx, best_wn_idx, best_wc_idx, best_wo_idx, best_wv_idx, best_wr_idx = 0, 0, 0, 0, 0, 0, 0, 0
Example #13
0
        learning_rate = 1e-4 if params['rl'] else 1e-3

        sql_data, table_data, val_sql_data, val_table_data, \
                test_sql_data, test_table_data, \
                TRAIN_DB, DEV_DB, TEST_DB = load_dataset(
                        params['dataset'], use_small=USE_SMALL)

        word_emb = load_word_emb('glove/glove.%dB.%dd.txt'%(B_word,N_word), \
                load_used=params['train_emb'], use_small=USE_SMALL)

        if params['baseline']:
            model = Seq2SQL(word_emb, N_word=N_word, gpu=GPU,
                    trainable_emb = params['train_emb'])
            assert not params['train_emb'], "Seq2SQL can\'t train embedding."
        else:
            model = SQLNet(word_emb, N_word=N_word, use_ca=params['ca'],
                    gpu=GPU, trainable_emb = params['train_emb'])
            assert not params['rl'], "SQLNet can\'t do reinforcement learning."
        optimizer = torch.optim.Adam(model.parameters(),
                lr=learning_rate, weight_decay = 0)

        if params['train_emb']:
            agg_m, sel_m, cond_m, agg_e, sel_e, cond_e = best_model_name(args)
        else:
            agg_m, sel_m, cond_m = best_model_name(args)

        if params['rl'] or params['train_emb']: # Load pretrained model.
            agg_lm, sel_lm, cond_lm = best_model_name(args, for_load=True)
            print "Loading from %s"%agg_lm
            model.agg_pred.load_state_dict(torch.load(agg_lm))
            print "Loading from %s"%sel_lm
            model.sel_pred.load_state_dict(torch.load(sel_lm))