コード例 #1
0
def predict():
    if FLAGS.model_path == None:
        raise 'Model path is None!'
    if FLAGS.dataset == 'pku':
        config.hidden_dim = 64
    word2id = get_word2id(train_data_path,
                          bigram_words=bigram_words_path,
                          min_bw_frequence=FLAGS.min_bg_freq)
    id2word = build_reverse_dictionary(word2id)
    x_test, y_test = get_train_data(test_data_path, word2id)
    init_embedding = None

    print 'test_data_path: %s' % test_data_path
    print 'bigram_words_path: %s' % bigram_words_path
    print 'model_path: %s' % FLAGS.model_path
    print 'min_bg_freq: %d' % FLAGS.min_bg_freq

    with tf.Session() as sess:
        model = models.BaselineModel(vocab_size=len(word2id),
                                     word_dim=config.word_dim,
                                     hidden_dim=config.hidden_dim,
                                     pad_word=word2id[PAD],
                                     init_embedding=init_embedding,
                                     num_classes=config.num_classes,
                                     clip=config.clip,
                                     lr=config.lr,
                                     l2_reg_lamda=config.l2_reg_lamda,
                                     num_layers=config.num_layers,
                                     rnn_cell=config.rnn_cell,
                                     bi_direction=config.bi_direction)
        checkpoints_model = os.path.join('checkpoints', FLAGS.model_path)
        saver = tf.train.Saver(tf.all_variables())
        ckpt = tf.train.get_checkpoint_state(checkpoints_model)
        if ckpt and ckpt.model_checkpoint_path:
            print 'test_start!'
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print '没有训练好的模型'
            exit()

        test_pred = []
        for i in range(0, len(x_test), config.batch_size):
            input_x = x_test[slice(i, i + config.batch_size)]
            input_x = padding3(input_x, word2id[PAD])
            y = y_test[slice(i, i + config.batch_size)]
            y = padding(y, 3)
            predict = model.predict_step(sess, input_x)
            test_pred += predict

        P, R, F = evaluate_word_PRF(test_pred, y_test)
        print '%s: P:%f R:%f F:%f' % (FLAGS.model_path, P, R, F)
        print '------------------------------------------'
コード例 #2
0
    load_prev_model = args.load_prev_model

    if args.model == 'simple_baseline':

        if load_prev_model:
            model = models.SimpleBaselineModel(args).cuda()
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint)
        else:
            model = models.SimpleBaselineModel(args)
            model.cuda() # load model to gpu

    if args.model == 'baseline':

        if load_prev_model:
            model = models.BaselineModel(args).cuda()
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint)
        else:
            model = models.BaselineModel(args)
            model.cuda() # load model to gpu


    if args.model == 'best':
        if load_prev_model:
                model = models.BestModel(args).cuda()
                checkpoint = torch.load(args.resume)
                model.load_state_dict(checkpoint)
        else:
                model = models.BestModel(args)
                model.cuda() # load model to gpu
コード例 #3
0
ファイル: train_baseline.py プロジェクト: zhusleep/CWS_Dict
def train():
    word2id = get_word2id(train_data_path,
                          bigram_words=bigram_words_path,
                          min_bw_frequence=FLAGS.min_bg_freq)
    X_train, y_train = get_train_data(train_data_path, word2id)
    init_embedding = get_embedding(word2id, size=config.word_dim)
    # domain1
    X_test1, y_test1 = get_train_data(literature_path, word2id)
    # domain2
    X_test2, y_test2 = get_train_data(computer_path, word2id)
    # domain3
    X_test3, y_test3 = get_train_data(medicine_path, word2id)
    # domain4
    X_test4, y_test4 = get_train_data(finance_path, word2id)

    print 'train_data %s' % train_data_path
    print 'bigram %s' % bigram_words_path
    print 'model_path: %s' % FLAGS.model_path
    print 'min_bg_freq: %d' % FLAGS.min_bg_freq

    print 'len(train) %d ' % (len(X_train))
    print 'len(test) %d %d %d %d' % (len(X_test1), len(X_test2), len(X_test3),
                                     len(X_test4))
    print 'init_embedding shape [%d,%d]' % (init_embedding.shape[0],
                                            init_embedding.shape[1])
    print 'Train started!'

    tfConfig = tf.ConfigProto()
    tfConfig.gpu_options.per_process_gpu_memory_fraction = FLAGS.memory
    with tf.Session(config=tfConfig) as sess:
        model = models.BaselineModel(vocab_size=len(word2id),
                                     word_dim=config.word_dim,
                                     hidden_dim=config.hidden_dim,
                                     pad_word=word2id[PAD],
                                     init_embedding=init_embedding,
                                     num_classes=config.num_classes,
                                     clip=config.clip,
                                     lr=config.lr,
                                     l2_reg_lamda=config.l2_reg_lamda,
                                     num_layers=config.num_layers,
                                     rnn_cell=config.rnn_cell,
                                     bi_direction=config.bi_direction)

        if not os.path.exists('checkpoints'):
            os.mkdir('checkpoints')
        checkpoints_model = os.path.join('checkpoints', FLAGS.model_path)
        saver = tf.train.Saver(tf.all_variables())

        ckpt = tf.train.get_checkpoint_state(checkpoints_model)
        if ckpt and ckpt.model_checkpoint_path:
            print 'restore from original model!'
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        best_f1, best_e1 = 0, 0
        best_f2, best_e2 = 0, 0
        best_f3, best_e3 = 0, 0
        best_f4, best_e4 = 0, 0
        for epoch in xrange(config.n_epoch):
            start_time = time.time()

            #train
            train_loss = []
            for step, (X, Y) in enumerate(
                    data_iterator(X_train,
                                  y_train,
                                  config.batch_size,
                                  padding_word=word2id[PAD],
                                  shuffle=True)):
                loss = model.train_step(sess, X, Y, config.dropout_keep_prob)
                print 'epoch:%d>>%2.2f%%' % (
                    epoch, config.batch_size * step * 100.0 / len(X_train)
                ), 'completed in %.2f (sec) <<\r' % (time.time() - start_time),
                sys.stdout.flush()
                train_loss.append(loss)
            train_loss = np.mean(train_loss, dtype=float)
            print 'Train Epoch %d loss %f' % (epoch, train_loss)
            saver.save(sess, checkpoints_model, epoch)

            def test(X_test, y_test, domain):
                test_pred = []
                for i in range(0, len(X_test), config.batch_size):
                    input_x = X_test[slice(i, i + config.batch_size)]
                    input_x = padding3(input_x, word2id[PAD])
                    y = y_test[slice(i, i + config.batch_size)]
                    y = padding(y, 3)

                    predict = model.predict_step(sess, input_x)
                    test_pred += predict
                P, R, F = evaluate_word_PRF(test_pred, y_test)
                print '%s Test: P:%f R:%f F:%f' % (domain, P, R, F)
                return F

            #domain1
            f1 = test(X_test1, y_test1, 'A')
            if best_f1 < f1:
                best_f1 = f1
                best_e1 = epoch
            # domain2
            f2 = test(X_test2, y_test2, 'B')
            if best_f2 < f2:
                best_f2 = f2
                best_e2 = epoch
            #domain3
            f3 = test(X_test3, y_test3, 'C')
            if best_f3 < f3:
                best_f3 = f3
                best_e3 = epoch
            #domain4
            f4 = test(X_test4, y_test4, 'D')
            if best_f4 < f4:
                best_f4 = f4
                best_e4 = epoch
            print "best A:%f %d  best B: %f %d  best C %f %d  best D %f %d" % (
                best_f1, best_e1, best_f2, best_e2, best_f3, best_e3, best_f4,
                best_e4)
            print "******************************************************"
コード例 #4
0
ファイル: train_baseline.py プロジェクト: zhusleep/CWS_Dict
def predict():
    if FLAGS.model_path == None:
        raise 'Model path is None!'
    word2id = get_word2id(train_data_path,
                          bigram_words=bigram_words_path,
                          min_bw_frequence=FLAGS.min_bg_freq)
    id2word = build_reverse_dictionary(word2id)
    init_embedding = None

    # domain1
    X_test1, y_test1 = get_train_data(literature_path, word2id)
    # domain2
    X_test2, y_test2 = get_train_data(computer_path, word2id)
    # domain3
    X_test3, y_test3 = get_train_data(medicine_path, word2id)
    # domain4
    X_test4, y_test4 = get_train_data(finance_path, word2id)

    print 'len(test) %d  %d %d %d' % (len(X_test1), len(X_test2), len(X_test3),
                                      len(X_test4))
    print 'model %s' % FLAGS.model
    print 'bigram_words_path: %s' % bigram_words_path
    print 'model_path: %s' % FLAGS.model_path

    with tf.Session() as sess:
        model = models.BaselineModel(vocab_size=len(word2id),
                                     word_dim=config.word_dim,
                                     hidden_dim=config.hidden_dim,
                                     pad_word=word2id[PAD],
                                     init_embedding=init_embedding,
                                     num_classes=config.num_classes,
                                     clip=config.clip,
                                     lr=config.lr,
                                     l2_reg_lamda=config.l2_reg_lamda,
                                     num_layers=config.num_layers,
                                     rnn_cell=config.rnn_cell,
                                     bi_direction=config.bi_direction)

        checkpoints_model = os.path.join('checkpoints', FLAGS.model_path)
        saver = tf.train.Saver(tf.all_variables())
        ckpt = tf.train.get_checkpoint_state(checkpoints_model)
        checkpoints_model = checkpoints_model + '-%d' % FLAGS.epoch

        print 'test_start!'
        saver.restore(sess, checkpoints_model)

        def test(X_test, y_test, domain):
            test_pred = []
            for i in range(0, len(X_test), config.batch_size):
                input_x = X_test[slice(i, i + config.batch_size)]
                input_x = padding3(input_x, word2id[PAD])
                y = y_test[slice(i, i + config.batch_size)]
                y = padding(y, 3)

                predict = model.predict_step(sess, input_x)
                test_pred += predict
            P, R, F = evaluate_word_PRF(test_pred, y_test)
            print '%s Test: P:%f R:%f F:%f' % (domain, P, R, F)
            return test_pred

        # domain1
        test_pred1 = test(X_test1, y_test1, 'Literature')
        convert_wordsegmentation(X_test1, test_pred1, id2word, FLAGS.model,
                                 literature_path)
        convert_wordsegmentation(X_test1, y_test1, id2word, FLAGS.model,
                                 literature_path + '_golden')
        # domain2
        test_pred2 = test(X_test2, y_test2, 'Computer  ')
        convert_wordsegmentation(X_test2, test_pred2, id2word, FLAGS.model,
                                 computer_path)
        convert_wordsegmentation(X_test2, y_test2, id2word, FLAGS.model,
                                 computer_path + '_golden')

        # domain3
        test_pred3 = test(X_test3, y_test3, 'Medicine  ')
        convert_wordsegmentation(X_test3, test_pred3, id2word, FLAGS.model,
                                 medicine_path)
        convert_wordsegmentation(X_test3, y_test3, id2word, FLAGS.model,
                                 medicine_path + '_golden')

        # domain4
        test_pred4 = test(X_test4, y_test4, 'Finance   ')
        convert_wordsegmentation(X_test4, test_pred4, id2word, FLAGS.model,
                                 finance_path)
        convert_wordsegmentation(X_test4, y_test4, id2word, FLAGS.model,
                                 finance_path + '_golden')
コード例 #5
0
def train():
    word2id = get_word2id(train_data_path,\
                            bigram_words=bigram_words_path,
                            min_bw_frequence=FLAGS.min_bg_freq)
    X_train, y_train = get_train_data(train_data_path, word2id)
    X_valid, y_valid = get_train_data(dev_data_path, word2id)
    x_test, y_test = get_train_data(test_data_path, word2id)
    init_embedding = get_embedding(word2id, size=config.word_dim)

    print 'train_data_path: %s' % train_data_path
    print 'valid_data_path: %s' % dev_data_path
    print 'test_data_path: %s' % test_data_path
    print 'bigram_words_path: %s' % bigram_words_path
    print 'model_path: %s' % FLAGS.model_path
    print 'min_bg_freq: %d' % FLAGS.min_bg_freq

    print 'len(train_data): %d' % len(X_train)
    print 'len(valid_data): %d' % len(X_valid)
    print 'len(test_data): %d' % len(x_test)
    print 'init_embedding shape: [%d,%d]' % (init_embedding.shape[0],
                                             init_embedding.shape[1])
    print 'Train started!'
    tfConfig = tf.ConfigProto()  # google protobuf2
    tfConfig.gpu_options.per_process_gpu_memory_fraction = FLAGS.memory
    with tf.Session(config=tfConfig) as sess:
        model = models.BaselineModel(vocab_size=len(word2id),
                                     word_dim=config.word_dim,
                                     hidden_dim=config.hidden_dim,
                                     pad_word=word2id[PAD],
                                     init_embedding=init_embedding,
                                     num_classes=config.num_classes,
                                     clip=config.clip,
                                     lr=config.lr,
                                     l2_reg_lamda=config.l2_reg_lamda,
                                     num_layers=config.num_layers,
                                     rnn_cell=config.rnn_cell,
                                     bi_direction=config.bi_direction)

        if not os.path.exists('checkpoints'):
            os.mkdir('checkpoints')
        checkpoints_model = os.path.join('checkpoints', FLAGS.model_path)
        saver = tf.train.Saver(
            tf.all_variables())  # defined in python.training.saver.py

        ckpt = tf.train.get_checkpoint_state(checkpoints_model)
        if ckpt and ckpt.model_checkpoint_path:
            print 'restore from original model!'
        else:
            saver.restore(sess, ckpt.model_checkpoint_path)
            sess.run(tf.global_variables_initializer())

        best_f1, best_e = 0, 0
        for epoch in xrange(config.n_epoch):
            start_time = time.time()

            #train
            train_loss = []
            for step, (X, Y) in enumerate(
                    data_iterator(X_train,
                                  y_train,
                                  config.batch_size,
                                  padding_word=word2id[PAD],
                                  shuffle=True)):
                loss = model.train_step(sess, X, Y, config.dropout_keep_prob)
                print 'epoch:%d>>%2.2f%%' % (
                    epoch, config.batch_size * step * 100.0 / len(X_train)
                ), 'completed in %.2f (sec) <<\r' % (time.time() - start_time),
                sys.stdout.flush()
                train_loss.append(loss)
            train_loss = np.mean(train_loss, dtype=float)
            print 'Train Epoch %d loss %f' % (epoch, train_loss)

            #valid
            valid_loss = []
            valid_pred = []
            for i in range(0, len(X_valid), config.batch_size):
                input_x = X_valid[slice(i, i + config.batch_size)]
                input_x = padding3(input_x, word2id[PAD])
                y = y_valid[slice(i, i + config.batch_size)]
                y = padding(y, 3)
                loss, predict = model.dev_step(sess, input_x, y)
                valid_loss.append(loss)
                valid_pred += predict
            valid_loss = np.mean(valid_loss, dtype=float)
            P, R, F = evaluate_word_PRF(valid_pred, y_valid)
            print 'Valid Epoch %d loss %f' % (epoch, valid_loss)
            print 'P:%f R:%f F:%f' % (P, R, F)
            print '--------------------------------'

            if F > best_f1:
                best_f1 = F
                best_e = 0
                saver.save(sess, checkpoints_model)
            else:
                best_e += 1

            test_pred = []
            for i in range(0, len(x_test), config.batch_size):
                input_x = x_test[slice(i, i + config.batch_size)]
                input_x = padding3(input_x, word2id[PAD])
                y = y_test[slice(i, i + config.batch_size)]
                y = padding(y, 3)
                predict = model.predict_step(sess, input_x)
                test_pred += predict
            P, R, F = evaluate_word_PRF(test_pred, y_test)
            print 'Test: P:%f R:%f F:%f Best_dev_F:%f' % (P, R, F, best_f1)

            if best_e > 4:
                print 'Early stopping'
                break

        print 'best_f1 on validset is %f' % best_f1