예제 #1
0
        test_data_list_path=conf_dict['test_data_path'])
    
    predict_infer(conf_dict, data_generator, predict_data_path, \
            predict_result_path, model_path)


if __name__ == '__main__':
    # 

Load configuration file
#加载配置文件
    parser = argparse.ArgumentParser()#打印
    parser.add_argument("--conf_path", type=str,
            help="conf_file_path_for_model. (default: %(default)s)",
            required=True)
    parser.add_argument("--model_path", type=str,
            help="model_path", required=True)
    parser.add_argument("--predict_file", type=str,
            help="the_file_to_be_predicted", required=True)
    parser.add_argument("--result_file", type=str,
            default='', help="the_file_of_predicted_results")
    args = parser.parse_args()
    conf_dict = conf_lib.load_conf(args.conf_path)#加载运行(?)路径
    model_path = args.model_path
    predict_data_path = args.predict_file
    predict_result_path = args.result_file
    for input_path in [model_path, predict_data_path]:
        if not os.path.exists(input_path):
            raise ValueError("%s not found." % (input_path))
    main(conf_dict, model_path, predict_data_path, predict_result_path)#运行主函数
예제 #2
0
def save_tf_model(sess, epoch):
    saver = tf.train.Saver()
    saver.save(sess, "model/spo_model/model.ckpt", global_step=epoch)



def main(conf_dict, use_cuda=False):
    """Train main function"""
    data_generator = spo_data_reader.DataReader(
        wordemb_dict_path=conf_dict['word_idx_path'],
        postag_dict_path=conf_dict['postag_dict_path'],
        label_dict_path=conf_dict['so_label_dict_path'],
        p_eng_dict_path=conf_dict['label_dict_path'],
        train_data_list_path=conf_dict['spo_train_data_path'],
        test_data_list_path=conf_dict['spo_test_data_path'])

    data_generator.load_train_data()
    train(conf_dict, data_generator, use_cuda=use_cuda)


if __name__ == '__main__':
    # Load the configuration file
    parser = argparse.ArgumentParser()
    parser.add_argument("--conf_path", type=str,
        help="conf_file_path_for_model. (default: %(default)s)",
        required=True)
    args = parser.parse_args()
    conf_dict = conf_lib.load_conf(args.conf_path)
    use_gpu = True if conf_dict.get('use_gpu', 'False') == 'True' else False
    main(conf_dict, use_cuda=use_gpu)
예제 #3
0
def train(conf_dict, data_reader, use_cuda=False):
    """
    Training of so labeling model
    """
    # input data layer
    word = fluid.layers.data(
        name='word_data', shape=[1], dtype='int64', lod_level=1)
    postag = fluid.layers.data(
        name='token_pos', shape=[1], dtype='int64', lod_level=1)
    p_word = fluid.layers.data(
        name='p_word', shape=[1], dtype='int64', lod_level=1)
    # label
    target = fluid.layers.data(
        name='target', shape=[1], dtype='int64', lod_level=1)

    # embedding + lstm
    feature_out = spo_model.db_lstm(data_reader, word, \
            postag, p_word, conf_dict)

    # loss function
    # crf layer
    mix_hidden_lr = float(conf_dict['mix_hidden_lr'])
    crf_cost = fluid.layers.linear_chain_crf(
        input=feature_out,
        label=target,
        param_attr=fluid.ParamAttr(name='crfw', learning_rate=mix_hidden_lr))
    avg_cost = fluid.layers.mean(crf_cost)

    # optimizer
    sgd_optimizer = fluid.optimizer.AdamOptimizer(
        learning_rate=2e-3, )

    sgd_optimizer.minimize(avg_cost)

    crf_decode = fluid.layers.crf_decoding(
        input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))

    train_batch_reader = paddle.batch(
        paddle.reader.shuffle(data_reader.get_train_reader(), buf_size=8192),
        batch_size=conf_dict['batch_size'])

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

    feeder = fluid.DataFeeder(feed_list=[word, postag, p_word, target], place=place)
    exe = fluid.Executor(place)

    save_dirname = conf_dict['spo_model_save_dir']

    def train_loop(main_program, trainer_id=0):
        """start train loop"""
        exe.run(fluid.default_startup_program())

        start_time = time.time()
        batch_id = 0
        for pass_id in six.moves.xrange(conf_dict['pass_num']):
            pass_start_time = time.time()
            cost_sum, cost_counter = 0, 0
            for data in train_batch_reader():
                cost = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost])
                cost = cost[0]
                cost_sum += cost
                cost_counter += 1
                if batch_id % 10 == 0 and batch_id != 0:
                    #sys.stderr.write("batch %d finished, second per batch: %02f\n" % (
                    #    batch_id, (time.time() - start_time) / batch_id))

                # cost expected, training over
                if float(cost) < 1:
                    save_path = os.path.join(save_dirname, 'final')
                    fluid.io.save_inference_model(save_path, ['word_data', 'token_dist', 'p_word'],
                                                  [feature_out], exe, params_filename='params')
                    return
                batch_id = batch_id + 1

            # save the model once each pass ends
            pass_avg_cost = cost_sum / cost_counter if cost_counter > 0 else 0.0
            #sys.stderr.write("%d pass end, cost time: %02f, avg_cost: %f" % (
             #       pass_id, time.time() - pass_start_time, pass_avg_cost))
            save_path = os.path.join(save_dirname, 'pass_%04d-%f' %
                                    (pass_id, pass_avg_cost))
            fluid.io.save_inference_model(save_path, ['word_data', 'token_pos', 'p_word'],
                                          [feature_out], exe, params_filename='params')

        else:
            # pass times complete and the training is over
            save_path = os.path.join(save_dirname, 'final')
            fluid.io.save_inference_model(save_path, ['word_data', 'token_pos', 'p_word'],
                                          [feature_out], exe, params_filename='params')
        return

    train_loop(fluid.default_main_program())


def main(conf_dict, use_cuda=False):
    """Train main function"""
    if use_cuda and not fluid.core.is_compiled_with_cuda():
        return
    data_generator = spo_data_reader.DataReader(
        wordemb_dict_path=conf_dict['word_idx_path'],
        postag_dict_path=conf_dict['postag_dict_path'],
        label_dict_path=conf_dict['so_label_dict_path'],
        p_eng_dict_path=conf_dict['label_dict_path'],
        train_data_list_path=conf_dict['spo_train_data_path'],
        test_data_list_path=conf_dict['spo_test_data_path'])
    
    train(conf_dict, data_generator, use_cuda=use_cuda)


if __name__ == '__main__':
    # Load the configuration file
    parser = argparse.ArgumentParser()
    parser.add_argument("--conf_path", type=str,
        help="conf_file_path_for_model. (default: %(default)s)",
        required=True)
    args = parser.parse_args()
    conf_dict = conf_lib.load_conf(args.conf_path)
    use_gpu = True if conf_dict.get('use_gpu', 'False') == 'True' else False
    main(conf_dict, use_cuda=use_gpu)