コード例 #1
0
def loadSemLM11(load_path,
                train_texts,
                valid_texts,
                we_dict_path=None,
                dep=1,
                hidden_size=32,
                batch_size=200,
                save_folder=".",
                model_name="simple",
                max_epochs=120,
                continue_train=False,
                load_dt=True):

    fm = FeatureManager()
    all_texts = train_texts + valid_texts
    X, Y = generate_sequential_data(all_texts, loaddata=load_dt)

    for i in range(len(X)):
        fm.extract_features(X[i], Y[i])

    X, Y = generate_sequential_data(train_texts, loaddata=load_dt)

    X = [[fm.f.map[fm.f.getFeatureValue(x)] + 1 for x in XX] for XX in X]
    Y = [[fm.fY.map[fm.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Y]

    Xv, Yv = generate_sequential_data(valid_texts, loaddata=load_dt)

    Xv = [[fm.f.map[fm.f.getFeatureValue(x)] + 1 for x in XX] for XX in Xv]
    Yv = [[fm.fY.map[fm.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Yv]

    we_dict = None
    if we_dict_path is not None:
        we_dict = WEDict(we_dict_path)

    mdl = Model11(fm.f.current_index + 1,
                  fm.fY.current_index + 1,
                  hidden_size,
                  dep=dep,
                  we_dict=we_dict,
                  map=fm.f.map)

    mdl.option[Option.SAVE_TO] = save_folder + "/" + model_name + ".pkl"
    mdl.option[Option.SAVE_FREQ] = 20
    mdl.option[Option.VALID_FREQ] = 20
    mdl.option[Option.BATCH_SIZE] = batch_size
    mdl.option[
        Option.
        SAVE_BEST_VALID_TO] = save_folder + "/" + "best_" + model_name + ".pkl"
    mdl.option[Option.MAX_EPOCHS] = max_epochs

    mdl.compile()
    func = preprare_seq_seq_data

    mdl.load_params(load_path)
    if continue_train:
        mdl.fit_shuffer(X, Y, Xv, Yv, process_data_func=func)
    else:
        return mdl, fm
コード例 #2
0
def trainSemLM22(train_texts,
                 valid_texts,
                 we_dict_path1=None,
                 we_dict_path2=None,
                 dep=1,
                 hidden_size1=32,
                 hidden_size2=32,
                 batch_size=200,
                 save_folder=".",
                 model_name="hybrid",
                 max_epochs=120,
                 load_dt=True):

    fm1 = FeatureManager()
    fm2 = FeatureManager()

    all_texts = train_texts + valid_texts
    X1, X2, Y1, Y2 = generate_sequential_data(all_texts,
                                              type="2-2",
                                              loaddata=load_dt)

    for i in range(len(X1)):
        fm1.extract_features(X1[i], Y1[i])

    for i in range(len(X2)):
        fm2.extract_features(X2[i], Y2[i])

    X1, X2, Y1, Y2 = generate_sequential_data(train_texts,
                                              type="2-2",
                                              loaddata=load_dt)

    X1 = [[fm1.f.map[fm1.f.getFeatureValue(x)] + 1 for x in XX] for XX in X1]
    Y1 = [[fm1.fY.map[fm1.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Y1]

    X2 = [[fm2.f.map[fm2.f.getFeatureValue(x)] + 1 for x in XX] for XX in X2]
    Y2 = [[fm2.fY.map[fm1.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Y2]

    X1v, X2v, Y1v, Y2v = generate_sequential_data(valid_texts,
                                                  type="2-2",
                                                  loaddata=load_dt)

    X1v = [[fm1.f.map[fm1.f.getFeatureValue(x)] + 1 for x in XX] for XX in X1v]
    Y1v = [[fm1.fY.map[fm1.fY.getFeatureValue(x)] + 1 for x in XX]
           for XX in Y1v]

    X2v = [[fm2.f.map[fm2.f.getFeatureValue(x)] + 1 for x in XX] for XX in X2v]
    Y2v = [[fm2.fY.map[fm2.fY.getFeatureValue(x)] + 1 for x in XX]
           for XX in Y2v]

    we_dict1 = None
    we_dict2 = None
    if we_dict_path1 is not None:
        we_dict1 = WEDict(we_dict_path1)

    if we_dict_path2 is not None:
        we_dict2 = WEDict(we_dict_path2)

    mdl = Model22(fm1.f.current_index + 1,
                  fm2.f.current_index + 1,
                  fm1.fY.current_index + 1,
                  fm2.fY.current_index + 1,
                  hidden_size1,
                  hidden_size2,
                  dep=dep,
                  we_dict1=we_dict1,
                  we_dict2=we_dict2,
                  map1=fm1.f.map,
                  map2=fm2.f.map,
                  semantic_label_map=fm2.fY.map)

    mdl.option[Option.SAVE_TO] = save_folder + "/" + model_name + ".pkl"
    mdl.option[Option.SAVE_FREQ] = 20
    mdl.option[Option.VALID_FREQ] = 20
    mdl.option[Option.BATCH_SIZE] = batch_size
    mdl.option[
        Option.
        SAVE_BEST_VALID_TO] = save_folder + "/" + "best_" + model_name + ".pkl"
    mdl.option[Option.MAX_EPOCHS] = max_epochs
    mdl.compile()
    func = preprare_seq_seq_data
    mdl.fit_shuffer(X1, X2, Y1, Y2, X1v, X2v, Y1v, Y2v, process_data_func=func)