def loadSemLM11(load_path, train_texts, valid_texts, we_dict_path=None, dep=1, hidden_size=32, batch_size=200, save_folder=".", model_name="simple", max_epochs=120, continue_train=False, load_dt=True): fm = FeatureManager() all_texts = train_texts + valid_texts X, Y = generate_sequential_data(all_texts, loaddata=load_dt) for i in range(len(X)): fm.extract_features(X[i], Y[i]) X, Y = generate_sequential_data(train_texts, loaddata=load_dt) X = [[fm.f.map[fm.f.getFeatureValue(x)] + 1 for x in XX] for XX in X] Y = [[fm.fY.map[fm.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Y] Xv, Yv = generate_sequential_data(valid_texts, loaddata=load_dt) Xv = [[fm.f.map[fm.f.getFeatureValue(x)] + 1 for x in XX] for XX in Xv] Yv = [[fm.fY.map[fm.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Yv] we_dict = None if we_dict_path is not None: we_dict = WEDict(we_dict_path) mdl = Model11(fm.f.current_index + 1, fm.fY.current_index + 1, hidden_size, dep=dep, we_dict=we_dict, map=fm.f.map) mdl.option[Option.SAVE_TO] = save_folder + "/" + model_name + ".pkl" mdl.option[Option.SAVE_FREQ] = 20 mdl.option[Option.VALID_FREQ] = 20 mdl.option[Option.BATCH_SIZE] = batch_size mdl.option[ Option. SAVE_BEST_VALID_TO] = save_folder + "/" + "best_" + model_name + ".pkl" mdl.option[Option.MAX_EPOCHS] = max_epochs mdl.compile() func = preprare_seq_seq_data mdl.load_params(load_path) if continue_train: mdl.fit_shuffer(X, Y, Xv, Yv, process_data_func=func) else: return mdl, fm
def trainSemLM22(train_texts, valid_texts, we_dict_path1=None, we_dict_path2=None, dep=1, hidden_size1=32, hidden_size2=32, batch_size=200, save_folder=".", model_name="hybrid", max_epochs=120, load_dt=True): fm1 = FeatureManager() fm2 = FeatureManager() all_texts = train_texts + valid_texts X1, X2, Y1, Y2 = generate_sequential_data(all_texts, type="2-2", loaddata=load_dt) for i in range(len(X1)): fm1.extract_features(X1[i], Y1[i]) for i in range(len(X2)): fm2.extract_features(X2[i], Y2[i]) X1, X2, Y1, Y2 = generate_sequential_data(train_texts, type="2-2", loaddata=load_dt) X1 = [[fm1.f.map[fm1.f.getFeatureValue(x)] + 1 for x in XX] for XX in X1] Y1 = [[fm1.fY.map[fm1.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Y1] X2 = [[fm2.f.map[fm2.f.getFeatureValue(x)] + 1 for x in XX] for XX in X2] Y2 = [[fm2.fY.map[fm1.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Y2] X1v, X2v, Y1v, Y2v = generate_sequential_data(valid_texts, type="2-2", loaddata=load_dt) X1v = [[fm1.f.map[fm1.f.getFeatureValue(x)] + 1 for x in XX] for XX in X1v] Y1v = [[fm1.fY.map[fm1.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Y1v] X2v = [[fm2.f.map[fm2.f.getFeatureValue(x)] + 1 for x in XX] for XX in X2v] Y2v = [[fm2.fY.map[fm2.fY.getFeatureValue(x)] + 1 for x in XX] for XX in Y2v] we_dict1 = None we_dict2 = None if we_dict_path1 is not None: we_dict1 = WEDict(we_dict_path1) if we_dict_path2 is not None: we_dict2 = WEDict(we_dict_path2) mdl = Model22(fm1.f.current_index + 1, fm2.f.current_index + 1, fm1.fY.current_index + 1, fm2.fY.current_index + 1, hidden_size1, hidden_size2, dep=dep, we_dict1=we_dict1, we_dict2=we_dict2, map1=fm1.f.map, map2=fm2.f.map, semantic_label_map=fm2.fY.map) mdl.option[Option.SAVE_TO] = save_folder + "/" + model_name + ".pkl" mdl.option[Option.SAVE_FREQ] = 20 mdl.option[Option.VALID_FREQ] = 20 mdl.option[Option.BATCH_SIZE] = batch_size mdl.option[ Option. SAVE_BEST_VALID_TO] = save_folder + "/" + "best_" + model_name + ".pkl" mdl.option[Option.MAX_EPOCHS] = max_epochs mdl.compile() func = preprare_seq_seq_data mdl.fit_shuffer(X1, X2, Y1, Y2, X1v, X2v, Y1v, Y2v, process_data_func=func)