print("---------------Training document embedding-----------------") # %% if embedding_type == "LOD": doc2vec = OnlyLeafDoc2Vec(data_name, dataset_train.number_of_classes(), size=embedding_size, epoch=270, batch_size=10000) elif embedding_type == "Normal": doc2vec = NoTag_Doc2Vec(data_name, dataset_train.number_of_classes(), size=embedding_size, epoch=270, batch_size=10000) else: doc2vec = Doc2Vec(data_name, dataset_train.number_of_classes(), size=embedding_size, epoch=270, batch_size=10000) # doc2vec.fit(dataset_train.datas, dataset_train.labels, dataset_validate.datas, dataset_validate.labels, early_stopping=False) doc2vec.load_model('export/%s/doc2vec.model' % data_name) # %% dataset_train.change_to_Doc2Vec(doc2vec) dataset_validate.change_to_Doc2Vec(doc2vec) if (test_split or predict_test or evaluate_test): dataset_test.change_to_Doc2Vec(doc2vec) # %% if hidden == 'auto' or target_hidden == 'auto': a = [] for i in range(len(dataset_train.level)-1): a.append(dataset_train.level[i+1] - dataset_train.level[i]) a = np.array(a) if hidden == 'auto': hidden = a*2 + 300 hidden[hidden > 3000] = 3000
# %% print("---------------Document embedding-----------------") # %% if embedding_type == "LOD": doc2vec = OnlyLeafDoc2Vec(data_name, dataset_test.number_of_classes(), size=embedding_size, epoch=270, batch_size=10000) elif embedding_type == "Normal": doc2vec = NoTag_Doc2Vec(data_name, dataset_test.number_of_classes(), size=embedding_size, epoch=270, batch_size=10000) else: doc2vec = Doc2Vec(data_name, dataset_test.number_of_classes(), size=embedding_size, epoch=270, batch_size=10000) doc2vec.load_model('export/%s/doc2vec.model' % data_name) # %% dataset_test.change_to_Doc2Vec(doc2vec) # %% print("---------------Training classifiers-----------------") # %% model = ESLNN(data_name, dataset_test, "temp", dataset_test, iteration=2000, stopping_time=300, batch_size=batch_size, hidden_size=hidden, target_hidden_size=target_hidden, use_dropout=True, start_level=99999) # %% model.train() model.apply_threshold(threshold)