path_man = PathManager(dataset=dataset_name, d_type=dataset_subtype, model_name=model_name_norm, version=version) ################################################ #----------------------读取模型参数------------------ ################################################ if model_name == 'SIMPLE': model_cfg = TrainingConfigManager(path_man.Doc() + 'config.json') else: model_cfg = TrainingConfigManager('../run/runConfig.json') modelParams = model_cfg.modelParams() dataset = SeqFileDataset(path_man.FileData(), path_man.FileSeqLen(), N=N) dataloader = DataLoader(dataset, batch_size=N, collate_fn=batchSequenceWithoutPad) if model_name != 'Random': state_dict = t.load(path_man.Model() + '_v%s.0' % version) word_matrix = state_dict['Embedding.weight'] else: word_matrix = t.Tensor( np.load(path_man.WordEmbedMatrix(), allow_pickle=True)) loss_fn = t.nn.NLLLoss().cuda() if model_name == 'SIMPLE': model = SIMPLE(word_matrix, **modelParams)
data_path_man = PathManager(dataset=data_dataset_name, d_type=dataset_subtype) model_path_man = PathManager(dataset=model_dataset_name, version=version, model_name=model_name) ################################################ #----------------------读取模型参数------------------ ################################################ model_cfg = TrainingConfigManager(model_path_man.Doc()+'config.json') modelParams = model_cfg.modelParams() dataset = SeqFileDataset(data_path_man.FileData(), data_path_man.FileSeqLen(), N=N) state_dict = t.load(model_path_man.Model() + '_v%s.0' % version) # state_dict = t.load(path_man.DatasetBase()+'models/ProtoNet_v105.0') word_matrix = state_dict['Embedding.weight'] if model_name == 'IMP': model = IMP(word_matrix, **modelParams) elif model_name == 'SIMPLE': model = SIMPLE(word_matrix, **modelParams) elif model_name == 'HybridIMP': model = HybridIMP(word_matrix, **modelParams) elif model_name == 'ProtoNet':
loss = t.nn.NLLLoss().cuda() \ if loss_func=='nll' else \ t.nn.MSELoss().cuda() printState('init managers...') train_path_manager = PathManager(dataset=data_folder, d_type='train', model_name=model_name, version=version) val_path_manager = PathManager(dataset=data_folder, d_type='validate', model_name=model_name, version=version) train_dataset = SeqFileDataset(train_path_manager.FileData(), train_path_manager.FileSeqLen(), N) val_dataset = SeqFileDataset(val_path_manager.FileData(), val_path_manager.FileSeqLen(), N) # train_dataset = ImageFileDataset(train_path_manager.FileData(), N, rd_crop_size=224) # val_dataset = ImageFileDataset(val_path_manager.FileData(), N, rd_crop_size=224) # train_task = MatrixProtoEpisodeTask(k ,qk, n, N, # dataset=train_dataset, # cuda=True, # label_expand=expand, # unsqueeze=False) # val_task = MatrixProtoEpisodeTask(k ,qk, n, N, # dataset=val_dataset, # cuda=True,
model_name=model_name, version=version) ################################################ #----------------------读取模型参数------------------ ################################################ model_cfg = TrainingConfigManager(test_path_manager.Doc() + 'config.json') modelParams = model_cfg.modelParams() LRDecayIters, LRDecayGamma, optimizer_type,\ weight_decay, loss_func, default_lr, lrs, taskBatchSize = model_cfg.trainingParams() test_dataset = SeqFileDataset(test_path_manager.FileData(), test_path_manager.FileSeqLen(), N) expand = True if loss_func == 'mse' else False if model_type in ADAPTED_MODELS: test_task = AdaptEpisodeTask(k, qk, n, N, test_dataset, cuda=True, expand=expand) else: test_task = ProtoEpisodeTask(k, qk, n,
# is_dir=True) # splitDatas(src=man.DatasetBase()+'train/', # dest=man.DatasetBase()+'test/', # ratio=30, # mode='x', # is_dir=True) ################################################################ # 制作基于下标的数据集 ################################################################ for d_type in ['train', 'validate', 'test']: manager = PathManager(dataset='virushare-20-3gram-tfidf', d_type=d_type) makeDataFile(json_path=manager.Folder(), w2idx_path=manager.WordIndexMap(), seq_length_save_path=manager.FileSeqLen(), data_save_path=manager.FileData(), idx2cls_mapping_save_path=manager.FileIdx2Cls(), num_per_class=20, max_seq_len=700) ################################################################ # renameItemFolder('/home/asichurter/datasets/JSONs/LargePE-100-original/') # 统计序列长度分布 ################################################################ # apiStat('/home/asichurter/datasets/JSONs/HKS/all/', # ratio_stairs=[50, 100, 200, 400, 500, 1000, 2000, 5000], # dump_report_path=None,#'/home/asichurter/datasets/reports/HKS_3gram_tfidf_api_report.json',#None,# # dump_apiset_path=None,#'/home/asichurter/datasets/reports/HKS_3gram_tfidf_api_set.json',#None # class_dir=True)