def _create(self): model = utils.get_pretrained_model(self.base_model, include_top=False, input_shape=self.input_shape) interm = model.output interm = Flatten()(interm) interm = Dropout(0.5)(interm) interm = Dense(self.fc_layer_size, activation='relu')(interm) interm = Dropout(0.5)(interm) output = Dense(len(self.classes), activation='softmax')(interm) self.model = Model(inputs=model.input, outputs=output)
def load_weights_from_top_model(self, top_model_weights_path=None): if top_model_weights_path is None: top_model_weights_path = config.get_top_model_weights_path( self.base_model) pretrained_model = utils.get_pretrained_model( self.base_model, include_top=False, input_shape=self.input_shape) top_model = TopModel(base_model=self.base_model, fc_layer_size=self.fc_layer_size, classes=self.classes) top_model.load_weights(top_model_weights_path) model = Model(inputs=pretrained_model.input, outputs=top_model.model(pretrained_model.output)) self.model = model
def _create(self): pretrained_model = utils.get_pretrained_model( self.base_model, include_top=False, input_shape=utils.get_input_shape(self.base_model)) self.pretrained_model = pretrained_model input_shape = [int(ele) for ele in pretrained_model.output.shape[1:]] model = Sequential() model.add(Flatten(input_shape=input_shape)) model.add(Dropout(0.5)) model.add(Dense(self.fc_layer_size, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(self.output_dim, activation='softmax')) self.model = model
def main(): parser = ArgumentParser() #任务配置 parser.add_argument('-device', default=0, type=int) parser.add_argument('-output_name', default='', type=str) parser.add_argument('-saved_model_path', default='', type=str) #如果是k fold合并模型进行预测,只需设置为对应k_fold模型对应的output path parser.add_argument('-type', default='train', type=str) parser.add_argument('-k_fold', default=-1, type=int) #如果是-1则说明不采用k折,否则说明采用k折的第几折 parser.add_argument('-merge_classification', default='avg', type=str) # 个数预测:vote则采用投票法,avg则是平均概率 parser.add_argument('-merge_with_bert_sort', default='yes', type=str) # 是否融合之前bert模型计算的相似度 parser.add_argument('-k_fold_cache', default='no', type=str) #是否使用之前k_fold的cache parser.add_argument('-generate_candidates', default='', type=str) # 是否融合之前bert模型计算的相似度 parser.add_argument('-seed', default=123456, type=int) # 随机数种子 parser.add_argument('-cls_position', default='zero', type=str) # 添加的两个cls的position是否使用0 parser.add_argument('-pretrained_model_path', default='/home/liangming/nas/lm_params/chinese_L-12_H-768_A-12/', type=str) # bert参数地址 #训练参数 parser.add_argument('-train_batch_size', default=64, type=int) parser.add_argument('-val_batch_size', default=256, type=int) parser.add_argument('-lr', default=2e-5, type=float) parser.add_argument('-epoch_num', default=20, type=int) parser.add_argument('-max_len', default=64, type=int) parser.add_argument('-dropout', default=0.3, type=float) parser.add_argument('-print_loss_step', default=2, type=int) parser.add_argument('-hit_list', default=[2, 5, 7, 10], type=list) args = parser.parse_args() # assert args.train_batch_size % args.neg_num == 0, print('batch size应该是neg_num的整数倍') #定义时间格式 DATE_FORMAT = "%Y-%m-%d-%H:%M:%S" #定义输出文件夹,如果不存在则创建, if args.output_name == '': output_path = os.path.join('./output/rerank_keywords_output', time.strftime(DATE_FORMAT,time.localtime(time.time()))) else: output_path = os.path.join('./output/rerank_keywords_output', args.output_name) # if os.path.exists(output_path): # raise Exception('the output path {} already exists'.format(output_path)) if not os.path.exists(output_path): os.makedirs(output_path) #配置tensorboard tensor_board_log_path = os.path.join(output_path, 'tensor_board_log{}'.format('' if args.k_fold == -1 else args.k_fold)) writer = SummaryWriter(tensor_board_log_path) #定义log参数 logger = Logger(output_path,'main{}'.format('' if args.k_fold == -1 else args.k_fold)).logger #设置seed logger.info('set seed to {}'.format(args.seed)) set_seed(args) #打印args print_args(args, logger) #读取数据 logger.info('#' * 20 + 'loading data and model' + '#' * 20) data_path = os.path.join(project_path, 'candidates') # data_path = os.path.join(project_path, 'tf_idf_candidates') train_list, val_list, test_list, code_to_name, name_to_code, standard_name_list = read_rerank_data(data_path, logger, args) #load model # pretrained_model_path = '/home/liangming/nas/lm_params/chinese_L-12_H-768_A-12/' pretrained_model_path = args.pretrained_model_path bert_config, bert_tokenizer, bert_model = get_pretrained_model(pretrained_model_path, logger) #获取dataset logger.info('create dataloader') train_dataset = RerankKeywordDataset(train_list, bert_tokenizer, args, logger) train_dataloader = DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=False, collate_fn=train_dataset.collate_fn) val_dataset = RerankKeywordDataset(val_list, bert_tokenizer, args, logger) val_dataloader = DataLoader(val_dataset, batch_size=args.val_batch_size, shuffle=False, collate_fn=val_dataset.collate_fn) test_dataset = RerankKeywordDataset(test_list, bert_tokenizer, args, logger) test_dataloader = DataLoader(test_dataset, batch_size=args.val_batch_size, shuffle=False, collate_fn=test_dataset.collate_fn) #创建model logger.info('create model') model = BertKeywordsClassification(bert_model, bert_config, args) model = model.to(args.device) #配置optimizer和scheduler t_total = len(train_dataloader) * args.epoch_num optimizer, _ = get_optimizer_and_scheduler(model, t_total, args.lr, 0) if args.type == 'train': train(model, train_dataloader, val_dataloader, test_dataloader, optimizer, writer, args, logger, output_path, standard_name_list) elif args.type == 'evaluate': if args.saved_model_path == '': raise Exception('saved model path不能为空') # 非k折模型 if args.k_fold == -1: logger.info('loading saved model') checkpoint = torch.load(args.saved_model_path, map_location='cpu') model.load_state_dict(checkpoint) model = model.to(args.device) # #生成icd标准词的最新embedding evaluate(model, test_dataloader, args, logger, writer, standard_name_list, is_test=True) else: evaluate_k_fold(model, test_dataloader, args, logger, writer, standard_name_list)
'val': DataLoader(data['val'], batch_size=batch_size, shuffle=True), 'test': DataLoader(data['test'], batch_size=batch_size, shuffle=True) } trainiter = iter(dataloaders['train']) features, labels = next(trainiter) print("class numbers: ", len(data['train'].classes)) n_classes = len(data['train'].classes) # This should return the same as the pretrained model with the custom classifier. In the case of resnet, we replace the `fc` layers with the same classifier. # # The `torchsummary` library has a helpful function called `summary` which summarizes our model. model = get_pretrained_model('resnet50', train_on_gpu, n_classes, multi_gpu) # if multi_gpu: # summary( # model.module, # input_size=(3, 224, 224), # batch_size=batch_size, # device='cuda') # else: # summary( # model, input_size=(3, 224, 224), batch_size=batch_size, device='cuda') model.class_to_idx = data['train'].class_to_idx model.idx_to_class = { idx: class_ for class_, idx in model.class_to_idx.items() }
ovule_loader = DataLoader(ovule_dset, batch_size=cfg.general.trainBatchSize, shuffle=False, pin_memory=True) tomato_loader = DataLoader(tomato_dset, batch_size=cfg.general.testBatchSize, shuffle=False, pin_memory=True) print('----START TRAINING----' * 4) # accs = {} # utils.get_pretrained_model() # model = FsModel() model = utils.get_pretrained_model() # model.load_state_dict(torch.load(os.path.join(cfg.general.checkpointDir, cfg.model.loadFromName)), strict=True) for param in model.parameters(): param.requires_grad = True # if cfg.model.freezeVgg: # for param in model.features.parameters(): # param.requires_grad = False criterion = HypercubeDistLoss( weights=torch.tensor(cfg.general.lossWeights, device=device)) optimizer = torch.optim.SGD(model.parameters(), lr=0.001) model, val_acc_history, best_acc = utils.getFeaturesAndCompare( model, ovule_loader, tomato_loader, criterion, optimizer)
def get_bottleneck_features(model=None, source='path', container_path=None, tensor=None, classes=None, save=False, filename=None, verbose=False): """Extract bottleneck features Parameters ---------- model: string pre-trained model name, being one of 'inception_v3', 'mobilenet', 'resnet50', 'resnet101', 'resnet152', 'vgg16', 'vgg19', 'xception' source: string where to extract bottleneck features, either 'path' or 'tensor' container_path: string if `source='path'`, `container_path` specifies the folder path that contains images of all the classes. If `None`, container_path will be set to 'path_to_the_module/data/train' tensor: numpy array/string if `source='tensor'`, `tensor` specifies the tensor from which bottleneck features are extracted or the path to the saved tensor file classes: tuple/list a tuple/list of classes for prediction save: boolen whether to save the extracted bottleneck features or not filename: string if `save=True`, specifies the name of the file in which the bottleneck features are saved verbose: boolean verbosity mode """ assert source in {'path', 'tensor'} if source == 'path': tensors = get_x_from_path(model=model, container_path=container_path, classes=classes, save=False, verbose=verbose) else: assert isinstance(tensor, (str, np.ndarray)) if isinstance(tensor, np.ndarray): tensors = tensor else: assert os.path.exists(tensor) tensors = utils.load_h5file(tensor) input_shape = utils.get_input_shape(model) pretrained_model = utils.get_pretrained_model(model, include_top=False, input_shape=input_shape) bottleneck_features = pretrained_model.predict(tensors, verbose=1 if verbose else 0) if save: assert filename is not None filepath = os.path.join(config.precomputed_dir, filename) utils.remove_file(filepath) if verbose: print('Started saving {}'.format(filename)) with h5py.File(filepath, 'w') as hf: hf.create_dataset('data', data=bottleneck_features) if verbose: print('Finished saving {}'.format(filename)) else: return bottleneck_features
def main(): parser = ArgumentParser() #任务配置 parser.add_argument('-neg_sample', default='online', type=str) parser.add_argument('-neg_num', default=4, type=int) parser.add_argument('-device', default=0, type=int) parser.add_argument('-output_name', default='', type=str) parser.add_argument('-saved_model_path', default='', type=str) #如果是k fold合并模型进行预测,只需设置为对应k_fold模型对应的output path parser.add_argument('-type', default='train', type=str) parser.add_argument('-k_fold', default=-1, type=int) #如果是-1则说明不采用k折,否则说明采用k折的第几折 parser.add_argument('-merge_classification', default='vote', type=str) # 个数预测:vote则采用投票法,avg则是平均概率 parser.add_argument('-merge_sort', default='avg', type=str) # k折融合算法: vote则采用投票法,avg则是平均概率 parser.add_argument('-generate_candidates', default='no', type=str) # 在预测过程中,是否生成候选词并保存在output_name下,如果是no则不生成,如果不是no则是保存的名字 parser.add_argument('-k_fold_cache', default='no', type=str) # 在预测k_fold中,是否采用之前的cache,如果存在cache,会在output_name下存在三个list文件, 避免5个模型进行预测 parser.add_argument('-add_keywords', default='no', type=str) # 这里不添加关键词信息 parser.add_argument('-seed', default=123456, type=int) # 种子 parser.add_argument('-loss_type', default='union', type=str) # loss type: class就仅更新分类的参数;sim就仅更新triple loss;union就同时更新二者参数 parser.add_argument('-hidden_layers', default=12, type=int) # bert的层数 parser.add_argument('-pretrained_model_path', default='/home/liangming/nas/lm_params/chinese_L-12_H-768_A-12/', type=str) # bert参数地址 #训练参数 parser.add_argument('-train_batch_size', default=64, type=int) parser.add_argument('-val_batch_size', default=256, type=int) parser.add_argument('-lr', default=2e-5, type=float) parser.add_argument('-epoch_num', default=20, type=int) parser.add_argument('-max_len', default=64, type=int) parser.add_argument('-margin', default=1, type=float) parser.add_argument('-distance', default='eu', type=str) parser.add_argument('-label_nums', default=3, type=int) parser.add_argument('-dropout', default=0.3, type=float) parser.add_argument('-pool', default='avg', type=str) parser.add_argument('-print_loss_step', default=2, type=int) parser.add_argument('-hit_list', default=[2, 5, 7, 10], type=list) args = parser.parse_args() assert args.train_batch_size % args.neg_num == 0, print('batch size应该是neg_num的整数倍') #定义时间格式 DATE_FORMAT = "%Y-%m-%d-%H:%M:%S" #定义输出文件夹,如果不存在则创建, if args.output_name == '': output_path = os.path.join('./output/mto_output', time.strftime(DATE_FORMAT,time.localtime(time.time()))) else: output_path = os.path.join('./output/mto_output', args.output_name) # if os.path.exists(output_path): # raise Exception('the output path {} already exists'.format(output_path)) if not os.path.exists(output_path): os.makedirs(output_path) #配置tensorboard tensor_board_log_path = os.path.join(output_path, 'tensor_board_log{}'.format('' if args.k_fold == -1 else args.k_fold)) writer = SummaryWriter(tensor_board_log_path) #定义log参数 logger = Logger(output_path,'main{}'.format('' if args.k_fold == -1 else args.k_fold)).logger #打印args print_args(args, logger) #设置seed logger.info('set seed to {}'.format(args.seed)) set_seed(args) #读取数据 logger.info('#' * 20 + 'loading data and model' + '#' * 20) data_path = os.path.join(project_path, 'data') train_list, val_list, test_list, code_to_name, name_to_code, standard_name_list = read_data(data_path, logger, args) #load model # pretrained_model_path = '/home/liangming/nas/lm_params/chinese_L-12_H-768_A-12/' pretrained_model_path = args.pretrained_model_path bert_config, bert_tokenizer, bert_model = get_pretrained_model(pretrained_model_path, logger, args) #获取dataset logger.info('create dataloader') train_dataset = TripleDataset(train_list, standard_name_list, code_to_name, name_to_code, bert_tokenizer, args, logger) train_dataloader = DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=False, collate_fn=train_dataset.collate_fn) val_dataset = TermDataset(val_list, bert_tokenizer, args, False) val_dataloader = DataLoader(val_dataset, batch_size=args.val_batch_size, shuffle=False, collate_fn=val_dataset.collate_fn) test_dataset = TermDataset(test_list, bert_tokenizer, args, False) test_dataloader = DataLoader(test_dataset, batch_size=args.val_batch_size, shuffle=False, collate_fn=test_dataset.collate_fn) icd_dataset = TermDataset(standard_name_list, bert_tokenizer, args, True) icd_dataloader = DataLoader(icd_dataset, batch_size=args.val_batch_size, shuffle=False, collate_fn=icd_dataset.collate_fn) train_term_dataset = TermDataset(train_list, bert_tokenizer, args, False) train_term_dataloader = DataLoader(train_term_dataset, batch_size=args.val_batch_size, shuffle=False, collate_fn=train_term_dataset.collate_fn) #创建model logger.info('create model') model = SiameseClassificationModel(bert_model, bert_config, args) model = model.to(args.device) #配置optimizer和scheduler t_total = len(train_dataloader) * args.epoch_num optimizer, _ = get_optimizer_and_scheduler(model, t_total, args.lr, 0) if args.type == 'train': train(model, train_dataloader, val_dataloader, test_dataloader, icd_dataloader, train_term_dataloader, optimizer, writer, args, logger, \ standard_name_list, train_list, output_path, bert_tokenizer, name_to_code, code_to_name) elif args.type == 'evaluate': if args.saved_model_path == '': raise Exception('saved model path不能为空') # 非k折模型 if args.k_fold == -1: logger.info('loading saved model') checkpoint = torch.load(args.saved_model_path, map_location='cpu') model.load_state_dict(checkpoint) model = model.to(args.device) # #生成icd标准词的最新embedding logger.info('generate icd embedding') icd_embedding = get_model_embedding(model, icd_dataloader, True) start_time = time.time() evaluate(model, test_dataloader, icd_embedding, args, logger, writer, standard_name_list, True) end_time = time.time() logger.info('total length is {}, predict time is {}, per mention time is {}'.format(len(test_list), end_time - start_time, (end_time - start_time) / len(test_list))) else: evaluate_k_fold(model, test_dataloader, icd_dataloader, args, logger, writer, standard_name_list)
def main(): # Training settings parser = argparse.ArgumentParser(description='Embedding extraction module') parser.add_argument('--net', default='lenet5', help='DNN name (default=lenet5)') parser.add_argument('--root', default='data', help='rootpath (default=data)') parser.add_argument('--dataset', default='imagenet', help='dataset (default=imagenet)') parser.add_argument('--tensor_folder', default='tensor_pub', help='tensor_folder (default=tensor_pub)') parser.add_argument('--layer-info', default='layer_info', help='layer-info (default=layer_info)') parser.add_argument('--gpu-id', default='1', type=str, help='id(s) for CUDA_VISIBLE_DEVICES') parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', help='number of data loading workers (default: 8)') parser.add_argument('-b', '--batch-size', default=1, type=int, metavar='N', help='should be 1') args = parser.parse_args() use_cuda = True # Define what device we are using print("CUDA Available: ", torch.cuda.is_available()) root = args.root dataset = args.dataset net = args.net tensor_folder = args.tensor_folder layers, cols = utils.get_layer_info(root, dataset, net, args.layer_info) print(dataset) print(root, dataset, net) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id if dataset.startswith('imagenet'): if net == 'resnet50': model = utils.load_resnet50_model(True) elif net == 'vgg16': model = utils.load_vgg_model(pretrained=True, net=net) else: model = utils.load_resnet_model(pretrained=True) sub_models = utils.load_imagenet_sub_models( utils.get_model_root(root, dataset, net), layers, net, cols) # sub_models = utils.load_resnet_sub_models(utils.get_model_root(root, # dataset, net), layers, net) test_loader = utils.load_imagenet_test(args.batch_size, args.workers) anatomy(model, sub_models, test_loader, root, dataset, tensor_folder, net, layers) else: # cifar10, cifar100, mnist device = torch.device("cuda" if ( use_cuda and torch.cuda.is_available()) else "cpu") nclass = 10 if dataset == 'cifar100': nclass = 100 model = utils.load_model( net, device, utils.get_pretrained_model(root, dataset, net), dataset) weight_models = utils.load_weight_models( net, device, utils.get_model_root(root, dataset, net), layers, cols, nclass) if dataset == 'mnist': train_loader, test_loader = utils.load_mnist( utils.get_root(root, dataset, 'data', net)) elif dataset == 'cifar10': train_loader, test_loader = utils.load_cifar10( utils.get_root(root, dataset, 'data', net)) elif dataset == 'cifar100': train_loader, test_loader = utils.load_cifar100( utils.get_root(root, dataset, 'data', net)) else: #default mnist train_loader, test_loader = utils.load_mnist( utils.get_root(root, dataset, 'data', net)) anatomy(model, weight_models, test_loader, root, dataset, tensor_folder, net, layers)