def __init__(self, conf, inference=False): print(conf) # self.loader, self.class_num = construct_msr_dataset(conf) self.loader, self.class_num = get_train_loader(conf) self.model = Backbone(conf.net_depth, conf.drop_ratio, conf.net_mode) print('{}_{} model generated'.format(conf.net_mode, conf.net_depth)) if not inference: self.milestones = conf.milestones self.writer = SummaryWriter(conf.log_path) self.step = 0 self.head = QAMFace(embedding_size=conf.embedding_size, classnum=self.class_num).to(conf.device) self.focalLoss = FocalLoss() print('two model heads generated') paras_only_bn, paras_wo_bn = separate_bn_paras(self.model) self.optimizer = optim.SGD( [{ 'params': paras_wo_bn + [self.head.kernel], 'weight_decay': 5e-4 }, { 'params': paras_only_bn }], lr=conf.lr, momentum=conf.momentum) print(self.optimizer) # self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=40, verbose=True) print('optimizers generated') self.board_loss_every = len(self.loader) // 1000 self.evaluate_every = len(self.loader) // 10 self.save_every = len(self.loader) // 2 else: self.threshold = conf.threshold # 多GPU训练 self.model = torch.nn.DataParallel(self.model) self.model.to(conf.device) self.head = torch.nn.DataParallel(self.head) self.head = self.head.to(conf.device)
def train(args): DEVICE = torch.device(("cuda:%d"%args.gpu[0]) if torch.cuda.is_available() else "cpu") writer = SummaryWriter(args.log_root) train_transform = transforms.Compose([transforms.Resize([int(128*args.input_size/112), int(128*args.input_size/112)]), transforms.RandomCrop([args.input_size, args.input_size]), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[args.rgb_mean,args.rgb_mean,args.rgb_mean], std=[args.rgb_std,args.rgb_std,args.rgb_std]) ]) train_dataset = datasets.ImageFolder(args.data_root, train_transform) weights = make_weights_for_balanced_classes(train_dataset.imgs, len(train_dataset.classes)) weights = torch.DoubleTensor(weights) sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, num_workers=8, shuffle=True, drop_last=True) NUM_CLASS = len(train_loader.dataset.classes) BACKBONE = Backbone([args.input_size, args.input_size], args.num_layers, args.mode) HEAD = ArcFace(args.emb_dims, NUM_CLASS, device_id=args.gpu) LOSS = FocalLoss() backbone_paras_only_bn, backbone_paras_wo_bn = separate_irse_bn_paras(BACKBONE) _, head_paras_wo_bn = separate_irse_bn_paras(HEAD) optimizer = optim.SGD([{'params': backbone_paras_wo_bn+head_paras_wo_bn, 'weight_decay': args.weight_decay}, {'params': backbone_paras_only_bn}], lr=args.lr, momentum=args.momentum) # optimizer = optim.AdamW([{'params': backbone_paras_wo_bn+head_paras_wo_bn, 'weight_decay': args.weight_decay}, # {'params': backbone_paras_only_bn}], lr=args.lr, momentum=args.momentum) if args.load_ckpt: BACKBONE.load_state_dict(torch.load(os.path.join(args.load_ckpt, 'backbone_epoch{}.pth'.format(args.load_epoch)))) HEAD.load_state_dict(torch.load(os.path.join(args.load_ckpt, 'head_epoch{}.pth'.format(args.load_epoch)))) print('Checkpoint loaded') start_epoch = args.load_epoch if args.load_ckpt else 0 BACKBONE = nn.DataParallel(BACKBONE, device_ids=args.gpu) BACKBONE = BACKBONE.to(DEVICE) dispaly_frequency = len(train_loader) // 100 NUM_EPOCH_WARM_UP = args.num_epoch // 25 NUM_BATCH_WARM_UP = len(train_loader) * NUM_EPOCH_WARM_UP batch = 0 print('Start training at %s!' % datetime.now().strftime('%Y-%m-%d %H:%M:%S')) for epoch in range(start_epoch, args.num_epoch): if epoch==args.stages[0] or epoch==args.stages[1] or epoch==args.stages[2]: for params in optimizer.param_groups: params['lr'] /= 10. BACKBONE.train() HEAD.train() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() for inputs, labels in train_loader: if (epoch+1 <= NUM_EPOCH_WARM_UP) and (batch+1 <= NUM_BATCH_WARM_UP): for params in optimizer.param_groups: params['lr'] = (batch+1) * args.lr / NUM_BATCH_WARM_UP inputs = inputs.to(DEVICE) labels = labels.to(DEVICE).long() features = BACKBONE(inputs) outputs = HEAD(features, labels) loss = LOSS(outputs, labels) prec1, prec5 = accuracy(outputs.data, labels, topk=(1,5)) losses.update(loss.data.item(), inputs.size(0)) top1.update(prec1.data.item(), inputs.size(0)) top5.update(prec5.data.item(), inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch += 1 if batch % dispaly_frequency == 0: print('%s Epoch %d/%d Batch %d/%d: train loss %f, train prec@1 %f, train prec@5 %f' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), epoch, args.num_epoch, batch, len(train_loader)*args.num_epoch, losses.avg, top1.avg, top5.avg)) writer.add_scalar('Train_Loss', losses.avg, epoch+1) writer.add_scalar('Train_Top1_Accuracy', top1.avg, epoch+1) writer.add_scalar('Train_Top5_Accuracy', top5.avg, epoch+1) torch.save(BACKBONE.module.state_dict(), os.path.join(args.ckpt_root, 'backbone_epoch%d.pth'%(epoch+1))) torch.save(HEAD.state_dict(), os.path.join(args.ckpt_root, 'head_epoch%d.pth'%(epoch+1)))
def __init__(self, args, trainRegressionDataLoader, trainRegressionClassificationLoader, testDataLoader, trainRainFallLoader, means, std): self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.trainRegressionDataLoader = trainRegressionDataLoader self.trainRegressionClassificationLoader = trainRegressionClassificationLoader self.testDataLoader = testDataLoader self.classificationLoader = trainRainFallLoader self.run_datetime = datetime.datetime.now() self.out_path = args.out self.sigma = args.sigma self.beta = args.beta self.earlyStop = args.earlyStop self.nClass = args.nClass self.noiseMean = torch.zeros(args.batch_size, args.featureNums, 17, 17) self.noiseStd = 1e-3 self.model = AutoencoderBN(self.noiseMean, self.noiseStd).to(self.device) self.regressionModel = Regression(self.nClass).to(self.device) self.classificationModel = regressionClassification(self.nClass).to(self.device) self.rainFallClassifierModel = rainFallClassification().to(self.device) self.meanStdNormalizer = MeanVarianceNormalizer(means, std).to(self.device) self.meanvarLoss = MeanVarLoss(self.nClass).to(self.device) self.normaliedLoss = NormalizerLoss(std).to(self.device) self.focalLoss = FocalLoss(self.nClass, alpha=0.25, gamma=2).to(self.device) self.rainFocalLoss = FocalLoss(2, alpha=0.25, gamma=2).to(self.device) self.regressionOptim = torch.optim.Adam([ {'params': self.regressionModel.parameters(), 'lr': args.lr, 'weight_decay': args.weight_decay}, {'params': self.model.parameters(), 'lr': args.lr, 'weight_decay': args.weight_decay}, ], lr=args.lr * 10, weight_decay=args.weight_decay * 10) self.classificationOptim = torch.optim.Adam(self.classificationModel.parameters(), lr=args.lr * 100) self.rainFallOptim = torch.optim.Adam(self.rainFallClassifierModel.parameters(), lr=args.lr * 10) # self.reconstructOptim = torch.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay) self.scheduler = torch.optim.lr_scheduler.StepLR(self.regressionOptim, step_size=750 * 2) self.criterion = nn.MSELoss() self.classificationCriterion = nn.CrossEntropyLoss() if not os.path.exists(self.out_path): os.makedirs(self.out_path) self.logger = Logger(self.out_path) with open(os.path.join(self.out_path, "para.json"), "w") as f: json.dump(args.__dict__, f) self.epoch = 0 self.iteration = 0 self.classificationIteration = 0 self.rainfallclassificationIteration = 0 self.test_step = 0 self.max_epoch = args.epochs self.val_interval = args.interval self.res = 0 self.bestConstructLoss = 1e7 self.bestConstructEpoch = 0 self.best_error = 1e7; self.best_res_epoch = 0
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, type="evaluation") log = util.get_logger(args.save_dir, args.name) device, args.gpu_ids = util.get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # Get your model log.info('Building model...') model, step=get_model(log,args) model = model.to(device) model.eval() # Get data loader log.info('Building dataset...') dev_dataset = util.load_dataset(args.test_file,args.PPI_dir,args.PPI_gene_feature_dir, args.PPI_gene_query_dict_dir,args.max_nodes,train=False) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=util.collate_fn) # Train log.info('Evaluating...') #get loss computer cri=FocalLoss(alpha=torch.tensor([args.alpha,1]).to(device),gamma=args.gamma) loss_meter = util.AverageMeter() ground_true = dev_loader.dataset.y_list ground_true = ground_true.to(device) predict_list=torch.zeros([dev_loader.dataset.__len__(),2],dtype=torch.float) predict_list = predict_list.to(device) sample_index=0 with torch.no_grad(), \ tqdm(total=len(dev_loader.dataset)) as progress_bar: for batch_a, batch_bio_a, batch_A, batch_b, batch_bio_b, batch_B, batch_y in dev_loader: # Setup for forward batch_a = batch_a.to(device) batch_bio_a = batch_bio_a.to(device) batch_A = batch_A.to(device) batch_bio_b = batch_bio_b.to(device) batch_b = batch_b.to(device) batch_B = batch_B.to(device) batch_y = batch_y.to(device) batch_y = batch_y.long() batch_size = batch_bio_a.size(0) # Forward output= model(batch_a, batch_bio_a, batch_A, batch_b, batch_bio_b, batch_B) loss = cri(output, batch_y) loss_val = loss.item() loss_meter.update(loss_val, batch_size) predict_list[sample_index:sample_index+batch_size]=output sample_index=sample_index+batch_size # Log info progress_bar.update(batch_size) progress_bar.set_postfix(NLL=loss_meter.avg) results = util.metrics_compute(predict_list, ground_true) log.info("Evaluation result of model:") log.info(f"Loss in test dataset is {loss_meter.avg}") log.info(f"Accuracy:{results['Accuracy']}, AUC:{results['AUC']}, Recall:{results['Recall']},Precision:{results['Precision']},Specificity:{results['Specificity']}") log.info(f"TP:{results['TP']},FN:{results['FN']}") log.info(f"FP:{results['FP']},TN:{results['TN']}") log.info("plot prediction curve...") ROC_AUC(results["fpr"],results["tpr"],results["AUC"],os.path.join(args.save_dir,"ROC_curve.pdf")) log.info("Save evaluation result...") np.savez(os.path.join(args.save_dir,"results.npz"),predict=np.array(predict_list.cpu().tolist()),result=results)
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, type="train") log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get your model log.info('Building model...') model, step = get_model(log, args) model = model.to(device) model.train() #Exponential moving average ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=[0.8, 0.999], eps=1e-7, weight_decay=args.l2_wd) scheduler = sched.LambdaLR(optimizer, lambda step: 1) #get loss computer cri = FocalLoss(alpha=torch.tensor([args.alpha, 1]).to(device), gamma=args.gamma) # Get data loader log.info('Building dataset...') dev_dataset = util.load_dataset(args.dev_file, args.PPI_dir, args.PPI_gene_feature_dir, args.PPI_gene_query_dict_dir, args.max_nodes, train=False) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=util.collate_fn) train_dataset = util.load_dataset(args.train_file, args.PPI_dir, args.PPI_gene_feature_dir, args.PPI_gene_query_dict_dir, args.max_nodes) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=util.collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = 0 while epoch != args.num_epochs: epoch += 1 log.info(f'Starting epoch {epoch}...') with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for batch_a, batch_bio_a, batch_A, batch_b, batch_bio_b, batch_B, batch_y in train_loader: # Setup for forward batch_a = batch_a.to(device) batch_bio_a = batch_bio_a.to(device) batch_A = batch_A.to(device) batch_bio_b = batch_bio_b.to(device) batch_b = batch_b.to(device) batch_B = batch_B.to(device) batch_y = batch_y.to(device) batch_y = batch_y.long() batch_size = batch_bio_a.size(0) optimizer.zero_grad() # Forward output = model(batch_a, batch_bio_a, batch_A, batch_b, batch_bio_b, batch_B) loss = cri(output, batch_y) #loss = F.nll_loss(output, batch_y) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step() ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/Loss', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) results = evaluate(model, dev_loader, cri, device) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.5f}' for k, v in results.items()) log.info(f'Dev {results_str}') log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step)
def _load_criterion(self) -> None: self.criterion = FocalLoss(gamma=2)
device = torch.device('cuda') train_iterator, valid_iterator = data.BucketIterator.splits( (train_data, valid_data), batch_size=BATCH_SIZE, sort_key=lambda x: len(x.text1), device=device, shuffle=True) pretrained_embeddings = TEXT.vocab.vectors model = ESIM(pretrained_embeddings, VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, LINEAR_SIZE, DROPOUT) optimizer = optim.Adam(model.parameters()) criterion = FocalLoss(2) model = model.to(device) criterion = criterion.to(device) def categorical_accuracy(preds, y): """ Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8 """ max_preds = preds.argmax( dim=1, keepdim=True) # get the index of the max probability correct = max_preds.squeeze(1).eq(y) return correct.sum() / torch.FloatTensor([y.shape[0]]) def train(model, iterator, optimizer, criterion):
def __init__(self, args, trainRegressionDataLoader, trainRainDataLoader, testDataLoader, spaces, ranges): self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.trainRegressionDataLoader = trainRegressionDataLoader self.trainRainDataLoader = trainRainDataLoader self.testDataLoader = testDataLoader self.run_datetime = datetime.datetime.now() self.out_path = args.out self.alpha = args.alpha self.sigma = args.sigma self.beta = args.beta self.earlyStop = args.earlyStop self.spaces = spaces self.nClass = ranges.shape[0] self.noiseMean = torch.zeros(args.batch_size, args.featureNums, 17, 17) self.noiseStd = args.noiseStd self.denoiseModel = AutoencoderBN(self.noiseMean, self.noiseStd).to(self.device) self.regressionModel = OrdinalRegressionModel(self.nClass).to( self.device) self.rainFallClassifierModel = RainFallClassification().to(self.device) self.ordinalLoss = OrdinalLoss(spaces).to(self.device) self.rainFocalLoss = FocalLoss(2, alpha=0.25, gamma=2).to(self.device) self.regressionOptim = torch.optim.Adam( [ { 'params': self.regressionModel.parameters(), 'lr': args.lr * 10, 'weight_decay': args.weight_decay * 10 }, { 'params': self.denoiseModel.parameters(), 'lr': args.lr, 'weight_decay': args.weight_decay }, ], lr=args.lr * 10, weight_decay=args.weight_decay * 10) self.rainFallOptim = torch.optim.Adam( self.rainFallClassifierModel.parameters(), lr=args.lr * 10) self.criterion = nn.MSELoss() if not os.path.exists(self.out_path): os.makedirs(self.out_path) self.logger = Logger(self.out_path) with open(os.path.join(self.out_path, 'para.json'), 'w') as f: json.dump(args.__dict__, f) self.epoch = 0 self.iteration = 0 self.classificationIteration = 0 self.rainfallclassificationIteration = 0 self.test_step = 0 self.max_epoch = args.epochs self.val_interval = args.interval self.res = 0 self.bestConstructLoss = 1e7 self.bestConstructEpoch = 0 self.best_error = 1e7 self.best_res_epoch = 0