def main(): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False sys.stdout = Logger(osp.join(args.save_dir, 'log' + '.txt')) if use_gpu: print("Currently using GPU: {}".format(args.gpu)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU") with open(loader_path, 'rb') as f: trainloader, testloader = pickle.load(f) print("Creating model: {}".format(args.model)) model = models.create(name=args.model, num_classes=num_classes, feature_dim=feature_dim) if use_gpu: model = nn.DataParallel(model).cuda() criterion_xent = nn.CrossEntropyLoss() criterion_cent = CenterLoss(num_classes=num_classes, feat_dim=args.featdim, use_gpu=use_gpu) optimizer_model = torch.optim.SGD(model.parameters(), lr=args.lr_model, weight_decay=5e-04, momentum=0.9) optimizer_centloss = torch.optim.SGD(criterion_cent.parameters(), lr=args.lr_cent) if args.stepsize > 0: scheduler = lr_scheduler.StepLR(optimizer_model, step_size=args.stepsize, gamma=args.gamma) start_time = time.time() total_loss_list = [] train_acc, test_acc = 0, 0 for epoch in range(args.max_epoch): adjust_learning_rate(optimizer_model, epoch) print("==> Epoch {}/{}".format(epoch+1, args.max_epoch)) loss_list, train_acc = train(model, criterion_xent, criterion_cent, optimizer_model, optimizer_centloss, trainloader, use_gpu, num_classes, epoch) total_loss_list.append(loss_list) if args.stepsize > 0: scheduler.step() if args.eval_freq > 0 and (epoch+1) % args.eval_freq == 0 or (epoch+1) == args.max_epoch: print("==> Test") test_acc = test(model, testloader, use_gpu, num_classes, epoch) total_loss_list = np.array(total_loss_list).ravel() elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print("Finished. Total elapsed time (h:m:s): {}".format(elapsed)) return total_loss_list,train_acc, test_acc
def main(): homepath = os.environ['HOME'] datapath = os.path.join(homepath, 'data') mx.file.copy_parallel(args.data_url, datapath) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = MobileNetV3().to(device) centerloss = CenterLoss(num_classes=75, feat_dim=1280, use_gpu=True) cross_entropy = nn.CrossEntropyLoss() optimizer_model = torch.optim.SGD(model.parameters(), lr=args.lr_model, weight_decay=5e-04, momentum=0.9) optimizer_centloss = torch.optim.SGD(centerloss.parameters(), lr=args.lr_centloss) train_iterator, test_iterator = dataprocess( train_label_path=args.train_label_txt, data_dirtory=datapath, test_label_path=args.test_label_txt, batch_size=args.batch_size) if args.step > 0: scheduler = lr_scheduler.StepLR(optimizer_model, step_size=args.step, gamma=args.gamma) if not (os.path.isdir(os.path.join(args.homepath, 'model'))): os.makedirs(os.path.join(args.homepath, 'model')) tmp_accuracy = 0 for epoch in range(args.num_epoch): if args.step > 0: scheduler.step() train_loss, train_acc = train(model=model, device=device, train_iterator=train_iterator, optimizer_model=optimizer_model, optimizer_centloss=optimizer_centloss, criterion1=cross_entropy, criterion2=centerloss, weight_centloss=args.weight) test_loss, test_acc = eval(model=model, device=device, test_iterator=test_iterator, criterion1=cross_entropy, criterion2=centerloss, weight_centloss=args.weight_centloss) print('|Epoch:', epoch + 1, '|Train loss', train_loss.item(), '|Train acc:', train_acc.item(), '|Test loss', test_loss.item(), '|Test acc', test_acc.item()) if test_acc > tmp_accuracy: MODEL_SAVE_PATH = os.path.join(args.homepath, 'model', 'mymodel_{}.pth'.format(epoch)) torch.save(model.save_dict(), MODEL_SAVE_PATH) tmp_accuracy = test_acc mox.file.copy(MODEL_SAVE_PATH, os.path.join(args.train_url, 'model/mymodel.pth'))
def main(): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False sys.stdout = Logger(osp.join(args.save_dir, 'log_' + args.dataset + '.txt')) if use_gpu: print("Currently using GPU: {}".format(args.gpu)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU") print("Creating dataset: {}".format(args.dataset)) dataset = datasets.create( name=args.dataset, batch_size=args.batch_size, use_gpu=use_gpu, num_workers=args.workers, ) trainloader, testloader = dataset.trainloader, dataset.testloader print("Creating model: {}".format(args.model)) model = models.create(name=args.model, num_classes=dataset.num_classes) if use_gpu: model = nn.DataParallel(model).cuda() criterion_xent = nn.CrossEntropyLoss() criterion_cent = CenterLoss(num_classes=dataset.num_classes, feat_dim=2, use_gpu=use_gpu) optimizer_model = torch.optim.SGD(model.parameters(), lr=args.lr_model, weight_decay=5e-04, momentum=0.9) optimizer_centloss = torch.optim.SGD(criterion_cent.parameters(), lr=args.lr_cent) if args.stepsize > 0: scheduler = lr_scheduler.StepLR(optimizer_model, step_size=args.stepsize, gamma=args.gamma) start_time = time.time() for epoch in range(args.max_epoch): print("==> Epoch {}/{}".format(epoch+1, args.max_epoch)) train(model, criterion_xent, criterion_cent, optimizer_model, optimizer_centloss, trainloader, use_gpu, dataset.num_classes, epoch) if args.stepsize > 0: scheduler.step() if args.eval_freq > 0 and (epoch+1) % args.eval_freq == 0 or (epoch+1) == args.max_epoch: print("==> Test") acc, err = test(model, testloader, use_gpu, dataset.num_classes, epoch) print("Accuracy (%): {}\t Error rate (%): {}".format(acc, err)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
def train(): # Init Training data_loaders = get_data_loader() coconut_model = CoconutModel(num_of_classes=args.num_of_classes, feature_size=args.feature_size) center_loss = CenterLoss(num_classes=args.num_of_classes, feat_dim=args.feature_size, use_gpu=torch.cuda.is_available()) coconut_model.to(device) center_loss.to(device) params = list(coconut_model.parameters()) + list( center_loss.parameters()) + list(coconut_model.bert_model.parameters()) optimizer = RAdam(params=params, lr=args.lr, betas=(0.0, 0.999), eps=1e-3, weight_decay=args.l2_reg) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[80, 150], gamma=0.1) starting_epoch = 0 if args.resume: checkpoints = load_model(model=coconut_model, optimizer=optimizer, lr_scheduler=lr_scheduler, center_loss=center_loss) (starting_epoch, coconut_model, optimizer, lr_scheduler, center_loss) = checkpoints for epoch in range(starting_epoch, args.epoch): train_model(epoch=epoch, model=coconut_model, optimizer=optimizer, loader=data_loaders["train_loader"], center_loss=center_loss) lr_scheduler.step() eval_model(epoch=epoch, model=coconut_model, loader=data_loaders["dev_loader"]) save_mode(epoch=epoch, model=coconut_model, optimizer=optimizer, lr_scheduler=lr_scheduler, center_loss=center_loss) return
def __init__(self, rnn_type, ntokens, ninp, nhid, nlayers, dropout=0.5, tie_weights=False, ALPHA=0.5): super(RNNModel, self).__init__() self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntokens, ninp) if rnn_type in ['LSTM', 'GRU']: self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) else: try: nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type] except KeyError: raise ValueError( """An invalid option for `--model` was supplied, options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""") self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout) self._decoder = nn.Linear(nhid, ntokens) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: if nhid != ninp: raise ValueError('When using the tied flag, nhid must be equal to emsize') self._decoder.weight = self.encoder.weight self.init_weights() self._cross_entropy_fn = nn.CrossEntropyLoss() self._center_loss_fn = CenterLoss(ntokens, nhid, ALPHA=ALPHA) self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers
def main(): train_dataset = datasets.MNIST('../data', download=True, train=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) model = LeNetPP(dim_hidden=args.dim_hidden) if args.cuda: model = model.cuda() nll_loss = nn.NLLLoss() if args.loss == 0: center_loss = CenterLoss(dim_hidden=args.dim_hidden, num_classes=10, lambda_c=args.lambda_c, use_cuda=args.cuda) if args.loss == 1: center_loss = ContrastiveCenterLoss(dim_hidden=args.dim_hidden, num_classes=10, lambda_c=args.lambda_c, use_cuda=args.cuda) if args.cuda: nll_loss, center_loss = nll_loss.cuda(), center_loss.cuda() criterion = [nll_loss, center_loss] optimizer_nn = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) scheduler = lr_scheduler.StepLR(optimizer_nn, step_size=50, gamma=0.2) optimizer_c = optim.SGD(center_loss.parameters(), lr=args.alpha) for epoch in range(args.epochs): scheduler.step() train(train_loader, model, criterion, [optimizer_nn, optimizer_c], epoch + 1)
def main(): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu use_gpu = torch.cuda.is_available() if use_gpu: print("Currently using GPU: {}".format(args.gpu)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU") transform = transforms.Compose([transforms.ToTensor()]) # Load dataset dset_train = MultiViewDataSet(args.datadir, transform=transform) trainloader = DataLoader(dset_train, batch_size=args.batch_size, shuffle=True, num_workers=0) model = MVCNN(args.num_classes) model.cuda() criterion_xent = nn.CrossEntropyLoss() criterion_cent = CenterLoss(num_classes=args.num_classes, feat_dim=args.feat_dim, use_gpu=use_gpu) optimizer_model = torch.optim.SGD(model.parameters(), lr=args.lr_model, momentum=0.9) optimizer_centloss = torch.optim.SGD(criterion_cent.parameters(), lr=args.lr_cent) if args.stepsize > 0: scheduler = lr_scheduler.StepLR(optimizer_model, step_size=args.stepsize, gamma=args.gamma) for epoch in range(args.max_epoch): #trainloader = iter(train_loader) print("==> Epoch {}/{}".format(epoch+1, args.max_epoch)) print("++++++++++++++++++++++++++") train(model, criterion_xent, criterion_cent, optimizer_model, optimizer_centloss, trainloader, use_gpu, args.num_classes, epoch) if epoch % args.save_model_freq == 0: torch.save(model.state_dict(), args.model_dir+'/'+'3D_model.pth') if args.stepsize > 0: scheduler.step() writer.close()
def test(): """ Test model accuracy on test dataset. 测试模型在测试集上的准确率。 """ print("Start to test...") ctx = mx.gpu() if args.use_gpu else mx.cpu() _, test_iter = data_loader(args.batch_size) model = LeNetPlus() model.load_parameters(os.path.join(args.ckpt_dir, args.prefix + "-best.params"), ctx=ctx, allow_missing=True) # center_net = CenterLoss(num_classes=args.num_classes, feature_size=args.feature_size, lmbd=args.lmbd, ctx=mx.cpu()) center_net.load_parameters(os.path.join( args.ckpt_dir, args.prefix + "-feature_matrix.params"), ctx=ctx) start_time = time.time() test_accuracy, features, predictions, labels = evaluate_accuracy( test_iter, model, center_net, args.eval_method, ctx) elapsed_time = time.time() - start_time print("Test_acc: %s, Elapsed_time: %f s" % (test_accuracy, elapsed_time)) # make directory if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) # draw feature map if args.plotting: plot_features(features, labels, num_classes=args.num_classes, fpath=os.path.join(args.out_dir, "%s.png" % args.prefix))
def train_net(net, train_loader, test_loader, lr, device, prefix): global tensorboard_writer tensorboard_writer = SummaryWriter(comment = prefix) # set net on gpu net.to(device) # loss and optimizer criterion = nn.CrossEntropyLoss() criterion_cent = CenterLoss(num_classes = 10, feat_dim = 128) criterion_cent.to(device) optimizer = optim.SGD(net.parameters(), lr = lr, momentum = MOMENTUM, weight_decay = WEIGHT_DECAY) optimizer_cent = optim.SGD(criterion_cent.parameters(), lr = lr, momentum = MOMENTUM) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones = MILESTONES, gamma = GAMMA) scheduler_cent = lr_scheduler.MultiStepLR(optimizer_cent, milestones = MILESTONES, gamma = GAMMA) # initial test eval_net(net, test_loader, 0, device) # epochs for epoch in range(EPOCHS): # train net.train() scheduler.step() scheduler_cent.step() for i, (images, labels) in enumerate(train_loader): net.zero_grad() optimizer.zero_grad() optimizer_cent.zero_grad() images = images.to(device) labels = labels.to(device) features, outputs = net(images) loss_xent = criterion(outputs, labels) loss_cent = 0.1 * criterion(features, labels) loss = loss_xent + loss_cent loss.backward() optimizer.step() optimizer_cent.step() print(f'epoch {epoch+1:3d}, {i:3d}|{len(train_loader):3d}, loss_xent: {loss_xent.item():2.4f}, loss_cent: {loss_cent.item():2.4f} ', end = '\r') tensorboard_writer.add_scalars('train_loss', {'train_loss_xent': loss_xent.item(), 'train_loss_cent': loss_cent.item()}, epoch * len(train_loader) + i) eval_net(net, test_loader, epoch + 1, device) torch.save(net.state_dict(), f'zoo/{prefix}_params.pth')
def train_cnn_ivr(): lowAccLabel_fp = '../data/list_tc/label/accLeccThan20Label_filter.txt' loaders, cnnidx2label = load_data_for_training_cnn( batch_size=16 * 1, lowAccLabel_fp=lowAccLabel_fp) # model = InceptionResNetV2(num_classes=365, num_feature=1024, drop_rate=0.2) model = se_resnet50(num_classes=365) criterion_cent = CenterLoss(num_classes=365, feat_dim=1024, use_gpu=False) DEVICE = torch.device('cuda:0') train_cnn(model, criterion_cent, loaders['train_cnn'], loaders['val_cnn'], cnnidx2label, DEVICE, multi_gpu=None, repick=True)
def __init__(self, out_dim, alpha_ratio, lambda_ratio=0.5, is_center_loss=True): self.out_dim = out_dim self.alpha_ratio = alpha_ratio self.lambda_ratio = lambda_ratio self.is_center_loss = is_center_loss super(LeNets, self).__init__() with self.init_scope(): self.conv1_1 = L.Convolution2D(None, 32, ksize=5, stride=1, pad=2) self.conv1_2 = L.Convolution2D(None, 32, ksize=5, stride=1, pad=2) self.conv2_1 = L.Convolution2D(None, 64, ksize=5, stride=1, pad=2) self.conv2_2 = L.Convolution2D(None, 64, ksize=5, stride=1, pad=2) self.conv3_1 = L.Convolution2D(None, 128, ksize=5, stride=1, pad=2) self.conv3_2 = L.Convolution2D(None, 128, ksize=5, stride=1, pad=2) self.fc1 = L.Linear(None, 2) self.fc2 = L.Linear(None, out_dim) if is_center_loss: self.center_loss_function = CenterLoss(alpha_ratio, out_dim)
def main(): global best_acc start_epoch = 0 # start from epoch 0 or last checkpoint epoch opt = args args.lr = Config["normal_config"]["initial_lr"] checkpoint_dir = Config["normal_config"]["checkpoint"] if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) train_dataloader = datatype().instance_a_loader(t='train') test_dataloader = datatype().instance_a_loader(t='test') #test_dataloader = train_dataloader model = model_audio() model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion = nn.CrossEntropyLoss().cuda() criterion_cent = CenterLoss(num_classes=13, feat_dim=1024, use_gpu=True) optimizer = optim.RMSprop(model.parameters(), lr=args.lr, alpha=0.9, eps=1.0, weight_decay=0.00004, momentum=0.9, centered=False) optimizer_cent = optim.RMSprop(model.parameters(), lr=args.lr) best_acc = 63 start_epoch = 0 if args.resume: checkpoint = torch.load( Config["harmonic_6_14"]["harmonic_model_best"].format(ff)) start_epoch = checkpoint['epoch'] + 1 best_acc = checkpoint['acc'] #best_acc = 86 print( '==> Resuming from checkpoint..(trained from {} epochs,lr:{}\tacc:{})' .format(start_epoch - 1, checkpoint['lr'], best_acc)) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) best_acc = 64 print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(test_dataloader, model, criterion, criterion_cent, start_epoch, use_cuda) print('test_loss:{}, test_acc:{}'.format(test_loss, test_acc)) return epoches = Config["normal_config"]["epoch_num"] for epoch in range(0, epoches - start_epoch): epoch = start_epoch + epoch adjust_learning_rate(optimizer, epoch) print('\nEpoch: [%d | %d] LR: %f' % (epoch, epoches, state['lr'])) #train_loss, train_acc = train(train_dataloader, model, criterion, optimizer, epoch, use_cuda) train_loss, train_acc = train(train_dataloader, model, criterion, criterion_cent, optimizer, optimizer_cent, epoch, use_cuda) test_loss, test_acc = test(test_dataloader, model, criterion, criterion_cent, epoch, use_cuda) line = 'Epoch: [%d|%d] train_acc: %f train_loss: %f \t test_acc: %f test_loss :%f \tlr:%f\n' % ( epoch, epoches, train_acc, train_loss, test_acc, test_loss, state['lr']) print(line) with open( Config["harmonic_6_14"]["harmonic_train_process_txt"].format( ff), 'a') as f: f.write(line) dir = Config["harmonic_6_14"]["harmonic_checkpoint"].format(ff) if os.path.exists(dir): print('{} \n'.format(dir)) state_a = { 'epoch': epoch, 'state_dict': model.state_dict(), 'acc': test_acc, 'train_acc': train_acc, 'optimizer': optimizer.state_dict(), 'lr': state['lr'] } torch.save(state_a, dir) #is_best = False # test_acc > best_acc # best_acc = max(test_acc, best_acc) is_best = test_acc > best_acc if is_best: best_acc = test_acc dir_best = Config["harmonic_6_14"]["harmonic_model_best"].format( ff) print(dir_best) torch.save( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'lr': state['lr'] }, dir_best)
def main(): feature_dim = int(args.feature_size / 2) tiou_thresholds = np.linspace(0.1, 0.7, 7) train_subset = 'training' test_subset = 'validation' threshold_type = 'mean' prediction_filename = './data/prediction.json' fps = 25 stride = 16 if args.dataset_name == 'Thumos14': t_max = 750 t_max_ctc = 2800 train_subset = 'validation' test_subset = 'test' num_class = 20 groundtruth_filename = './data/th14_groundtruth.json' elif args.dataset_name == 'GTEA': fps = 15 t_max = 100 t_max_ctc = 150 num_class = 7 groundtruth_filename = './data/gtea_groundtruth.json' elif args.dataset_name == 'BEOID': fps = 30 t_max = 100 t_max_ctc = 400 num_class = 34 groundtruth_filename = './data/beoid_groundtruth.json' else: raise ValueError('wrong dataset') device = torch.device("cuda") if args.background: num_class += 1 dataset = Dataset(args, groundtruth_filename, train_subset=train_subset, test_subset=test_subset, mode=args.mode, use_sf=args.use_sf) os.system('mkdir -p %s' % args.model_dir) os.system('mkdir -p %s/%s' % (args.log_dir, args.model_name)) now = datetime.now() dt_string = now.strftime("%Y%m%d%H%M%S") logger = Logger('%s/%s_%s' % (args.log_dir, args.model_name, dt_string)) model = SFNET(dataset.feature_size, num_class).to(device) if args.eval_only and args.resume is None: print('***************************') print('Pretrained Model NOT Loaded') print('Evaluating on Random Model') print('***************************') if args.resume is not None: model.load_state_dict(torch.load(args.resume)) best_acc = 0 optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0005) criterion_cent_f = CenterLoss(num_classes=num_class, feat_dim=feature_dim, use_gpu=True) optimizer_centloss_f = torch.optim.SGD(criterion_cent_f.parameters(), lr=0.1) criterion_cent_r = CenterLoss(num_classes=num_class, feat_dim=feature_dim, use_gpu=True) optimizer_centloss_r = torch.optim.SGD(criterion_cent_r.parameters(), lr=0.1) criterion_cent_all = [criterion_cent_f, criterion_cent_r] optimizer_centloss_all = [optimizer_centloss_f, optimizer_centloss_r] center_f = criterion_cent_f.get_centers() center_r = criterion_cent_r.get_centers() centers = [center_f, center_r] params = {'alpha': args.alpha, 'beta': args.beta, 'gamma': args.gamma} ce = torch.nn.CrossEntropyLoss().cuda() counts = dataset.get_frame_counts() print('total %d annotated frames' % counts) for itr in range(args.max_iter + 1): dataset.t_max = t_max if itr % 2 == 0 and itr > 000: dataset.t_max = t_max_ctc if not args.eval_only: train_SF(itr, dataset, args, model, optimizer, criterion_cent_all, optimizer_centloss_all, logger, device, ce, params, mode=args.mode) if itr % args.eval_steps == 0 and (not itr == 0 or args.eval_only): print('model_name: %s' % args.model_name) acc = evaluate(itr, dataset, model, logger, groundtruth_filename, prediction_filename, background=args.background, fps=fps, stride=stride, subset=test_subset, threshold_type=threshold_type, frame_type=args.frame_type, adjust_mean=args.adjust_mean, act_weight=args.actionness_weight, tiou_thresholds=tiou_thresholds, use_anchor=args.use_anchor) torch.save(model.state_dict(), '%s/%s.%d.pkl' % (args.model_dir, args.model_name, itr)) if acc >= best_acc and not args.eval_only: torch.save( model.state_dict(), '%s/%s_best.pkl' % (args.model_dir, args.model_name)) best_acc = acc if args.expand and itr == args.expand_step: act_expand(args, dataset, model, device, centers=None) model = SFNET(dataset.feature_size, num_class).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0005) counts = dataset.get_frame_counts() print('total %d frames' % counts) if args.eval_only: print('Done Eval!') break
def train(): print('Start to train...') if not os.path.exists(args.ckpt_dir): os.makedirs(args.ckpt_dir) ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] if args.gpus != '-1' else mx.cpu() print('Loading the data...') train_iter, test_iter = data_loader(args.batch_size) model = LeNetPlus() model.hybridize() model.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(model.collect_params(), optimizer='sgd', optimizer_params={ 'learning_rate': args.lr, 'wd': args.wd }) if args.center_loss: center_loss = CenterLoss(args.num_classes, feature_size=2, lmbd=args.lmbd) center_loss.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) trainer_center = gluon.Trainer( center_loss.collect_params(), optimizer='sgd', optimizer_params={'learning_rate': args.alpha}) else: center_loss, trainer_center = None, None smoothing_constant, moving_loss = .01, 0.0 best_acc = 0 for e in range(args.epochs): start_time = timeit.default_timer() for i, (data, label) in enumerate(train_iter): data = data.as_in_context(ctx[0]) label = label.as_in_context(ctx[0]) with autograd.record(): output, features = model(data) loss_softmax = softmax_cross_entropy(output, label) if args.center_loss: loss_center = center_loss(features, label) loss = loss_softmax + loss_center else: loss = loss_softmax loss.backward() trainer.step(data.shape[0]) if args.center_loss: trainer_center.step(data.shape[0]) curr_loss = nd.mean(loss).asscalar() moving_loss = (curr_loss if ((i == 0) and (e == 0)) else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss) elapsed_time = timeit.default_timer() - start_time train_accuracy, train_ft, _, train_lb = evaluate_accuracy( train_iter, model, ctx) test_accuracy, test_ft, _, test_lb = evaluate_accuracy( test_iter, model, ctx) if args.plotting: plot_features(train_ft, train_lb, num_classes=args.num_classes, fpath=os.path.join( args.out_dir, '%s-train-epoch-%s.png' % (args.prefix, e))) plot_features(test_ft, test_lb, num_classes=args.num_classes, fpath=os.path.join( args.out_dir, '%s-test-epoch-%s.png' % (args.prefix, e))) logging.warning("Epoch [%d]: Loss=%f" % (e, moving_loss)) logging.warning("Epoch [%d]: Train-Acc=%f" % (e, train_accuracy)) logging.warning("Epoch [%d]: Test-Acc=%f" % (e, test_accuracy)) logging.warning("Epoch [%d]: Elapsed-time=%f" % (e, elapsed_time)) if test_accuracy > best_acc: best_acc = test_accuracy model.save_params( os.path.join(args.ckpt_dir, args.prefix + '-best.params'))
model = Model(dataset.feature_size, dataset.num_class, dataset.labels101to20).to(device) if args.eval_only and args.pretrained_ckpt is None: print('***************************') print('Pretrained Model NOT Loaded') print('Evaluating on Random Model') print('***************************') if args.pretrained_ckpt is not None: model.load_state_dict(torch.load(args.pretrained_ckpt)) best_acc = 0 optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0005) criterion_cent_f = CenterLoss(num_classes=dataset.num_class, feat_dim=1024, use_gpu=True) optimizer_centloss_f = torch.optim.SGD(criterion_cent_f.parameters(), lr=0.1) criterion_cent_r = CenterLoss(num_classes=dataset.num_class, feat_dim=1024, use_gpu=True) optimizer_centloss_r = torch.optim.SGD(criterion_cent_r.parameters(), lr=0.1) criterion_cent_all = [criterion_cent_f, criterion_cent_r] optimizer_centloss_all = [optimizer_centloss_f, optimizer_centloss_r] for itr in range(args.max_iter): dataset.t_max = t_max if itr % 2 == 0 and itr > 000:
train_loader, test_loader = data_loader.getTargetDataSet( args.dataset, args.batch_size, args.imageSize, args.dataroot) print('Load model') model = models.vgg13() print(model) if args.cuda: model.cuda() # Center loss addition if args.centerloss: classes = 10 # default for CIFAR & SVHN dim = 10 centerloss = CenterLoss(num_classes=classes, feat_dim=dim, use_gpu=args.cuda) print( 'Center loss component | Classes: {} | Features: {} | GPU: {}'.format( classes, dim, args.cuda)) print('Setup optimizer') if args.centerloss: params = list(model.parameters()) + list(centerloss.parameters()) else: params = model.parameters() optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.wd) decreasing_lr = list(map(int, args.decreasing_lr.split(','))) def train(epoch):
def train(train_sets, test_sets): # ---------------------- dataloader ---------------------- # # dataset loaders train_loaders, train_iters, test_loaders, test_iters = {}, {}, {}, {} # 加载有label的训练数据 for domain in opt.domains: train_loaders[domain] = DataLoader(train_sets[domain], opt.batch_size, shuffle=True) train_iters[domain] = iter(train_loaders[domain]) test_loaders[domain] = DataLoader(test_sets[domain], opt.batch_size, shuffle=False) test_iters[domain] = iter(test_loaders[domain]) # ---------------------- model initialization ---------------------- # F_d = {} C = None if opt.model.lower() == 'mlp': for domain in opt.domains: F_d[domain] = MlpFeatureExtractor(opt.feature_num, opt.F_hidden_sizes, opt.domain_hidden_size, opt.dropout, opt.F_bn) C = SentimentClassifier(opt.C_layers, opt.domain_hidden_size, opt.domain_hidden_size, opt.num_labels, opt.dropout, opt.C_bn) # 转移到gpu上 C = C.to(opt.device) for f_d in F_d.values(): f_d = f_d.to(opt.device) criterion_cent = CenterLoss(num_classes=2, feat_dim=opt.domain_hidden_size, use_gpu=True) optimizer_centloss = torch.optim.SGD(criterion_cent.parameters(), lr=0.5) optimizer = optim.Adam(itertools.chain( *map(list, [C.parameters()] + [f.parameters() for f in F_d.values()])), lr=opt.learning_rate) # training correct, total = defaultdict(int), defaultdict(int) # D accuracy d_correct, d_total = 0, 0 best_acc = 0.0 best_acc_dict = {} margin = 3 margin_lambda = 0.1 # center_loss_weight_cent = 0.1 for epoch in range(opt.max_epoch): C.train() for f in F_d.values(): f.train() # conceptually view 1 epoch as 1 epoch of the first domain num_iter = len(train_loaders[opt.domains[0]]) # First_stage for _ in tqdm(range(num_iter)): for f_d in F_d.values(): f_d.zero_grad() C.zero_grad() optimizer_centloss.zero_grad() for domain in opt.domains: inputs, targets = utils.endless_get_next_batch( train_loaders, train_iters, domain) targets = targets.to(opt.device) domain_feat = F_d[domain](inputs) visual_feature, c_outputs = C(domain_feat) # loss_cent = criterion_cent(visual_feature, targets) # loss_cent *= center_loss_weight_cent loss_cent = 0.0 loss_part_1 = functional.nll_loss(c_outputs, targets) targets = targets.unsqueeze(1) targets_onehot = torch.FloatTensor(opt.batch_size, 2) targets_onehot.zero_() targets_onehot.scatter_(1, targets.cpu(), 1) targets_onehot = targets_onehot.to(opt.device) loss_part_2 = margin_lambda * margin_regularization( inputs, targets_onehot, F_d[domain], C, margin) # loss_part_2 = 0.0 print("loss_part_1: " + str(loss_part_1)) print("loss_part_2: " + str(loss_part_2)) print("loss_cent: " + str(loss_cent)) l_c = loss_part_1 + loss_part_2 + loss_cent l_c.backward() _, pred = torch.max(c_outputs, 1) total[domain] += targets.size(0) correct[domain] += (pred == targets).sum().item() optimizer.step() # for param in criterion_cent.parameters(): # param.grad.data *= (1. / center_loss_weight_cent) optimizer_centloss.step() # end of epoch log.info('Ending epoch {}'.format(epoch + 1)) if d_total > 0: log.info('D Training Accuracy: {}%'.format(100.0 * d_correct / d_total)) log.info('Training accuracy:') log.info('\t'.join(opt.domains)) log.info('\t'.join( [str(100.0 * correct[d] / total[d]) for d in opt.domains])) log.info('Evaluating test sets:') test_acc = {} for domain in opt.domains: test_acc[domain] = evaluate(domain, test_loaders[domain], F_d[domain], C) avg_test_acc = sum([test_acc[d] for d in opt.domains]) / len(opt.domains) log.info(f'Average test accuracy: {avg_test_acc}') if avg_test_acc > best_acc: log.info(f'New best Average test accuracy: {avg_test_acc}') best_acc = avg_test_acc best_acc_dict = test_acc for d in opt.domains: if d in F_d: torch.save( F_d[d].state_dict(), '{}/net_F_d_{}.pth'.format(opt.exp2_model_save_file, d)) torch.save(C.state_dict(), '{}/netC.pth'.format(opt.exp2_model_save_file)) log.info( f'Loading model for feature visualization from {opt.exp2_model_save_file}...' ) for domain in opt.domains: F_d[domain].load_state_dict( torch.load( os.path.join(opt.exp2_model_save_file, f'net_F_d_{domain}.pth'))) num_iter = len(train_loaders[opt.domains[0]]) visual_features, senti_labels = get_visual_features( num_iter, test_loaders, test_iters, F_d, C) # visual_features, senti_labels = get_visual_features(num_iter, train_loaders, train_iters, F_d) return best_acc, best_acc_dict, visual_features, senti_labels
model = Net(backbone, num_class, ALPHA_1) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) # Softmax loss criterion_softmax = nn.CrossEntropyLoss() criterion_softmax = criterion_softmax.to(device) # Center loss if FLAG_CENTER: criterion_centerloss = CenterLoss(num_classes=num_class, feat_dim=FEAT_DIM, use_gpu=True) else: pass # Optimizer if cfg['OPTIM'].lower() == 'adam': optimizer_softmax = optim.Adam(model.parameters(), lr=LR_SOFTMAX, weight_decay=WEIGHT_DECAY) if FLAG_CENTER: optimizer_center = optim.Adam(criterion_centerloss.parameters(), lr=LR_CENTER) else: pass else:
def _do_train(self): # STEP 1 ------------------------------------------------------------------------------------- #data------------------------------------------------------------------------------------- torch.backends.cudnn.benchmark if self.use_VGG: feature_extractor = VGGFc(self.device, model_name='vgg19') else: feature_extractor = ResNetFc(self.device, model_name='resnet50') #### source on which perform training of cls and self-sup task images, labels = get_split_dataset_info( self.folder_txt_files + self.source + '_train_all.txt', self.folder_dataset) ds_source_ss = CustomDataset(images, labels, img_transformer=transform_source_ss, returns=6, is_train=True, ss_classes=self.ss_classes, n_classes=self.n_classes, only_4_rotations=self.only_4_rotations, n_classes_target=self.n_classes_target) source_train_ss = torch.utils.data.DataLoader( ds_source_ss, batch_size=self.batch_size, shuffle=True, num_workers=self.n_workers, pin_memory=True, drop_last=True) images, labels = get_split_dataset_info( self.folder_txt_files + self.target + '_test.txt', self.folder_dataset) ds_target_train = CustomDataset(images, labels, img_transformer=transform_target_train, returns=2, is_train=True, ss_classes=self.ss_classes, n_classes=self.n_classes, only_4_rotations=self.only_4_rotations, n_classes_target=self.n_classes_target) target_train = torch.utils.data.DataLoader(ds_target_train, batch_size=self.batch_size, shuffle=True, num_workers=self.n_workers, pin_memory=True, drop_last=True) #### target on which compute the scores to select highest batch (integrate to the learning of ss task) and lower batch (integrate to the learning of cls task for the class unknown) images, labels = get_split_dataset_info( self.folder_txt_files + self.target + '_test.txt', self.folder_dataset) ds_target_test_for_scores = CustomDataset( images, labels, img_transformer=transform_target_test_for_scores, returns=2, is_train=False, ss_classes=self.ss_classes, n_classes=self.n_classes, only_4_rotations=self.only_4_rotations, n_classes_target=self.n_classes_target) target_test_for_scores = torch.utils.data.DataLoader( ds_target_test_for_scores, batch_size=1, shuffle=False, num_workers=self.n_workers, pin_memory=True, drop_last=False) #### target for the final evaluation images, labels = get_split_dataset_info( self.folder_txt_files + self.target + '_test.txt', self.folder_dataset) ds_target_test = CustomDataset(images, labels, img_transformer=transform_target_test, returns=2, is_train=False, ss_classes=self.ss_classes, n_classes=self.n_classes, only_4_rotations=self.only_4_rotations, n_classes_target=self.n_classes_target) target_test = torch.utils.data.DataLoader(ds_target_test, batch_size=1, shuffle=False, num_workers=self.n_workers, pin_memory=True, drop_last=False) # network ----------------------------------------------------------------------------------------------- if self.only_4_rotations: discriminator_p = Discriminator(n=1, n_s=self.ss_classes, vgg=self.use_VGG) else: discriminator_p = Discriminator(n=self.n_classes, n_s=self.ss_classes, vgg=self.use_VGG) cls = CLS(feature_extractor.output_num(), self.n_classes + 1, bottle_neck_dim=256, vgg=self.use_VGG) discriminator_p.to(self.device) feature_extractor.to(self.device) cls.to(self.device) net = nn.Sequential(feature_extractor, cls).to(self.device) center_loss = CenterLoss(num_classes=self.ss_classes * self.n_classes, feat_dim=256 * self.n_classes, use_gpu=True, device=self.device) if self.use_VGG: center_loss_object = CenterLoss(num_classes=self.n_classes, feat_dim=4096, use_gpu=True, device=self.device) else: center_loss_object = CenterLoss(num_classes=self.n_classes, feat_dim=2048, use_gpu=True, device=self.device) # scheduler, optimizer --------------------------------------------------------- max_iter = int(self.epochs_step1 * len(source_train_ss)) scheduler = lambda step, initial_lr: inverseDecaySheduler( step, initial_lr, gamma=10, power=0.75, max_iter=max_iter) params = list(discriminator_p.parameters()) if self.weight_center_loss > 0: params = params + list(center_loss.parameters()) optimizer_discriminator_p = OptimWithSheduler( optim.SGD(params, lr=self.learning_rate, weight_decay=5e-4, momentum=0.9, nesterov=True), scheduler) if not self.use_VGG: for name, param in feature_extractor.named_parameters(): words = name.split('.') if words[1] == 'layer4': param.requires_grad = True else: param.requires_grad = False params_cls = list(cls.parameters()) optimizer_cls = OptimWithSheduler( optim.SGD([{ 'params': params_cls }, { 'params': feature_extractor.parameters(), 'lr': (self.learning_rate / self.divison_learning_rate_backbone) }], lr=self.learning_rate, weight_decay=5e-4, momentum=0.9, nesterov=True), scheduler) else: for name, param in feature_extractor.named_parameters(): words = name.split('.') if words[1] == 'classifier': param.requires_grad = True else: param.requires_grad = False params_cls = list(cls.parameters()) optimizer_cls = OptimWithSheduler( optim.SGD([{ 'params': params_cls }, { 'params': feature_extractor.parameters(), 'lr': (self.learning_rate / self.divison_learning_rate_backbone) }], lr=self.learning_rate, weight_decay=5e-4, momentum=0.9, nesterov=True), scheduler) log = Logger(self.folder_log + '/step', clear=True) target_train = cycle(target_train) k = 0 print('\n') print( 'Separation known/unknown phase------------------------------------------------------------------------------------------' ) print('\n') while k < self.epochs_step1: print('Epoch: ', k) for (i, (im_source, im_source_ss, label_source, label_source_ss, label_source_ss_center, label_source_center_object)) in enumerate(source_train_ss): (im_target, _) = next(target_train) global loss_object_class global acc_train global loss_rotation global acc_train_rot global loss_center im_source = im_source.to(self.device) im_target = im_target.to(self.device) im_source_ss = im_source_ss.to(self.device) label_source = label_source.to(self.device) label_source_ss = label_source_ss.to(self.device) label_source_ss_center = label_source_ss_center.to(self.device) label_source_center_object = label_source_center_object.to( self.device) (_, _, _, predict_prob_source) = net.forward(im_source) (_, _, _, _) = net.forward(im_target) fs1_ss = feature_extractor.forward(im_source_ss) fs1_original = feature_extractor.forward(im_source) _ = feature_extractor.forward(im_target) double_input = torch.cat((fs1_original, fs1_ss), 1) fs1_ss = double_input p0, p0_center = discriminator_p.forward(fs1_ss) p0 = nn.Softmax(dim=-1)(p0) # =========================loss function ce = CrossEntropyLoss(label_source, predict_prob_source) d1 = CrossEntropyLoss(label_source_ss, p0) center, _ = center_loss(p0_center, label_source_ss_center) with OptimizerManager( [optimizer_cls, optimizer_discriminator_p]): loss_object_class = self.cls_weight_source * ce loss_rotation = self.ss_weight_source * d1 loss_center = self.weight_center_loss * center loss = loss_object_class + loss_rotation + loss_center loss.backward() if self.weight_center_loss > 0: for param in center_loss.parameters(): param.grad.data *= (1. / self.weight_center_loss) log.step += 1 k += 1 counter = AccuracyCounter() counter.addOntBatch(variable_to_numpy(predict_prob_source), variable_to_numpy(label_source)) acc_train = torch.from_numpy( np.asarray([counter.reportAccuracy()], dtype=np.float32)).to(self.device) counter_ss = AccuracyCounter() counter_ss.addOntBatch(variable_to_numpy(p0), variable_to_numpy(label_source_ss)) acc_train_rot = torch.from_numpy( np.asarray([counter_ss.reportAccuracy()], dtype=np.float32)).to(self.device) track_scalars(log, [ 'loss_object_class', 'acc_train', 'loss_rotation', 'acc_train_rot', 'loss_center' ], globals()) select_low = compute_scores_all_target( target_test_for_scores, feature_extractor, discriminator_p, net, self.use_VGG, self.n_classes, self.ss_classes, self.device, self.source, self.target, self.folder_txt_files, self.folder_txt_files_saving) # ========================= Add target samples to cls and discriminator_p classifiers in function of the score #data--------------------------------------------------------------------------------------------------------------- self.only_4_rotations = True images, labels = get_split_dataset_info( self.folder_txt_files_saving + self.source + '_' + self.target + '_test_high.txt', self.folder_dataset) ds_target_high = CustomDataset( images, labels, img_transformer=transform_target_ss_step2, returns=3, is_train=True, ss_classes=self.ss_classes, n_classes=self.n_classes, only_4_rotations=self.only_4_rotations, n_classes_target=self.n_classes_target) target_train_high = torch.utils.data.DataLoader( ds_target_high, batch_size=self.batch_size, shuffle=True, num_workers=self.n_workers, pin_memory=True, drop_last=True) images, labels = get_split_dataset_info( self.folder_txt_files + self.target + '_test.txt', self.folder_dataset) ds_target = CustomDataset(images, labels, img_transformer=transform_target_ss_step2, returns=3, is_train=True, ss_classes=self.ss_classes, n_classes=self.n_classes, only_4_rotations=self.only_4_rotations, n_classes_target=self.n_classes_target) target_train = torch.utils.data.DataLoader(ds_target, batch_size=self.batch_size, shuffle=True, num_workers=self.n_workers, pin_memory=True, drop_last=True) images, labels = get_split_dataset_info( self.folder_txt_files_saving + self.source + '_' + self.target + '_test_low.txt', self.folder_dataset) ds_target_low = CustomDataset( images, labels, img_transformer=transform_source_ss_step2, returns=6, is_train=True, ss_classes=self.ss_classes, n_classes=self.n_classes, only_4_rotations=self.only_4_rotations, n_classes_target=self.n_classes_target) target_train_low = torch.utils.data.DataLoader( ds_target_low, batch_size=self.batch_size, shuffle=True, num_workers=self.n_workers, pin_memory=True, drop_last=True) # network -------------------------------------------------------------------------------------------------------------------------- discriminator_p = Discriminator(n=1, n_s=self.ss_classes, vgg=self.use_VGG) discriminator_p.to(self.device) if not self.use_weight_net_first_part: if self.use_VGG: feature_extractor = VGGFc(self.device, model_name='vgg19') else: feature_extractor = ResNetFc(self.device, model_name='resnet50') cls = CLS(feature_extractor.output_num(), self.n_classes + 1, bottle_neck_dim=256, vgg=self.use_VGG) feature_extractor.to(self.device) cls.to(self.device) net = nn.Sequential(feature_extractor, cls).to(self.device) if len(target_train_low) >= len(target_train_high): length = len(target_train_low) else: length = len(target_train_high) max_iter = int(self.epochs_step2 * length) scheduler = lambda step, initial_lr: inverseDecaySheduler( step, initial_lr, gamma=10, power=0.75, max_iter=max_iter) params = list(discriminator_p.parameters()) optimizer_discriminator_p = OptimWithSheduler( optim.SGD(params, lr=self.learning_rate, weight_decay=5e-4, momentum=0.9, nesterov=True), scheduler) if not self.use_VGG: for name, param in feature_extractor.named_parameters(): words = name.split('.') if words[1] == 'layer4': param.requires_grad = True else: param.requires_grad = False params_cls = list(cls.parameters()) optimizer_cls = OptimWithSheduler( optim.SGD([{ 'params': params_cls }, { 'params': feature_extractor.parameters(), 'lr': (self.learning_rate / self.divison_learning_rate_backbone) }], lr=self.learning_rate, weight_decay=5e-4, momentum=0.9, nesterov=True), scheduler) else: for name, param in feature_extractor.named_parameters(): words = name.split('.') if words[1] == 'classifier': param.requires_grad = True else: param.requires_grad = False params_cls = list(cls.parameters()) optimizer_cls = OptimWithSheduler( optim.SGD([{ 'params': params_cls }, { 'params': feature_extractor.parameters(), 'lr': (self.learning_rate / self.divison_learning_rate_backbone) }], lr=self.learning_rate, weight_decay=5e-4, momentum=0.9, nesterov=True), scheduler) k = 0 print('\n') print( 'Adaptation phase--------------------------------------------------------------------------------------------------------' ) print('\n') ss_weight_target = self.ss_weight_target weight_class_unknown = 1 / (select_low * (self.n_classes / (len(source_train_ss) * self.batch_size))) while k < self.epochs_step2: print('Epoch: ', k) iteration = cycle(target_train) if len(target_train_low) > len(target_train_high): num_iterations = len(target_train_low) num_iterations_smaller = len(target_train_high) target_train_low_iter = iter(target_train_low) target_train_high_iter = cycle(target_train_high) else: num_iterations = len(target_train_high) num_iterations_smaller = len(target_train_low) target_train_low_iter = cycle(target_train_low) target_train_high_iter = iter(target_train_high) for i in range(num_iterations): global entropy_loss (im_target_entropy, _, _) = next(iteration) (im_source, im_source_ss, label_source, label_source_ss, _, _) = next(target_train_low_iter) (im_target, im_target_ss, label_target_ss) = next(target_train_high_iter) im_source = im_source.to(self.device) im_source_ss = im_source_ss.to(self.device) label_source = label_source.to(self.device) label_source_ss = label_source_ss.to(self.device) im_target = im_target.to(self.device) im_target_ss = im_target_ss.to(self.device) label_target_ss = label_target_ss.to(self.device) im_target_entropy = im_target_entropy.to(self.device) ft1_ss = feature_extractor.forward(im_target_ss) ft1_original = feature_extractor.forward(im_target) double_input_t = torch.cat((ft1_original, ft1_ss), 1) ft1_ss = double_input_t (_, _, _, predict_prob_source) = net.forward(im_source) (_, _, _, _) = net.forward(im_target_entropy) (_, _, _, predict_prob_target) = net.forward(im_target) p0_t, _ = discriminator_p.forward(ft1_ss) p0_t = nn.Softmax(dim=-1)(p0_t) # =========================loss function class_weight = np.ones((self.n_classes + 1), dtype=np.dtype('f')) class_weight[ self. n_classes] = weight_class_unknown * self.weight_class_unknown class_weight = (torch.from_numpy(class_weight)).to(self.device) ce = CrossEntropyLoss(label_source, predict_prob_source, class_weight) entropy = EntropyLoss(predict_prob_target) d1_t = CrossEntropyLoss(label_target_ss, p0_t) with OptimizerManager( [optimizer_cls, optimizer_discriminator_p]): loss_object_class = self.cls_weight_source * ce loss_rotation = ss_weight_target * d1_t entropy_loss = self.entropy_weight * entropy loss = loss_object_class + loss_rotation + entropy_loss loss.backward() log.step += 1 k += 1 counter = AccuracyCounter() counter.addOntBatch(variable_to_numpy(predict_prob_source), variable_to_numpy(label_source)) acc_train = torch.from_numpy( np.asarray([counter.reportAccuracy()], dtype=np.float32)).to(self.device) counter_ss = AccuracyCounter() counter_ss.addOntBatch(variable_to_numpy(p0_t), variable_to_numpy(label_target_ss)) acc_train_rot = torch.from_numpy( np.asarray([counter_ss.reportAccuracy()], dtype=np.float32)).to(self.device) track_scalars(log, [ 'loss_object_class', 'acc_train', 'loss_rotation', 'acc_train_rot', 'entropy_loss' ], globals()) global predict_prob global label global predict_index # =================================evaluation if k % 10 == 0 or k == (self.epochs_step2): with TrainingModeManager([feature_extractor, cls], train=False) as mgr, Accumulator([ 'predict_prob', 'predict_index', 'label' ]) as accumulator: for (i, (im, label)) in enumerate(target_test): with torch.no_grad(): im = im.to(self.device) label = label.to(self.device) (ss, fs, _, predict_prob) = net.forward(im) predict_prob, label = [ variable_to_numpy(x) for x in (predict_prob, label) ] label = np.argmax(label, axis=-1).reshape(-1, 1) predict_index = np.argmax(predict_prob, axis=-1).reshape(-1, 1) accumulator.updateData(globals()) for x in accumulator.keys(): globals()[x] = accumulator[x] y_true = label.flatten() y_pred = predict_index.flatten() m = extended_confusion_matrix( y_true, y_pred, true_labels=range(self.n_classes_target), pred_labels=range(self.n_classes + 1)) cm = m cm = cm.astype(np.float) / np.sum(cm, axis=1, keepdims=True) acc_os_star = sum([cm[i][i] for i in range(self.n_classes) ]) / (self.n_classes) unkn = sum([ cm[i][self.n_classes] for i in range(self.n_classes, self.n_classes_target) ]) / (self.n_classes_target - (self.n_classes)) acc_os = (acc_os_star * (self.n_classes) + unkn) / (self.n_classes + 1) hos = (2 * acc_os_star * unkn) / (acc_os_star + unkn) print('os', acc_os) print('os*', acc_os_star) print('unkn', unkn) print('hos', hos) net.train()
def main(): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False sys.stdout = Logger(osp.join(args.save_dir, 'log_' + args.dataset + '.txt')) if use_gpu: print("Currently using GPU: {}".format(args.gpu)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU") #==================================dataset loading============================ print("Creating dataset: {}".format(args.dataset)) dataset = datasets.create( name=args.dataset, batch_size=args.batch_size, use_gpu=use_gpu, num_workers=args.workers, ) trainloader, testloader = dataset.trainloader, dataset.testloader print("Creating model: {}".format(args.model)) model = models.create(name=args.model, num_classes=dataset.num_classes) if use_gpu: model = nn.DataParallel(model).cuda() criterion_xent = nn.CrossEntropyLoss() criterion_cent = CenterLoss(num_classes=dataset.num_classes, feat_dim=2, use_gpu=use_gpu) optimizer_model = torch.optim.SGD(model.parameters(), lr=args.lr_model, weight_decay=5e-04, momentum=0.9) optimizer_centloss = torch.optim.SGD(criterion_cent.parameters(), lr=args.lr_cent) if args.stepsize > 0: scheduler = lr_scheduler.StepLR(optimizer_model, step_size=args.stepsize, gamma=args.gamma) start_time = time.time() xent_plot = [] cent_plot = [] loss_plot = [] for epoch in range(args.max_epoch): print("==> Epoch {}/{}".format(epoch + 1, args.max_epoch)) xent_losses, cent_losses, losses = train(model, criterion_xent, criterion_cent, optimizer_model, optimizer_centloss, trainloader, use_gpu, dataset.num_classes, epoch) xent_plot.append(xent_losses.avg) cent_plot.append(cent_losses.avg) loss_plot.append(losses.avg) if args.stepsize > 0: scheduler.step() # if args.eval_freq > 0 and (epoch+1) % args.eval_freq == 0 or (epoch+1) == args.max_epoch: # print("==> Test") # acc, err = test(model, testloader, use_gpu, dataset.num_classes, epoch) # print("Accuracy (%): {}\t Error rate (%): {}".format(acc, err)) if epoch % 100 == 0: state = {'cnn': model.state_dict()} torch.save( state, '/home/mg/code/GEI+PTSN/train/pytorch-center-loss-master/snapshots_512/snapshot_%d.t7' % epoch) print('model save at epoch %d' % epoch) plot_losses(xent_plot, cent_plot, loss_plot, prefix='losses') elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
def train(args): """ Function that starts the model training with evaluation at the end of the each epoch :param args: Command line arguments parsed with the argparse lib """ model = SqueezeModel(num_classes=args.num_classes) if args.model: model.load_state_dict(torch.load(args.model)) print("Loaded model from:", args.model) use_gpu = False if args.gpu > -1: use_gpu = True model.cuda(args.gpu) # dataset input_shape = (args.num_channels, args.height, args.width) train_transform, val_transform = data_transformations(input_shape) train_dataset = ImageFolder(root=os.path.join(args.dataset, 'train'), transform=train_transform) train_dataset_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_threads, pin_memory=True) validation_dataset = ImageFolder(root=os.path.join(args.dataset, 'validation'), transform=val_transform) validation_dataset_loader = DataLoader(validation_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_threads, pin_memory=True) # losses model_criterion = CrossEntropyLoss() center_criterion = CenterLoss(num_classes=args.num_classes, feat_dim=model.num_features, use_gpu=use_gpu) # optimizers optim_params = filter(lambda p: p.requires_grad, model.parameters()) if args.optimizer == 'sgd': model_optimizer = SGD(params=optim_params, lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'adam': model_optimizer = Adam(optim_params, lr=args.learning_rate, weight_decay=args.weight_decay) else: raise ValueError('Unknown optimizer') center_optimizer = Adam(center_criterion.parameters(), lr=args.center_learning_rate, weight_decay=args.weight_decay) # schedulers model_lr_scheduler = ReduceLROnPlateau( model_optimizer, factor=0.25, patience=5, verbose=True) center_lr_scheduler = ReduceLROnPlateau( center_optimizer, factor=0.25, patience=5, verbose=True) for epoch in range(1, args.max_epoch + 1): _ = train_epoch(train_dataset_loader, model, model_criterion, center_criterion, model_optimizer, center_optimizer, use_gpu) eval_info = evaluate(validation_dataset_loader, model, model_criterion, center_criterion, use_gpu) model_lr_scheduler.step(eval_info['model_loss']) center_lr_scheduler.step(eval_info['center_loss']) print_eval_info(eval_info, epoch) if epoch == 1: best_f1_val = eval_info['f1'] if eval_info['f1'] >= best_f1_val: model_filename = (args.name + '_epoch_{:02d}' '-valLoss_{:.5f}' '-valF1_{:.5f}'.format(epoch, eval_info['total_loss'], eval_info['f1'])) model_path = os.path.join(args.save_dir, model_filename) torch.save(model.state_dict(), model_path) print('Epoch {}: Saved the new best model to: {}'.format( epoch, model_path)) best_f1_val = eval_info['f1']
x = self.features(x) x = x.view(N, 512, 14 * 14) x = torch.bmm(x, torch.transpose(x, 1, 2)) / (14**2) # Bilinear x = x.view(N, 512**2) x = torch.sqrt(x + 1e-12) x = nn.functional.normalize(x) x = self.fc(x) #x = x.view() return x model_ft = vgg19_see_smart(model_ft) model_ft = model_ft.to(device) celoss = CrossEntropyLoss(smooth_eps=0.1).to(device) centerloss = CenterLoss(num_classes=num_classes, feat_dim=507, use_gpu=True).to(device) criterion = [celoss, centerloss] max_lr = 0.001 min_lr = 0.00001 one_cycle = 20 num_cycle = 3 max_epochs = int(num_classes * one_cycle) net_optimizer = torch.optim.SGD(model_ft.parameters(), max_lr, momentum=0.9, weight_decay=1e-4) cl_optimimzer = torch.optim.SGD(centerloss.parameters(),
def train(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Save the arguments. with open(os.path.join(args.model_path, 'args.json'), 'w') as args_file: json.dump(args.__dict__, args_file) # Config logging. log_format = '%(levelname)-8s %(message)s' log_file_name = 'train_' + args.train_log_file_suffix + '.log' logfile = os.path.join(args.model_path, log_file_name) logging.basicConfig(filename=logfile, level=logging.INFO, format=log_format) logging.getLogger().addHandler(logging.StreamHandler()) logging.info(json.dumps(args.__dict__)) # Image preprocessing transform = transforms.Compose([ transforms.ToTensor(), transforms.ToPILImage(), transforms.RandomResizedCrop(args.crop_size, scale=(1.00, 1.2), ratio=(0.75, 1.3333333333333333)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Load vocabulary wrapper. vocab = load_vocab(args.vocab_path) # Load the category types. cat2name = json.load(open(args.cat2name)) # Build data loader logging.info("Building data loader...") train_sampler = None val_sampler = None if os.path.exists(args.train_dataset_weights): train_weights = json.load(open(args.train_dataset_weights)) train_weights = torch.DoubleTensor(train_weights) train_sampler = torch.utils.data.sampler.WeightedRandomSampler( train_weights, len(train_weights)) if os.path.exists(args.val_dataset_weights): val_weights = json.load(open(args.val_dataset_weights)) val_weights = torch.DoubleTensor(val_weights) val_sampler = torch.utils.data.sampler.WeightedRandomSampler( val_weights, len(val_weights)) data_loader = get_loader(args.dataset, transform, args.batch_size, shuffle=False, num_workers=args.num_workers, max_examples=args.max_examples, sampler=train_sampler) val_data_loader = get_loader(args.val_dataset, transform, args.batch_size, shuffle=False, num_workers=args.num_workers, max_examples=args.max_examples, sampler=val_sampler) print('Done loading data ............................') logging.info("Done") vqg = create_model(args, vocab) if args.load_model is not None: vqg.load_state_dict(torch.load(args.load_model)) logging.info("Done") # Loss criterion. pad = vocab(vocab.SYM_PAD) # Set loss weight for 'pad' symbol to 0 criterion = nn.CrossEntropyLoss() criterion2 = nn.MultiMarginLoss().cuda() l2_criterion = nn.MSELoss() alpha = None if (args.bayes): alpha = vqg.alpha # Setup GPUs. if torch.cuda.is_available(): logging.info("Using available GPU...") vqg.cuda() criterion.cuda() l2_criterion.cuda() if (alpha is not None): alpha.cuda() gen_params = vqg.generator_parameters() info_params = vqg.info_parameters() learning_rate = args.learning_rate info_learning_rate = args.info_learning_rate gen_optimizer = torch.optim.Adam(gen_params, lr=learning_rate) info_optimizer = torch.optim.Adam(info_params, lr=info_learning_rate) if (args.step_two): cycle_params = vqg.cycle_params() cycle_optimizer = torch.optim.Adam(cycle_params, lr=learning_rate) if (args.center_loss): center_loss = CenterLoss(num_classes=args.num_categories, feat_dim=args.z_size, use_gpu=True) optimizer_centloss = torch.optim.SGD(center_loss.parameters(), lr=0.5) scheduler = ReduceLROnPlateau(optimizer=gen_optimizer, mode='min', factor=0.5, patience=args.patience, verbose=True, min_lr=1e-7) cycle_scheduler = ReduceLROnPlateau(optimizer=gen_optimizer, mode='min', factor=0.99, patience=args.patience, verbose=True, min_lr=1e-7) info_scheduler = ReduceLROnPlateau(optimizer=info_optimizer, mode='min', factor=0.5, patience=args.patience, verbose=True, min_lr=1e-7) # Train the model. total_steps = len(data_loader) start_time = time.time() n_steps = 0 # Optional losses. Initialized here for logging. recon_category_loss = 0.0 recon_image_loss = 0.0 kl_loss = 0.0 category_cycle_loss = 0.0 regularisation_loss = 0.0 c_loss = 0.0 cycle_loss = 0.0 if (args.step_two): category_cycle_loss = 0.0 if (args.bayes): regularisation_loss = 0.0 if (args.center_loss): loss_center = 0.0 c_loss = 0.0 for epoch in range(args.num_epochs): for i, (images, questions, answers, categories, qindices) in enumerate(data_loader): n_steps += 1 ''' remove answers from dataloader later ''' # Set mini-batch dataset. if torch.cuda.is_available(): images = images.cuda() questions = questions.cuda() answers = answers.cuda() categories = categories.cuda() qindices = qindices.cuda() if (args.bayes): alpha = alpha.cuda() # Eval now. if (args.eval_every_n_steps is not None and n_steps >= args.eval_every_n_steps and n_steps % args.eval_every_n_steps == 0): run_eval(vqg, val_data_loader, criterion, l2_criterion, args, epoch, scheduler, info_scheduler) compare_outputs(images, questions, answers, categories, vqg, vocab, logging, cat2name, args) # Forward. vqg.train() gen_optimizer.zero_grad() info_optimizer.zero_grad() if (args.step_two): cycle_optimizer.zero_grad() if (args.center_loss): optimizer_centloss.zero_grad() image_features = vqg.encode_images(images) category_features = vqg.encode_categories(categories) # Question generation. t_mus, t_logvars, ts = vqg.encode_into_t(image_features, category_features) if (args.center_loss): loss_center = 0.0 c_loss = center_loss(ts, categories) loss_center += args.lambda_centerloss * c_loss c_loss = c_loss.item() loss_center.backward(retain_graph=True) for param in center_loss.parameters(): param.grad.data *= (1. / args.lambda_centerloss) optimizer_centloss.step() qlengths_prev = process_lengths(questions) (outputs, _, _), pred_ques = vqg.decode_questions(image_features, ts, questions=questions, teacher_forcing_ratio=1.0) # Reorder the questions based on length. questions = torch.index_select(questions, 0, qindices) # Ignoring the start token. questions = questions[:, 1:] qlengths = process_lengths(questions) # Convert the output from MAX_LEN list of (BATCH x VOCAB) -> # (BATCH x MAX_LEN x VOCAB). outputs = [o.unsqueeze(1) for o in outputs] outputs = torch.cat(outputs, dim=1) outputs = torch.index_select(outputs, 0, qindices) if (args.step_two): category_cycle_loss = 0.0 category_cycle = vqg.encode_questions(pred_ques, qlengths) cycle_loss = criterion(category_cycle, categories) category_cycle_loss += args.lambda_c_cycle * cycle_loss cycle_loss = cycle_loss.item() category_cycle_loss.backward(retain_graph=True) cycle_optimizer.step() # Calculate the generation loss. targets = pack_padded_sequence(questions, qlengths, batch_first=True)[0] outputs = pack_padded_sequence(outputs, qlengths, batch_first=True)[0] gen_loss = criterion(outputs, targets) total_loss = 0.0 total_loss += args.lambda_gen * gen_loss gen_loss = gen_loss.item() # Variational loss. if (args.bayes): kl_loss = -0.5 * torch.sum(1 + t_logvars + alpha.pow(2).log() - alpha.pow(2) * (t_mus.pow(2) + t_logvars.exp())) regularisation_loss = l2_criterion(alpha.pow(-1), torch.ones_like(alpha)) total_loss += args.lambda_t * kl_loss + args.lambda_reg * regularisation_loss kl_loss = kl_loss.item() regularisation_loss = regularisation_loss.item() else: kl_loss = gaussian_KL_loss(t_mus, t_logvars) total_loss += args.lambda_t * kl_loss kl_loss = kl_loss.item() # Generator Backprop. total_loss.backward(retain_graph=True) gen_optimizer.step() # Reconstruction loss. recon_image_loss = 0.0 recon_category_loss = 0.0 if not args.no_category_space or not args.no_image_recon: total_info_loss = 0.0 category_targets = category_features.detach() image_targets = image_features.detach() recon_image_features, recon_category_features = vqg.reconstruct_inputs( image_targets, category_targets) # Category reconstruction loss. if not args.no_category_space: recon_c_loss = l2_criterion( recon_category_features, category_targets) # changed to criterion2 total_info_loss += args.lambda_c * recon_c_loss recon_category_loss = recon_c_loss.item() # Image reconstruction loss. if not args.no_image_recon: recon_i_loss = l2_criterion(recon_image_features, image_targets) total_info_loss += args.lambda_i * recon_i_loss recon_image_loss = recon_i_loss.item() # Info backprop. total_info_loss.backward() info_optimizer.step() # Print log info if i % args.log_step == 0: delta_time = time.time() - start_time start_time = time.time() logging.info( 'Time: %.4f, Epoch [%d/%d], Step [%d/%d], ' 'LR: %f, Center-Loss: %.4f, KL: %.4f, ' 'I-recon: %.4f, C-recon: %.4f, C-cycle: %.4f, Regularisation: %.4f' % (delta_time, epoch, args.num_epochs, i, total_steps, gen_optimizer.param_groups[0]['lr'], c_loss, kl_loss, recon_image_loss, recon_category_loss, cycle_loss, regularisation_loss)) # Save the models if args.save_step is not None and (i + 1) % args.save_step == 0: torch.save( vqg.state_dict(), os.path.join(args.model_path, 'vqg-tf-%d-%d.pkl' % (epoch + 1, i + 1))) torch.save( vqg.state_dict(), os.path.join(args.model_path, 'vqg-tf-%d.pkl' % (epoch + 1))) torch.save( center_loss.state_dict(), os.path.join(args.model_path, 'closs-tf-%d-%d.pkl' % (epoch + 1, i + 1))) # Evaluation and learning rate updates. run_eval(vqg, val_data_loader, criterion, l2_criterion, args, epoch, scheduler, info_scheduler)
def train(): """ train model using softmax loss or softmax loss/center loss. 训练模型。 """ print("Start to train...") if not os.path.exists(args.ckpt_dir): os.makedirs(args.ckpt_dir) train_iter, test_iter = data_loader(args.batch_size) ctx = mx.gpu() if args.use_gpu else mx.cpu() # main model (LeNetPlus), loss, trainer model = LeNetPlus(classes=args.num_classes, feature_size=args.feature_size) model.hybridize() model.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() network_trainer = gluon.Trainer(model.collect_params(), optimizer="sgd", optimizer_params={ "learning_rate": args.lr, "wd": args.wd }) # "momentum": 0.9, # center loss network and trainer if args.center_loss: center_loss = CenterLoss(num_classes=args.num_classes, feature_size=args.feature_size, lmbd=args.lmbd, ctx=ctx) center_loss.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx) # 包含了一个center矩阵,因此需要进行初始化 center_trainer = gluon.Trainer( center_loss.collect_params(), optimizer="sgd", optimizer_params={"learning_rate": args.alpha}) else: center_loss, center_trainer = None, None smoothing_constant, moving_loss = .01, 0.0 best_acc = 0.0 for epoch in range(args.epochs): # using learning rate decay during training process if (epoch > 0) and (epoch % args.lr_step == 0): network_trainer.set_learning_rate(network_trainer.learning_rate * args.lr_factor) if args.center_loss: center_trainer.set_learning_rate(center_trainer.learning_rate * args.lr_factor) start_time = time.time() for i, (data, label) in enumerate(train_iter): data = data.as_in_context(ctx) label = label.as_in_context(ctx) with autograd.record(): output, features = model(data) loss_softmax = softmax_cross_entropy(output, label) # cumpute loss according to user"s choice if args.center_loss: loss_center = center_loss(features, label) loss = loss_softmax + loss_center else: loss = loss_softmax # update 更新参数 loss.backward() network_trainer.step(args.batch_size) if args.center_loss: center_trainer.step(args.batch_size) # calculate smoothed loss value 平滑损失 curr_loss = nd.mean(loss).asscalar() moving_loss = (curr_loss if ((i == 0) and (epoch == 0)) else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss) # 累计加权函数 # training cost time 训练耗时 elapsed_time = time.time() - start_time train_accuracy, train_ft, _, train_lb = evaluate_accuracy( train_iter, model, center_loss, args.eval_method, ctx) test_accuracy, test_ft, _, test_lb = evaluate_accuracy( test_iter, model, center_loss, args.eval_method, ctx) # draw feature map 绘制特征图像 if args.plotting: plot_features(train_ft, train_lb, num_classes=args.num_classes, fpath=os.path.join( args.out_dir, "%s-train-epoch-%d.png" % (args.prefix, epoch))) plot_features(test_ft, test_lb, num_classes=args.num_classes, fpath=os.path.join( args.out_dir, "%s-test-epoch-%d.png" % (args.prefix, epoch))) logging.warning( "Epoch [%d]: Loss=%f, Train-Acc=%f, Test-Acc=%f, Epoch-time=%f" % (epoch, moving_loss, train_accuracy, test_accuracy, elapsed_time)) # save model parameters with the highest accuracy 保存accuracy最高的model参数 if test_accuracy > best_acc: best_acc = test_accuracy model.save_parameters( os.path.join(args.ckpt_dir, args.prefix + "-best.params")) # 因为CenterLoss继承自gluon.HyperBlock,所以具有普通模型相关的对象可供调用,即可使用save_parameters/load_parameters进行参数的保存和加载。 # 如果CenterLoss没有直接父类,那么就需要通过CenterLoss.embedding.weight.data/set_data进行数据的保存和加载。 center_loss.save_parameters( os.path.join(args.ckpt_dir, args.prefix + "-feature_matrix.params"))
model = models.alexnet(pretrained=True) model.classifier[6] = nn.Linear(4096,num_classes) # Freezing the convolutional layer weights for param in model.features.parameters(): param.requires_grad = False # Freezing the classifier layer weights except the last layer for n in range(6): for param in model.classifier[n].parameters(): param.requires_grad = False feature_model = copy.deepcopy(model) del(feature_model.classifier[6]) print("Training Data Samples: ", len(train_loader)) # Using center loss and defining the parameters needed for the center loss center_loss = CenterLoss(num_classes, feat_dim=4096, use_gpu=False) optimizer_centloss = torch.optim.SGD(center_loss.parameters(), lr=0.5) optimizer = optim.Adam(model.classifier[6].parameters(), lr = 0.1, betas = (0.9,0.999), weight_decay = 0.00005) scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=10, verbose=False, threshold=0.003, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08) ###### PARAMETERS NEEDED TO BE MONITORED ########## train_acc = 0 val_acc = 0 train_loss = 0 val_loss = 0 num_epochs = 100 alpha = 0.1 ################ TRAINING THE MODEL ############## for ep in range(num_epochs): start = time.time() print("\n")
def second_train(): """ Train a model using only center loss based on pretrained model. In order to avoid feature matrix becoming zero matrix, fix CenterLoss' parameters not to train it. 基于之前训练的模型,仅使用center loss对模型进行训练。 为了避免在训练的过程,CenterLoss中的特征矩阵变为0矩阵,将其参数固定不对其进行训练。 """ print("Start to train LeNet++ with CenterLoss...") train_iter, test_iter = data_loader(args.batch_size) ctx = mx.gpu() if args.use_gpu else mx.cpu() # main model (LeNetPlus), loss, trainer model = LeNetPlus(classes=args.num_classes, feature_size=args.feature_size) model.load_parameters(os.path.join(args.ckpt_dir, args.prefix + "-best.params"), ctx=ctx, allow_missing=True) network_trainer = gluon.Trainer(model.collect_params(), optimizer="sgd", optimizer_params={ "learning_rate": args.lr, "wd": args.wd }) center_loss = CenterLoss(num_classes=args.num_classes, feature_size=args.feature_size, lmbd=args.lmbd, ctx=ctx) center_loss.load_parameters(os.path.join( args.ckpt_dir, args.prefix + "-feature_matrix.params"), ctx=ctx) center_loss.params.setattr("grad_req", "null") # smoothing_constant, moving_loss = .01, 0.0 best_acc = 0.0 for epoch in range(args.epochs): # using learning rate decay during training process if (epoch > 0) and (epoch % args.lr_step == 0): network_trainer.set_learning_rate(network_trainer.learning_rate * args.lr_factor) start_time = time.time() for i, (data, label) in enumerate(train_iter): data = data.as_in_context(ctx) label = label.as_in_context(ctx) # only calculate loss with center loss with autograd.record(): output, features = model(data) loss = center_loss(features, label) loss.backward() # only update parameters of LeNet++ network_trainer.step( args.batch_size, ignore_stale_grad=True ) # without ignore_stale_grad=True, it will raise warning information # 去除ignore_stale_grad=True后,模型训练会出错,梯度无法进行更新 # calculate smoothed loss value curr_loss = nd.mean(loss).asscalar() moving_loss = (curr_loss if ((i == 0) and (epoch == 0)) else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss) # training cost time elapsed_time = time.time() - start_time train_accuracy, train_ft, _, train_lb = evaluate_accuracy( train_iter, model, center_loss, args.eval_method, ctx) test_accuracy, test_ft, _, test_lb = evaluate_accuracy( test_iter, model, center_loss, args.eval_method, ctx) # draw feature map with different prefix, to make it convenient to compare features if args.plotting: plot_features(train_ft, train_lb, num_classes=args.num_classes, fpath=os.path.join( args.out_dir, "%s-second-train-epoch-%d.png" % (args.prefix, epoch))) plot_features(test_ft, test_lb, num_classes=args.num_classes, fpath=os.path.join( args.out_dir, "%s-second-test-epoch-%d.png" % (args.prefix, epoch))) logging.warning( "Epoch [%d]: Loss=%f, Train-Acc=%f, Test-Acc=%f, Epoch-time=%f" % (epoch, moving_loss, train_accuracy, test_accuracy, elapsed_time)) if test_accuracy > best_acc: best_acc = test_accuracy model.save_parameters( os.path.join(args.ckpt_dir, args.prefix + "-second-best.params"))
def scAdapt(args, data_set): ## prepare data batch_size = args.batch_size kwargs = {'num_workers': 0, 'pin_memory': True} source_name = args.source_name #"TM_baron_mouse_for_baron" target_name = args.target_name #"baron_human" domain_to_indices = np.where(data_set['accessions'] == source_name)[0] train_set = {'features': data_set['features'][domain_to_indices], 'labels': data_set['labels'][domain_to_indices], 'accessions': data_set['accessions'][domain_to_indices]} domain_to_indices = np.where(data_set['accessions'] == target_name)[0] test_set = {'features': data_set['features'][domain_to_indices], 'labels': data_set['labels'][domain_to_indices], 'accessions': data_set['accessions'][domain_to_indices]} print('source labels:', np.unique(train_set['labels']), ' target labels:', np.unique(test_set['labels'])) test_set_eval = {'features': data_set['features'][domain_to_indices], 'labels': data_set['labels'][domain_to_indices], 'accessions': data_set['accessions'][domain_to_indices]} print(train_set['features'].shape, test_set['features'].shape) data = torch.utils.data.TensorDataset( torch.FloatTensor(train_set['features']), torch.LongTensor(matrix_one_hot(train_set['labels'], int(max(train_set['labels'])+1)).long())) source_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, drop_last=True, **kwargs) data = torch.utils.data.TensorDataset( torch.FloatTensor(test_set['features']), torch.LongTensor(matrix_one_hot(test_set['labels'], int(max(train_set['labels'])+1)).long())) target_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, drop_last=True, **kwargs) target_test_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False, drop_last=False, **kwargs) class_num = max(train_set['labels'])+1 class_num_test = max(test_set['labels']) + 1 ### re-weighting the classifier cls_num_list = [np.sum(train_set['labels'] == i) for i in range(class_num)] #from https://github.com/YyzHarry/imbalanced-semi-self/blob/master/train.py # # Normalized weights based on inverse number of effective data per class. #2019 Learning Imbalanced Datasets with Label-Distribution-Aware Margin Loss #2020 Rethinking the Value of Labels for Improving Class-Imbalanced Learning beta = 0.9999 effective_num = 1.0 - np.power(beta, cls_num_list) per_cls_weights = (1.0 - beta) / np.array(effective_num) per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len(cls_num_list) per_cls_weights = torch.FloatTensor(per_cls_weights).cuda() ## set base network embedding_size = args.embedding_size base_network = FeatureExtractor(num_inputs=train_set['features'].shape[1], embed_size = embedding_size).cuda() label_predictor = LabelPredictor(base_network.output_num(), class_num).cuda() total_model = nn.Sequential(base_network, label_predictor) center_loss = CenterLoss(num_classes=class_num, feat_dim=embedding_size, use_gpu=True) optimizer_centloss = torch.optim.SGD([{'params': center_loss.parameters()}], lr=0.5) print("output size of FeatureExtractor and LabelPredictor: ", base_network.output_num(), class_num) ad_net = scAdversarialNetwork(base_network.output_num(), 1024).cuda() ## set optimizer config_optimizer = {"lr_type": "inv", "lr_param": {"lr": 0.001, "gamma": 0.001, "power": 0.75}} parameter_list = base_network.get_parameters() + ad_net.get_parameters() + label_predictor.get_parameters() optimizer = optim.SGD(parameter_list, lr=1e-3, weight_decay=5e-4, momentum=0.9, nesterov=True) schedule_param = config_optimizer["lr_param"] lr_scheduler = lr_schedule.schedule_dict[config_optimizer["lr_type"]] ## train len_train_source = len(source_loader) len_train_target = len(target_loader) transfer_loss_value = classifier_loss_value = total_loss_value = 0.0 epoch_global = 0.0 hit = False s_global_centroid = torch.zeros(class_num, embedding_size).cuda() t_global_centroid = torch.zeros(class_num, embedding_size).cuda() for epoch in range(args.num_iterations): if epoch % (2500) == 0 and epoch != 0: feature_target = base_network(torch.FloatTensor(test_set['features']).cuda()) output_target = label_predictor.forward(feature_target) softmax_out = nn.Softmax(dim=1)(output_target) predict_prob_arr, predict_label_arr = torch.max(softmax_out, 1) if epoch == args.epoch_th: data = torch.utils.data.TensorDataset(torch.FloatTensor(test_set['features']), predict_label_arr.cpu()) target_loader_align = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, drop_last=True, **kwargs) result_path = args.result_path #"../results/" model_file = result_path + 'final_model_' + str(epoch) + source_name + target_name+'.ckpt' torch.save({'base_network': base_network.state_dict(), 'label_predictor': label_predictor.state_dict()}, model_file) if not os.path.exists(result_path): os.makedirs(result_path) with torch.no_grad(): code_arr_s = base_network(Variable(torch.FloatTensor(train_set['features']).cuda())) code_arr_t = base_network(Variable(torch.FloatTensor(test_set_eval['features']).cuda())) code_arr = np.concatenate((code_arr_s.cpu().data.numpy(), code_arr_t.cpu().data.numpy()), 0) digit_label_dict = pd.read_csv(args.dataset_path + 'digit_label_dict.csv') digit_label_dict = pd.DataFrame(zip(digit_label_dict.iloc[:,0], digit_label_dict.index), columns=['digit','label']) digit_label_dict = digit_label_dict.to_dict()['label'] # transform digit label to cell type name y_pred_label = [digit_label_dict[x] if x in digit_label_dict else x for x in predict_label_arr.cpu().data.numpy()] pred_labels_file = result_path + 'pred_labels_' + source_name + "_" + target_name + "_" + str(epoch) + ".csv" pd.DataFrame([predict_prob_arr.cpu().data.numpy(), y_pred_label], index=["pred_probability", "pred_label"]).to_csv(pred_labels_file, sep=',') embedding_file = result_path + 'embeddings_' + source_name + "_" + target_name + "_" + str(epoch)+ ".csv" pd.DataFrame(code_arr).to_csv(embedding_file, sep=',') #### only for evaluation # acc_by_label = np.zeros( class_num_test ) # all_label = test_set['labels'] # for i in range(class_num_test): # acc_by_label[i] = np.sum(predict_label_arr.cpu().data.numpy()[all_label == i] == i) / np.sum(all_label == i) # np.set_printoptions(suppress=True) # print('iter:', epoch, "average acc over all test cell types: ", round(np.nanmean(acc_by_label), 3)) # print("acc of each test cell type: ", acc_by_label) # div_score, div_score_all, ent_score, sil_score = evaluate_multibatch(code_arr, train_set, test_set_eval, epoch) #results_file = result_path + source_name + "_" + target_name + "_" + str(epoch)+ "_acc_div_sil.csv" #evel_res = [np.nanmean(acc_by_label), div_score, div_score_all, ent_score, sil_score] #pd.DataFrame(evel_res, index = ["acc","div_score","div_score_all","ent_score","sil_score"], columns=["values"]).to_csv(results_file, sep=',') # pred_labels_file = result_path + source_name + "_" + target_name + "_" + str(epoch) + "_pred_labels.csv" # pd.DataFrame([predict_label_arr.cpu().data.numpy(), all_label], index=["pred_label", "true_label"]).to_csv(pred_labels_file, sep=',') ## train one iter base_network.train(True) ad_net.train(True) label_predictor.train(True) optimizer = lr_scheduler(optimizer, epoch, **schedule_param) optimizer.zero_grad() optimizer_centloss.zero_grad() if epoch % len_train_source == 0: iter_source = iter(source_loader) epoch_global = epoch_global + 1 if epoch % len_train_target == 0: if epoch < args.epoch_th: iter_target = iter(target_loader) else: hit = True iter_target = iter(target_loader_align) inputs_source, labels_source = iter_source.next() inputs_target, labels_target = iter_target.next() inputs_source, inputs_target, labels_source, labels_target = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda(), labels_target.cuda() feature_source = base_network(inputs_source) feature_target = base_network(inputs_target) features = torch.cat((feature_source, feature_target), dim=0) output_source = label_predictor.forward(feature_source) output_target = label_predictor.forward(feature_target) ######## VAT and BNM loss # LDS should be calculated before the forward for cross entropy vat_loss = VATLoss(xi=args.xi, eps=args.eps, ip=args.ip) lds_loss = vat_loss(total_model, inputs_target) softmax_tgt = nn.Softmax(dim=1)(output_target[:, 0:class_num]) _, s_tgt, _ = torch.svd(softmax_tgt) BNM_loss = -torch.mean(s_tgt) ########domain alignment loss if args.method == 'DANN': domain_prob_discriminator_1_source = ad_net.forward(feature_source) domain_prob_discriminator_1_target = ad_net.forward(feature_target) adv_loss = loss_utility.BCELossForMultiClassification(label=torch.ones_like(domain_prob_discriminator_1_source), \ predict_prob=domain_prob_discriminator_1_source) # domain matching adv_loss += loss_utility.BCELossForMultiClassification(label=torch.ones_like(domain_prob_discriminator_1_target), \ predict_prob=1 - domain_prob_discriminator_1_target) transfer_loss = adv_loss elif args.method == 'mmd': base = 1.0 # sigma for MMD sigma_list = [1, 2, 4, 8, 16] sigma_list = [sigma / base for sigma in sigma_list] transfer_loss = loss_utility.mix_rbf_mmd2(feature_source, feature_target, sigma_list) ######CrossEntropyLoss classifier_loss = nn.CrossEntropyLoss(weight=per_cls_weights)(output_source, torch.max(labels_source, dim=1)[1]) # classifier_loss = loss_utility.CrossEntropyLoss(labels_source.float(), nn.Softmax(dim=1)(output_source)) ######semantic_loss and center loss cell_th = args.cell_th epoch_th = args.epoch_th if epoch < args.epoch_th or hit == False: semantic_loss = torch.FloatTensor([0.0]).cuda() center_loss_src = torch.FloatTensor([0.0]).cuda() sum_dist_loss = torch.FloatTensor([0.0]).cuda() # center_loss.centers = feature_source[torch.max(labels_source, dim=1)[1] == 0].mean(dim=0, keepdim=True) pass elif hit == True: center_loss_src = center_loss(feature_source, labels=torch.max(labels_source, dim=1)[1]) s_global_centroid = center_loss.centers semantic_loss, s_global_centroid, t_global_centroid = loss_utility.semant_use_s_center(class_num, s_global_centroid, t_global_centroid, feature_source, feature_target, torch.max( labels_source, dim=1)[1], labels_target, 0.7, cell_th) #softmax_tgt if epoch > epoch_th: lds_loss = torch.FloatTensor([0.0]).cuda() if epoch <= args.num_iterations: progress = epoch / args.epoch_th #args.num_iterations else: progress = 1 lambd = 2 / (1 + math.exp(-10 * progress)) - 1 total_loss = classifier_loss + lambd*args.DA_coeff * transfer_loss + lambd*args.BNM_coeff*BNM_loss + lambd*args.alpha*lds_loss\ + args.semantic_coeff *semantic_loss + args.centerloss_coeff*center_loss_src total_loss.backward() optimizer.step() # multiple (1./centerloss_coeff) in order to remove the effect of centerloss_coeff on updating centers if args.centerloss_coeff > 0 and center_loss_src > 0: for param in center_loss.parameters(): param.grad.data *= (1. / args.centerloss_coeff) optimizer_centloss.step() #optimize the center in center loss
mean_cluster_d += class_mean_d max_cluster_d = max(max_cluster_d, class_mean_d) ################################################ if HACKS['DEBUG_TEST']: break mean_cluster_d = mean_cluster_d / (i + 1) return mean_cluster_d, max_cluster_d '''--------------- Prepare the criterions --------------- ''' criterion_xent = nn.CrossEntropyLoss() criterion_cent = CenterLoss(num_classes=num_classes, feat_dim=args.embed_size, use_gpu=use_gpu) '''--------------- Prepare the optimizers --------------- ''' optimizer_model = torch.optim.SGD(model.parameters(), lr=args.lr_model, weight_decay=5e-04, momentum=0.9) #optimizer_centloss = torch.optim.SGD(criterion_cent.parameters(), lr=args.lr_cent) #optimizer_model = torch.optim.Adam(model.parameters()) optimizer_centloss = torch.optim.SGD(criterion_cent.parameters(), lr=args.lr_cent) if args.stepsize > 0: scheduler = lr_scheduler.StepLR(optimizer_model, step_size=args.stepsize,