def MultiTrAdap(Xs, Ys, Xa, Ya, Xt, Yt, nIters=200): # s for source domain, a for auxilary data, t for test data p = progressbar.ProgressBar() Xsa = np.concatenate((Xs, Xa)) Ysa = np.concatenate((Ys, Ya)) Ns = Ys.shape[0] Na = Ya.shape[0] Epss = [] # Epsilons in each iteration TestAcc = [] # Accuracy on test set in each iteration TestPrd = {} # Predictions made in each iteration Weights = {} # Sample weights in each iteration AdaAcc = [ ] # if only use the aux data with weights (Only adaboost), what will be the accuracy Beta = 1 / (1 + np.sqrt(2 * np.log(Ns) / nIters) ) # for updateting the source samples Wsa = np.ones(Ns + Na) / (Ns + Na) # Init the weights evenly p.start(nIters) for ni in range(nIters): Weights[ni] = Wsa #---- update P, train and predict ---- Psa = Wsa / np.sum(Wsa) clf = RFC(n_estimators=5, criterion='entropy', max_depth=2) # clf = LRC(solver = 'liblinear',multi_class='ovr') # clf = LinearSVC(multi_class='ovr') # clf = TreeC(splitter='best',max_depth=3) # update the W if 0: # Update the weights alternatively: train on A, prd on S => update S; then train on S, prd on A, => update A clf.fit(Xa, Ya, sample_weight=Psa[-Na:]) YsPrd = clf.predict(Xs) RorWs = 1 * (YsPrd != Ys) clf.fit(Xs, Ys, sample_weight=Psa[:Ns]) YaPrd = clf.predict(Xa) RorWa = 1 * (YaPrd != Ya) RorW = np.concatenate((RorWs, RorWa)) else: # Normal TrAdaBoost, Train on S&A, prd on S&A clf.fit(Xsa, Ysa, sample_weight=Psa) YsaPrd = clf.predict(Xsa) # calculate the accuracy on XYa RorW = 1 * (YsaPrd != Ysa) Eps = np.sum((Wsa * RorW)[-Na:]) / np.sum( Wsa[-Na:]) # Epss are only from A data Epss.append(1 - Eps) # adjust Eps if Eps >= 0.4: Eps = 0.4 elif Eps <= 0: Eps = 0.01 # Weight update if 1: Alpha = np.sqrt(Eps / (1 - Eps)) # Alpha = Eps/(1-Eps) # This is the original update from Dai's Coef = np.concatenate( (Beta * np.ones(Ns), (1 / Alpha) * np.ones(Na))) wUpdate = np.power(Coef, RorW) else: # Update with momentum Alpha = np.sqrt((1 - Eps) / (1 + Eps)) Ct = 2.5 * (1 - Eps) Coef = np.concatenate( (Ct * Beta * np.ones(Ns), Alpha * np.ones(Na))) wUpdate = np.power(Coef, -25 * RorW / nIters) # Now update Wsa = Wsa * wUpdate # result & summary Yprd = clf.predict(Xt) TestPrd[ni] = Yprd TestAcc.append(Metrics.Accuracy(Yt, Yprd)) clf.fit(Xa, Ya, sample_weight=Psa[-Na:]) AdaAcc.append(Metrics.Accuracy(Yt, clf.predict(Xt))) p.update(ni + 1) # progress bar # print(np.mean(Target)) p.finish() return Weights, Epss, TestPrd, TestAcc, AdaAcc
model = sfr.get_sfr(classes_num=config.classes_num, channel_size=config.channel_size, drop_rate=config.drop_rate, sr_rate=config.sr_rate, fr_rate=config.fr_rate) validate_loader = ImageReader.getLoader(config.dataset, "validate", config.validate_img_path) all_weights_path = "./weights/cub/sfr_resnet50_cub_best_acc.pth" if config.use_cuda: model = model.cuda() model.load_state_dict(torch.load(all_weights_path)) model.eval() val_loss = 0 val_step = 0 accuracy = Metrics.Accuracy() criterion = nn.CrossEntropyLoss() for x, y in validate_loader: x = Variable(x, requires_grad=False) y = Variable(y, requires_grad=False) if config.use_cuda: x = x.cuda() y = y.cuda(async=True) y_ = model.forward_validate(x) step_loss = criterion(y_, y) step_acc = accuracy(y_, y) val_loss += step_loss.data[0] val_step += 1 del (x, y, y_, step_loss) val_acc = accuracy.total_correct / (accuracy.total_sample + 1e-5)
p.finish() return Weights, Epss, TestPrd, TestAcc, AdaAcc #============================================================ # 3 data set are tested. Synthetic, UCI heart desease, Amazon+Webcam idChanged = [] Xs, Ys, Xa, Ya, Xt, Yt, idChanged = Datasets.gen_noisy_classi_data() #Xs,Ys,Xa,Ya,Xt,Yt = Datasets.load_heart() #Xs,Ys,Xa,Ya,Xt,Yt = Datasets.load_pics() nIters = 50 # Baseline, from A to T clf0 = RFC(n_estimators=5, criterion='entropy', max_depth=2) clf0.fit(Xa, Ya) Acc0 = Metrics.Accuracy(Yt, clf0.predict(Xt)) SPweights, Acc_auxi, All_test_prd, Acc_test, Acc_AdaOnly = MultiTrAdap( Xs, Ys, Xa, Ya, Xt, Yt, nIters=nIters) PrdDf = pd.DataFrame.from_dict(All_test_prd) HalfDf = PrdDf.iloc[:, round(nIters / 2):] # use the last half only BoostPrd = HalfDf.mode(axis=1) # Boosting: simply vote AccB = Metrics.Accuracy(Yt, BoostPrd[0]) # ================ Plot=========================== import matplotlib.pyplot as plt fig1 = plt.figure() ax1 = fig1.add_subplot(111) ax1.plot(Acc_test, label='From S+A to Test') ax1.plot(Acc_auxi, label='Weighted Aux set')
def train(): ''' 进行训练 ''' #定义记录部分 csv_path = config.csv_path tb_path = config.tb_path writer = SummaryWriter(log_dir=tb_path) #定义模型 if config.start_epoch == 1: model = net.get_net(classes_num=config.classes_num, channel_size=config.channel_size, cnn_weights_path=config.cnn_weights_path, drop_rate=config.drop_rate) else: model = net.get_net(classes_num=config.classes_num, channel_size=config.channel_size, drop_rate=config.drop_rate) print("model load succeed") if config.use_cuda: model = model.cuda() #将model转到cuda上 if config.use_parallel: model = nn.DataParallel(model, device_ids=config.device_ids) cudnn.benchmark = True if config.start_epoch != 1: all_weights_path = config.save_weights_path.format(config.start_epoch - 1) model.load_state_dict(torch.load(all_weights_path)) print("{} load succeed".format(all_weights_path)) #加载数据集 train_folder = config.train_img_path validate_folder = config.validate_img_path train_loader = ImageReader.getLoader(config.dataset, "train", train_folder) validate_loader = ImageReader.getLoader(config.dataset, "validate", validate_folder) #定义优化器和学习率调度器 optimizer = SGD(params=model.parameters(), lr=config.start_lr, momentum=0.9, weight_decay=config.weight_decay) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=config.stones, gamma=0.1) #定义评估函数 criterion = nn.CrossEntropyLoss() accuracy = Metrics.Accuracy() #定义最好的准确率 best_acc = 0 for i in range(config.start_epoch, config.start_epoch + config.num_epoch): #分配学习率 scheduler.step(epoch=i) lr = scheduler.get_lr()[0] print("{} epoch start , lr is {}".format(i, lr)) #开始训练这一轮 model.train() accuracy.__init__() train_loss = 0 train_step = 0 for x, y in train_loader: x = Variable(x) y = Variable(y) if config.use_cuda: x = x.cuda() y = y.cuda(async=True) optimizer.zero_grad() #清空梯度值 y_ = model(x) #求y #求这一步的损失值和准确率 step_loss = criterion(y_, y) step_acc = accuracy(y_, y) train_loss += step_loss.data[0] #更新梯度值 step_loss.backward() optimizer.step() train_step += 1 #训练步数+1 #输出这一步的记录 print("{} epoch,{} step,step loss is {:.6f},step acc is {:.4f}". format(i, train_step, step_loss.data[0], step_acc)) del (step_loss, x, y, y_) #求这一轮训练情况 train_acc = accuracy.total_correct / (accuracy.total_sample + 1e-5) train_loss = train_loss / (train_step + 1e-5) #保存模型 weights_name = config.save_weights_path.format(i) torch.save(model.state_dict(), weights_name) del_weights_name = config.save_weights_path.format(i - 3) if os.path.exists(del_weights_name): os.remove(del_weights_name) print("{} save,{} delete".format(weights_name, del_weights_name)) #开始验证步骤 model.eval() accuracy.__init__() #将accuracy中total_sample和total_correct清0 val_loss = 0 val_step = 0 for x, y in validate_loader: x = Variable(x, requires_grad=False) y = Variable(y, requires_grad=False) if config.use_cuda: x = x.cuda() y = y.cuda(async=True) y_ = model(x) step_loss = criterion(y_, y) step_acc = accuracy(y_, y) val_loss += step_loss.data[0] val_step += 1 del (x, y, y_, step_loss) val_acc = accuracy.total_correct / (accuracy.total_sample + 1e-5) val_loss = val_loss / (val_step + 1e-5) print("validate end,log start") #保存最佳的模型 if best_acc < val_acc: weights_name = config.save_weights_path.format("best_acc") torch.save(model.state_dict(), weights_name) best_acc = val_acc #求model的正则化项 l2_reg = 0.0 for param in model.parameters(): l2_reg += torch.norm(param).data[0] #开始记录 with open(csv_path, "a", encoding="utf-8") as file: t = get_ctime() content = "{},{:.6f},{:.4f},{:.6f},{:.4f},{:.6f},{}".format( i, train_loss, train_acc, val_loss, val_acc, l2_reg, t) + "\n" file.write(content) writer.add_scalar("Train/acc", train_acc, i) writer.add_scalar("Train/loss", train_loss, i) writer.add_scalar("Val/acc", val_acc, i) writer.add_scalar("Val/loss", val_loss, i) writer.add_scalar("lr", lr, i) writer.add_scalar("l2_reg", l2_reg, i) print("log end ...") print( "{} epoch end, train loss is {:.6f},train acc is {:.4f},val loss is \ {:.6f},val acc is {:.4f},weight l2 norm is {:.6f}".format( i, train_loss, train_acc, val_loss, val_acc, l2_reg)) del (model) print("{} train end,best_acc is {}...".format(config.dataset, best_acc))