def train(classifier, X, y, epoch=10, batch_size=3, reg=1e-3): # 训练网络 # input: # X: 训练图片 # y: 训练标签 # epoch: 训练次数 # lr: 训练速率 # batch_size: mini batch 大小 (最好能整除 X 的张数) # output: # loss_list、acc_list # 横坐标为epoch loss_list, acc_list = [], [] #print(len(classifier.raise_params())) loss_func = loss.Loss_Sequential(loss.soft_max_loss(), loss.L1_loss()) _optimizer = optimizer.MB_SGD(classifier.layers, loss_func) batch_num = X.shape[0] // batch_size for e in range(epoch): X, y = shuffle(X, y) acc_sum, loss_sum = 0, 0 for batch in range(batch_num): x_batch = X[batch * batch_size:(batch + 1) * batch_size] y_batch = y[batch * batch_size:(batch + 1) * batch_size] p = classifier.forward(x_batch) loss_sum += loss.softmax_loss(y_batch, p) + loss.L1_loss( classifier.layers, reg) _optimizer.optimize(x_batch, y_batch, p) acc_sum += get_acc_avg(y_batch, p) loss_list.append(loss_sum / batch_num) acc_list.append(acc_sum / batch_num) return loss_list, acc_list
def main(): model = get_model() device = torch.device('cuda') model = model.to(device) loader = data.Data(args).train_loader rank = torch.Tensor([i for i in range(101)]).cuda() for i in range(args.epochs): lr = 0.001 if i < 30 else 0.0001 optimizer = utils.make_optimizer(args, model, lr) model.train() print('Learning rate:{}'.format(lr)) start_time = time.time() for j, inputs in enumerate(loader): img, label, age = inputs img = img.to(device) label = label.to(device) age = age.to(device) optimizer.zero_grad() outputs = model(img) ages = torch.sum(outputs*rank, dim=1) loss1 = loss.kl_loss(outputs, label) loss2 = loss.L1_loss(ages, age) total_loss = loss1 + loss2 total_loss.backward() optimizer.step() current_time = time.time() print('[Epoch:{}] \t[batch:{}]\t[loss={:.4f}]'.format(i, j, total_loss.item())) start_time = time.time() torch.save(model, './pretrained/{}.pt'.format(args.model_name)) torch.save(model.state_dict(), './pretrained/{}_dict.pt'.format(args.model_name)) print('Test: Epoch=[{}]'.format(i)) if (i+1) % 2 == 0: test()
def train(train_loader, model, criterion, optimizer, epoch, device, train_count): model.train() loss_monitor = AverageMeter() accuracy_monitor = AverageMeter() rank = torch.Tensor([i for i in range(101)]).to(device) correct_gender = 0 correct_race = 0 with tqdm(train_loader) as _tqdm: for x, y, lbl, g, r in _tqdm: x = x.to(device) y = y.to(device) lbl = lbl.to(device) g = g.to(device) r = r.to(device) # compute output outputs, gen, race = model(x) outputs = F.softmax(outputs, dim = 1) ages = torch.sum(outputs*rank, dim=1) # calc loss # loss = criterion(outputs, y) loss1 = L.kl_loss(outputs, lbl) loss2 = L.L1_loss(ages, y) loss3 = criterion(gen, g) loss4 = criterion(race, r) loss = loss1 + loss2 + loss3 + loss4 cur_loss = loss.item() # calc accuracy correct_num = (abs(ages - y) < 1).sum().item() gen = F.softmax(gen, dim = 1) race = F.softmax(race, 1) correct_gender += torch.sum(torch.argmax(gen, 1) == g).item() correct_race += torch.sum(torch.argmax(race, 1) == r).item() # measure accuracy and record loss sample_num = x.size(0) loss_monitor.update(cur_loss, sample_num) accuracy_monitor.update(correct_num, sample_num) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() _tqdm.set_postfix(OrderedDict(stage="train", epoch=epoch, loss=loss_monitor.avg), acc=accuracy_monitor.avg, correct=correct_num, sample_num=sample_num) print("Gender accu:", correct_gender/train_count) print("Race accu:", correct_race/train_count) return loss_monitor.avg, accuracy_monitor.avg
def main(): model = get_model() device = torch.device('cuda') model = model.to(device) print(model) loader = data.Data(args).train_loader rank = torch.Tensor([i for i in range(101)]).cuda() best_mae = np.inf for i in range(args.epochs): lr = 0.001 if i < 30 else 0.0001 optimizer = utils.make_optimizer(args, model, lr) model.train() print('Learning rate:{}'.format(lr)) # start_time = time.time() for j, inputs in enumerate(tqdm(loader)): img, label, age = inputs['image'], inputs['label'], inputs['age'] img = img.to(device) label = label.to(device) age = age.to(device) optimizer.zero_grad() outputs = model(img) ages = torch.sum(outputs * rank, dim=1) loss1 = loss.kl_loss(outputs, label) loss2 = loss.L1_loss(ages, age) total_loss = loss1 + loss2 total_loss.backward() optimizer.step() # current_time = time.time() if j % 10 == 0: tqdm.write('[Epoch:{}] \t[batch:{}]\t[loss={:.4f}]'.format( i, j, total_loss.item())) # start_time = time.time() torch.save(model, './checkpoint/{}.pt'.format(args.model_name)) torch.save(model.state_dict(), './checkpoint/{}_dict.pt'.format(args.model_name)) if (i + 1) % 2 == 0: print('Test: Epoch=[{}]'.format(i)) cur_mae = test(model) if cur_mae < best_mae: best_mae = cur_mae print(f'Saving best model with MAE {cur_mae}... ') torch.save( model, './checkpoint/best_{}_MAE={}.pt'.format( args.model_name, cur_mae)) torch.save( model.state_dict(), './checkpoint/best_{}_dict_MAE={}.pt'.format( args.model_name, cur_mae))
def validate_ldl(validate_loader, model, criterion, epoch, device, group_count, gender_count="False", get_ca=False): model.eval() loss_monitor = AverageMeter() accuracy_monitor = AverageMeter() preds = [] gt = [] rank = torch.Tensor([i for i in range(101)]).to(device) group_mae = torch.zeros(7) gender_mae = torch.zeros(2) to_count = False error = [] ca = None if get_ca: ca = {3: 0, 5: 0, 7: 0} if sum(group_count) == 0: to_count = True with torch.no_grad(): with tqdm(validate_loader) as _tqdm: for i, pack in enumerate(_tqdm): #(x, y, lbl) x = pack[0] y = pack[1] lbl = pack[2] path = pack[-1] if gender_count != "False": gender = pack[3] if to_count: for ind, p in enumerate(y): group_count[get_group(p.item())] += 1 if gender_count != "False": if gender[ind]: gender_count[1] += 1 else: gender_count[0] += 1 x = x.to(device) y = y.to(device) lbl = lbl.to(device) # compute output outputs = model(x) if torch.isnan(outputs).any() or torch.isinf(outputs).any(): print(outputs) outputs = F.softmax(outputs, dim=1) ages = torch.sum(outputs * rank, dim=1) # age expectation preds.append(ages.cpu().numpy()) # append predicted age gt.append(y.cpu().numpy()) # append real age for ind, age in enumerate(ages): group_mae[get_group(y[ind].item())] += abs(y[ind] - age) if gender_count != "False": gender_mae[gender[ind]] += abs(y[ind] - age) if abs(y[ind] - age) > 5 or abs(y[ind] - age) < 0.5: error.append([ path[ind], y[ind].item(), age.item(), abs(y[ind] - age).item() ]) if ca is not None: if abs(y[ind].item() - age) < 3: ca[3] += 1 if abs(y[ind].item() - age) < 5: ca[5] += 1 if abs(y[ind].item() - age) < 7: ca[7] += 1 # valid for validation, not used for test if criterion is not None: # calc loss loss1 = L.kl_loss(outputs, lbl) loss2 = L.L1_loss(ages, y) loss = loss1 + loss2 cur_loss = loss.item() # calc accuracy # _, predicted = outputs.max(1) # correct_num = predicted.eq(y).sum().item() correct_num = (abs(ages - y) < 1).sum().item() # measure accuracy and record loss sample_num = x.size(0) loss_monitor.update(cur_loss, sample_num) accuracy_monitor.update(correct_num, sample_num) _tqdm.set_postfix(OrderedDict(stage="val", epoch=epoch, loss=loss_monitor.avg), acc=accuracy_monitor.avg, correct=correct_num, sample_num=sample_num) preds = np.concatenate(preds, axis=0) gt = np.concatenate(gt, axis=0) mae = np.abs(preds - gt).mean() if ca is not None: for i in ca.keys(): ca[i] = ca[i] / torch.sum(group_count) df = pd.DataFrame(error, columns=["photo", "age", "pred", "error"]) if gender_count != "False": return loss_monitor.avg, accuracy_monitor.avg, mae, (group_mae, gender_mae, ca), df else: return loss_monitor.avg, accuracy_monitor.avg, mae, (group_mae, ca), df
def validate(validate_loader, model, criterion, epoch, device, val_count, get_ca=False): model.eval() loss_monitor = AverageMeter() accuracy_monitor = AverageMeter() preds = [] gt = [] rank = torch.Tensor([i for i in range(101)]).to(device) ca = None correct_gender = 0 correct_race = 0 if get_ca: ca = {3:0, 5:0, 7:0} with torch.no_grad(): with tqdm(validate_loader) as _tqdm: for i, (x, y, lbl, g, r) in enumerate(_tqdm): x = x.to(device) y = y.to(device) lbl = lbl.to(device) g = g.to(device) r = r.to(device) # compute output outputs, gen, race = model(x) outputs = F.softmax(outputs, dim = 1) ages = torch.sum(outputs*rank, dim=1) # age expectation preds.append(ages.cpu().numpy()) # append predicted age gt.append(y.cpu().numpy()) # append real age for ind, age in enumerate(ages): if ca is not None: if abs(y[ind].item() - age) < 3: ca[3] += 1 if abs(y[ind].item() - age) < 5: ca[5] += 1 if abs(y[ind].item() - age) < 7: ca[7] += 1 # valid for validation, not used for test if criterion is not None: # calc loss loss1 = L.kl_loss(outputs, lbl) loss2 = L.L1_loss(ages, y) loss3 = criterion(gen, g) loss4 = criterion(race, r) loss = loss1 + loss2 + loss3 + loss4 cur_loss = loss.item() # calc accuracy correct_num = (abs(ages - y) < 1).sum().item() gen = F.softmax(gen, dim = 1) correct_gender += torch.sum(torch.argmax(gen, 1) == g).item() race = F.softmax(race, 1) correct_race += torch.sum(torch.argmax(race, 1) == r).item() # measure accuracy and record loss sample_num = x.size(0) loss_monitor.update(cur_loss, sample_num) accuracy_monitor.update(correct_num, sample_num) _tqdm.set_postfix(OrderedDict(stage="val", epoch=epoch, loss=loss_monitor.avg), acc=accuracy_monitor.avg, correct=correct_num, sample_num=sample_num) preds = np.concatenate(preds, axis=0) gt = np.concatenate(gt, axis=0) mae = np.abs(preds - gt).mean() print("Gender accu:", correct_gender/val_count) print("Race accu:", correct_race/val_count) if ca is not None: for i in ca.keys(): ca[i] = ca[i] / val_count print("\n") print("CA3: {:.2f} CA5: {:.2f} CA7: {:2f}".format(ca[3] * 100, ca[5]*100, ca[7]*100)) return loss_monitor.avg, accuracy_monitor.avg, mae, ca
def main(): model = get_model() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) loader = data.Data(args, "train").train_loader val_loader = data.Data(args, "valid").valid_loader rank = torch.Tensor([i for i in range(101)]).to(device) best_mae = 10000 for i in range(args.epochs): lr = 0.001 if i < 30 else 0.0001 optimizer = utils.make_optimizer(args, model, lr) model.train() print('Learning rate:{}'.format(lr)) start_time = time.time() for j, inputs in enumerate(loader): img, label, age = inputs img = img.to(device) label = label.to(device) age = age.to(device) optimizer.zero_grad() outputs = model(img) ages = torch.sum(outputs * rank, dim=1) loss1 = loss.kl_loss(outputs, label) loss2 = loss.L1_loss(ages, age) total_loss = loss1 + loss2 total_loss.backward() optimizer.step() current_time = time.time() print('[Epoch:{}] \t[batch:{}]\t[loss={:.4f}]'.format( i, j, total_loss.item())) torch.cuda.empty_cache() model.eval() count = 0 error = 0 total_loss = 0 with torch.no_grad(): for inputs in val_loader: img, label, age = inputs count += len(age) img = img.to(device) label = label.to(device) age = age.to(device) outputs = model(img) ages = torch.sum(outputs * rank, dim=1) loss1 = loss.kl_loss(outputs, label) loss2 = loss.L1_loss(ages, age) total_loss += loss1 + loss2 error += torch.sum(abs(ages - age)) mae = error / count if mae < best_mae: print( "Epoch: {}\tVal loss: {:.5f}\tVal MAE: {:.4f} improved from {:.4f}" .format(i, total_loss / count, mae, best_mae)) best_mae = mae torch.save( model, "checkpoint/epoch{:03d}_{}_{:.5f}_{:.4f}_{}_{}.pth".format( i, args.dataset, total_loss / count, best_mae, datetime.now().strftime("%Y%m%d"), args.model_name)) else: print( "Epoch: {}\tVal loss: {:.5f}\tBest Val MAE: {:.4f} not improved, current MAE: {:.4f}" .format(i, total_loss / count, best_mae, mae)) torch.cuda.empty_cache()
def main(): model = get_model() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) loader = data.Data(args, "train").train_loader val_loader = data.Data(args, "valid").valid_loader rank = torch.Tensor([i for i in range(101)]).cuda() best_mae = 10000 group = { 0: "0-5", 1: "6-10", 2: "11-20", 3: "21-30", 4: "31-40", 5: "41-60", 6: "61-" } group_count = torch.zeros(7) to_count = True for i in range(args.epochs): lr = 0.001 if i < 30 else 0.0001 optimizer = utils.make_optimizer(args, model, lr) model.train() print('Learning rate:{}'.format(lr)) start_time = time.time() for j, inputs in enumerate(loader): img, label, age = inputs img = img.to(device) label = label.to(device) age = age.to(device) optimizer.zero_grad() outputs = model(img) ages = torch.sum(outputs * rank, dim=1) loss1 = loss.kl_loss(outputs, label) loss2 = loss.L1_loss(ages, age) total_loss = loss1 + loss2 total_loss.backward() optimizer.step() current_time = time.time() print('[Epoch:{}] \t[batch:{}]\t[loss={:.4f}]'.format( i, j, total_loss.item()), end=" ") torch.cuda.empty_cache() model.eval() count = 0 error = 0 total_loss = 0 correct_count = torch.zeros(7) correct_group = torch.zeros(7) with torch.no_grad(): for inputs in val_loader: img, label, age = inputs if to_count: for p in age: group_count[get_group(p.item())] += 1 count += len(age) img = img.to(device) label = label.to(device) age = age.to(device) outputs = model(img) ages = torch.sum(outputs * rank, dim=1) loss1 = loss.kl_loss(outputs, label) loss2 = loss.L1_loss(ages, age) total_loss += loss1 + loss2 error += torch.sum(abs(ages - age)) for ind, a in enumerate(ages): if abs(age[ind].item() - a) < 1: correct_count[get_group(age[ind].item())] += 1 correct_group[get_group(age[ind].item())] += 1 elif get_group(age[ind].item()) == get_group(a): correct_group[get_group(age[ind].item())] += 1 mae = error / count if to_count: for ind, p in enumerate(group_count): if p == 0: group_count[ind] = 1 to_count = False print("\nCorrect group rate:") print(correct_group / group_count) print("Correct age rate:") print(correct_count / group_count) rate = (correct_group, correct_count) if mae < best_mae: print( "Epoch: {}\tVal loss: {:.5f}\tVal MAE: {:.4f} improved from {:.4f}" .format(i, total_loss / count, mae, best_mae)) best_mae = mae torch.save( model, "checkpoint/epoch{:03d}_{}_{:.5f}_{:.4f}_{}_{}_pretraining.pth" .format(i, args.dataset, total_loss / count, best_mae, datetime.now().strftime("%Y%m%d"), args.model_name)) best_rate = rate else: print( "Epoch: {}\tVal loss: {:.5f}\tBest Val MAE: {:.4f} not improved, current MAE: {:.4f}" .format(i, total_loss / count, best_mae, mae)) torch.cuda.empty_cache() print("Finish, with best MAE") print("Correct group:") print(rate[0]) print(rate[0] / group_count) print("Correct age:") print(rate[1]) print(rate[1] / group_count)