def train(train_loader, model, criterion, optimizer, epoch): losses = AverageMeter() acces = AverageMeter() model.train() for i, (inputs, target) in enumerate(train_loader): output = model(inputs.cuda()) target = target.cuda(async=True) loss = criterion(output, target) # measure accuracy and record loss acc = accuracy(output.data, target) losses.update(loss.item(), inputs.size(0)) acces.update(acc[0], inputs.size(0)) # backward optimizer.zero_grad() # clear gradients out before each mini-batch loss.backward() optimizer.step() if (i + 1) % args.print_freq == 0: print('Epoch-{:<3d} {:3d} batches\t' 'loss {loss.val:.4f} ({loss.avg:.4f})\t' 'accu {acc.val:.3f} ({acc.avg:.3f})'.format(epoch + 1, i + 1, loss=losses, acc=acces)) return losses.avg, acces.avg
def test(test_loader, model, checkpoint, lable_path, pred_path): acces = AverageMeter() # load learnt model that obtained best performance on validation set model.load_state_dict(torch.load(checkpoint)['state_dict']) model.eval() label_output = list() pred_output = list() t_start = time.time() for i, (inputs, target) in enumerate(test_loader): with torch.no_grad(): output = model(inputs.cuda()) output = output.view( (-1, inputs.size(0) // target.size(0), output.size(1))) output = output.mean(1) label_output.append(target.cpu().numpy()) pred_output.append(output.cpu().numpy()) acc = accuracy(output.data, target.cuda(async=True)) acces.update(acc[0], inputs.size(0)) label_output = np.concatenate(label_output, axis=0) np.savetxt(lable_path, label_output, fmt='%d') pred_output = np.concatenate(pred_output, axis=0) np.savetxt(pred_path, pred_output, fmt='%f') print('Test: accuracy {:.3f}, time: {:.2f}s'.format( acces.avg, time.time() - t_start))
def train(train_loader, model, criterion, optimizer, epoch): losses = AverageMeter() acces = AverageMeter() model.train() for i, (inputs, target) in enumerate( train_loader ): #train_loader.dataset[x]: (35673 x 300 x 150); train_loader.dataset[y]: (35673) inputs = inputs.float() output = model( inputs.cuda() ) # inputs: torch.Size([64, 20, 75]) -- [batch_size X #segments? X (75=25x3)]; outputs: [batch_size X #classes(60)] target = target.cuda() # target: [batch_size] loss = criterion(output, target) # measure accuracy and record loss acc = accuracy(output.data, target) losses.update(loss.item(), inputs.size(0)) acces.update(acc[0], inputs.size(0)) # backward optimizer.zero_grad() # clear gradients out before each mini-batch loss.backward() optimizer.step() if (i + 1) % args.print_freq == 0: print('Epoch-{:<3d} {:3d} batches\t' 'loss {loss.val:.4f} ({loss.avg:.4f})\t' 'accu {acc.val:.3f} ({acc.avg:.3f})'.format(epoch + 1, i + 1, loss=losses, acc=acces)) return losses.avg, acces.avg
def val_acc(val_loader, model): acc_list = [] top1 = AverageMeter() for data in val_loader[0]: input_var, target_var = data input_var, target_var = input_var.to(device), target_var.to(device) output = model(input_var) prec1 = accuracy(output.data, target_var) top1.update(prec1[0]) return top1.avg
def validate(val_loader, model, criterion): losses = AverageMeter() acces = AverageMeter() model.eval() for i, (inputs, target) in enumerate(val_loader): with torch.no_grad(): output = model(inputs.cuda()) target = target.cuda(non_blocking=True) with torch.no_grad(): loss = criterion(output, target) # measure accuracy and record loss acc = accuracy(output.data, target) losses.update(loss.item(), inputs.size(0)) acces.update(acc[0], inputs.size(0)) return losses.avg, acces.avg
def validate(val_loader, model, criterion, batch, task, viw, replay): global avg_acc losses = AverageMeter() top1 = AverageMeter() i = 0 acc_list = [] for data, d1 in zip(val_loader[0], viw): i += 1 input_var, target_var = data input_var, target_var = input_var.to(device), target_var.to(device) output = model(input_var) loss = criterion(output, target_var) prec1 = accuracy(output.data, target_var) acc_list.append(prec1[0].cpu()) losses.update(loss.item()) top1.update(prec1[0]) print("Replay:", len(replay.replaybuffer), "Task:", task) if batch == task: replay.update_best(list(np.array(acc_list)), task) if batch < task: print(len(val_loader[1])) replay.update_replay(acc_list, val_loader[1], val_loader[2], batch, task) print('Batch:{} ' 'Acc:{} ' 'Loss:{} '.format(batch, top1.avg, losses.avg)) f = open("val.txt", "a") f.write('Trained Batch: {}'.format(task)) f.write("\n") f.write('Batch{}, Loss :{}, top1:{}'.format(batch, losses.avg, top1.avg)) f.write("\n") f.close() avg_acc = (avg_acc * (batch) + top1.avg) / (batch + 1) print(avg_acc) return replay
def train(train_loader, model, criterion, optimizer, epoch): losses = AverageMeter() acces = AverageMeter() model.train() for i, (inputs, target) in enumerate(train_loader): output = model(inputs.cuda()) target = target.cuda(non_blocking=True) loss = criterion(output, target) # measure accuracy and record loss acc = accuracy(output.data, target) losses.update(loss.item(), inputs.size(0)) acces.update(acc[0], inputs.size(0)) # backward optimizer.zero_grad() # clear gradients out before each mini-batch loss.backward() optimizer.step() return losses.avg, acces.avg
def train(train_loader, val_loader, train_task, model, criterion, optimizer, epoch, par, replay, flag, args): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() val = [1] if val_loader: val = [data for data in replay] print(len(val)) for param_group in optimizer.param_groups: lr_log = param_group['lr'] f = 0 total_norm = 0 end = time.time() i = -1 if flag: for data in train_loader[0]: i += 1 # measure data loading time input_var, target_var = data input_var, target_var = input_var.to(device), target_var.to(device) data_time.update(time.time() - end) for p in par: total_norm += torch.sum(torch.abs(p)) total_norm = total_norm**(1. / 2) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1 = accuracy(output.data, target_var) losses.update(loss.item()) top1.update(prec1[0]) optimizer.zero_grad() loss.backward() optimizer.step() # print("Memory", torch.cuda.memory_allocated()/1e9, torch.cuda.max_memory_allocated()/1e9) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.disp_iter == 0: print('Batch: {batch} ' 'Epoch: [{0}][{1}/{2}] ' 'LR: {lr}' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.7f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Grad:{norm}'.format(epoch, i, len(train_loader[0]), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, batch=train_task, norm=total_norm, lr=lr_log)) if val_loader and len(val): input_var, target_var = val[f] input_var, target_var = input_var.to(device), target_var.to( device) output = model(input_var) #output = metric_fc(feature, target_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1 = accuracy(output.data, target_var) losses.update(loss.item()) top1.update(prec1[0]) #compute gradient and do SGD step f = (f + 1) % (len(val)) optimizer.zero_grad() loss.backward() optimizer.step() if i % args.disp_iter == 0: print('Batch_Replay: {batch} ' 'Epoch: [{0}][{1}/{2}] ' 'LR: {lr}' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.7f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Grad:{norm}'.format(epoch, i, len(train_loader[0]), data_time=data_time, loss=losses, top1=top1, batch=train_task, norm=total_norm, lr=lr_log)) if len(val) > len(train_loader[0]): for j in range(f, len(val)): input_var, target_var = val[j] input_var, target_var = input_var.to(device), target_var.to(device) for p in par: total_norm += torch.sum(torch.abs(p)) total_norm = total_norm**(1. / 2) output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1 = accuracy(output.data, target_var) losses.update(loss.item()) top1.update(prec1[0]) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if j % args.disp_iter == 0: print('Batch: {batch} ' 'Epoch: [{0}][{1}/{2}] ' 'LR: {lr}' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Loss {loss.val:.7f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Grad:{norm}'.format(epoch, i, len(val_loader[0]), batch_time=batch_time, loss=losses, top1=top1, batch=train_task, norm=total_norm, lr=lr_log)) adjust_learning_rate(optimizer, epoch) if epoch % 1 == 0: save_ckpt(losses, lr_log, top1, train_task, model, epoch, args.num_epoch) return losses
def main(args): #model = EfficientNet.from_name("efficientnet-b3").to(device) model = MobileNetV2().to(device) f = open("losses.txt", "a") f.write(test_name) f.write("\n") f.close() f = open("val.txt", "a") f.write(test_name) f.write("\n") f.close() optimizer, par = create_optimizer1(model, args) if args.load: model.load_state_dict(torch.load("batch0")) if args.focal_loss: loss = FocalLoss(gamma=args.gamma).to(device) else: loss = nn.CrossEntropyLoss().to(device) trans = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) transview = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) train_batch_list = [ BatchData(args.train_dataset_path, 'train', i, trans) for i in range(1, 13) ] train_loader_list = [ (torch.utils.data.DataLoader(batch, batch_size=args.batch_size, shuffle=True, num_workers=2), batch.data_list, batch.label_list) for batch in train_batch_list ] val_batch_list = [ BatchData(args.val_dataset_path, 'validation', i, trans) for i in range(1, 13) ] val_loader_list = [(torch.utils.data.DataLoader(batch, batch_size=args.batch_size, shuffle=False, num_workers=2), batch.data_list, batch.label_list) for batch in val_batch_list] view_batch_list = [ BatchData(args.val_dataset_path, 'validation', i, transview) for i in range(1, 13) ] view_loader_list = [ torch.utils.data.DataLoader(batch, batch_size=args.batch_size, shuffle=True, num_workers=2) for batch in view_batch_list ] replay1 = ReplayMemory(12, len(val_loader_list[0]), 40) replay = torch.utils.data.DataLoader(ReplayData(replay1.replaybuffer, replay1.label, trans), batch_size=args.batch_size, shuffle=False, num_workers=0) for train_task in range(12): model.train() if train_task != -1: best_acc = AverageMeter() los = AverageMeter() if train_task == 11: args.num_epoch = 30 for epoch in range(1, args.num_epoch + 1): if train_task == 11 and epoch == 21: args.lr = 0.003 if train_task == 0: los = train(train_loader_list[train_task], train_task > 0, train_task, model, loss, optimizer, epoch, par, 0, 1, args) else: if epoch == 2: replay1 = validate(val_loader_list[train_task - 1], model, loss, train_task - 1, train_task, view_loader_list[train_task - 1], replay1) replay = torch.utils.data.DataLoader( ReplayData(replay1.replaybuffer, replay1.label, trans), batch_size=2 * args.batch_size, shuffle=False, num_workers=0) print("Replay:", len(replay1.replaybuffer)) los = train(train_loader_list[train_task], val_loader_list[train_task], train_task, model, loss, optimizer, epoch, par, replay, 1, args) acc = val_acc(val_loader_list[train_task], model) if acc > best_acc.avg: best_acc.update(acc) elif best_acc.avg - acc >= 1: print(">>>>>>Early Stopping:", acc, best_acc.avg) lr_log = 0 for param_group in optimizer.param_groups: lr_log = param_group['lr'] # save_ckpt(los, lr_log, best_acc, train_task, model, epoch, args.num_epoch) break replay1.reset() for i in range(train_task + 1): model.eval() with torch.no_grad(): replay1 = validate(val_loader_list[i], model, loss, i, train_task, view_loader_list[i], replay1) if train_task: replay = torch.utils.data.DataLoader( ReplayData(replay1.replaybuffer, replay1.label, trans), batch_size=2 * args.batch_size, shuffle=True, num_workers=0) else: replay = torch.utils.data.DataLoader( ReplayData(replay1.replaybuffer, replay1.label, trans), batch_size=2 * args.batch_size, shuffle=False, num_workers=0)