def train(train_loader, net, criterion, optimizer, epoch, train_args, total_num_paramters): train_loss = AverageMeter() # curr_iter : total dataset per epoch curr_iter = (epoch - 1) * len(train_loader) index = 0 start_time = time.time() net.train() for step, data in enumerate(train_loader): predictions_all = [] visual = [] inputs, labels = data assert inputs.size()[2:] == labels.size()[1:] N = inputs.size(0) inputs = Variable(inputs).cuda() labels = Variable(labels).cuda() optimizer.zero_grad() outputs = net(inputs) assert outputs.size()[2:] == labels.size()[1:] assert outputs.size()[1] == segmentation_dataloader.num_classes before_op_time = timeit.default_timer() loss = criterion(outputs, labels) duration = timeit.default_timer() - before_op_time loss.backward() optimizer.step() batch_time = time.time() - start_time train_loss.update(loss.data[0], N) curr_iter += 1 writer.add_scalar('train_loss', train_loss.avg, curr_iter) if (step + 1) % train_args.print_frequency == 0: examples_time = args.train_batch_size / duration print( 'epoch: %d | iter: %d / %d | train loss: %.5f | examples/s: %4.2f | time_elapsed: %.5f' 's' % (epoch, step + 1, len(train_loader), train_loss.avg, examples_time, batch_time)) # SAVE THE IMAGES AND THE MODEL if (step + 1) % train_args.model_freq == 0: torch.save( net.state_dict(), os.path.join(ckpt_path, 'Model', ImageNet, exp_name_ImageNet, 'model-{}'.format(step + 1) + '.pkl')) data_transform = standard_transforms.ToTensor() np_outputs = outputs.data.cpu().numpy() result = np_outputs.argmax(axis=1) predictions_all.append(result) else: continue predictions_all = np.concatenate(predictions_all) for idx, data in enumerate(predictions_all): predictions_pil = segmentation_dataloader.colorize_mask(data) predictions = data_transform(predictions_pil.convert('RGB')) visual.extend([predictions]) visual = torch.stack(visual, 0) visual = vutils.make_grid(visual, nrow=1, padding=0) # result = np_outputs.argmax(axis=1)[0] # row, col = result.shape # dst = np.zeros((row, col, 3), dtype=np.uint8) # # for i in range(19): # dst[result == i] = COLOR_MAP[i] # dst = np.array(dst, dtype=np.uint8) # dst = cv2.cvtColor(dst, cv2.COLOR_RGB2BGR) # if not os.path.exists(os.path.join(ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet, 'prediction')): # os.makedirs(os.path.join(ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet, 'prediction')) # # cv2.imwrite(os.path.join(ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet, 'prediction/%06d.png' % # epoch), dst) writer.add_image('Output_image_{}'.format(epoch), visual) with open( os.path.join(ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet, 'LR_v0{}_{}.txt'.format(x, version)), 'a') as LRtxt: LRtxt.write("index : {}, epoch : {}, learning rate : {: f}".format( index, epoch, optimizer.param_groups[0]['lr']) + '\n') index += 1
def train(train_loader, net, criterion, optimizer, epoch, train_args, train_set): import shutil src = "/home/mk/Semantic_Segmentation/DenseASPP-master/My_train/segmentation_main2.py" copy_path = os.path.join( ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet, "segmentation_main2_" + "v_0{}_{}.py".format(x, version)) shutil.copy(src, copy_path) net.train() batch_time = AverageMeter() train_loss = AverageMeter() examples_time = AverageMeter() num_training_samples = len(train_set) steps_per_epoch = np.ceil(num_training_samples / args.train_batch_size).astype(np.int32) num_total_steps = args.num_epochs * steps_per_epoch print("total number of samples: {}".format(num_training_samples)) print("total number of steps : {}".format(num_total_steps)) # curr_iter : total dataset per epoch curr_iter = (epoch - 1) * len(train_loader) # COUNT_PARAMS total_num_paramters = 0 for param in net.parameters(): total_num_paramters += np.array(list(param.size())).prod() print("number of trainable parameters: {}".format(total_num_paramters)) # for step in range(num_total_steps): # if step and step % 100 == 0: # time_sofar = (time.time() - start_time) / 3600 # training_time_left = (num_total_steps / step - 1.0) * time_sofar # Data = [[train_loader], [range(num_total_steps)]] # for [[i, data], step] in Data : index = 0 start_time = time.time() for i, data in enumerate(train_loader): inputs, labels = data assert inputs.size()[2:] == labels.size()[1:] N = inputs.size(0) inputs = Variable(inputs).cuda() labels = Variable(labels).cuda() optimizer.zero_grad() outputs = net(inputs) assert outputs.size()[2:] == labels.size()[1:] assert outputs.size()[1] == segmentation_dataloader.num_classes before_op_time = time.time() # loss = torch.nn.functional.cross_entropy(input=outputs, target=labels, ignore_index=segmentation_dataloader.ignore_label) loss = criterion(outputs, labels) duration = time.time() - before_op_time loss.backward() optimizer.step() batch_time.update(time.time() - start_time) # why use N?? N is batch size? train_loss.update(loss.data[0], N) curr_iter += 1 # [[ writer.add_scalar ]] # writer.add_scalar('myscalar', value, iteration) writer.add_scalar('train_loss', train_loss.avg, curr_iter) if (i + 1) % train_args.print_frequency == 0: examples_time.update(args.train_batch_size / duration) # print_string = 'epoch {: %d} | iter { %d / %d} | train_loss: {%.5f} | time_elapsed: {%.2f}h' # print_string = 'batch {:>6} | examples/s: {:4.2f} | loss: {:.5f} | time elapsed: {:.2f}h | time left: {:.2f}h' # print(print_string.format(step, examples_per_sec, loss_value, time_sofar, training_time_left)) print( 'epoch: %d | iter: %d / %d | train loss: %.5f | examples/s: %4.2f | time_elapsed: %.5f' 's' % (epoch, i + 1, len(train_loader), train_loss.avg, examples_time.avg, batch_time.avg)) poly_lr_scheduler(optimizer=optimizer, init_lr=args.learning_rate, epoch=epoch - 1) # misc.PolyLR(optimizer=optimizer, curr_iter=epoch-1, max_iter=args.num_epochs, lr_decay=0.9) with open( os.path.join(ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet, 'LR_v0{}_{}.txt'.format(x, version)), 'a') as LRtxt: LRtxt.write("index : {}, epoch : {}, learning rate : {: f}".format( index, epoch, optimizer.param_groups[0]['lr']) + '\n') index += 1
def validate(val_loader, net, criterion, optimizer, epoch, train_args, restore, visualize): net.eval() val_loss = AverageMeter() inputs_all, gts_all, predictions_all = [], [], [] for vi, data in enumerate(val_loader): inputs, gts = data N = inputs.size(0) inputs = Variable(inputs, volatile=True).cuda() gts = Variable(gts, volatile=True).cuda() outputs = net(inputs) # outputs.data : when batch is 0, pixel value in 19 classes predictions = outputs.data.max(1)[1].squeeze_(1).cpu().numpy() val_loss.update(criterion(outputs, gts).data[0] / N, N) # validation_loss = torch.nn.functional.cross_entropy(input=outputs, target=gts, ignore_index=segmentation_dataloader.ignore_label) # val_loss.update(validation_loss.data[0] / N, N) for i in inputs: if random.random() > train_args.val_img_sample_rate: inputs_all.append(None) else: inputs_all.append(i.data.cpu()) gts_all.append(gts.data.cpu().numpy()) predictions_all.append(predictions) gts_all = np.concatenate(gts_all) predictions_all = np.concatenate(predictions_all) acc, acc_cls, acc_cls_mean, mean_iu, fwavacc = evaluate( predictions_all, gts_all, segmentation_dataloader.num_classes) num_validate = epoch with open( os.path.join(ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet, 'class_accuracy{}_{}.txt'.format(x, version)), 'a') as acc_cls_txt: acc_cls_txt.write( "================================the number of validation : {}================================" "\nroad: {}, \nsidewalk: {}, \nbuilding: {}, \nwall: {}, \nfence: {}, \npole: {}, \ntraffic light: {}, \ntraffic sign: {}," "\nvegetation: {}, \nterrain: {}, \nsky: {}, \nperson: {}, \nrider: {}, \ncar: {}, \ntruck: {}, \nbus: {}, \ntrain: {}, \nmotorcycle: {}," "\nbicycle: {}\n\n".format( num_validate, acc_cls[0] * 100, acc_cls[1] * 100, acc_cls[2] * 100, acc_cls[3] * 100, acc_cls[4] * 100, acc_cls[5] * 100, acc_cls[6] * 100, acc_cls[7] * 100, acc_cls[8] * 100, acc_cls[9] * 100, acc_cls[10] * 100, acc_cls[11] * 100, acc_cls[12] * 100, acc_cls[13] * 100, acc_cls[14] * 100, acc_cls[15] * 100, acc_cls[16] * 100, acc_cls[17] * 100, acc_cls[18] * 100)) if mean_iu > train_args.best_record['mean_iu']: train_args.best_record['val_loss'] = val_loss.avg train_args.best_record['epoch'] = epoch train_args.best_record['acc'] = acc # acc_cls : accuracy class train_args.best_record['acc_cls_mean'] = acc_cls_mean # mean_iu : mean_intersection over union train_args.best_record['mean_iu'] = mean_iu # fwavacc : frequency weighted average accuracy train_args.best_record['fwavacc'] = fwavacc # snapshot_name = 'epoch_%d_loss_%.5f_acc_%.5f_acc-cls_%.5f_mean-iu_%.5f_fwavacc_%.5f_lr_%.10f' % ( # epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc, optimizer.param_groups[0]['lr'] # ) snapshot_name = 'epoch_%d_loss_%.2f_acc_%.2f_acc-cls_%.2f_mean-iu_%.2f_fwavacc_%.2f_lr_%.10f' % ( epoch, val_loss.avg, acc, acc_cls_mean, mean_iu, fwavacc, optimizer.param_groups[0]['lr']) torch.save( net.state_dict(), os.path.join(ckpt_path, 'Model', ImageNet, exp_name_ImageNet, snapshot_name + '_v0{}'.format(x) + '.pth')) # torch.save(optimizer.state_dict(),os.path.join(ckpt_path, 'Model', ImageNet, exp_name_ImageNet, 'opt_' + snapshot_name + '_v0{}'.format(x) + '.pth')) # setting path to save the val_img if train_args.val_save_to_img_file: # to_save_dir = os.path.join(ckpt_path, exp_name, 'epoch'+str(epoch)+'_v0{}'.format(x)) to_save_dir = os.path.join( ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet, 'epoch' + str(epoch) + '_v0{}'.format(x)) check_mkdir(to_save_dir) val_visual = [] for idx, data in enumerate(zip(inputs_all, gts_all, predictions_all)): if data[0] is None: continue # data[0] : inputs_all input_pil = restore(data[0]) gt_pil = segmentation_dataloader.colorize_mask(data[1]) predictions_pil = segmentation_dataloader.colorize_mask(data[2]) if train_args.val_save_to_img_file: # saving the restored image input_pil.save(os.path.join(to_save_dir, '%d_input.png' % idx)) predictions_pil.save( os.path.join(to_save_dir, '%d_prediction.png' % idx)) gt_pil.save(os.path.join(to_save_dir, '%d_gt.png' % idx)) # input RGB image, gt image and prediction image are showed on tensorboardX val_visual.extend([ visualize(input_pil.convert('RGB')), visualize(gt_pil.convert('RGB')), visualize(predictions_pil.convert('RGB')) ]) val_visual = torch.stack(val_visual, 0) # [[ make_grid() ]] # make_grid function : prepare the image array and send the result to add_image() # --------------------- make_grid takes a 4D tensor and returns tiled images in 3D tensor --------------------- val_visual = vutils.make_grid(val_visual, nrow=3, padding=0) # [[ writer.add_image ]] # writer.add_image('imresult', x, iteration) : save the image. writer.add_image(snapshot_name, val_visual) print( '-----------------------------------------------------------------------------------------------------------' ) print( '[epoch %d], [val loss %.5f], [acc %.5f], [acc_cls_mean %.5f], [mean_iu %.5f], [fwavacc %.5f]' % (epoch, val_loss.avg, acc, acc_cls_mean, mean_iu, fwavacc)) print( 'best record: [val loss %.5f], [acc %.5f], [acc_cls_mean %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d]' % (train_args.best_record['val_loss'], train_args.best_record['acc'], train_args.best_record['acc_cls_mean'], train_args.best_record['mean_iu'], train_args.best_record['fwavacc'], train_args.best_record['epoch'])) print( '-----------------------------------------------------------------------------------------------------------' ) # [[ add_scalar ]] # Adds many scalar data to summary. writer.add_scalar('val_loss', val_loss.avg, epoch) writer.add_scalar('acc', acc, epoch) writer.add_scalar('acc_cls_mean', acc_cls_mean, epoch) writer.add_scalar('mean_iu', mean_iu, epoch) writer.add_scalar('fwavacc', fwavacc, epoch) writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch) net.train() return val_loss.avg
def train(train_loader, net, criterion, optimizer, epoch, train_args, train_set): # batch_time = AverageMeter() train_loss = AverageMeter() # examples_time = AverageMeter() num_training_samples = len(train_set) steps_per_epoch = np.ceil(num_training_samples / args.train_batch_size).astype(np.int32) num_total_steps = args.num_epochs * steps_per_epoch print("total number of samples: {}".format(num_training_samples)) print("total number of steps : {}".format(num_total_steps)) # curr_iter : total dataset per epoch curr_iter = (epoch - 1) * len(train_loader) # COUNT_PARAMS total_num_paramters = 0 for param in net.parameters(): total_num_paramters += np.array(list(param.size())).prod() print("number of trainable parameters: {}".format(total_num_paramters)) index = 0 start_time = time.time() net.train() for i, data in enumerate(train_loader): inputs, labels = data assert inputs.size()[2:] == labels.size()[1:] N = inputs.size(0) inputs = Variable(inputs).cuda() labels = Variable(labels).cuda() optimizer.zero_grad() """zero_grad() : Sets gradients of all model parameters to zero.""" # img = "/home/mk/Semantic_Segmentation/Seg_dataset/Cityscapes_dataset/leftImg8bit_trainvaltest/leftImg8bit/train/aachen/aachen_000001_000019_leftImg8bit.png" # import cv2 # from torchvision import transforms # pil_img = Image.open(img) # data_tr = transforms.Compose([transforms.RandomResizedCrop(512),transforms.ToTensor()]) # # input__ = Variable(data_tr(pil_img).unsqueeze(0).cuda()) # net(input__) outputs = net(inputs) assert outputs.size()[2:] == labels.size()[1:] assert outputs.size()[1] == segmentation_dataloader.num_classes before_op_time = timeit.default_timer() # loss = torch.nn.functional.cross_entropy(input=outputs, target=labels, ignore_index=segmentation_dataloader.ignore_label) loss = criterion(outputs, labels) duration = timeit.default_timer() - before_op_time loss.backward() optimizer.step() batch_time = time.time() - start_time train_loss.update(loss.data[0], N) curr_iter += 1 writer.add_scalar('train_loss', train_loss.avg, curr_iter) if (i + 1) % train_args.print_frequency == 0: examples_time = args.train_batch_size / duration print( 'epoch: %d | iter: %d / %d | train loss: %.5f | examples/s: %4.2f | time_elapsed: %.5f' 's' % (epoch, i + 1, len(train_loader), train_loss.avg, examples_time, batch_time)) with open( os.path.join(ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet, 'LR_v0{}_{}.txt'.format(x, version)), 'a') as LRtxt: LRtxt.write("index : {}, epoch : {}, learning rate : {: f}".format( index, epoch, optimizer.param_groups[0]['lr']) + '\n') index += 1