def _train(network, criterion, trainLoader, device, optimizer): network.train() epoch_loss = 0 epoch_acc = 0 num_data = 0 for input_x, target_y in tqdm(trainLoader): input_x, target_y = input_x.to(device), target_y.to(device) optimizer.zero_grad() predict_y = network(input_x) loss = criterion(predict_y, target_y) loss.backward() optimizer.step() # log data num_data += target_y.shape[0] epoch_loss += loss.item() epoch_acc += (predict_y.argmax(axis=1) == target_y).sum().item() mean_loss = epoch_loss / num_data mean_acc = epoch_acc / num_data return mean_loss, mean_acc
input = input.to(device) target = target.to(device) # Forward pass of the network. output = net.forward(input) # Gather the training stats. stats.training.correctSamples += torch.sum( snn.predict.getClass(output) == label).data.item() stats.training.numSamples += len(label) # Calculate loss. loss = error.numSpikes(output, target) # Reset gradients to zero. optimizer.zero_grad() # Backward pass of the network. loss.backward() # Update weights. optimizer.step() # Gather training loss stats. stats.training.lossSum += loss.cpu().data.item() # Display training stats. stats.print(epoch, i, (datetime.now() - tSt).total_seconds()) # Testing loop. # Same steps as Training loops except loss backpropagation and weight update.
def train(train_loader, net, criterion, optimizer, curr_epoch, writer): ''' Runs the training loop per epoch train_loader: Data loader for train net: thet network criterion: loss fn optimizer: optimizer curr_epoch: current epoch writer: tensorboard writer return: val_avg for step function if required ''' net.train() train_main_loss = AverageMeter() train_edge_loss = AverageMeter() train_seg_loss = AverageMeter() train_att_loss = AverageMeter() train_dual_loss = AverageMeter() curr_iter = curr_epoch * len(train_loader) for i, data in enumerate(train_loader): if i == 0: print('running....') inputs, mask, edge, _img_name = data if torch.sum(torch.isnan(inputs)) > 0: import pdb pdb.set_trace() batch_pixel_size = inputs.size(0) * inputs.size(2) * inputs.size(3) inputs, mask, edge = inputs.cuda(), mask.cuda(), edge.cuda() if i == 0: print('forward done') optimizer.zero_grad() main_loss = None loss_dict = None if args.joint_edgeseg_loss: loss_dict = net(inputs, gts=(mask, edge)) if args.seg_weight > 0: log_seg_loss = loss_dict['seg_loss'].mean().clone().detach_() train_seg_loss.update(log_seg_loss.item(), batch_pixel_size) main_loss = loss_dict['seg_loss'] if args.edge_weight > 0: log_edge_loss = loss_dict['edge_loss'].mean().clone().detach_() train_edge_loss.update(log_edge_loss.item(), batch_pixel_size) if main_loss is not None: main_loss += loss_dict['edge_loss'] else: main_loss = loss_dict['edge_loss'] if args.att_weight > 0: log_att_loss = loss_dict['att_loss'].mean().clone().detach_() train_att_loss.update(log_att_loss.item(), batch_pixel_size) if main_loss is not None: main_loss += loss_dict['att_loss'] else: main_loss = loss_dict['att_loss'] if args.dual_weight > 0: log_dual_loss = loss_dict['dual_loss'].mean().clone().detach_() train_dual_loss.update(log_dual_loss.item(), batch_pixel_size) if main_loss is not None: main_loss += loss_dict['dual_loss'] else: main_loss = loss_dict['dual_loss'] else: main_loss = net(inputs, gts=mask) main_loss = main_loss.mean() log_main_loss = main_loss.clone().detach_() train_main_loss.update(log_main_loss.item(), batch_pixel_size) main_loss.backward() optimizer.step() if i == 0: print('step 1 done') curr_iter += 1 if args.local_rank == 0: msg = '[epoch {}], [iter {} / {}], [train main loss {:0.6f}], [seg loss {:0.6f}], [edge loss {:0.6f}], [lr {:0.6f}]'.format( curr_epoch, i + 1, len(train_loader), train_main_loss.avg, train_seg_loss.avg, train_edge_loss.avg, optimizer.param_groups[-1]['lr']) logging.info(msg) # Log tensorboard metrics for each iteration of the training phase writer.add_scalar('training/loss', (train_main_loss.val), curr_iter) writer.add_scalar('training/lr', optimizer.param_groups[-1]['lr'], curr_iter) if args.joint_edgeseg_loss: writer.add_scalar('training/seg_loss', (train_seg_loss.val), curr_iter) writer.add_scalar('training/edge_loss', (train_edge_loss.val), curr_iter) writer.add_scalar('training/att_loss', (train_att_loss.val), curr_iter) writer.add_scalar('training/dual_loss', (train_dual_loss.val), curr_iter) if i > 5 and args.test_mode: return
def trainNetwork(logging_path, loader, bt_size, eval_size, is_cuda, evle, net, loss_func, optimizer, num_epochs, str_epochs, lr, arg_list=[]): if is_cuda > -1: net.cuda(is_cuda) print("Training for " + str(num_epochs)) log = logger.Logger(logging_path) log.masterLog(net.getStructure(), loss_func, optimizer, lr) opt = optimizer(net, lr) for epoch in range(str_epochs, num_epochs + 1): print("Training epoch: " + str(epoch)) tr_loss = 0.0 tr_root_loss = 0.0 tr_soil_loss = 0.0 ev_loss = 0.0 opt.zero_grad() bt_per_it = 1 for bt_it in range(bt_per_it): #Load Data #bt_nbr = np.random.randint( num_bts ) batch, teacher = loader.getBatchAndShuffle(bt_size) for it in range(bt_size - eval_size): num_slices = 2 cut_it = int(round(batch.size()[4] / num_slices)) cut_id = 0 for jt in range(num_slices): print(" " + str(it) + " Slice: " + str(jt)) start, end = cut_id, min(batch.size()[4], cut_it * (jt + 1)) start_t, end_t = start, min(teacher.size()[4], end - net.teacher_offset * 2) cut_id = end input_data = batch[:, it, :, :, start:end].unsqueeze(1) teacher_data = teacher[:, it, :, :, start_t:end_t].unsqueeze(1) #Train output = net(input_data, loss_func.apply_sigmoid) loss, root_loss, soil_loss = loss_func( output, teacher_data, epoch) loss /= (bt_size - eval_size) * bt_per_it * num_slices loss.backward() tr_loss += loss tr_root_loss += root_loss tr_soil_loss += soil_loss #Eval output = net(batch[:, 3, :, :, :].unsqueeze(1)) loss, _, _ = loss_func(output, teacher[:, 3, :, :, :].unsqueeze(1), epoch) ev_loss += loss tr_root_loss /= (bt_size - eval_size) * bt_per_it * num_slices tr_soil_loss /= (bt_size - eval_size) * bt_per_it * num_slices opt.step() #Log log.logEpoch(epoch, tr_loss.cpu().data.numpy(), ev_loss.cpu().data.numpy(), tr_root_loss.cpu().data.numpy(), tr_soil_loss.cpu().data.numpy()) if (epoch % 20 == 0): weights = net.getWeightsCuda() output = feedForward(net, loader, 0) log.logWeights(weights) teacher = loader.getTeacherNp(0, 4, loader.offset) f1_r = np.array([0.0, 0.0, 0.0]) f1_s = np.array([0.0, 0.0, 0.0]) for it in range(4): f1_r += evle(output[it][0, 0, :, :, :], teacher[0, it, :, :, :]) f1_s += evle(output[it][0, 0, :, :, :], teacher[0, it, :, :, :], True) log.logF1Root(epoch, f1_r / 4) log.logF1Soil(epoch, f1_s / 4) if (epoch % 100 == 0): log.logMilestone(epoch, weights, output)