def vis_dataset(prepared_train_labels, train_images_folder): stride = 8 sigma = 7 path_thickness = 1 dataset = Body21TrainDataset(prepared_train_labels, train_images_folder, stride, sigma, path_thickness, transform=transforms.Compose([ ConvertKeypoints2(), Scale(), Rotate(pad=(128, 128, 128)), CropPad(pad=(128, 128, 128)), Flip2() ])) train_loader = DataLoader(dataset) for batch_data in train_loader: images = batch_data['image'] keypoint_masks = batch_data['keypoint_mask'] paf_masks = batch_data['paf_mask'] keypoint_maps = batch_data['keypoint_maps'] paf_maps = batch_data['paf_maps'] #num_kepoints = batch_data['label']['num_keypoints'][0].item() # if num_kepoints < 21: # continue image_bgr = np.uint8(images[0].numpy().transpose(1, 2, 0) * 256 + 128) cv2.imshow('image', image_bgr) cv2.moveWindow('image', 384, 0) for i in range(len(keypoint_maps[0])): heat_map = cv2.resize(keypoint_maps[0][i].numpy(), (images[0].shape[2], images[0].shape[1])) heat_map = np.expand_dims(np.uint8(heat_map * 255), -1) debug_map = image_bgr // 2 + heat_map // 2 win_name = 'keypoint: {}'.format(kp_names[i]) cv2.imshow(win_name, debug_map) cv2.moveWindow(win_name, 0, 0) cv2.waitKey(0) cv2.destroyWindow(win_name) for i in range(len(BODY_PARTS_PAF_IDS)): pair = BODY_PARTS_PAF_IDS[i] paf_map_x = np.abs(paf_maps[0][pair[0]].numpy()) paf_map_y = np.abs(paf_maps[0][pair[1]].numpy()) paf_map = np.fmin(np.fmax(paf_map_x, paf_map_y) * 100, 1.0) paf_map = cv2.resize(paf_map, (images[0].shape[2], images[0].shape[1])) paf_map = np.expand_dims(np.uint8(abs(paf_map) * 255), -1) debug_map = image_bgr // 2 + paf_map // 2 win_name = 'part: {}'.format(pf_names[i]) cv2.imshow(win_name, debug_map) cv2.moveWindow(win_name, 0, 0) cv2.waitKey(0) cv2.destroyWindow(win_name)
def train(prepared_train_labels, train_images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, val_labels, val_images_folder, val_output_name, checkpoint_after, val_after): net = PoseEstimationWithMobileNet(num_refinement_stages) stride = 8 sigma = 7 path_thickness = 1 dataset = CocoTrainDataset(prepared_train_labels, train_images_folder, stride, sigma, path_thickness, transform=transforms.Compose([ ConvertKeypoints(), Scale(), Rotate(pad=(128, 128, 128)), CropPad(pad=(128, 128, 128)), Flip() ])) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam([ { 'params': get_parameters_conv(net.model, 'weight') }, { 'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4 }, { 'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 drop_after_epoch = [100, 200, 260] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] current_epoch = checkpoint['current_epoch'] net = DataParallel(net).cuda() net.train() for epochId in range(current_epoch, 280): scheduler.step() total_losses = [0, 0] * (num_refinement_stages + 1 ) # heatmaps loss, paf loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].cuda() keypoint_masks = batch_data['keypoint_mask'].cuda() paf_masks = batch_data['paf_mask'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() paf_maps = batch_data['paf_maps'].cuda() stages_output = net(images) losses = [] for loss_idx in range(len(total_losses) // 2): losses.append( l2_loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0])) losses.append( l2_loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0])) total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses) // 2): print('\n'.join([ 'stage{}_pafs_loss: {}', 'stage{}_heatmaps_loss: {}' ]).format(loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after, loss_idx + 1, total_losses[loss_idx * 2] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 if num_iter % checkpoint_after == 0: snapshot_name = '{}/checkpoint_iter_{}.pth'.format( checkpoints_folder, num_iter) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) if num_iter % val_after == 0: print('Validation...') evaluate(val_labels, val_output_name, val_images_folder, net) net.train()
def train(prepared_train_labels, train_images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, val_labels, val_images_folder, val_output_name, checkpoint_after, val_after): net = PoseEstimationWithMobileNet(num_refinement_stages)#---------------------------------for training, define a PoseEstimation model stride = 8 sigma = 7 path_thickness = 1 dataset = CocoTrainDataset(prepared_train_labels, train_images_folder, stride, sigma, path_thickness, transform=transforms.Compose([ ConvertKeypoints(), Scale(), Rotate(pad=(128, 128, 128)), CropPad(pad=(128, 128, 128)), Flip()])) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) #If you need to move a model to GPU via.cuda(), please do so before # constructing optimizers for it.Parameters of a model after.cuda() will be different objects with those before the call. optimizer = optim.Adam([ {'params': get_parameters_conv(net.model, 'weight')}, {'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0}, {'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0}, {'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, {'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr}, {'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, {'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0}, {'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr}, {'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, {'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4}, {'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0}, {'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0}, {'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 drop_after_epoch = [100, 200, 260] #torch.optim.lr_scheduler provides several methods to adjust the learning rate based on the number of epochs. #------------------------VVI scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333) if checkpoint_path: #--------check if the training needs to be continued from a certain pth checkpoint checkpoint = torch.load(checkpoint_path)#-------------VVI: it can be wts for other parts along with the mobile net wts or only the mobilenet if from_mobilenet: load_from_mobilenet(net, checkpoint)#target, source else: load_state(net, checkpoint) if not weights_only:#--------------------------------------If you want to load not only the weights but also other parameters optimizer.load_state_dict(checkpoint['optimizer'])#-----------------when we save a model we save not only weights but also things like lr and thus scheduler.load_state_dict(checkpoint['scheduler'])#-----------------we can load them like this num_iter = checkpoint['iter'] current_epoch = checkpoint['current_epoch'] net = DataParallel(net).cuda() net.train() for epochId in range(current_epoch, 280):#------------------------------------------------------training for only 280 epochs print("This is Epoch No ",str(epochId)) scheduler.step() total_losses = [0, 0] * (num_refinement_stages + 1) # heatmaps loss, paf loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].cuda() keypoint_masks = batch_data['keypoint_mask'].cuda() paf_masks = batch_data['paf_mask'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() paf_maps = batch_data['paf_maps'].cuda() # import time # print(images.shape) # time.sleep(222) stages_output = net(images) losses = [] for loss_idx in range(len(total_losses) // 2): losses.append(l2_loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0])) losses.append(l2_loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0])) total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses) // 2): print('\n'.join(['stage{}_pafs_loss: {}', 'stage{}_heatmaps_loss: {}']).format( loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after, loss_idx + 1, total_losses[loss_idx * 2] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 if num_iter % checkpoint_after == 0: snapshot_name = '{}/checkpoint_iter_{}_after_37000.pth'.format(checkpoints_folder, num_iter) torch.save({'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId}, snapshot_name) if num_iter % val_after == 0: print('Validation...') evaluate(val_labels, val_output_name, val_images_folder, net) net.train()
def train(prepared_train_labels, train_images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, val_labels, val_images_folder, val_output_name, checkpoint_after, val_after): net = PoseEstimationWithMobileNet(num_refinement_stages) stride = 8 sigma = 7 path_thickness = 1 dataset = CocoTrainDataset(prepared_train_labels, train_images_folder, stride, sigma, path_thickness, transform=transforms.Compose([ ConvertKeypoints(), Scale(), Rotate(pad=(128, 128, 128)), CropPad(pad=(128, 128, 128)), Flip() ])) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam([ { 'params': get_parameters_conv(net.model, 'weight') }, { 'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4 }, { 'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 drop_after_epoch = [100, 200, 260] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] current_epoch = checkpoint['current_epoch'] net = DataParallel(net).cuda() net.train() for epochId in range(current_epoch, 280): scheduler.step() total_losses = [0, 0] * (num_refinement_stages + 1 ) # heatmaps loss, paf loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() # print("show imgs" # , batch_data['keypoint_maps'].shape, batch_data['paf_maps'].shape # , batch_data['keypoint_mask'].shape, batch_data['paf_mask'].shape # , batch_data['mask'].shape, batch_data['image'].shape # ) # print("seg", batch_data['label']['segmentations']) print("batched images size", batch_data['image'].shape) vis.images(batch_data['image'][:, [2, 1, 0], ...] + 0.5, 4, 2, "1", opts=dict(title="img")) vis.images(batch_data['keypoint_mask'].permute(1, 0, 2, 3), 4, 2, "2", opts=dict(title="kp_mask")) vis.images(batch_data['paf_mask'].permute(1, 0, 2, 3), 4, 2, "3", opts=dict(title="paf_mask")) vis.images(batch_data['keypoint_maps'].permute(1, 0, 2, 3), 4, 2, "4", opts=dict(title="keypoint_maps")) vis.images(batch_data['paf_maps'].permute(1, 0, 2, 3), 4, 2, "5", opts=dict(title="paf_maps")) vis.images(batch_data['mask'].unsqueeze(0), 4, 2, "6", opts=dict(title="MASK")) images = batch_data['image'].cuda() keypoint_masks = batch_data['keypoint_mask'].cuda() paf_masks = batch_data['paf_mask'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() paf_maps = batch_data['paf_maps'].cuda() pafs = batch_data['paf_maps'][0].permute(1, 2, 0).numpy() scale = 4 img_p = np.zeros((pafs.shape[1] * 8, pafs.shape[0] * 8, 3), dtype=np.uint8) # pafs[pafs < 0.07] = 0 for idx in range(len(BODY_PARTS_PAF_IDS)): # print(pp, pafs.shape) pp = BODY_PARTS_PAF_IDS[idx] k_idx = BODY_PARTS_KPT_IDS[idx] cc = BODY_CONN_COLOR[idx] vx = pafs[:, :, pp[0]] vy = pafs[:, :, pp[1]] for i in range(pafs.shape[1]): for j in range(pafs.shape[0]): a = (i * 2 * scale, j * 2 * scale) b = (2 * int((i + vx[j, i] * 3) * scale), 2 * int( (j + vy[j, i] * 3) * scale)) if a[0] == b[0] and a[1] == b[1]: continue cv2.line(img_p, a, b, cc, 1) # break cv2.imshow("paf", img_p) key = cv2.waitKey(0) if key == 27: # esc exit(0) stages_output = net(images) losses = [] for loss_idx in range(len(total_losses) // 2): losses.append( l2_loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0])) losses.append( l2_loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0])) total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses) // 2): print('\n'.join([ 'stage{}_pafs_loss: {}', 'stage{}_heatmaps_loss: {}' ]).format(loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after, loss_idx + 1, total_losses[loss_idx * 2] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 if num_iter % checkpoint_after == 0: snapshot_name = '{}/checkpoint_iter_{}.pth'.format( checkpoints_folder, num_iter) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) if num_iter % val_after == 0: print('Validation...') evaluate(val_labels, val_output_name, val_images_folder, net) net.train()
def train(prepared_train_labels, train_images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, val_labels, val_images_folder, val_output_name, checkpoint_after, val_after): net = PoseEstimationWithMobileNet(num_refinement_stages) stride = 8 sigma = 7 path_thickness = 1 dataset = CocoTrainDataset(prepared_train_labels, train_images_folder, stride, sigma, path_thickness, transform=transforms.Compose([ ConvertKeypoints(), Scale(), Rotate(pad=(128, 128, 128)), CropPad(pad=(128, 128, 128)), Flip() ])) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam([ { 'params': get_parameters_conv(net.model, 'weight') }, { 'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4 }, { 'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 drop_after_epoch = [100, 200, 260] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] current_epoch = checkpoint['current_epoch'] print("optimizer LR") for param_group in optimizer.param_groups: print(param_group['lr']) for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() net = DataParallel(net).cuda() net.train() from DGPT.Visualize.Viz import Viz viz = Viz(dict(env="refine")) for epochId in range(current_epoch, 280): # scheduler.step() total_losses = [0, 0] * (num_refinement_stages + 1 ) # heatmaps loss, paf loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].cuda() keypoint_masks = batch_data['keypoint_mask'].cuda() paf_masks = batch_data['paf_mask'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() paf_maps = batch_data['paf_maps'].cuda() images = preprocess(images) stages_output = net(images) losses = [] for loss_idx in range(len(total_losses) // 2): losses.append( l2_loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0])) losses.append( l2_loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0])) total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() viz.draw_line(num_iter, loss.item(), "Loss") batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 scheduler.step() else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses) // 2): print('\n'.join([ 'stage{}_pafs_loss: {}', 'stage{}_heatmaps_loss: {}' ]).format(loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after, loss_idx + 1, total_losses[loss_idx * 2] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 xx = images[:1, ...].detach() #.clone() hh = keypoint_maps[:1, ...].detach() #.clone() mm = keypoint_masks[:1, ...].detach() #.clone() print(xx.shape, hh.shape, mm.shape) hh = hh.squeeze(0).reshape(19, 1, hh.shape[2], hh.shape[3]) mm = mm.squeeze(0).reshape(19, 1, hh.shape[2], hh.shape[3]) viz.draw_images(xx, "input1") viz.draw_images(hh, "input1_heatmap") viz.draw_images(mm, "input1_mask") oh = stages_output[-2].detach()[:1, :-1, ...] oh = oh.reshape(oh.shape[1], 1, oh.shape[2], oh.shape[3]) viz.draw_images(oh, "output1_heatmap") if num_iter % checkpoint_after == 0: snapshot_name = '{}/checkpoint_iter_{}.pth'.format( checkpoints_folder, num_iter) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) if num_iter % val_after == 0: print('Validation...') evaluate(val_labels, val_output_name, val_images_folder, net) net.train()