def LoadPredictionModels(in_model_weights_names, net_settings, cuda_device_id): nets = [] for weights_filename in in_model_weights_names: # TODO Also load bias post transform modules. net = models.MakeNetwork(in_shape=[ net_settings[training_helpers.IN_CHANNELS], net_settings[training_helpers.TARGET_HEIGHT], net_settings[training_helpers.TARGET_WIDTH] ], options=net_settings, post_transform_modules=[]) net.load_state_dict(torch.load(weights_filename), strict=False) net.cuda(cuda_device_id) net.eval() nets.append(net) return nets
# Crop settings parser.add_argument('--crop_top', type=int, default=0) parser.add_argument('--crop_bottom', type=int, default=0) parser.add_argument('--crop_left', type=int, default=0) parser.add_argument('--crop_right', type=int, default=0) # Post-crop resize settings. parser.add_argument('--target_height', type=int, default=-1) parser.add_argument('--target_width', type=int, default=-1) args = parser.parse_args() # Init model and load weights. net = models.MakeNetwork( args.net_name, in_shape=[3, args.target_height, args.target_width], out_dims=args.net_out_total_dimensions, dropout_prob=0.0) net.load_state_dict(torch.load(args.in_model_weights)) net.eval() net.cuda() result_data = [] frames_generator = image_helpers.VideoFrameGenerator(args.in_video) for raw_frame, frame_index in frames_generator: frame_cropped = image_helpers.CropHWC(raw_frame, args.crop_top, args.crop_bottom, args.crop_left, args.crop_right) frame_resized = image_helpers.MaybeResizeHWC(frame_cropped, args.target_height, args.target_width)
def MakeTrainer(train_data, val_data, all_settings, num_nets_to_train, epochs, cuda_device_id=0, preload_weight_names=None): learners = [] for net_idx in range(num_nets_to_train): net = models.MakeNetwork(all_settings[NET_NAME], in_shape=[ all_settings[IN_CHANNELS], all_settings[TARGET_HEIGHT], all_settings[TARGET_WIDTH] ], head_dims=all_settings[NET_HEAD_DIMS], out_dims=all_settings[LABEL_DIMENSIONS], dropout_prob=all_settings[DROPOUT_PROB], options=all_settings[NET_OPTIONS]) assert net.InputNames() == all_settings[INPUT_NAMES] assert net.LabelNames() == all_settings[LABEL_NAMES] if preload_weight_names is not None: assert len(preload_weight_names) == num_nets_to_train net.load_state_dict(torch.load(preload_weight_names[net_idx])) net.cuda(cuda_device_id) optimizer = MakeOptimizer(net, all_settings[OPTIMIZER], all_settings[LEARNING_RATE]) lr_scheduler = None if all_settings[PLATEAU_PATIENCE_EPOCHS] > 0: lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=0.5, patience=all_settings[PLATEAU_PATIENCE_EPOCHS]) learners.append(optimize.Learner(net, optimizer, lr_scheduler)) train_settings = optimize.TrainSettings( optimize.SingleLabelLoss( optimize.PowerLoss(all_settings[LOSS_NORM_POW])), epochs) data_element_names = all_settings[INPUT_NAMES] + all_settings[LABEL_NAMES] image_element_idx = data_element_names.index(models.FRAME_IMG) steering_element_idx = data_element_names.index(models.STEERING) random_shift_directions = None if all_settings[DO_PCA_RANDOM_SHIFTS]: random_shift_directions = image_helpers.GetPcaRgbDirections( train_data[image_element_idx].astype(np.float32) / 255.0) horizontal_label_shift_rate = np.array( all_settings[HORIZONTAL_LABEL_SHIFT_RATE], dtype=np.float32) augment_settings = augmentation.AugmentSettings( target_width=all_settings[TARGET_WIDTH], max_horizontal_shift_pixels=all_settings[MAX_HORIZONTAL_SHIFT_PIXELS], horizontal_label_shift_rate=horizontal_label_shift_rate, blur_sigma=all_settings[TRAIN_BLUR_SIGMA], blur_prob=all_settings[TRAIN_BLUR_PROB], grayscale_interpolate_prob=all_settings[GRAYSCALE_INTERPOLATE_PROB], random_shift_directions=random_shift_directions) train_loader, val_loader = MakeDataLoaders( train_data, val_data, image_element_idx, steering_element_idx, all_settings[TARGET_WIDTH], augment_settings, all_settings[BATCH_SIZE]) return learners, train_loader, val_loader, train_settings
# at gradient wrt the first frame, so set all the other gradients to zero. output_gradient = np.zeros( [args.batch_size, net_settings[training_helpers.LABEL_DIMENSIONS]], dtype=np.float32) output_gradient[:, 0] = 1.0 output_gradient_tensor = Variable(torch.from_numpy(output_gradient)).cuda( args.cuda_device_id) # Load the model and transfer to GPU. # TODO factor out from here and predict_video.py net = models.MakeNetwork( net_settings[training_helpers.NET_NAME], in_shape=[ net_settings[training_helpers.IN_CHANNELS], net_settings[training_helpers.TARGET_HEIGHT], net_settings[training_helpers.TARGET_WIDTH] ], head_dims=net_settings[training_helpers.NET_HEAD_DIMS], out_dims=net_settings[training_helpers.LABEL_DIMENSIONS], dropout_prob=net_settings[training_helpers.DROPOUT_PROB], options=net_settings[training_helpers.NET_OPTIONS]) net.load_state_dict(torch.load(args.in_model_weights)) net.cuda(args.cuda_device_id) net.eval() out_video = skvideo.io.FFmpegWriter(args.out_video, inputdict={'-r': '30.0'}, outputdict={ '-r': '30.0', '-crf': '17', '-preset': 'slow'
label_suffix=args.labels_file_suffix) random_shift_directions = None if not args.do_pca_random_shifts else ( image_helpers.GetPcaRgbDirections( train_data.astype(np.float32) / 255.0)) augment_settings = augmentation.AugmentSettings( target_width=args.target_width, max_horizontal_shift_pixels=args.max_horizontal_shift_pixels, horizontal_label_shift_rate=args.horizontal_label_shift_rate, blur_sigma=args.train_blur_sigma, blur_prob=args.train_blur_prob, grayscale_interpolate_prob=args.grayscale_interpolate_prob, random_shift_directions=random_shift_directions) train_loader, val_loader = training_helpers.MakeDataLoaders( train_data, train_labels, val_data, val_labels, args.target_width, augment_settings, args.batch_size, args.example_label_extra_weight_scale) net = models.MakeNetwork( args.net_name, in_shape=[3, args.target_height, args.target_width], dropout_prob=args.dropout_prob) net.cuda() train_settings = optimize.TrainSettings( optimize.LossSettings(optimize.WeightedMSELoss()), torch.optim.Adam(net.parameters()), args.epochs) optimize.TrainModel(net, train_loader, val_loader, train_settings, args.out_prefix)
# Gradients wrt output label to be used for backpropagation. We will only look # at gradient wrt the first frame, so set all the other gradients to zero. output_gradient = np.zeros( [args.batch_size, net_settings[training_helpers.LABEL_DIMENSIONS]], dtype=np.float32) output_gradient[:, 0] = 1.0 output_gradient_tensor = Variable(torch.from_numpy(output_gradient)).cuda( args.cuda_device_id) # Load the model and transfer to GPU. # TODO factor out from here and predict_video.py net = models.MakeNetwork( net_settings[training_helpers.NET_NAME], in_shape=[ net_settings[training_helpers.IN_CHANNELS], net_settings[training_helpers.TARGET_HEIGHT], net_settings[training_helpers.TARGET_WIDTH] ], out_dims=net_settings[training_helpers.LABEL_DIMENSIONS], options=net_settings[training_helpers.NET_OPTIONS]) net.load_state_dict(torch.load(args.in_model_weights)) net.cuda(args.cuda_device_id) net.eval() out_video = skvideo.io.FFmpegWriter(args.out_video, inputdict={'-r': '30.0'}, outputdict={ '-r': '30.0', '-crf': '17', '-preset': 'slow' })