def main(argv): parser = argparse.ArgumentParser( description="Train a network to predict primitives") parser.add_argument("dataset_directory", help="Path to the directory containing the dataset") parser.add_argument("output_directory", help="Save the output files in that directory") parser.add_argument( "--weight_file", default=None, help=("The path to a previously trainined model to continue" " the training from")) parser.add_argument("--continue_from_epoch", default=0, type=int, help="Continue training from epoch (default=0)") parser.add_argument("--run_on_gpu", action="store_true", help="Use GPU") parser.add_argument("--experiment_tag", default=None, help="Tag that refers to the current experiment") parser.add_argument("--cache_size", type=int, default=2000, help="The batch provider cache size") parser.add_argument("--seed", type=int, default=27, help="Seed for the PRNG") # Parse args add_nn_parameters(parser) add_dataset_parameters(parser) add_datatype_parameters(parser) add_training_parameters(parser) args = parser.parse_args(argv) if args.run_on_gpu: #and torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") print("Running code on", device) # Check if output directory exists and if it doesn't create it if not os.path.exists(args.output_directory): os.makedirs(args.output_directory) # Create an experiment directory using the experiment_tag if args.experiment_tag is None: experiment_tag = id_generator(9) else: experiment_tag = args.experiment_tag experiment_directory = os.path.join(args.output_directory, experiment_tag) if not os.path.exists(experiment_directory): os.makedirs(experiment_directory) # Store the parameters for the current experiment in a json file save_experiment_params(args, experiment_tag, experiment_directory) print("Save experiment statistics in %s" % (experiment_tag, )) # Create two files to store the training and test evolution train_stats = os.path.join(experiment_directory, "train.txt") val_stats = os.path.join(experiment_directory, "val.txt") if args.weight_file is None: train_stats_f = open(train_stats, "w") else: train_stats_f = open(train_stats, "a+") train_stats_f.write(("epoch loss\n")) # Set the random seed np.random.seed(args.seed) torch.manual_seed(np.random.randint(np.iinfo(np.int32).max)) if torch.cuda.is_available(): torch.cuda.manual_seed_all(np.random.randint(np.iinfo(np.int32).max)) # TODO M = 11 data_output_shape = (M, 7) # Create a factory that returns the appropriate data type based on the # input argument data_factory = DataFactory( args.data_type, tuple([data_input_shape(args), data_output_shape])) # Create a dataset instance to generate the samples for training training_dataset = get_dataset_type("matrix_loss")( (DatasetBuilder().with_dataset(args.dataset_type).build( args.dataset_directory)), data_factory, transform=compose_transformations(args.data_type)) training_loader = DataLoader(training_dataset, batch_size=32, num_workers=4, pin_memory=True, drop_last=True, shuffle=True) # Build the model to be used for training network_params = NetworkParameters(args.architecture, M, False) model = network_params.network(network_params) # Move model to the device to be used model.to(device) # Check whether there is a weight file provided to continue training from if args.weight_file is not None: model.load_state_dict(torch.load(args.weight_file)) model.train() # Build an optimizer object to compute the gradients of the parameters optimizer = optimizer_factory(args, model) # Loop over the dataset multiple times losses = [] for i in range(args.epochs): bar = get_logger("matrix_loss", i + 1, args.epochs, args.steps_per_epoch) j = 0 for sample in training_loader: X, y_target = sample # if j == 0: # import matplotlib.pyplot as plt # import matplotlib.image as mpimg # print(np.shape(X)) # print(X) # img = X.numpy()[0] # img = np.transpose(img, (1,2,0)) # img = img.reshape((224, 224, 3)) # print(img) # imgplot = plt.imshow(img) # print(imgplot) # plt.show() # print(j) # j +=1 # if j > 20: # break # continue # print(X.shape) # print(y_target.shape) # #exit(1) # exit(1) X, y_target = X.to(device), y_target.to(device) # Train on batch batch_loss, metrics, debug_stats = train_on_batch( model, lr_schedule(optimizer, i, args.lr, args.lr_factor, args.lr_epochs), matrix_loss, X, y_target, device) # The losses bar.loss = moving_average(bar.loss, batch_loss, b) # Record in list losses.append(bar.loss) # TODO: Update the file that keeps track of the statistics if (j % 50) == 0: train_stats_f.write(("%d %5.8f") % (i, bar.loss)) train_stats_f.write("\n") train_stats_f.flush() j += 1 bar.next() if j >= args.steps_per_epoch: break # Finish the progress bar and save the model after every epoch bar.finish() if (i % 5) == 0: torch.save( model.state_dict(), os.path.join(experiment_directory, "model_%d" % (i + args.continue_from_epoch, ))) torch.save(model.state_dict(), os.path.join(experiment_directory, "model_final")) # TODO: print final training stats print([ sum(losses[args.steps_per_epoch:]) / float(args.steps_per_epoch), sum(losses[:args.steps_per_epoch]) / float(args.steps_per_epoch) ])
def main(argv): parser = argparse.ArgumentParser( description="Train a network to predict primitives") parser.add_argument("dataset_directory", help="Path to the directory containing the dataset") parser.add_argument("output_directory", help="Save the output files in that directory") parser.add_argument( "--tsdf_directory", default="", help="Path to the directory containing the precomputed tsdf files") parser.add_argument( "--weight_file", default=None, help=("The path to a previously trainined model to continue" " the training from")) parser.add_argument("--continue_from_epoch", default=0, type=int, help="Continue training from epoch (default=0)") parser.add_argument("--n_primitives", type=int, default=32, help="Number of primitives") parser.add_argument( "--use_deformations", action="store_true", help="Use Superquadrics with deformations as the shape configuration") parser.add_argument("--train_test_splits_file", default=None, help="Path to the train-test splits file") parser.add_argument("--run_on_gpu", action="store_true", help="Use GPU") parser.add_argument("--probs_only", action="store_true", help="Optimize only using the probabilities") parser.add_argument("--experiment_tag", default=None, help="Tag that refers to the current experiment") parser.add_argument("--cache_size", type=int, default=2000, help="The batch provider cache size") parser.add_argument("--seed", type=int, default=27, help="Seed for the PRNG") add_nn_parameters(parser) add_dataset_parameters(parser) add_voxelizer_parameters(parser) add_training_parameters(parser) add_sq_mesh_sampler_parameters(parser) add_regularizer_parameters(parser) add_gaussian_noise_layer_parameters(parser) # Parameters related to the loss function and the loss weights add_loss_parameters(parser) # Parameters related to loss options add_loss_options_parameters(parser) args = parser.parse_args(argv) if args.train_test_splits_file is not None: train_test_splits = parse_train_test_splits( args.train_test_splits_file, args.model_tags) training_tags = np.hstack( [train_test_splits["train"], train_test_splits["val"]]) else: training_tags = args.model_tags #device = torch.device("cuda:0") if args.run_on_gpu: #and torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") print("Running code on {}".format(device)) # Check if output directory exists and if it doesn't create it if not os.path.exists(args.output_directory): os.makedirs(args.output_directory) # Create an experiment directory using the experiment_tag if args.experiment_tag is None: experiment_tag = id_generator(9) else: experiment_tag = args.experiment_tag experiment_directory = os.path.join(args.output_directory, experiment_tag) if not os.path.exists(experiment_directory): os.makedirs(experiment_directory) # Store the parameters for the current experiment in a json file save_experiment_params(args, experiment_tag, experiment_directory) print("Save experiment statistics in %s" % (experiment_tag, )) # Create two files to store the training and test evolution train_stats = os.path.join(experiment_directory, "train.txt") val_stats = os.path.join(experiment_directory, "val.txt") if args.weight_file is None: train_stats_f = open(train_stats, "w") else: train_stats_f = open(train_stats, "a+") train_stats_f.write( ("epoch loss pcl_to_prim_loss prim_to_pcl_loss bernoulli_regularizer " "entropy_bernoulli_regularizer parsimony_regularizer " "overlapping_regularizer sparsity_regularizer\n")) # Set the random seed np.random.seed(args.seed) torch.manual_seed(np.random.randint(np.iinfo(np.int32).max)) if torch.cuda.is_available(): torch.cuda.manual_seed_all(np.random.randint(np.iinfo(np.int32).max)) # Create an object that will sample points in equal distances on the # surface of the primitive sampler = get_sampler(args.use_cuboids, args.n_points_from_sq_mesh, args.D_eta, args.D_omega) # Create a factory that returns the appropriate voxelizer based on the # input argument voxelizer_factory = VoxelizerFactory(args.voxelizer_factory, np.array(voxelizer_shape(args)), args.save_voxels_to) # Create a dataset instance to generate the samples for training training_dataset = get_dataset_type("euclidean_dual_loss")( (DatasetBuilder().with_dataset(args.dataset_type).lru_cache( 2000).filter_tags(training_tags).build(args.dataset_directory)), voxelizer_factory, args.n_points_from_mesh, transform=compose_transformations(args.voxelizer_factory)) # Create a batchprovider object to start generating batches train_bp = BatchProvider(training_dataset, batch_size=args.batch_size, cache_size=args.cache_size) train_bp.ready() network_params = NetworkParameters.from_options(args) # Build the model to be used for training model = network_params.network(network_params) # Move model to the device to be used model.to(device) # Check whether there is a weight file provided to continue training from if args.weight_file is not None: model.load_state_dict(torch.load(args.weight_file)) model.train() # Build an optimizer object to compute the gradients of the parameters optimizer = optimizer_factory(args, model) # Loop over the dataset multiple times pcl_to_prim_losses = [] prim_to_pcl_losses = [] losses = [] for i in range(args.epochs): bar = get_logger("euclidean_dual_loss", i + 1, args.epochs, args.steps_per_epoch) for b, sample in zip(range(args.steps_per_epoch), yield_infinite(train_bp)): tags, X, y_target = sample X, y_target = X.to(device), y_target.to(device) # based on `tag` part_point_samples = [] P = [] indices_w_parts = [] for idx_batchwide, tag in enumerate(tags): # TODO: does this sample have any part pt samples? target_part_samples_path = os.path.exists( os.path.join(RANDINDEX_SAMPLES_DIR, '{}.pkl'.format(tag))) if os.path.exists(target_part_samples_path): with open(target_part_samples_path, 'rb') as f: foo = pickle.load(f) point_samples = foo['samples'] point_samples = torch.Tensor(point_samples).to(device) part_point_samples.append(point_samples) N = point_samples.shape[0] nonzero_indices = foo['neighbor_pairs'] assert nonzero_indices.shape[0] == 2 val = np.ones(nonzero_indices.shape[1]) curr_P = torch.sparse_coo_tensor(nonzero_indices, val, (N, N)) curr_P = curr_P.to(device) P.append(curr_P) # turn into matrices here? indices_w_parts.append(idx_batchwide) P = torch.stack(P, axis=0) part_point_samples = torch.stack(part_point_samples, axis=0) import ipdb ipdb.set_trace() # Train on batch batch_loss, metrics, debug_stats = train_on_batch_w_parts( model, lr_schedule(optimizer, i, args.lr, args.lr_factor, args.lr_epochs), euclidean_dual_loss, X, y_target, get_regularizer_terms(args, i), sampler, get_loss_options(args), P, part_point_samples, indices_w_parts) # Get the regularizer terms reg_values = debug_stats["regularizer_terms"] sparsity_regularizer = reg_values["sparsity_regularizer"] overlapping_regularizer = reg_values["overlapping_regularizer"] parsimony_regularizer = reg_values["parsimony_regularizer"] entropy_bernoulli_regularizer = reg_values[ "entropy_bernoulli_regularizer"] bernoulli_regularizer = reg_values["bernoulli_regularizer"] # The lossess pcl_to_prim_loss = debug_stats["pcl_to_prim_loss"].item() prim_to_pcl_loss = debug_stats["prim_to_pcl_loss"].item() bar.loss = moving_average(bar.loss, batch_loss, b) bar.pcl_to_prim_loss = \ moving_average(bar.pcl_to_prim_loss, pcl_to_prim_loss, b) bar.prim_to_pcl_loss = \ moving_average(bar.prim_to_pcl_loss, prim_to_pcl_loss, b) losses.append(bar.loss) prim_to_pcl_losses.append(bar.prim_to_pcl_loss) pcl_to_prim_losses.append(bar.pcl_to_prim_loss) bar.bernoulli_regularizer =\ (bar.bernoulli_regularizer * b + bernoulli_regularizer) / (b+1) bar.parsimony_regularizer =\ (bar.parsimony_regularizer * b + parsimony_regularizer) / (b+1) bar.overlapping_regularizer =\ (bar.overlapping_regularizer * b + overlapping_regularizer) / (b+1) bar.entropy_bernoulli_regularizer = \ (bar.entropy_bernoulli_regularizer * b + entropy_bernoulli_regularizer) / (b+1) bar.sparsity_regularizer =\ (bar.sparsity_regularizer * b + sparsity_regularizer) / (b+1) bar.exp_n_prims = metrics[0].sum(-1).mean() # Update the file that keeps track of the statistics train_stats_f.write( ("%d %.8f %.8f %.8f %.6f %.6f %.6f %.6f %.6f") % (i, bar.loss, bar.pcl_to_prim_loss, bar.prim_to_pcl_loss, bar.bernoulli_regularizer, bar.entropy_bernoulli_regularizer, bar.parsimony_regularizer, bar.overlapping_regularizer, bar.sparsity_regularizer)) train_stats_f.write("\n") train_stats_f.flush() bar.next() # Finish the progress bar and save the model after every epoch bar.finish() # Stop the batch provider train_bp.stop() torch.save( model.state_dict(), os.path.join(experiment_directory, "model_%d" % (i + args.continue_from_epoch, ))) print([ sum(losses[args.steps_per_epoch:]) / float(args.steps_per_epoch), sum(losses[:args.steps_per_epoch]) / float(args.steps_per_epoch), sum(pcl_to_prim_losses[args.steps_per_epoch:]) / float(args.steps_per_epoch), sum(pcl_to_prim_losses[:args.steps_per_epoch]) / float(args.steps_per_epoch), sum(prim_to_pcl_losses[args.steps_per_epoch:]) / float(args.steps_per_epoch), sum(prim_to_pcl_losses[:args.steps_per_epoch]) / float(args.steps_per_epoch), ])
def main(argv): parser = argparse.ArgumentParser( description="Train a network to predict primitives") parser.add_argument( "config_file", help="Path to the file that contains the experiment configuration") parser.add_argument("output_directory", help="Save the output files in that directory") parser.add_argument( "--weight_file", default=None, help=("The path to a previously trained model to continue" " the training from")) parser.add_argument("--continue_from_epoch", default=0, type=int, help="Continue training from epoch (default=0)") parser.add_argument("--run_on_gpu", action="store_true", help="Use GPU") parser.add_argument("--probs_only", action="store_true", help="Optimize only using the probabilities") parser.add_argument("--experiment_tag", default=None, help="Tag that refers to the current experiment") parser.add_argument("--credentials", default=os.path.join(os.path.dirname(__file__), ".credentials"), help="The credentials file for the Google API") parser.add_argument("--cache_size", type=int, default=0, help="The batch provider cache size") parser.add_argument( "--n_processes", type=int, default=8, help="The numper of processed spawned by the batch provider") parser.add_argument("--seed", type=int, default=27, help="Seed for the PRNG") add_dataset_parameters(parser) # Parameters related to the loss function and the loss weights args = parser.parse_args(argv) set_num_threads(1) if args.run_on_gpu: # and torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") print("Running code on", device) # Check if output directory exists and if it doesn't create it if not os.path.exists(args.output_directory): os.makedirs(args.output_directory) # Create an experiment directory using the experiment_tag if args.experiment_tag is None: experiment_tag = id_generator(9) else: experiment_tag = args.experiment_tag experiment_directory = os.path.join(args.output_directory, experiment_tag) if not os.path.exists(experiment_directory): os.makedirs(experiment_directory) # Get the parameters and their ordering for the spreadsheet save_experiment_params(args, experiment_tag, experiment_directory) print("Save experiment statistics in %s" % (experiment_tag, )) # Set the random seed np.random.seed(args.seed) torch.manual_seed(np.random.randint(np.iinfo(np.int32).max)) if torch.cuda.is_available(): torch.cuda.manual_seed_all(np.random.randint(np.iinfo(np.int32).max)) config = load_config(args.config_file) # Build the network architecture to be used for training network = build_network(args.config_file, args.weight_file, device=device) network.train() loss_options = get_loss_options(config) # Build an optimizer object to compute the gradients of the parameters optimizer = optimizer_factory(config, network) # Create an object that will sample points in equal distances on the # surface of the primitive n_points_from_sq_mesh = config["data"].get("n_points_from_sq_mesh", 200) sampler = PrimitiveSampler(n_points_from_sq_mesh) # Instantiate a dataloader to generate the samples for training dataloader = build_dataloader( config, args.model_tags, args.category_tags, config["data"].get("train_split", ["train", "val"]), config["loss"].get("batch_size", 32), args.n_processes, cache_size=args.cache_size, ) # Instantiate a dataloader to generate the samples for validation val_dataset = build_dataset(config, [], [], config["data"].get("test_split", ["test"]), random_subset=args.val_random_subset, cache_size=args.cache_size) val_dataloader = DataLoader(val_dataset, batch_size=config["data"].get( "validation_batch_size", 8), num_workers=args.n_processes, shuffle=True) epochs = config["loss"].get("epochs", 150) steps_per_epoch = config["loss"].get("steps_per_epoch", 500) # Create logger to keep track of the training statistics logger = get_logger(epochs, steps_per_epoch, os.path.join(experiment_directory, "train.txt")) # Create logger to keep track of the validation statistics val_every = config["data"].get("validation_every", 1000) val_logger = get_logger(epochs // val_every, len(val_dataset), os.path.join(experiment_directory, "val.txt"), prefix="Validation Epoch") # Counter to keep track of the validation epochs val_epochs = 0 save_every = config["data"].get("save_every", 5) for i in range(epochs): logger.new_epoch(i) for b, sample in zip(list(range(steps_per_epoch)), yield_infinite(dataloader)): X = sample[0].to(device) y_target = [yi.to(device) for yi in sample[1:]] if len(y_target) == 1: y_target = y_target[0] # Train on batch reg_terms, reg_options = get_regularizer_options(config, i) loss_options.update(reg_options) batch_loss, preds = train_on_batch( network, lr_schedule(optimizer, i, config), get_loss(config["loss_type"], reg_terms, sampler, loss_options), X, y_target, i) logger.new_batch(b, batch_loss) if i % save_every == 0: torch.save( network.state_dict(), os.path.join(experiment_directory, "model_%d" % (i + args.continue_from_epoch, ))) # Perform validation every validation every epochs if i % val_every == 0 and i > 0: val_logger.new_epoch(val_epochs) total = 0 for sample in val_dataloader: X = sample[0].to(device) y_target = [yi.to(device) for yi in sample[1:]] if len(y_target) == 1: y_target = y_target[0] val_l, preds = validate_on_batch( network, get_loss(config["loss_type"], reg_terms, sampler, loss_options), X, y_target) total += X.shape[0] val_logger.new_batch(total, val_l) # Increment counter by one val_epochs += 1 print("Saved statistics in %s" % (experiment_tag, ))