def test_filter(self): dataset_path = get_dataset_path("../../data/preprocessed") print("Using dataset path:", dataset_path) dataset_parameters_pointclouds = {} dataset_parameters_pointclouds["input_type"] = "pointcloud" dataset_parameters_pointclouds["random_seed"] = 666 dataset_parameters_pointclouds["pointcloud_target_size"] = 3000 dataset_parameters_pointclouds["pointcloud_random_rotation"] = True dataset_parameters_pointclouds["filter"] = "front" data_generator = create_datagenerator_from_parameters( dataset_path, dataset_parameters_pointclouds) data_generator.analyze_files() dataset = next(data_generator.generate(size=100, verbose=True)) assert dataset[0].shape == (100, 3000, 3), str(dataset[0].shape) assert dataset[1].shape == (100, 1), str(dataset[1].shape) dataset_parameters_pointclouds = {} dataset_parameters_pointclouds["input_type"] = "pointcloud" dataset_parameters_pointclouds["random_seed"] = 666 dataset_parameters_pointclouds["pointcloud_target_size"] = 3000 dataset_parameters_pointclouds["pointcloud_random_rotation"] = True dataset_parameters_pointclouds["filter"] = "360" data_generator = create_datagenerator_from_parameters( dataset_path, dataset_parameters_pointclouds) data_generator.analyze_files() dataset = next(data_generator.generate(size=100, verbose=True)) assert dataset[0].shape == (100, 3000, 3), str(dataset[0].shape) assert dataset[1].shape == (100, 1), str(dataset[1].shape) dataset_parameters_pointclouds = {} dataset_parameters_pointclouds["input_type"] = "pointcloud" dataset_parameters_pointclouds["random_seed"] = 666 dataset_parameters_pointclouds["pointcloud_target_size"] = 3000 dataset_parameters_pointclouds["pointcloud_random_rotation"] = True dataset_parameters_pointclouds["filter"] = "back" data_generator = create_datagenerator_from_parameters( dataset_path, dataset_parameters_pointclouds) data_generator.analyze_files() dataset = next(data_generator.generate(size=100, verbose=True)) assert dataset[0].shape == (100, 3000, 3), str(dataset[0].shape) assert dataset[1].shape == (100, 1), str(dataset[1].shape)
def test_voxelgrid_generation(self): dataset_path = get_dataset_path() print("Using dataset path:", dataset_path) dataset_parameters_voxelgrids = {} dataset_parameters_voxelgrids["input_type"] = "voxelgrid" dataset_parameters_voxelgrids["random_seed"] = 666 dataset_parameters_voxelgrids["voxelgrid_target_shape"] = (32, 32, 32) dataset_parameters_voxelgrids["voxel_size_meters"] = 0.1 data_generator = create_datagenerator_from_parameters(dataset_path, dataset_parameters_voxelgrids) data_generator.analyze_files() dataset = next(data_generator.generate(size=100, verbose=True)) assert dataset[0].shape == (100, 3000, 4), str(dataset[0].shape) assert dataset[1].shape == (100, 1), str(dataset[1].shape)
def test_sequence_rgb_map_stress(self): dataset_path = get_dataset_path("../../data/preprocessed") print("Using dataset path:", dataset_path) dataset_parameters_rgbmaps = {} dataset_parameters_rgbmaps["input_type"] = "rgbmap" dataset_parameters_rgbmaps["random_seed"] = 666 dataset_parameters_rgbmaps["filter"] = "360" dataset_parameters_rgbmaps["sequence_length"] = 8 dataset_parameters_rgbmaps["rgbmap_target_width"] = 64 dataset_parameters_rgbmaps["rgbmap_target_height"] = 64 dataset_parameters_rgbmaps["rgbmap_scale_factor"] = 1.0 dataset_parameters_rgbmaps["rgbmap_axis"] = "vertical" data_generator = create_datagenerator_from_parameters(dataset_path, dataset_parameters_rgbmaps) data_generator.analyze_files() dataset = next(data_generator.generate(size=10000, verbose=True)) assert dataset[0].shape == (10000, 64, 64, 3), str(dataset[0].shape) assert dataset[1].shape == (10000, 1), str(dataset[1].shape)
def test_pointcloud_measuring_time(self): dataset_path = get_dataset_path() print("Using dataset path:", dataset_path) dataset_parameters_pointclouds = {} dataset_parameters_pointclouds["input_type"] = "pointcloud" dataset_parameters_pointclouds["random_seed"] = 666 dataset_parameters_pointclouds["pointcloud_target_size"] = 30000 dataset_parameters_pointclouds["pointcloud_random_rotation"] = True data_generator = create_datagenerator_from_parameters(dataset_path, dataset_parameters_pointclouds) start_time = time.time() pointclouds_count = 0 for qrcode, paths in data_generator.qrcodes_dictionary.items(): for path in paths: with open(path, "rb") as file: (pointcloud, targets) = pickle.load(file) pointclouds_count += 1 del pointcloud del targets elapsed_time = time.time() - start_time print("Loaded {} pointclouds in {} seconds".format(pointclouds_count, elapsed_time))
def test_sequence_rgb_map(self): dataset_path = get_dataset_path("../../data/preprocessed") print("Using dataset path:", dataset_path) dataset_parameters_rgbmaps = {} dataset_parameters_rgbmaps["input_type"] = "rgbmap" dataset_parameters_rgbmaps["output_targets"] = ["weight"] dataset_parameters_rgbmaps["random_seed"] = 666 dataset_parameters_rgbmaps["filter"] = "360" dataset_parameters_rgbmaps["sequence_length"] = 4 dataset_parameters_rgbmaps["rgbmap_target_width"] = 64 dataset_parameters_rgbmaps["rgbmap_target_height"] = 64 dataset_parameters_rgbmaps["rgbmap_scale_factor"] = 1.0 dataset_parameters_rgbmaps["rgbmap_axis"] = "horizontal" data_generator = create_datagenerator_from_parameters(dataset_path, dataset_parameters_rgbmaps) data_generator.analyze_files() generator = data_generator.generate(size=10, verbose=False) for _ in range(10): dataset = next(generator) assert dataset[0].shape == (10, 4, 64, 64, 3), str(dataset[0].shape) assert dataset[1].shape == (10, 1), str(dataset[1].shape) print(dataset[1])
# # Create data-generator. # # The method create_datagenerator_from_parameters is a convencience method. It allows you to instantiate a generator from a specification-dictionary. # In[4]: from cgmcore.preprocesseddatagenerator import create_datagenerator_from_parameters dataset_parameters_pointclouds = {} dataset_parameters_pointclouds["input_type"] = "pointcloud" dataset_parameters_pointclouds["output_targets"] = ["height"] dataset_parameters_pointclouds["random_seed"] = random_seed dataset_parameters_pointclouds["pointcloud_target_size"] = 10000 dataset_parameters_pointclouds["pointcloud_random_rotation"] = False dataset_parameters_pointclouds["sequence_length"] = 0 datagenerator_instance_pointclouds = create_datagenerator_from_parameters( dataset_path, dataset_parameters_pointclouds) # # Getting the QR-Codes and do a train-validate-split. # # The data-generator is perfectly capable of retrieving all QR-codes from the dataset. This snipped shows how to do so and how to split the QR-codes into two sets: Train and validate. # In[5]: # Get the QR-codes. qrcodes_to_use = datagenerator_instance_pointclouds.qrcodes[0:1500] # Do the split. random.seed(random_seed) qrcodes_shuffle = qrcodes_to_use[:] random.shuffle(qrcodes_shuffle) split_index = int(0.8 * len(qrcodes_shuffle))
random_seed = 667 image_size = 128 # For creating pointclouds. dataset_parameters = {} dataset_parameters["input_type"] = "rgbmap" dataset_parameters["output_targets"] = ["weight"] dataset_parameters["random_seed"] = 666 dataset_parameters["filter"] = "360" dataset_parameters["sequence_length"] = 0 #4 dataset_parameters["rgbmap_target_width"] = image_size dataset_parameters["rgbmap_target_height"] = image_size dataset_parameters["rgbmap_scale_factor"] = 1.0 dataset_parameters["rgbmap_axis"] = "horizontal" datagenerator_instance = create_datagenerator_from_parameters( dataset_path, dataset_parameters) # Get the QR-codes. qrcodes_to_use = datagenerator_instance.qrcodes[:] #qrcodes_to_use = qrcodes.standing_list # Do the split. random.seed(random_seed) qrcodes_shuffle = qrcodes_to_use[:] random.shuffle(qrcodes_shuffle) split_index = int(0.8 * len(qrcodes_shuffle)) qrcodes_train = sorted(qrcodes_shuffle[:split_index]) qrcodes_validate = sorted(qrcodes_shuffle[split_index:]) del qrcodes_shuffle print("QR-codes for training:\n", "\t".join(qrcodes_train)) print("QR-codes for validation:\n", "\t".join(qrcodes_validate))
def data(): dataset_path = "../../preprocessed_trimmed/2018_07_31_10_52" print("Using dataset path", dataset_path) random_seed = 300 batch_size = {{choice([15, 20, 25])}} dataset_parameters_pointclouds = {} dataset_parameters_pointclouds["input_type"] = "pointcloud" dataset_parameters_pointclouds["output_targets"] = ["height"] dataset_parameters_pointclouds["random_seed"] = random_seed dataset_parameters_pointclouds["pointcloud_target_size"] = 10000 dataset_parameters_pointclouds["pointcloud_random_rotation"] = False dataset_parameters_pointclouds["sequence_length"] = 0 datagenerator_instance_pointclouds = create_datagenerator_from_parameters( dataset_path, dataset_parameters_pointclouds) # Get the QR-codes. qrcodes_to_use = datagenerator_instance_pointclouds.qrcodes[0:1500] # Do the split. random.seed(random_seed) qrcodes_shuffle = qrcodes_to_use[:] random.shuffle(qrcodes_shuffle) split_index_a = int(0.8 * len(qrcodes_shuffle)) #split_index_b = int(0.8 * 0.8 * len(qrcodes_shuffle)) qrcodes_train = sorted(qrcodes_shuffle[:split_index_a]) #qrcodes_validate = sorted(qrcodes_shuffle[split_index_b:split_index_a]) qrcodes_test = sorted(qrcodes_shuffle[split_index_a:]) del qrcodes_shuffle #print("QR-codes for training:\n", "\t".join(qrcodes_train)) #print("QR-codes for validation:\n", "\t".join(qrcodes_validate)) # Create python generators. generator_pointclouds_train = datagenerator_instance_pointclouds.generate( size=batch_size, qrcodes_to_use=qrcodes_train) generator_pointclouds_test = datagenerator_instance_pointclouds.generate( size=batch_size, qrcodes_to_use=qrcodes_test) # generator_pointclouds_validate = datagenerator_instance_pointclouds.generate(size=batch_size, qrcodes_to_use=qrcodes_validate) size = 16000 generator_pointclouds = generator_pointclouds_train X = [] Y = [] d = next(generator_pointclouds) while d: t_x, t_y = d for x in t_x: X.append(x) for y in t_y: Y.append(y) d = next(generator_pointclouds) if len(X) > size: break train_x = X train_y = Y size = 4000 generator_pointclouds = generator_pointclouds_test X = [] Y = [] d = next(generator_pointclouds) while d: t_x, t_y = d for x in t_x: X.append(x) for y in t_y: Y.append(y) d = next(generator_pointclouds) if len(X) > size: break test_x = X test_y = Y return train_x, train_y, test_x, test_y
def main(): # Parse command line arguments. parser = argparse.ArgumentParser(description="Training on GPU") parser.add_argument("-config_file", action="store", dest="config_file", type=str, required=True, help="config file path") parser.add_argument("-use_multi_gpu", action="store_true", dest="use_multi_gpu", help="set the training on multiple gpus") parser.add_argument("-resume_training", action="store_true", dest="resume_training", help="resumes a previous training") arguments = parser.parse_args() # Loading the config file. config = json.load(open(arguments.config_file, "r")) config = Bunch({key: Bunch(value) for key, value in config.items()}) # Create logger. logger = logging.getLogger("train.py") logger.setLevel(logging.DEBUG) file_handler = logging.FileHandler( os.path.join(config.global_parameters.output_path, "train.log")) file_handler.setLevel(logging.DEBUG) console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s") file_handler.setFormatter(formatter) console_handler.setFormatter(formatter) logger.addHandler(file_handler) logger.addHandler(console_handler) logger.info("Starting training job...") # Prepare results. results = Bunch() # Check if there is a GPU. if len(utils.get_available_gpus()) == 0: logger.warning("WARNING! No GPU available!") # Create datagenerator. datagenerator_instance = create_datagenerator_from_parameters( config.datagenerator_parameters.dataset_path, config.datagenerator_parameters) # Do a test-validation split. qrcodes = datagenerator_instance.qrcodes[:] randomizer = random.Random(config.datagenerator_parameters.random_seed) randomizer.shuffle(qrcodes) split_index = int(0.8 * len(qrcodes)) qrcodes_train = sorted(qrcodes[:split_index]) qrcodes_validate = sorted(qrcodes[split_index:]) del qrcodes results.qrcodes_train = qrcodes_train results.qrcodes_validate = qrcodes_validate # Create python generators. workers = 4 generator_train = datagenerator_instance.generate( size=config.training_parameters.batch_size, qrcodes_to_use=qrcodes_train, workers=workers) generator_validate = datagenerator_instance.generate( size=config.training_parameters.batch_size, qrcodes_to_use=qrcodes_validate, workers=workers) # Output path. Ensure its existence. if os.path.exists(config.global_parameters.output_path) == False: os.makedirs(config.global_parameters.output_path) logger.info("Using output path:", config.global_parameters.output_path) # Copy config file. shutil.copy2(arguments.config_file, config.global_parameters.output_path) # Create the model path. model_path = os.path.join(config.global_parameters.output_path, "model.h5") # TODO assert config.model_parameters.type == "pointnet" # Resume training. if arguments.resume_training == True: if os.path.exists(model_path) == False: logger.error("Model does not exist. Cannot resume!") exit(0) model = tf.keras.models.load_model(model_path) logger.info("Loaded model from {}.".format(config.model_path)) # Start from scratch. else: model = modelutils.create_point_net( config.model_parameters.input_shape, config.model_parameters.output_size, config.model_parameters.hidden_sizes) logger.info("Created new model.") model.summary() # Compile model. if config.model_parameters.optimizer == "rmsprop": optimizer = optimizers.RMSprop( learning_rate=config.model_parameters.learning_rate) elif config.model_parameters.optimizer == "adam": optimizer = optimizers.Adam( learning_rate=config.model_parameters.learning_rate, beta_1=config.model_parameters.beta_1, beta_2=config.model_parameters.beta_2, amsgrad=config.model_parameters.amsgrad) else: raise Exception("Unexpected optimizer {}".format( config.model_parameters.optimizer)) model.compile(optimizer=optimizer, loss="mse", metrics=["mae"]) # Do training on multiple GPUs. original_model = model if arguments.use_multi_gpu == True: model = tf.keras.utils.multi_gpu_model(model, gpus=2) # Create the callbacks. callbacks = [] # Logging training progress with tensorboard. tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=config.global_parameters.output_path, histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None, update_freq="epoch") callbacks.append(tensorboard_callback) # Early stopping. if config.training_parameters.use_early_stopping == True: early_stopping_callback = tf.keras.callbacks.EarlyStopping( monitor="val_loss", min_delta=config.training_parameters.early_stopping_threshold, patience=5, verbose=1) callbacks.append(early_stopping_callback) # Model checkpoint. val_loss_callback = tf.keras.callbacks.ModelCheckpoint( os.path.join(config.global_parameters.output_path, "val_loss_{val_loss:.2f}_at_epoche_{epoch:2d}.hdf5"), monitor="val_loss", verbose=0, save_best_only=True, save_weights_only=False, mode="auto", save_freq="epoch") callbacks.append(val_loss_callback) # Start training. results.training_begin = utils.get_datetime_string() try: # Train the model. model.fit_generator( generator_train, steps_per_epoch=config.training_parameters.steps_per_epoch, epochs=config.training_parameters.epochs, validation_data=generator_validate, validation_steps=config.training_parameters.validation_steps, use_multiprocessing=False, workers=0, callbacks=callbacks) except KeyboardInterrupt: logger.info("Gracefully stopping training...") datagenerator_instance.finish() results.interrupted_by_user = True # Training ended. results.training_end = utils.get_datetime_string() # Save the model. Make sure that it is the original model. original_model.save(model_path) # Store the history. results.model_history = model.history.history # Write the results. results_name = "results.json" results_path = os.path.join(config.global_parameters.output_path, results_name) json.dump(results, open(results_path, "w"), indent=4, sort_keys=True)