def collect_weights( model_path, tensors, output_directory='results', debug=False, **kwargs ): # setup directories and file names experiment_dir_name = find_non_existing_dir_by_adding_suffix(output_directory) logger.info('Model path: {}'.format(model_path)) logger.info('Output path: {}'.format(experiment_dir_name)) logger.info('\n') model, model_definition = load_model_and_definition(model_path) # collect weights print_boxed('COLLECT WEIGHTS') collected_tensors = model.collect_weights(tensors) # saving os.makedirs(experiment_dir_name) saved_filenames = save_tensors(collected_tensors, experiment_dir_name) logger.info('Saved to: {0}'.format(experiment_dir_name)) return saved_filenames
def get_experiment_dir_name( output_directory, experiment_name, model_name='run' ): base_dir_name = os.path.join( output_directory, experiment_name + ('_' if model_name else '') + model_name ) return find_non_existing_dir_by_adding_suffix(base_dir_name)
def full_predict(model_path, data_csv=None, data_hdf5=None, split=TEST, batch_size=128, skip_save_unprocessed_output=False, skip_save_test_predictions=False, skip_save_test_statistics=False, output_directory='results', evaluate_performance=True, gpus=None, gpu_memory_limit=None, allow_parallel_threads=True, use_horovod=None, debug=False, **kwargs): set_on_master(use_horovod) if is_on_master(): logger.info('Dataset path: {}'.format( data_csv if data_csv is not None else data_hdf5)) logger.info('Model path: {}'.format(model_path)) logger.info('') train_set_metadata_json_fp = os.path.join(model_path, TRAIN_SET_METADATA_FILE_NAME) # preprocessing dataset, train_set_metadata = preprocess_for_prediction( model_path, split, data_csv, data_hdf5, train_set_metadata_json_fp, evaluate_performance) # run the prediction if is_on_master(): print_boxed('LOADING MODEL') model, model_definition = load_model_and_definition( model_path, use_horovod=use_horovod, gpus=gpus, gpu_memory_limit=gpu_memory_limit, allow_parallel_threads=allow_parallel_threads) prediction_results = predict(dataset, train_set_metadata, model, model_definition, batch_size, evaluate_performance, debug) if is_on_master(): # setup directories and file names experiment_dir_name = find_non_existing_dir_by_adding_suffix( output_directory) # if we are skipping all saving, # there is no need to create a directory that will remain empty should_create_exp_dir = not (skip_save_unprocessed_output and skip_save_test_predictions and skip_save_test_statistics) if should_create_exp_dir: os.makedirs(experiment_dir_name) # postprocess postprocessed_output = postprocess( prediction_results, model_definition['output_features'], train_set_metadata, experiment_dir_name, skip_save_unprocessed_output or not is_on_master()) if not skip_save_test_predictions: save_prediction_outputs(postprocessed_output, experiment_dir_name) if evaluate_performance: print_test_results(prediction_results) if not skip_save_test_statistics: save_test_statistics(prediction_results, experiment_dir_name) logger.info('Saved to: {0}'.format(experiment_dir_name))
def collect_activations( model_path, tensors, data_csv=None, data_hdf5=None, split=TEST, batch_size=128, output_directory='results', gpus=None, gpu_memory_limit=None, allow_parallel_threads=True, debug=False, **kwargs ): """Uses the pretrained model to collect the tensors corresponding to a datapoint in the dataset. Saves the tensors to the experiment directory :param model_path: Is the model from which the tensors will be collected :param tensors: List contaning the names of the tensors to collect :param data_csv: The CSV filepath which contains the datapoints from which the tensors are collected :param data_hdf5: The HDF5 file path if the CSV file path does not exist, an alternative source of providing the data to the model :param split: Split type :param batch_size: Batch size :param output_directory: Output directory :param gpus: The total number of GPUs that the model intends to use :param gpu_memory_limit: (int: default: `None`) maximum memory in MB to allocate per GPU device. :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow to use multithreading parallelism to improve performance at the cost of determinism. :param debug: To step through the stack traces and find possible errors :returns: None """ # setup directories and file names experiment_dir_name = find_non_existing_dir_by_adding_suffix(output_directory) logger.info('Dataset path: {}'.format( data_csv if data_csv is not None else data_hdf5) ) logger.info('Model path: {}'.format(model_path)) logger.info('Output path: {}'.format(experiment_dir_name)) logger.info('\n') train_set_metadata_fp = os.path.join( model_path, TRAIN_SET_METADATA_FILE_NAME ) # preprocessing dataset, train_set_metadata = preprocess_for_prediction( model_path, split, data_csv, data_hdf5, train_set_metadata_fp ) model, model_definition = load_model_and_definition(model_path, gpus=gpus, gpu_memory_limit=gpu_memory_limit, allow_parallel_threads=allow_parallel_threads) # collect activations print_boxed('COLLECT ACTIVATIONS') collected_tensors = model.collect_activations( dataset, tensors, batch_size ) # saving os.makedirs(experiment_dir_name) save_tensors(collected_tensors, experiment_dir_name) logger.info('Saved to: {0}'.format(experiment_dir_name))