if dropbox_parameters: dropbox_params = DropboxConnection.Parameters(dropbox_parameters[0], dropbox_parameters[1]) dropbox = DropboxConnection(dropbox_params) logger.info('Dropbox parameters:: dropbox_params: %s', dropbox_params) #Model file model_file = ModelInput(input_params.model_name) model_file_name = model_file.file_name(0, 0) #Input data file input_data_file = InputDataFile(constants.PREDICTION_INPUT_DATA_FILE_NAME_GUIDANCE) input_data_file_name = input_data_file.file_name(0, 0) #Prepare input files input_files_client = InputFiles(dropbox) input_files = input_files_client.get_all([input_data_file_name, model_file_name]) #Assign input files input_data_file_path = input_files[input_data_file_name] model_file_path = input_files[model_file_name] #Load model model = load_model(str(model_file_path)) #Input data frame input_data = read_csv(input_data_file_path, index_col = 0) #Update input data parameters num_classes = len(getattr(input_data, image_generation_params.label_col).unique()) image_generation_params_update = dict(num_classes = num_classes)
def test_get_all_local_and_remote_files(self): #Arrange inputs = InputFiles(self._dropbox) inputs._dropbox.download = MagicMock() self.get_all(inputs, lambda: bool(randint(0, 1)))
dropbox = DropboxConnection(dropbox_params) logger.info('Dropbox parameters:: dropbox_params: %s', dropbox_params) #Predictable randomness seed = 3 np_seed(seed) tf_seed(seed) imgaug_seed(seed) #Input data file input_data_file = InputDataFile() input_data_file_name = input_data_file.file_name(0, training_params.epoch_id) #Prepare input files input_files_client = InputFiles(dropbox) input_data_file_path = input_files_client.get_all([input_data_file_name])[input_data_file_name] #Input data frame input_data = read_csv(input_data_file_path, index_col = 0) #Update input data parameters num_classes = max(getattr(input_data, image_generation_params.label_col)) + 1 image_generation_params_update = dict(num_classes = num_classes) update_params(image_generation_params, **image_generation_params_update) logger.info('Updated input data parameters: %s', input_params) #Model input model_input = ModelInput(input_params.model_name) model_file = model_input.file_name(training_params.batch_id, training_params.epoch_id)
def test_get_all_just_remote_files(self): #Arrange inputs = InputFiles(self._dropbox) inputs._dropbox.download = MagicMock() self.get_all(inputs, lambda: False)
def test_get_all_just_local_files(self): #Arrange inputs = InputFiles(self._dropbox) #Act & Assert self.get_all(inputs, lambda: True)
def test_init(self): #Valid inputs _ = InputFiles(self._dropbox)
'Input parameters:: input_data: %s label_col: %s output_file: %s log_to_console: %s', input_data, label_col, output_file, log_to_console) #Dropbox connection placeholder dropbox = None if dropbox_parameters: dropbox_params = DropboxConnection.Parameters(dropbox_parameters[0], dropbox_parameters[1]) dropbox = DropboxConnection(dropbox_params) logger.info('Dropbox parameters:: dropbox_params: %s', dropbox_params) ####################################### Prepare the input dataset [Start] ############################################ #Prepare input files input_files_client = InputFiles(dropbox) input_data = input_files_client.get_all([input_data])[input_data] #Input data as pandas data frame input_data = csv_to_dataframe(input_data) ####################################### Prepare the input dataset [End] ############################################ ####################################### Rebalance the dataset [Start] ############################################ #Rebalance the data and obtain the statistics rebalancer = Rebalancing(input_data, label_col) result, pre_stats, post_stats = rebalancer.rebalance(statistics=True) ####################################### Rebalance the dataset [End] ############################################ #Output to a file dataframe_to_csv(result, output_file)
epoch_data_dirs, dropbox_parameters, log_to_console = parse_args() #Initialize logging logging.initialize(__file__, log_to_console=log_to_console) logger = logging.get_logger(__name__) #Log input parameters logger.info( 'Running with parameters epoch_data_dirs: %s log_to_console: %d', epoch_data_dirs, log_to_console) #Dropbox connection dropbox = DropboxConnection.get_client_from_params(dropbox_parameters) #Prepare input files input_files_client = InputFiles(dropbox) #Epoch data files placeholder input_files = [] ####################################### Prepare input files [Start] ############################################ #Iterate over input epoch stores and enumerate their result files. for epoch_store in epoch_data_dirs: #Fetch the remote epoch data epoch_data = dropbox.list(epoch_store, constants.INPUT_RESULT_FILE_PREFIX) #Extract file paths from epoch data input_files.extend([file_path for file_path in epoch_data[0]]) #Create local epoch store locations