def setUp(self): log_config.configure('DEBUG') self.setUpPyfakefs() self.out_path = os.path.join('/', 'repo', 'outputs') self.in_path = os.path.join('/', 'repo', 'inputs') self.metadata_path = os.path.join('prt', '2019', '10', '02') self.data_files = [ 'GRSM_prt_6974_2019-10-02.parquet', 'UNDE_prt_6848_2019-10-02.parquet', 'WREF_prt_6848_2019-10-02.parquet' ] self.expected_files = [ 'prt_6974_2019-10-02.parquet', 'prt_6848_2019-10-02.parquet', ] for data_file in self.data_files: name_parts = data_file.split('_') source_id = name_parts[2] data_path = os.path.join(self.in_path, self.metadata_path, source_id, data_file) # use real data file to convert actual_data_file_path = os.path.join(os.path.dirname(__file__), data_file) self.fs.add_real_file(actual_data_file_path, target_path=data_path)
def setUp(self): log_config.configure('DEBUG') self.location = 'CFGLOC113507' # The context group to find in the location file should match the existing file entry 'aspirated-single-224'. self.group = 'aspirated-triple-' self.setUpPyfakefs() self.in_path = os.path.join('/', 'inputs') self.out_path = os.path.join('/', 'outputs') self.metadata_path = os.path.join('dualfan', '2019', '05', '21') inputs_root = os.path.join(self.in_path, 'repo', self.metadata_path) data_path = os.path.join(inputs_root, self.location, 'data', 'data.ext') locations_path = os.path.join(inputs_root, self.location, 'location', 'locations.json') self.fs.create_file(data_path) # Use real location file for parsing actual_location_file_path = os.path.join(os.path.dirname(__file__), 'test-locations.json') self.fs.add_real_file(actual_location_file_path, target_path=locations_path)
def setUp(self): log_config.configure('DEBUG') # File path indices. self.source_type_index = 3 self.year_index = 4 self.month_index = 5 self.day_index = 6 self.source_id_index = 7 self.data_type_index = 8 self.setUpPyfakefs() self.out_path = os.path.join('/', 'outputs') self.metadata_path = os.path.join('prt', '2019', '05', '21', '00001') self.context = 'aspirated-triple' # The context to find in the location file. self.in_path = os.path.join('/', 'inputs') inputs_path = os.path.join(self.in_path, 'merged', self.metadata_path) data_path = os.path.join(inputs_path, 'data', 'data.ext') flags_path = os.path.join(inputs_path, 'flags', 'flags.ext') locations_path = os.path.join(inputs_path, 'location', 'locations.json') uncertainty_coefficient_path = os.path.join(inputs_path, 'uncertainty_coefficient', 'uncertaintyCoefficient.json') self.fs.create_file(data_path) self.fs.create_file(flags_path) self.fs.create_file(uncertainty_coefficient_path) # Use real location file for parsing actual_location_file_path = os.path.join(os.path.dirname(__file__), 'test-locations.json') self.fs.add_real_file(actual_location_file_path, target_path=locations_path)
def setUp(self): """Set required files in mock filesystem.""" log_config.configure('DEBUG') self.out_dir = os.path.join('/', 'tmp', 'outputs') self.location = 'CFGLOC112154' self.input_root = os.path.join( '/', 'tmp', 'inputs', ) source_month = os.path.join('prt', '2019', '01') self.input_data_dir = os.path.join(self.input_root, source_month, '03') self.source_dir = os.path.join(source_month, '03', self.location) self.out_name = 'outname' self.target_date = '2019-01-03' self.date_index = 2 self.loc_index = 1 self.setUpPyfakefs() self.data_dir = 'data' # Data file self.source_file_name = 'prt_CFGLOC112154_2019-01-03_basicStats_030.ext' self.target_file_name = 'outname_2019-01-03_CFGLOC112154_basicStats_030.ext' data_path = os.path.join(self.input_root, self.source_dir, self.data_dir, self.source_file_name) self.fs.create_file(data_path) print('Egress test') print(f'input data_path: {data_path}')
def setUp(self): log_config.configure('DEBUG') self.setUpPyfakefs() self.out_path = os.path.join('/', 'outputs') self.dir1 = 'dir1' self.dir_2 = 'dir2' self.dir_3 = 'dir3' self.file_name1 = 'dir1.ext' self.file_name2 = 'dir2.ext' self.file_name3 = 'dir3.ext' # Set required files in mock filesystem. self.in_dir = os.path.join('/', 'inputs') in_dir_path = os.path.join(self.in_dir, 'dir') file1 = os.path.join(in_dir_path, self.dir1, self.file_name1) file2 = os.path.join(in_dir_path, self.dir_2, self.file_name2) file3 = os.path.join(in_dir_path, self.dir_3, self.file_name3) self.fs.create_file(file1) self.fs.create_file(file2) self.fs.create_file(file3) self.out_path1 = os.path.join(self.out_path, self.dir1, self.file_name1) self.out_path2 = os.path.join(self.out_path, self.dir_2, self.file_name2) self.out_path3 = os.path.join(self.out_path, self.dir_3, self.file_name3)
def main(): env = environs.Env() data_path = env('DATA_PATH') location_path = env('LOCATION_PATH') empty_files_path = env('EMPTY_FILES_PATH') output_directories = env('OUTPUT_DIRECTORIES') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) # directory names to output should be a comma separated string. if ',' in output_directories: output_directories = output_directories.split(',') # empty file paths empty_files_paths = get_empty_file_paths(empty_files_path) empty_data_path = empty_files_paths.get('empty_data_path') empty_flags_path = empty_files_paths.get('empty_flags_path') empty_uncertainty_data_path = empty_files_paths.get('empty_uncertainty_data_path') date_constraints = get_date_constraints() if date_constraints is not None: start_date = date_constraints.get('start_date') end_date = date_constraints.get('end_date') keys = gap_filler.get_data_files(data_path, out_path, start_date=start_date, end_date=end_date) gap_filler.process_location_files(location_path, keys, out_path, output_directories, empty_data_path, empty_flags_path, empty_uncertainty_data_path, start_date=start_date, end_date=end_date) else: keys = gap_filler.get_data_files(data_path, out_path) gap_filler.process_location_files(location_path, keys, out_path, output_directories, empty_data_path, empty_flags_path, empty_uncertainty_data_path)
def setUp(self): log_config.configure('DEBUG') self.setUpPyfakefs() self.in_path = os.path.join('/', 'inputs') self.out_path = os.path.join('/', 'outputs') self.regularized_path = os.path.join(self.in_path, 'regularized') self.quality_path = os.path.join(self.in_path, 'quality') # regularized file self.fs.create_file( os.path.join(self.regularized_path, 'prt', '2018', '01', '01', 'CFGLOC112154', 'flags', 'prt_CFGLOC112154_2018-01-01_flagsCal.ext')) # quality file self.fs.create_file( os.path.join(self.quality_path, 'prt', '2018', '01', '01', 'CFGLOC112154', 'flags', 'prt_CFGLOC112154_2018-01-01_plausibility.ext')) # quality file 2 self.fs.create_file( os.path.join(self.quality_path, 'prt', '2018', '01', '02', 'CFGLOC112154', 'flags', 'prt_CFGLOC112154_2018-01-01_plausibility.ext'))
def setUp(self): log_config.configure('DEBUG') self.setUpPyfakefs() self.out_path = os.path.join('/', 'outputs') self.metadata_path = os.path.join('prt', '2019', '05', '17', '00001') self.calibrated_path = os.path.join('/', 'inputs', 'calibrated') self.location_path = os.path.join('/', 'inputs', 'location') # Create calibrated input files. calibrated_root = os.path.join(self.calibrated_path, self.metadata_path) data_path = os.path.join(calibrated_root, 'data', 'data.ext') flags_path = os.path.join(calibrated_root, 'flags', 'flags.ext') uncertainty_path = os.path.join(calibrated_root, 'uncertainty', 'uncertainty.json') test_extra_dir_path = os.path.join(calibrated_root, 'test', 'test_dir', 'test.json') self.fs.create_file(data_path) self.fs.create_file(flags_path) self.fs.create_file(uncertainty_path) self.fs.create_file(test_extra_dir_path) # Create location input file. locations_path = os.path.join(self.location_path, 'prt', '00001', 'locations.json') self.fs.create_file(locations_path) # Create output dir self.fs.create_dir(self.out_path)
def setUp(self): log_config.configure('DEBUG') self.location = 'CFGLOC113507' # The context group to find in the test location file entry 'aspirated-single-224'. self.group = 'aspirated-triple-' self.setUpPyfakefs() self.source_id = '00001' self.in_path = os.path.join('/', 'inputs') self.out_path = os.path.join('/', 'outputs') input_root = os.path.join(self.in_path, 'repo', 'heater') self.event_file = 'heater_' + self.source_id + '_events.json' self.location_file = 'heater_' + self.source_id + '_locations.json' data_path = os.path.join(input_root, self.source_id, 'data', self.event_file) locations_path = os.path.join(input_root, self.source_id, 'location', self.location_file) self.fs.create_file(data_path) # Use real location file for parsing actual_location_file_path = os.path.join(os.path.dirname(__file__), 'test-locations.json') self.fs.add_real_file(actual_location_file_path, target_path=locations_path)
def setUp(self): log_config.configure('DEBUG') self.setUpPyfakefs() self.data_metadata_path = os.path.join('prt', '2019', '07', '23', '0001') self.out_path = os.path.join('/', 'outputs') self.calibration_metadata_path = os.path.join('prt', '0001') self.data_filename = 'prt_0001_2018-01-03.ext' # Set input files in mock filesystem. in_path = os.path.join('/', 'inputs') data_path = os.path.join(in_path, 'data', self.data_metadata_path) calibration_path = os.path.join(in_path, 'calibration', self.calibration_metadata_path) resistance_input_dir = os.path.join(calibration_path, 'resistance') temperature_input_dir = os.path.join(calibration_path, 'temperature') # Calibration files self.fs.create_file(os.path.join(resistance_input_dir, 'calibration1.xml')) self.fs.create_file(os.path.join(resistance_input_dir, 'calibration2.xml')) self.fs.create_file(os.path.join(temperature_input_dir, 'calibration1.xml')) self.fs.create_file(os.path.join(temperature_input_dir, 'calibration2.xml')) # Data file self.fs.create_file(os.path.join(data_path, self.data_filename)) self.data_path = os.path.join(in_path, 'data') self.calibration_path = os.path.join(in_path, 'calibration')
def setUp(self): log_config.configure('DEBUG') self.setUpPyfakefs() self.out_path = os.path.join('/', 'repo', 'outputs') self.data_path = os.path.join('/', 'repo', 'data') self.location_path = os.path.join('/', 'location') self.metadata_path = os.path.join('prt', '2019', '05', '17') # Create data file. self.data_file = 'prt_00001_2019-05-17.ext' self.input_data_path = os.path.join(self.data_path, self.metadata_path, self.data_file) self.fs.create_file(self.input_data_path) # Create location file. self.location_file = 'prt_00001_locations.json' self.input_location_path = os.path.join(self.location_path, 'prt', '00001', self.location_file) self.fs.create_file(self.input_location_path) # Create output directory. self.fs.create_dir(self.out_path)
def setUp(self): log_config.configure('DEBUG') self.setUpPyfakefs() self.input_path = os.path.join('/', 'repo', 'inputs') self.output_path = os.path.join('/', 'outputs') self.group = 'aspirated-single-121' self.location = 'CFGLOC123' self.metadata_path = os.path.join('2019', '05', '24', self.group) self.data_dir = 'data' self.location_dir = 'location' self.data_file = 'data.ext' self.location_file = 'locations.json' self.base_path = os.path.join(self.input_path, 'prt', self.metadata_path) self.in_data_path = os.path.join(self.base_path, self.location, self.data_dir, self.data_file) self.in_location_path = os.path.join(self.base_path, self.location, self.location_dir, self.location_file) self.fs.create_file(self.in_data_path) self.fs.create_file(self.in_location_path)
def main(): env = environs.Env() source_path = env('SOURCE_PATH') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) log.debug(f'source_path: {source_path} out_path: {out_path}') process(source_path, out_path)
def main(): env = environs.Env() out_path = env('OUT_PATH') db_url = env('DATABASE_URL') log_level = env('LOG_LEVEL') log_config.configure(log_level) log.debug(f'Out path: {out_path}') load(db_url, out_path)
def main(): env = environs.Env() pathname = env('PATHNAME') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) log.debug(f'pathname: {pathname}, log_level: {log_level}') join(pathname, out_path)
def main(): """Group related paths without modifying the paths.""" env = environs.Env() related_paths = env('RELATED_PATHS') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) log.debug(f'related_paths: {related_paths} out_path: {out_path}') group(related_paths, out_path)
def main(): """Group data by related location group.""" env = environs.Env() data_path = env('DATA_PATH') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) log.debug(f'data_path: {data_path} out_path: {out_path}') group_related(data_path, out_path)
def main(): """Group input data files without modifying the file paths.""" env = environs.Env() data_path = env('DATA_PATH') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) log.debug(f'data_path: {data_path} out_path: {out_path}') group(data_path, out_path)
def main(): env = environs.Env() in_path = env.str('IN_PATH') out_path = env.str('OUT_PATH') log_level = env.str('LOG_LEVEL', 'INFO') # 30 percent duplication threshold for dedup by default dedup_threshold = env.float('DEDUP_THRESHOLD', 0.3) log_config.configure(log_level) linkmerge(in_path, out_path, dedup_threshold)
def main(): env = environs.Env() data_path = env('DATA_PATH') location_path = env('LOCATION_PATH') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) log.debug(f'data_dir: {data_path} location_dir: {location_path} out_dir: {out_path}') group(data_path, location_path, out_path)
def main(): """Add the related location group name stored in the location file to the output path.""" env = environs.Env() source_path = env('SOURCE_PATH') group = env('GROUP') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) log.debug(f'source_path: {source_path} group: {group} out_path: {out_path}') process(source_path, group, out_path)
def main(): env = environs.Env() calibrated_path = env('CALIBRATED_PATH') location_path = env('LOCATION_PATH') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) log.debug(f'calibrated_dir: {calibrated_path} ' f'location_dir: {location_path} out_dir: {out_path}') group(calibrated_path, location_path, out_path)
def main(): env = environs.Env() data_path = env('DATA_PATH') event_path = env('EVENT_PATH') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) log.debug( f'data_dir: {data_path} event_dir: {event_path} out_dir: {out_path}') target_root_path = group_data(data_path, out_path) group_events(event_path, target_root_path)
def main(): env = environs.Env() in_path = env('IN_PATH') filter_dirs = env('FILTER_DIR') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) log = structlog.get_logger() log.debug( f'in_path: {in_path} filter_dirs: {filter_dirs} out_dir: {out_path}') filter_directory(in_path, filter_dirs, out_path)
def main(): """Analyze padded time series data""" env = environs.Env() data_path = env('DATA_PATH') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) log = get_logger() log.debug(f'data_path: {data_path}') log.debug(f'out_path: {out_path}') analyzer.analyze(data_path, out_path)
def main(): env = environs.Env() data_path = env('DATA_PATH') calibration_path = env('CALIBRATION_PATH') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) log = structlog.get_logger() log.debug(f'data_path: {data_path}') log.debug(f'calibration_path: {calibration_path}') log.debug(f'out_path: {out_path}') grouper.group(data_path, calibration_path, out_path)
def main(): """Group quality and calibration flags.""" env = environs.Env() regularized_path = env('REGULARIZED_PATH') quality_path = env('QUALITY_PATH') out_path = env('OUT_PATH') log_level = env('LOG_LEVEL') log_config.configure(log_level) log.debug( f'regularized_path: {regularized_path} quality_path: {quality_path} out_path: {out_path}' ) group(regularized_path, quality_path, out_path)
def main(): env = environs.Env() db_url = env('DATABASE_URL') out_path = env('OUT_PATH') log_level_name = env('LOG_LEVEL') log_config.configure(log_level_name) log = get_logger() log.debug(f'URL: {db_url}') log.debug(f'Out path: {out_path}') log.debug(f'Log level: {log_level_name}') with closing(cx_Oracle.connect(db_url)) as connection: thresholds = threshold_finder.find_thresholds(connection) date_generated = date_formatter.convert(datetime.utcnow()) write_file(thresholds, out_path, date_generated)
def setUp(self): log_config.configure('DEBUG') self.setUpPyfakefs() self.out_path = os.path.join('/', 'repo', 'outputs') self.in_path = os.path.join('/', 'repo', 'inputs') self.metadata_path = os.path.join('prt', '2019', '01', '05', '767') self.data_filename = 'prt_767_2019-01-05.avro' data_path = os.path.join(self.in_path, self.metadata_path, self.data_filename) # use real data file to convert self.real_path = os.path.join(os.path.dirname(__file__), self.data_filename) self.fs.add_real_file(self.real_path, target_path=data_path)
def setUp(self): # logging log_config.configure('DEBUG') # location self.location_name = 'SENSOR000000' # initialize fake file system self.setUpPyfakefs() # create output directory self.out_path = os.path.join('/', 'outputs', 'repo') self.fs.create_dir(self.out_path) # create data repo self.create_data_repo() # create location by date repo self.create_location_repo() # create empty files repo self.create_empty_files_repo() # directory names to output self.output_directories = 'data,location,calibration,uncertainty_data,uncertainty_coef,flags'