def test_harvester_multi_file(self): """ Set the timing so the harvester finds multiple new files at once """ config = CONFIG.copy() config[DataSetDriverConfigKeys.FREQUENCY] = 1 config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15 # start the harvester from scratch memento = None file_harvester = SingleDirectoryHarvester(config, memento, self.new_file_found_callback, self.modified_files_found_callback, self.file_exception_callback) file_harvester.start() # set the file filler to generate files with only .5 secs between, # meaning 2 files will appear in the 1 seconds between the # harvester checking self.directory_filler = gevent.spawn(self.fill_directory_with_files, CONFIG[DataSetDriverConfigKeys.DIRECTORY], CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 12, .5) # Wait for sets of new files to be discovered self.wait_for_file(0) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) file_harvester.shutdown()
def test_harvester_multi_file(self): """ Set the timing so the harvester finds multiple new files at once """ # start the harvester from scratch memento = None file_harvester = SingleDirectoryHarvester(CONFIG, memento, self.new_file_found_callback, self.file_exception_callback) file_harvester.start() # set the file filler to generate files with only .5 secs between, # meaning 2 files will appear in the 1 seconds between the # harvester checking self.directory_filler = gevent.spawn(self.fill_directory_with_files, CONFIG['directory'], CONFIG['pattern'], 0, 12, .5) # Wait for sets of new files to be discovered self.wait_for_file(0) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) file_harvester.shutdown()
def test_harvester_from_scratch(self): """ Test that the harvester can find files as they are added to a directory, starting with just the base file in the directory """ # start the harvester from scratch memento = None file_harvester = SingleDirectoryHarvester(CONFIG, memento, self.new_file_found_callback, self.file_exception_callback) file_harvester.start() # start a new event which will increase the file index using INDICIES self.directory_filler = gevent.spawn(self.fill_directory_with_files, CONFIG['directory'], CONFIG['pattern'], 0, 6) # Wait for three sets of new files to be discovered self.wait_for_file(0) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) file_harvester.shutdown()
def test_harvester_from_scratch(self): """ Test that the harvester can find files as they are added to a directory, starting with just the base file in the directory """ # start the harvester from scratch memento = None config = CONFIG.copy() config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 10 file_harvester = SingleDirectoryHarvester(CONFIG, memento, self.new_file_found_callback, self.modified_files_found_callback, self.file_exception_callback) file_harvester.start() # start a new event which will increase the file index using INDICIES self.directory_filler = gevent.spawn(self.fill_directory_with_files, CONFIG[DataSetDriverConfigKeys.DIRECTORY], CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 5, 10) # Wait for new files to be discovered self.wait_for_file(0, 5) self.wait_for_file(self.found_file_count, 5) self.wait_for_file(self.found_file_count, 5) self.wait_for_file(self.found_file_count, 5) self.wait_for_file(self.found_file_count, 5) file_harvester.shutdown()
def test_missing_directory(self): config = {'directory': TESTDIR, 'pattern': CONFIG['pattern']} self.clean_directory(TESTDIR) os.rmdir(TESTDIR) self.assertFalse(os.path.exists(TESTDIR)) # start the harvester from scratch memento = None os.mkdir(TESTDIR) file_harvester = SingleDirectoryHarvester(config, memento, self.new_file_found_callback, self.file_exception_callback) file_harvester.start() # start a new event which will increase the file index using INDICIES self.directory_filler = gevent.spawn(self.fill_directory_with_files, CONFIG['directory'], CONFIG['pattern'], 0, 2) # Wait for three sets of new files to be discovered self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) file_harvester.shutdown()
def test_harvester_with_memento(self): """ Test that the harvester can find file as they are added to a directory, using a memento to start partway through the indices """ # make sure we have 2 files already in the directory self.fill_directory_with_files(CONFIG['directory'], CONFIG['pattern'], 2, 0) # start at index 2 dir_files = glob.glob(CONFIG['directory'] + '/' + CONFIG['pattern']) memento = self.replace_file_index(dir_files[0], 2) file_harvester = SingleDirectoryHarvester(CONFIG, memento, self.new_file_found_callback, self.file_exception_callback) file_harvester.start() # start a new event which will copy the first file and increase the # file index into data directory with a delay in between self.directory_filler = gevent.spawn(self.fill_directory_with_files, CONFIG['directory'], CONFIG['pattern'], 3) # Wait for three sets of new files to be discovered self.wait_for_file(0) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) file_harvester.shutdown()
def test_harvester_without_mod_time(self): """ Test that we can use a default frequency """ config = {DataSetDriverConfigKeys.DIRECTORY: TESTDIR, DataSetDriverConfigKeys.STORAGE_DIRECTORY: TESTDIR, DataSetDriverConfigKeys.PATTERN: CONFIG[DataSetDriverConfigKeys.PATTERN], DataSetDriverConfigKeys.FREQUENCY: 5} # start the harvester from scratch memento = None file_harvester = SingleDirectoryHarvester(config, memento, self.new_file_found_callback, self.modified_files_found_callback, self.file_exception_callback) file_harvester.start() # start a new event which will increase the file index using INDICIES self.directory_filler = gevent.spawn(self.fill_directory_with_files, CONFIG[DataSetDriverConfigKeys.DIRECTORY], CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2) # Wait for two sets of new files to be discovered self.wait_for_file(0, 2) self.wait_for_file(self.found_file_count, 2) file_harvester.shutdown()
def test_harvester_without_frequency(self): """ Test that we can use a default frequency """ config = {'directory': TESTDIR, 'pattern': CONFIG['pattern']} # start the harvester from scratch memento = None file_harvester = SingleDirectoryHarvester(config, memento, self.new_file_found_callback, self.file_exception_callback) file_harvester.start() # start a new event which will copy the first file and increase the # file index into data directory with a delay in between self.directory_filler = gevent.spawn(self.fill_directory_with_files, CONFIG['directory'], CONFIG['pattern'], 2) # Wait for three sets of new files to be discovered self.wait_for_file(0) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) file_harvester.shutdown()
def test_harvester_with_memento(self): """ Test that the harvester can find file as they are added to a directory, using a memento to start partway through the indices """ # make sure we have 2 files already in the directory self.fill_directory_with_files(CONFIG['directory'], CONFIG['pattern'], 0, 2, 0) # start at index 2 memento = CONFIG['directory'] + '/' + 'unit_' + INDICIES[1] + CONFIG['pattern'].replace('*', '') log.debug("starting with memento %s", memento) file_harvester = SingleDirectoryHarvester(CONFIG, memento, self.new_file_found_callback, self.file_exception_callback) file_harvester.start() # start a new event which will increase the file index using INDICIES # with a delay in between self.directory_filler = gevent.spawn(self.fill_directory_with_files, CONFIG['directory'], CONFIG['pattern'], 2, 9) # Wait for three sets of new files to be discovered self.wait_for_file(0) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) self.wait_for_file(self.found_file_count) file_harvester.shutdown()
def _build_harvester(self, driver_state): """ Build and return the harvesters """ harvesters = [] # list of harvesters to be returned # # Verify that the Recovered harvester has been configured. # If so, build the harvester and add it to the list of harvesters. # if DataTypeKey.PARAD_K_STC_RECOVERED in self._harvester_config: recovered_harvester = SingleDirectoryHarvester( self._harvester_config.get(DataTypeKey.PARAD_K_STC_RECOVERED), driver_state[DataTypeKey.PARAD_K_STC_RECOVERED], lambda filename: self._new_file_callback( filename, DataTypeKey.PARAD_K_STC_RECOVERED), lambda modified: self._modified_file_callback( modified, DataTypeKey.PARAD_K_STC_RECOVERED), self._exception_callback) if recovered_harvester is not None: harvesters.append(recovered_harvester) else: log.warn('Unable to build Harvester %s', DataTypeKey.PARAD_K_STC_RECOVERED) else: log.warn('Harvester configuration missing key %s', DataTypeKey.PARAD_K_STC_RECOVERED) # # Verify that the Telemetered harvester has been configured. # If so, build the harvester and add it to the list of harvesters. # if DataTypeKey.PARAD_K_STC in self._harvester_config: telemetered_harvester = SingleDirectoryHarvester( self._harvester_config.get(DataTypeKey.PARAD_K_STC), driver_state[DataTypeKey.PARAD_K_STC], lambda filename: self._new_file_callback( filename, DataTypeKey.PARAD_K_STC), lambda modified: self._modified_file_callback( modified, DataTypeKey.PARAD_K_STC), self._exception_callback) if telemetered_harvester is not None: harvesters.append(telemetered_harvester) else: log.warn('Unable to build Harvester %s', DataTypeKey.PARAD_K_STC) else: log.warn('Harvester configuration missing key %s', DataTypeKey.PARAD_K_STC) return harvesters
def _build_harvester(self, driver_state): """ Build and return the harvesters """ harvesters = [] # list of harvesters to be returned # # Verify that the DOSTA_ABCDJM_CSPP_RECOVERED harvester has been configured. # If so, build the harvester and add it to the list of harvesters. # if DataTypeKey.DOSTA_ABCDJM_CSPP_RECOVERED in self._harvester_config: harvester = SingleDirectoryHarvester( self._harvester_config.get( DataTypeKey.DOSTA_ABCDJM_CSPP_RECOVERED), driver_state[DataTypeKey.DOSTA_ABCDJM_CSPP_RECOVERED], lambda filename: self._new_file_callback( filename, DataTypeKey.DOSTA_ABCDJM_CSPP_RECOVERED), lambda modified: self._modified_file_callback( modified, DataTypeKey.DOSTA_ABCDJM_CSPP_RECOVERED), self._exception_callback) if harvester is not None: harvesters.append(harvester) else: log.warn('DOSTA_ABCDJM_CSPP_RECOVERED harvester not built') else: log.warn( 'DOSTA_ABCDJM_CSPP_RECOVERED key missing from config harvester not built' ) # # Verify that the DOSTA_ABCDJM_CSPP_TELEMETERED harvester has been configured. # If so, build the harvester and add it to the list of harvesters. # if DataTypeKey.DOSTA_ABCDJM_CSPP_TELEMETERED in self._harvester_config: harvester = SingleDirectoryHarvester( self._harvester_config.get( DataTypeKey.DOSTA_ABCDJM_CSPP_TELEMETERED), driver_state[DataTypeKey.DOSTA_ABCDJM_CSPP_TELEMETERED], lambda filename: self._new_file_callback( filename, DataTypeKey.DOSTA_ABCDJM_CSPP_TELEMETERED), lambda modified: self._modified_file_callback( modified, DataTypeKey.DOSTA_ABCDJM_CSPP_TELEMETERED), self._exception_callback) if harvester is not None: harvesters.append(harvester) else: log.warn('DOSTA_ABCDJM_CSPP_TELEMETERED harvester not built') else: log.warn( 'DOSTA_ABCDJM_CSPP_TELEMETERED key missing from config harvester not built' ) return harvesters
def _build_harvester(self, driver_state): """ Build the harvesters. Verify correctness of data keys. Display warnings if error detected in data keys or in the creation of the harvesters. @param driver_state The starting driver state """ harvesters = [] # Verify that the Recovered harvester has been configured. # If so, build the harvester and add it to the list of harvesters. if DataTypeKey.DOSTA_ABCDJM_RECOVERED in self._harvester_config: rec_harvester = SingleDirectoryHarvester( self._harvester_config.get(DataTypeKey.DOSTA_ABCDJM_RECOVERED), driver_state[DataTypeKey.DOSTA_ABCDJM_RECOVERED], lambda filename: self._new_file_callback( filename, DataTypeKey.DOSTA_ABCDJM_RECOVERED), lambda modified: self._modified_file_callback( modified, DataTypeKey.DOSTA_ABCDJM_RECOVERED), self._exception_callback) harvesters.append(rec_harvester) else: log.warn( 'No configuration for dosta_abcdjm_dcl recovered harvester, not building' ) # Verify that the Telemetered harvester has been configured. # If so, build the harvester and add it to the list of harvesters. if DataTypeKey.DOSTA_ABCDJM_TELEMETERED in self._harvester_config: tel_harvester = SingleDirectoryHarvester( self._harvester_config.get( DataTypeKey.DOSTA_ABCDJM_TELEMETERED), driver_state[DataTypeKey.DOSTA_ABCDJM_TELEMETERED], lambda filename: self._new_file_callback( filename, DataTypeKey.DOSTA_ABCDJM_TELEMETERED), lambda modified: self._modified_file_callback( modified, DataTypeKey.DOSTA_ABCDJM_TELEMETERED), self._exception_callback) harvesters.append(tel_harvester) else: log.warn( 'No configuration for dosta_abcdjm_dcl telemetered harvester, not building' ) return harvesters
def test_init(self): """ Test initialize """ # start the harvester from scratch memento = None file_harvester = SingleDirectoryHarvester(CONFIG, memento, self.new_file_found_callback, self.modified_files_found_callback, self.file_exception_callback) file_harvester.start() file_harvester.shutdown()
def test_file_mod_wait_time(self): """ that the file mod wait time is actually waiting before finding files """ memento = None file_harvester = SingleDirectoryHarvester(CONFIG, memento, self.new_file_found_callback, self.modified_files_found_callback, self.file_exception_callback) file_harvester.start() # put a file in the directory, the mod time will be the create time self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY], CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 1, 0) # wait until just before the file mod time should allow us to find the files # keep track of how long it takes to find the file approximately file_found_time = 0; while(self.found_file_count == 0): time.sleep(1) file_found_time += 1 if file_found_time > 60: raise Exception("Timeout waiting to find file") if file_found_time < CONFIG.get(DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME): # we found the file before the mod time, this is bad! file_harvester.shutdown() self.fail('Files found in %s seconds' % file_found_time) log.debug('File found in %s seconds', file_found_time) file_harvester.shutdown()
def _build_harvester(self, driver_state): """ Build and return the harvesters """ harvesters = [] # list of harvesters to be returned # # Verify that the CTDPF_CKL_WFP_RECOVERED harvester has been configured. # If so, build the CTDPF_CKL_WFP_RECOVERED harvester and add it to the # list of harvesters. # if DataTypeKey.CTDPF_CKL_WFP_RECOVERED in self._harvester_config: harvester = SingleDirectoryHarvester( self._harvester_config.get( DataTypeKey.CTDPF_CKL_WFP_RECOVERED), driver_state[DataTypeKey.CTDPF_CKL_WFP_RECOVERED], lambda filename: self._new_file_callback( filename, DataTypeKey.CTDPF_CKL_WFP_RECOVERED), lambda modified: self._modified_file_callback( modified, DataTypeKey.CTDPF_CKL_WFP_RECOVERED), self._exception_callback) if harvester is not None: harvesters.append(harvester) else: log.warning('CTDPF_CKL_WFP_RECOVERED HARVESTER NOT BUILT') # # Verify that the CTDPF_CKL_WFP_TELEMETERED harvester has been configured. # If so, build the CTDPF_CKL_WFP_TELEMETERED harvester and add it to the # list of harvesters. # if DataTypeKey.CTDPF_CKL_WFP_TELEMETERED in self._harvester_config: harvester = SingleDirectoryHarvester( self._harvester_config.get( DataTypeKey.CTDPF_CKL_WFP_TELEMETERED), driver_state[DataTypeKey.CTDPF_CKL_WFP_TELEMETERED], lambda filename: self._new_file_callback( filename, DataTypeKey.CTDPF_CKL_WFP_TELEMETERED), lambda modified: self._modified_file_callback( modified, DataTypeKey.CTDPF_CKL_WFP_TELEMETERED), self._exception_callback) if harvester is not None: harvesters.append(harvester) else: log.warning('CTDPF_CKL_WFP_TELEMETERED HARVESTER NOT BUILT') return harvesters
def test_harvester_with_memento(self): """ Test that the harvester can find file as they are added to a directory, using a memento to start partway through the indices """ # make sure we have 2 files already in the directory self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY], CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2, 0) filename_1 = 'unit_' + INDICIES[0] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '') filename_2 = 'unit_' + INDICIES[1] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '') # get metadata for the files metadata_1 = self.get_file_metadata(filename_1) metadata_1[DriverStateKey.INGESTED] = True metadata_1[DriverStateKey.PARSER_STATE] = None metadata_2 = self.get_file_metadata(filename_2) metadata_2[DriverStateKey.INGESTED] = True metadata_2[DriverStateKey.PARSER_STATE] = None # generate memento with two files ingested (parser state is not looked at) memento = {DriverStateKey.VERSION: 0.1, filename_1: metadata_1, filename_2: metadata_2 } log.debug("starting with memento %s", memento) config = CONFIG.copy() config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15 file_harvester = SingleDirectoryHarvester(CONFIG, memento, self.new_file_found_callback, self.modified_files_found_callback, self.file_exception_callback) file_harvester.start() # start a new event which will increase the file index using INDICIES # with a delay in between self.directory_filler = gevent.spawn(self.fill_directory_with_files, CONFIG[DataSetDriverConfigKeys.DIRECTORY], CONFIG[DataSetDriverConfigKeys.PATTERN], 2, 9, 5) # Wait for three sets of new files to be discovered self.wait_for_file(0, 2) self.wait_for_file(self.found_file_count, 2) self.wait_for_file(self.found_file_count, 2) self.wait_for_file(self.found_file_count, 2) self.wait_for_file(self.found_file_count, 2) self.wait_for_file(self.found_file_count, 2) file_harvester.shutdown()
def _build_harvester(self, driver_state): """ Build the harvester @param driver_state The starting driver state """ self._harvester = [] if DataTypeKey.DOSTA_ABCDJM_SIO_TELEMETERED in self._harvester_config: telemetered_harvester = SingleFileHarvester( self._harvester_config.get( DataTypeKey.DOSTA_ABCDJM_SIO_TELEMETERED), driver_state[DataTypeKey.DOSTA_ABCDJM_SIO_TELEMETERED], lambda file_state: self._file_changed_callback( file_state, DataTypeKey.DOSTA_ABCDJM_SIO_TELEMETERED), self._exception_callback) self._harvester.append(telemetered_harvester) else: log.warn( 'No configuration for telemetered harvester, not building') if DataTypeKey.DOSTA_ABCDJM_SIO_RECOVERED in self._harvester_config: recovered_harvester = SingleDirectoryHarvester( self._harvester_config.get( DataTypeKey.DOSTA_ABCDJM_SIO_RECOVERED), driver_state[DataTypeKey.DOSTA_ABCDJM_SIO_RECOVERED], lambda filename: self._new_file_callback( filename, DataTypeKey.DOSTA_ABCDJM_SIO_RECOVERED), lambda modified: self._modified_file_callback( modified, DataTypeKey.DOSTA_ABCDJM_SIO_RECOVERED), self._exception_callback) self._harvester.append(recovered_harvester) else: log.warn('No configuration for recovered harvester, not building') return self._harvester
def _build_harvester(self, driver_state): """ Build the harvester @param driver_state The starting driver state """ harvesters = [] if DataSourceKey.FLORT_DJ_SIO_TELEMETERED in self._harvester_config: telem_harvester = SingleFileHarvester( self._harvester_config.get( DataSourceKey.FLORT_DJ_SIO_TELEMETERED), driver_state[DataSourceKey.FLORT_DJ_SIO_TELEMETERED], lambda file_state: self._file_changed_callback( file_state, DataSourceKey.FLORT_DJ_SIO_TELEMETERED), self._exception_callback) harvesters.append(telem_harvester) else: log.warn('No configuration for %s harvester, not building', DataSourceKey.FLORT_DJ_SIO_TELEMETERED) if DataSourceKey.FLORT_DJ_SIO_RECOVERED in self._harvester_config: recov_harvester = SingleDirectoryHarvester( self._harvester_config.get( DataSourceKey.FLORT_DJ_SIO_RECOVERED), driver_state[DataSourceKey.FLORT_DJ_SIO_RECOVERED], lambda filename: self._new_file_callback( filename, DataSourceKey.FLORT_DJ_SIO_RECOVERED), lambda modified: self._modified_file_callback( modified, DataSourceKey.FLORT_DJ_SIO_RECOVERED), self._exception_callback) harvesters.append(recov_harvester) else: log.warn('No configuration for %s harvester, not building', DataSourceKey.FLORT_DJ_SIO_RECOVERED) return harvesters
def test_harvester_with_modified(self): """ Test that the harvester can find file as they are added to a directory, using a memento to start partway through the indices """ # make sure we have 2 files already in the directory self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY], CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2, 0) filename_1 = 'unit_' + INDICIES[0] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '') filename_2 = 'unit_' + INDICIES[1] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '') # get metadata for the files metadata_1 = self.get_file_metadata(filename_1) metadata_1[DriverStateKey.INGESTED] = True metadata_1[DriverStateKey.PARSER_STATE] = None metadata_2 = self.get_file_metadata(filename_2) metadata_2[DriverStateKey.INGESTED] = True metadata_2[DriverStateKey.PARSER_STATE] = None # generate memento with two files ingested (parser state is not looked at) memento = {DriverStateKey.VERSION: 0.1, filename_1: metadata_1, filename_2: metadata_2 } log.debug("starting with memento %s", memento) config = CONFIG.copy() config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15 file_harvester = SingleDirectoryHarvester(CONFIG, memento, self.new_file_found_callback, self.modified_files_found_callback, self.file_exception_callback) file_harvester.start() file_path = os.path.join(CONFIG[DataSetDriverConfigKeys.DIRECTORY], filename_1) with open(file_path, 'a') as filehandle: filehandle.write('a b c d') end_time = 0 while(self.found_modified_count == 0): log.debug("Waiting for modified file...") time.sleep(2) end_time += 2 if end_time > 60: raise Exception("Timeout waiting to find modified files") file_harvester.shutdown()
def _build_harvester(self, driver_state): """ Build and return the harvester """ self._harvester = SingleDirectoryHarvester( self._harvester_config, driver_state, self._new_file_callback, self._modified_file_callback, self._exception_callback) return self._harvester
def build_single_harvester(self, key, driver_state): harvester = SingleDirectoryHarvester( self._harvester_config.get(key), driver_state[key], lambda filename: self._new_file_callback(filename, key), lambda modified: self._modified_file_callback(modified, key), self._exception_callback) return harvester
def test_harvester_1000(self): """ The harvester is taking a really long time to run, find out how long for 1000 files """ self.fill_directory_1000_files(CONFIG[DataSetDriverConfigKeys.PATTERN]) memento = None config = CONFIG.copy() config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 1 file_harvester = SingleDirectoryHarvester(CONFIG, memento, self.new_file_found_callback, self.modified_files_found_callback, self.file_exception_callback) start_time = time.time() file_harvester.start() while(self.found_file_count < 1000): self.wait_for_file(self.found_file_count, 5, 60) end_time = time.time() log.debug('harvester found all files in %s', (end_time - start_time))
def _build_harvester(self, driver_state): """ Build and return the harvester """ _harvester = SingleDirectoryHarvester(self._harvester_config, driver_state, self._new_file_callback, self._modified_file_callback, self._exception_callback) if _harvester is None: log.warn('harverster failed instantiation due to missing config') return _harvester
def build_single_dir_harvester(self, driver_state, data_key): """ Build a single directory harvester for the given data source key @param driver_state - the starting driver state @param data_key - the data source key to build the harvester for """ return SingleDirectoryHarvester( self._harvester_config.get(data_key), driver_state[data_key], lambda filename: self._new_file_callback(filename, data_key), lambda modified: self._modified_file_callback(modified, data_key), self._exception_callback )
def _build_harvester(self, driver_state): """ Build and return the harvester """ # *** Replace the following with harvester initialization *** self._harvester = SingleDirectoryHarvester( self._harvester_config, driver_state, self._new_file_callback, self._modified_file_callback, self._exception_callback ) return self._harvester
def build_single_harvester(self, driver_state, key): """ Build and return the harvester """ if key in self._harvester_config: harvester = SingleDirectoryHarvester( self._harvester_config.get(key), driver_state[key], lambda filename: self._new_file_callback(filename, key), lambda modified: self._modified_file_callback(modified, key), self._exception_callback) else: harvester = None log.warn('flntu/flcdr harvester not built because missing config') return harvester
def _build_harvester(self, driver_state): """ Build and return the harvesters """ harvesters = [] # list of harvesters to be returned # # Verify that the CTDPF_CKL_WFP harvester has been configured. # If so, build the CTDPF_CKL_WFP harvester and add it to the # list of harvesters. # if DataTypeKey.CTDPF_CKL_WFP in self._harvester_config: log.debug('CAG DRIVER - build harvester for %s', driver_state[DataTypeKey.CTDPF_CKL_WFP]) harvester = SingleDirectoryHarvester( self._harvester_config.get(DataTypeKey.CTDPF_CKL_WFP), driver_state[DataTypeKey.CTDPF_CKL_WFP], lambda filename: self._new_file_callback( filename, DataTypeKey.CTDPF_CKL_WFP), lambda modified: self._modified_file_callback( modified, DataTypeKey.CTDPF_CKL_WFP), self._exception_callback) if harvester is not None: harvesters.append(harvester) else: log.warning('CTDPF_CKL_WFP HARVESTER NOT BUILT') # # Verify that the CTDPF_CKL_WFP_SIO_MULE harvester has been configured. # If so, build the CTDPF_CKL_WFP_SIO_MULE harvester and add it to the # list of harvesters. # if DataTypeKey.CTDPF_CKL_WFP_SIO_MULE in self._harvester_config: log.debug('CAG DRIVER - build harvester for %s', driver_state[DataTypeKey.CTDPF_CKL_WFP_SIO_MULE]) harvester = SingleFileHarvester( self._harvester_config.get(DataTypeKey.CTDPF_CKL_WFP_SIO_MULE), driver_state[DataTypeKey.CTDPF_CKL_WFP_SIO_MULE], lambda file_state: self._file_changed_callback( file_state, DataTypeKey.CTDPF_CKL_WFP_SIO_MULE), self._exception_callback) if harvester is not None: harvesters.append(harvester) else: log.warning('CTDPF_CKL_WFP_SIO_MULE HARVESTER NOT BUILT') return harvesters
def _build_harvester(self, driver_state): """ Build and return the harvesters """ harvesters = [] # list of harvesters to be returned # # Verify that the WFP harvester has been configured. # If so, build the harvester and add it to the list of harvesters. # if DataTypeKey.VEL3D_L_WFP in self._harvester_config: wfp_harvester = SingleDirectoryHarvester( self._harvester_config.get(DataTypeKey.VEL3D_L_WFP), driver_state[DataTypeKey.VEL3D_L_WFP], lambda filename: self._new_file_callback( filename, DataTypeKey.VEL3D_L_WFP), lambda modified: self._modified_file_callback( modified, DataTypeKey.VEL3D_L_WFP), self._exception_callback) if wfp_harvester is not None: harvesters.append(wfp_harvester) else: log.warn('Missing harvester configuration for key %s', DataTypeKey.VEL3D_L_WFP) # # Verify that the SIO Mule harvester has been configured. # If so, build the harvester and add it to the list of harvesters. # if DataTypeKey.VEL3D_L_WFP_SIO_MULE in self._harvester_config: sio_harvester = SingleFileHarvester( self._harvester_config.get(DataTypeKey.VEL3D_L_WFP_SIO_MULE), driver_state[DataTypeKey.VEL3D_L_WFP_SIO_MULE], lambda file_state: self._file_changed_callback( file_state, DataTypeKey.VEL3D_L_WFP_SIO_MULE), self._exception_callback) if sio_harvester is not None: harvesters.append(sio_harvester) else: log.warn('Missing harvester configuration for key %s', DataTypeKey.VEL3D_L_WFP) return harvesters
def build_single_harvester(self, driver_state, key): """ Build and return the harvester """ if key in self._harvester_config: harvester = SingleDirectoryHarvester( self._harvester_config.get(key), driver_state[key], lambda filename: self._new_file_callback(filename, key), lambda modified: self._modified_file_callback(modified, key), self._exception_callback) else: harvester = None log.warn( 'build_single_harvester did not receive a particle type, harvester instantiation failed' ) return harvester
def _build_single_dir_harvester(self, driver_state, data_key): """ Build and return a harvester """ harvester = None if data_key in self._harvester_config: harvester = SingleDirectoryHarvester(self._harvester_config.get(data_key), driver_state[data_key], lambda filename: self._new_file_callback(filename, data_key), lambda modified: self._modified_file_callback(modified, data_key), self._exception_callback) else: log.warn('No configuration for %s harvester, not building', data_key) return harvester
def test_init(self): """ Test initialize """ config = {'directory': TESTDIR, 'pattern': CONFIG['pattern']} # start the harvester from scratch memento = None file_harvester = SingleDirectoryHarvester(config, memento, self.new_file_found_callback, self.file_exception_callback) file_harvester.sort_files(['a_1_2.bla', 'a_2_2.bla']) file_harvester.start() file_harvester.shutdown()
def _build_harvester(self, driver_state): """ Build and return the harvesters """ harvesters = [] # list of harvesters to be returned # # Verify that the WFP_ENG_STC_IMODEM harvester has been configured. # If so, build the harvester and add it to the list of harvesters. # if DataTypeKey.WFP_ENG_STC_IMODEM in self._harvester_config: wfp_harvester = SingleDirectoryHarvester( self._harvester_config.get(DataTypeKey.WFP_ENG_STC_IMODEM), driver_state[DataTypeKey.WFP_ENG_STC_IMODEM], lambda filename: self._new_file_callback( filename, DataTypeKey.WFP_ENG_STC_IMODEM), lambda modified: self._modified_file_callback( modified, DataTypeKey.WFP_ENG_STC_IMODEM), self._exception_callback) if wfp_harvester is not None: harvesters.append(wfp_harvester) else: log.debug('WFP_ENG_STC_IMODEM HARVESTER NOT BUILT') # # Verify that the WFP_ENG_WFP_SIO_MULE harvester has been configured. # If so, build the harvester and add it to the list of harvesters. # if DataTypeKey.WFP_ENG_WFP_SIO_MULE in self._harvester_config: sio_harvester = SingleFileHarvester( self._harvester_config.get(DataTypeKey.WFP_ENG_WFP_SIO_MULE), driver_state[DataTypeKey.WFP_ENG_WFP_SIO_MULE], lambda file_state: self._file_changed_callback( file_state, DataTypeKey.WFP_ENG_WFP_SIO_MULE), self._exception_callback) if sio_harvester is not None: harvesters.append(sio_harvester) else: log.debug('WFP_ENG_WFP_SIO_MULE HARVESTER NOT BUILT') return harvesters
def _build_harvester(self, driver_state): """ Build the telemetered and recovered harvester if they are configured @param driver_state The starting driver state """ harvesters = [] if DataSourceKey.DOSTA_LN_WFP_SIO_MULE in self._harvester_config: telem_harvester = SingleFileHarvester( self._harvester_config.get( DataSourceKey.DOSTA_LN_WFP_SIO_MULE), driver_state[DataSourceKey.DOSTA_LN_WFP_SIO_MULE], lambda file_state: self._file_changed_callback( file_state, DataSourceKey.DOSTA_LN_WFP_SIO_MULE), self._exception_callback) harvesters.append(telem_harvester) else: log.warn( 'No configuration for dosta ln wfp sio mule harvester, not building' ) if DataSourceKey.DOSTA_LN_WFP in self._harvester_config: recov_harvester = SingleDirectoryHarvester( self._harvester_config.get(DataSourceKey.DOSTA_LN_WFP), driver_state[DataSourceKey.DOSTA_LN_WFP], lambda filename: self._new_file_callback( filename, DataSourceKey.DOSTA_LN_WFP), lambda modified: self._modified_file_callback( modified, DataSourceKey.DOSTA_LN_WFP), self._exception_callback) harvesters.append(recov_harvester) else: log.warn( 'No configuration for dosta ln wfp harvester, not building') return harvesters
def _build_harvester(self, driver_state): """ Build the harvester @param driver_state The starting driver state """ harvesters = [] # # Verify that the CO Recovered harvester has been configured. # If so, build the harvester and add it to the list of harvesters. # if DataTypeKey.CTDMO_GHQR_CO in self._harvester_config: co_harvester = SingleDirectoryHarvester( self._harvester_config.get(DataTypeKey.CTDMO_GHQR_CO), driver_state[DataTypeKey.CTDMO_GHQR_CO], lambda filename: self._new_file_callback( filename, DataTypeKey.CTDMO_GHQR_CO), lambda modified: self._modified_file_callback( modified, DataTypeKey.CTDMO_GHQR_CO), self._exception_callback) if co_harvester is not None: harvesters.append(co_harvester) else: log.warn('Could not build ctdmo_ghqr_co harvester') else: log.warn( 'No configuration for ctdmo_ghqr_co harvester, not building') # # Verify that the CT Recovered harvester has been configured. # If so, build the harvester and add it to the list of harvesters. # if DataTypeKey.CTDMO_GHQR_CT in self._harvester_config: ct_harvester = SingleDirectoryHarvester( self._harvester_config.get(DataTypeKey.CTDMO_GHQR_CT), driver_state[DataTypeKey.CTDMO_GHQR_CT], lambda filename: self._new_file_callback( filename, DataTypeKey.CTDMO_GHQR_CT), lambda modified: self._modified_file_callback( modified, DataTypeKey.CTDMO_GHQR_CT), self._exception_callback) if ct_harvester is not None: harvesters.append(ct_harvester) else: log.warn('Could not build ctdmo_ghqr_ct harvester') else: log.warn( 'No configuration for ctdmo_ghqr_ct harvester, not building') # # Verify that the CT Recovered harvester has been configured. # If so, build the harvester and add it to the list of harvesters. # if DataTypeKey.CTDMO_GHQR_SIO_MULE in self._harvester_config: ctdmo_ghqr_sio_mule_harvester = SingleFileHarvester( self._harvester_config.get(DataTypeKey.CTDMO_GHQR_SIO_MULE), driver_state[DataTypeKey.CTDMO_GHQR_SIO_MULE], lambda file_state: self._file_changed_callback( file_state, DataTypeKey.CTDMO_GHQR_SIO_MULE), self._exception_callback) if ctdmo_ghqr_sio_mule_harvester is not None: harvesters.append(ctdmo_ghqr_sio_mule_harvester) else: log.warn('Could not build ctdmo_ghqr_sio_mule harvester') else: log.warn( 'No configuration for ctdmo_ghqr_sio_mule harvester, not building' ) return harvesters