def test_harvester_multi_file(self):
        """
        Set the timing so the harvester finds multiple new files at once
        """
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FREQUENCY] = 1
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15
        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()

        # set the file filler to generate files with only .5 secs between,
        # meaning 2 files will appear in the 1 seconds between the
        # harvester checking
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 12, .5)

        # Wait for sets of new files to be discovered
        self.wait_for_file(0)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)
        
        file_harvester.shutdown()
 def test_harvester_multi_file(self):
     """
     Set the timing so the harvester finds multiple new files at once
     """
     
     # start the harvester from scratch
     memento = None
     file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                 self.new_file_found_callback,
                                                 self.file_exception_callback)
     file_harvester.start()
     
     # set the file filler to generate files with only .5 secs between,
     # meaning 2 files will appear in the 1 seconds between the
     # harvester checking
     self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                          CONFIG['directory'],
                                          CONFIG['pattern'], 0, 12, .5)
     
     # Wait for sets of new files to be discovered
     self.wait_for_file(0)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     
     file_harvester.shutdown()
 def test_harvester_from_scratch(self):
     """
     Test that the harvester can find files as they are added to a directory,
     starting with just the base file in the directory
     """
     # start the harvester from scratch
     memento = None
     file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                self.new_file_found_callback,
                                                self.file_exception_callback)
     file_harvester.start()
     
     # start a new event which will increase the file index using INDICIES
     self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                          CONFIG['directory'],
                                          CONFIG['pattern'], 0, 6)
     
     # Wait for three sets of new files to be discovered
     self.wait_for_file(0)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
      
     file_harvester.shutdown()
    def test_harvester_from_scratch(self):
        """
        Test that the harvester can find files as they are added to a directory,
        starting with just the base file in the directory
        """
        # start the harvester from scratch
        memento = None
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 10
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                self.new_file_found_callback,
                                                                        self.modified_files_found_callback,
                                                self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 5, 10)

        # Wait for new files to be discovered
        self.wait_for_file(0, 5)
        self.wait_for_file(self.found_file_count, 5)
        self.wait_for_file(self.found_file_count, 5)
        self.wait_for_file(self.found_file_count, 5)
        self.wait_for_file(self.found_file_count, 5)

        file_harvester.shutdown()
    def test_missing_directory(self):
        config = {'directory': TESTDIR, 'pattern': CONFIG['pattern']}

        self.clean_directory(TESTDIR)
        os.rmdir(TESTDIR)
        self.assertFalse(os.path.exists(TESTDIR))

        # start the harvester from scratch
        memento = None

        os.mkdir(TESTDIR)
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                         self.new_file_found_callback,
                                                         self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG['directory'],
                                             CONFIG['pattern'], 0, 2)

        # Wait for three sets of new files to be discovered
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)

        file_harvester.shutdown()
 def test_harvester_with_memento(self):
     """
     Test that the harvester can find file as they are added to a directory,
     using a memento to start partway through the indices
     """
     # make sure we have 2 files already in the directory
     self.fill_directory_with_files(CONFIG['directory'], CONFIG['pattern'], 2, 0)
     
     # start at index 2
     dir_files = glob.glob(CONFIG['directory'] + '/' + CONFIG['pattern'])
     memento = self.replace_file_index(dir_files[0], 2)
     file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                      self.new_file_found_callback,
                                                      self.file_exception_callback)
     file_harvester.start()
     
     # start a new event which will copy the first file and increase the
     # file index into data directory with a delay in between
     self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                          CONFIG['directory'],
                                          CONFIG['pattern'], 3)
     
     # Wait for three sets of new files to be discovered
     self.wait_for_file(0)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
      
     file_harvester.shutdown()
    def test_harvester_without_mod_time(self):
        """
        Test that we can use a default frequency
        """
        config = {DataSetDriverConfigKeys.DIRECTORY: TESTDIR,
                  DataSetDriverConfigKeys.STORAGE_DIRECTORY: TESTDIR,
                  DataSetDriverConfigKeys.PATTERN: CONFIG[DataSetDriverConfigKeys.PATTERN],
                  DataSetDriverConfigKeys.FREQUENCY: 5}

        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2)

        # Wait for two sets of new files to be discovered
        self.wait_for_file(0, 2)
        self.wait_for_file(self.found_file_count, 2)

        file_harvester.shutdown()
    def test_harvester_without_frequency(self):
        """
        Test that we can use a default frequency
        """
        config = {'directory': TESTDIR, 'pattern': CONFIG['pattern']}

        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                         self.new_file_found_callback,
                                                         self.file_exception_callback)
        file_harvester.start()

        # start a new event which will copy the first file and increase the
        # file index into data directory with a delay in between
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG['directory'],
                                             CONFIG['pattern'], 2)

        # Wait for three sets of new files to be discovered
        self.wait_for_file(0)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)

        file_harvester.shutdown()
 def test_harvester_with_memento(self):
     """
     Test that the harvester can find file as they are added to a directory,
     using a memento to start partway through the indices
     """
     
     # make sure we have 2 files already in the directory
     self.fill_directory_with_files(CONFIG['directory'], CONFIG['pattern'], 0, 2, 0)
     
     # start at index 2
     memento = CONFIG['directory'] + '/' + 'unit_' + INDICIES[1] + CONFIG['pattern'].replace('*', '')
     log.debug("starting with memento %s", memento)
     file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                self.new_file_found_callback,
                                                self.file_exception_callback)
     file_harvester.start()
     
     # start a new event which will increase the file index using INDICIES
     # with a delay in between
     self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                          CONFIG['directory'],
                                          CONFIG['pattern'], 2, 9)
     
     # Wait for three sets of new files to be discovered
     self.wait_for_file(0)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
      
     file_harvester.shutdown()
Example #10
0
    def _build_harvester(self, driver_state):
        """
        Build and return the harvesters
        """

        harvesters = []  # list of harvesters to be returned

        #
        # Verify that the Recovered harvester has been configured.
        # If so, build the harvester and add it to the list of harvesters.
        #
        if DataTypeKey.PARAD_K_STC_RECOVERED in self._harvester_config:
            recovered_harvester = SingleDirectoryHarvester(
                self._harvester_config.get(DataTypeKey.PARAD_K_STC_RECOVERED),
                driver_state[DataTypeKey.PARAD_K_STC_RECOVERED],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.PARAD_K_STC_RECOVERED),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.PARAD_K_STC_RECOVERED),
                self._exception_callback)

            if recovered_harvester is not None:
                harvesters.append(recovered_harvester)
            else:
                log.warn('Unable to build Harvester %s',
                         DataTypeKey.PARAD_K_STC_RECOVERED)

        else:
            log.warn('Harvester configuration missing key %s',
                     DataTypeKey.PARAD_K_STC_RECOVERED)

        #
        # Verify that the Telemetered harvester has been configured.
        # If so, build the harvester and add it to the list of harvesters.
        #
        if DataTypeKey.PARAD_K_STC in self._harvester_config:
            telemetered_harvester = SingleDirectoryHarvester(
                self._harvester_config.get(DataTypeKey.PARAD_K_STC),
                driver_state[DataTypeKey.PARAD_K_STC],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.PARAD_K_STC),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.PARAD_K_STC),
                self._exception_callback)

            if telemetered_harvester is not None:
                harvesters.append(telemetered_harvester)
            else:
                log.warn('Unable to build Harvester %s',
                         DataTypeKey.PARAD_K_STC)

        else:
            log.warn('Harvester configuration missing key %s',
                     DataTypeKey.PARAD_K_STC)

        return harvesters
Example #11
0
    def _build_harvester(self, driver_state):
        """
        Build and return the harvesters
        """

        harvesters = []  # list of harvesters to be returned

        #
        # Verify that the DOSTA_ABCDJM_CSPP_RECOVERED harvester has been configured.
        # If so, build the harvester and add it to the list of harvesters.
        #
        if DataTypeKey.DOSTA_ABCDJM_CSPP_RECOVERED in self._harvester_config:
            harvester = SingleDirectoryHarvester(
                self._harvester_config.get(
                    DataTypeKey.DOSTA_ABCDJM_CSPP_RECOVERED),
                driver_state[DataTypeKey.DOSTA_ABCDJM_CSPP_RECOVERED],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.DOSTA_ABCDJM_CSPP_RECOVERED),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.DOSTA_ABCDJM_CSPP_RECOVERED),
                self._exception_callback)

            if harvester is not None:
                harvesters.append(harvester)
            else:
                log.warn('DOSTA_ABCDJM_CSPP_RECOVERED harvester not built')
        else:
            log.warn(
                'DOSTA_ABCDJM_CSPP_RECOVERED key missing from config harvester not built'
            )
        #
        # Verify that the DOSTA_ABCDJM_CSPP_TELEMETERED harvester has been configured.
        # If so, build the harvester and add it to the list of harvesters.
        #
        if DataTypeKey.DOSTA_ABCDJM_CSPP_TELEMETERED in self._harvester_config:
            harvester = SingleDirectoryHarvester(
                self._harvester_config.get(
                    DataTypeKey.DOSTA_ABCDJM_CSPP_TELEMETERED),
                driver_state[DataTypeKey.DOSTA_ABCDJM_CSPP_TELEMETERED],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.DOSTA_ABCDJM_CSPP_TELEMETERED),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.DOSTA_ABCDJM_CSPP_TELEMETERED),
                self._exception_callback)

            if harvester is not None:
                harvesters.append(harvester)
            else:
                log.warn('DOSTA_ABCDJM_CSPP_TELEMETERED harvester not built')
        else:
            log.warn(
                'DOSTA_ABCDJM_CSPP_TELEMETERED key missing from config harvester not built'
            )

        return harvesters
Example #12
0
    def _build_harvester(self, driver_state):
        """
        Build the harvesters.
        Verify correctness of data keys.
        Display warnings if error detected in data keys or in the
        creation of the harvesters.
        @param driver_state The starting driver state
        """
        harvesters = []

        # Verify that the Recovered harvester has been configured.
        # If so, build the harvester and add it to the list of harvesters.

        if DataTypeKey.DOSTA_ABCDJM_RECOVERED in self._harvester_config:
            rec_harvester = SingleDirectoryHarvester(
                self._harvester_config.get(DataTypeKey.DOSTA_ABCDJM_RECOVERED),
                driver_state[DataTypeKey.DOSTA_ABCDJM_RECOVERED],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.DOSTA_ABCDJM_RECOVERED),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.DOSTA_ABCDJM_RECOVERED),
                self._exception_callback)

            harvesters.append(rec_harvester)

        else:
            log.warn(
                'No configuration for dosta_abcdjm_dcl recovered harvester, not building'
            )

        # Verify that the Telemetered harvester has been configured.
        # If so, build the harvester and add it to the list of harvesters.

        if DataTypeKey.DOSTA_ABCDJM_TELEMETERED in self._harvester_config:
            tel_harvester = SingleDirectoryHarvester(
                self._harvester_config.get(
                    DataTypeKey.DOSTA_ABCDJM_TELEMETERED),
                driver_state[DataTypeKey.DOSTA_ABCDJM_TELEMETERED],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.DOSTA_ABCDJM_TELEMETERED),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.DOSTA_ABCDJM_TELEMETERED),
                self._exception_callback)

            harvesters.append(tel_harvester)

        else:
            log.warn(
                'No configuration for dosta_abcdjm_dcl telemetered harvester, not building'
            )

        return harvesters
    def test_init(self):
        """
        Test initialize
        """

        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                self.new_file_found_callback,
                                                self.modified_files_found_callback,
                                                self.file_exception_callback)
        file_harvester.start()
        file_harvester.shutdown()
    def test_file_mod_wait_time(self):
        """
        that the file mod wait time is actually waiting before finding files
        """
        memento = None
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()
        # put a file in the directory, the mod time will be the create time
        self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                       CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 1, 0)
        # wait until just before the file mod time should allow us to find the files

        # keep track of how long it takes to find the file approximately
        file_found_time = 0;
        while(self.found_file_count == 0):
            time.sleep(1)
            file_found_time += 1
            if file_found_time > 60:
                raise Exception("Timeout waiting to find file")

        if file_found_time < CONFIG.get(DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME):
            # we found the file before the mod time, this is bad!
            file_harvester.shutdown()
            self.fail('Files found in %s seconds' % file_found_time)
        log.debug('File found in %s seconds', file_found_time)
        file_harvester.shutdown()
Example #15
0
    def _build_harvester(self, driver_state):
        """
        Build and return the harvesters
        """

        harvesters = []  # list of harvesters to be returned

        #
        # Verify that the CTDPF_CKL_WFP_RECOVERED harvester has been configured.
        # If so, build the CTDPF_CKL_WFP_RECOVERED harvester and add it to the
        # list of harvesters.
        #
        if DataTypeKey.CTDPF_CKL_WFP_RECOVERED in self._harvester_config:
            harvester = SingleDirectoryHarvester(
                self._harvester_config.get(
                    DataTypeKey.CTDPF_CKL_WFP_RECOVERED),
                driver_state[DataTypeKey.CTDPF_CKL_WFP_RECOVERED],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.CTDPF_CKL_WFP_RECOVERED),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.CTDPF_CKL_WFP_RECOVERED),
                self._exception_callback)

            if harvester is not None:
                harvesters.append(harvester)
            else:
                log.warning('CTDPF_CKL_WFP_RECOVERED HARVESTER NOT BUILT')
        #
        # Verify that the CTDPF_CKL_WFP_TELEMETERED harvester has been configured.
        # If so, build the CTDPF_CKL_WFP_TELEMETERED harvester and add it to the
        # list of harvesters.
        #
        if DataTypeKey.CTDPF_CKL_WFP_TELEMETERED in self._harvester_config:
            harvester = SingleDirectoryHarvester(
                self._harvester_config.get(
                    DataTypeKey.CTDPF_CKL_WFP_TELEMETERED),
                driver_state[DataTypeKey.CTDPF_CKL_WFP_TELEMETERED],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.CTDPF_CKL_WFP_TELEMETERED),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.CTDPF_CKL_WFP_TELEMETERED),
                self._exception_callback)

            if harvester is not None:
                harvesters.append(harvester)
            else:
                log.warning('CTDPF_CKL_WFP_TELEMETERED HARVESTER NOT BUILT')

        return harvesters
    def test_harvester_with_memento(self):
        """
        Test that the harvester can find file as they are added to a directory,
        using a memento to start partway through the indices
        """
        
        # make sure we have 2 files already in the directory
        self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                       CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2, 0)

        filename_1 = 'unit_' + INDICIES[0] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')
        filename_2 = 'unit_' + INDICIES[1] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')

        # get metadata for the files
        metadata_1 = self.get_file_metadata(filename_1)
        metadata_1[DriverStateKey.INGESTED] = True
        metadata_1[DriverStateKey.PARSER_STATE] = None
        metadata_2 = self.get_file_metadata(filename_2)
        metadata_2[DriverStateKey.INGESTED] = True
        metadata_2[DriverStateKey.PARSER_STATE] = None
        # generate memento with two files ingested (parser state is not looked at)
        memento = {DriverStateKey.VERSION: 0.1,
                   filename_1: metadata_1,
                   filename_2: metadata_2
                    }
        log.debug("starting with memento %s", memento)
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        # with a delay in between
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 2, 9, 5)

        # Wait for three sets of new files to be discovered
        self.wait_for_file(0, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)

        file_harvester.shutdown()
Example #17
0
    def _build_harvester(self, driver_state):
        """
        Build the harvester
        @param driver_state The starting driver state
        """
        self._harvester = []
        if DataTypeKey.DOSTA_ABCDJM_SIO_TELEMETERED in self._harvester_config:
            telemetered_harvester = SingleFileHarvester(
                self._harvester_config.get(
                    DataTypeKey.DOSTA_ABCDJM_SIO_TELEMETERED),
                driver_state[DataTypeKey.DOSTA_ABCDJM_SIO_TELEMETERED],
                lambda file_state: self._file_changed_callback(
                    file_state, DataTypeKey.DOSTA_ABCDJM_SIO_TELEMETERED),
                self._exception_callback)
            self._harvester.append(telemetered_harvester)
        else:
            log.warn(
                'No configuration for telemetered harvester, not building')

        if DataTypeKey.DOSTA_ABCDJM_SIO_RECOVERED in self._harvester_config:
            recovered_harvester = SingleDirectoryHarvester(
                self._harvester_config.get(
                    DataTypeKey.DOSTA_ABCDJM_SIO_RECOVERED),
                driver_state[DataTypeKey.DOSTA_ABCDJM_SIO_RECOVERED],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.DOSTA_ABCDJM_SIO_RECOVERED),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.DOSTA_ABCDJM_SIO_RECOVERED),
                self._exception_callback)
            self._harvester.append(recovered_harvester)
        else:
            log.warn('No configuration for recovered harvester, not building')
        return self._harvester
Example #18
0
    def _build_harvester(self, driver_state):
        """
        Build the harvester
        @param driver_state The starting driver state
        """
        harvesters = []
        if DataSourceKey.FLORT_DJ_SIO_TELEMETERED in self._harvester_config:
            telem_harvester = SingleFileHarvester(
                self._harvester_config.get(
                    DataSourceKey.FLORT_DJ_SIO_TELEMETERED),
                driver_state[DataSourceKey.FLORT_DJ_SIO_TELEMETERED],
                lambda file_state: self._file_changed_callback(
                    file_state, DataSourceKey.FLORT_DJ_SIO_TELEMETERED),
                self._exception_callback)
            harvesters.append(telem_harvester)
        else:
            log.warn('No configuration for %s harvester, not building',
                     DataSourceKey.FLORT_DJ_SIO_TELEMETERED)

        if DataSourceKey.FLORT_DJ_SIO_RECOVERED in self._harvester_config:
            recov_harvester = SingleDirectoryHarvester(
                self._harvester_config.get(
                    DataSourceKey.FLORT_DJ_SIO_RECOVERED),
                driver_state[DataSourceKey.FLORT_DJ_SIO_RECOVERED],
                lambda filename: self._new_file_callback(
                    filename, DataSourceKey.FLORT_DJ_SIO_RECOVERED),
                lambda modified: self._modified_file_callback(
                    modified, DataSourceKey.FLORT_DJ_SIO_RECOVERED),
                self._exception_callback)
            harvesters.append(recov_harvester)
        else:
            log.warn('No configuration for %s harvester, not building',
                     DataSourceKey.FLORT_DJ_SIO_RECOVERED)
        return harvesters
    def test_harvester_with_modified(self):
        """
        Test that the harvester can find file as they are added to a directory,
        using a memento to start partway through the indices
        """

        # make sure we have 2 files already in the directory
        self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                       CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2, 0)

        filename_1 = 'unit_' + INDICIES[0] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')
        filename_2 = 'unit_' + INDICIES[1] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')

        # get metadata for the files
        metadata_1 = self.get_file_metadata(filename_1)
        metadata_1[DriverStateKey.INGESTED] = True
        metadata_1[DriverStateKey.PARSER_STATE] = None
        metadata_2 = self.get_file_metadata(filename_2)
        metadata_2[DriverStateKey.INGESTED] = True
        metadata_2[DriverStateKey.PARSER_STATE] = None
        # generate memento with two files ingested (parser state is not looked at)
        memento = {DriverStateKey.VERSION: 0.1,
                   filename_1: metadata_1,
                   filename_2: metadata_2
                    }
        log.debug("starting with memento %s", memento)
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()
        
        file_path = os.path.join(CONFIG[DataSetDriverConfigKeys.DIRECTORY], filename_1)
        with open(file_path, 'a') as filehandle:
            filehandle.write('a b c d')

        end_time = 0
        while(self.found_modified_count == 0):
            log.debug("Waiting for modified file...")
            time.sleep(2)
            end_time += 2
            if end_time > 60:
                raise Exception("Timeout waiting to find modified files")

        file_harvester.shutdown()
Example #20
0
 def _build_harvester(self, driver_state):
     """
     Build and return the harvester
     """
     self._harvester = SingleDirectoryHarvester(
         self._harvester_config, driver_state, self._new_file_callback,
         self._modified_file_callback, self._exception_callback)
     return self._harvester
    def test_file_mod_wait_time(self):
        """
        that the file mod wait time is actually waiting before finding files
        """
        memento = None
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()
        # put a file in the directory, the mod time will be the create time
        self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                       CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 1, 0)
        # wait until just before the file mod time should allow us to find the files

        # keep track of how long it takes to find the file approximately
        file_found_time = 0;
        while(self.found_file_count == 0):
            time.sleep(1)
            file_found_time += 1
            if file_found_time > 60:
                raise Exception("Timeout waiting to find file")

        if file_found_time < CONFIG.get(DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME):
            # we found the file before the mod time, this is bad!
            file_harvester.shutdown()
            self.fail('Files found in %s seconds' % file_found_time)
        log.debug('File found in %s seconds', file_found_time)
        file_harvester.shutdown()
Example #22
0
    def build_single_harvester(self, key, driver_state):

        harvester = SingleDirectoryHarvester(
            self._harvester_config.get(key), driver_state[key],
            lambda filename: self._new_file_callback(filename, key),
            lambda modified: self._modified_file_callback(modified, key),
            self._exception_callback)

        return harvester
    def test_harvester_1000(self):
        """
        The harvester is taking a really long time to run, find out how long for 1000 files
        """
        self.fill_directory_1000_files(CONFIG[DataSetDriverConfigKeys.PATTERN])
        memento = None
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 1
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                self.new_file_found_callback,
                                                self.modified_files_found_callback,
                                                self.file_exception_callback)
        start_time = time.time()
        file_harvester.start()

        while(self.found_file_count < 1000):
            self.wait_for_file(self.found_file_count, 5, 60)
        end_time = time.time()
        log.debug('harvester found all files in %s', (end_time - start_time))
    def test_harvester_without_mod_time(self):
        """
        Test that we can use a default frequency
        """
        config = {DataSetDriverConfigKeys.DIRECTORY: TESTDIR,
                  DataSetDriverConfigKeys.STORAGE_DIRECTORY: TESTDIR,
                  DataSetDriverConfigKeys.PATTERN: CONFIG[DataSetDriverConfigKeys.PATTERN],
                  DataSetDriverConfigKeys.FREQUENCY: 5}

        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2)

        # Wait for two sets of new files to be discovered
        self.wait_for_file(0, 2)
        self.wait_for_file(self.found_file_count, 2)

        file_harvester.shutdown()
    def test_harvester_multi_file(self):
        """
        Set the timing so the harvester finds multiple new files at once
        """
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FREQUENCY] = 1
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15
        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()

        # set the file filler to generate files with only .5 secs between,
        # meaning 2 files will appear in the 1 seconds between the
        # harvester checking
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 12, .5)

        # Wait for sets of new files to be discovered
        self.wait_for_file(0)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)
        
        file_harvester.shutdown()
    def test_harvester_from_scratch(self):
        """
        Test that the harvester can find files as they are added to a directory,
        starting with just the base file in the directory
        """
        # start the harvester from scratch
        memento = None
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 10
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                self.new_file_found_callback,
                                                                        self.modified_files_found_callback,
                                                self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 5, 10)

        # Wait for new files to be discovered
        self.wait_for_file(0, 5)
        self.wait_for_file(self.found_file_count, 5)
        self.wait_for_file(self.found_file_count, 5)
        self.wait_for_file(self.found_file_count, 5)
        self.wait_for_file(self.found_file_count, 5)

        file_harvester.shutdown()
Example #27
0
    def _build_harvester(self, driver_state):
        """
        Build and return the harvester
        """

        _harvester = SingleDirectoryHarvester(self._harvester_config,
                                              driver_state,
                                              self._new_file_callback,
                                              self._modified_file_callback,
                                              self._exception_callback)
        if _harvester is None:
            log.warn('harverster failed instantiation due to missing config')
        return _harvester
Example #28
0
 def build_single_dir_harvester(self, driver_state, data_key):
     """
     Build a single directory harvester for the given data source key
     @param driver_state - the starting driver state
     @param data_key - the data source key to build the harvester for
     """
     return SingleDirectoryHarvester(
         self._harvester_config.get(data_key),
         driver_state[data_key],
         lambda filename: self._new_file_callback(filename, data_key),
         lambda modified: self._modified_file_callback(modified, data_key),
         self._exception_callback
     )
Example #29
0
 def _build_harvester(self, driver_state):
     """
     Build and return the harvester
     """
     # *** Replace the following with harvester initialization ***
     self._harvester = SingleDirectoryHarvester(
         self._harvester_config,
         driver_state,
         self._new_file_callback,
         self._modified_file_callback,
         self._exception_callback
     )
     return self._harvester
Example #30
0
 def build_single_harvester(self, driver_state, key):
     """
     Build and return the harvester
     """
     if key in self._harvester_config:
         harvester = SingleDirectoryHarvester(
             self._harvester_config.get(key), driver_state[key],
             lambda filename: self._new_file_callback(filename, key),
             lambda modified: self._modified_file_callback(modified, key),
             self._exception_callback)
     else:
         harvester = None
         log.warn('flntu/flcdr harvester not built because missing config')
     return harvester
Example #31
0
    def _build_harvester(self, driver_state):
        """
        Build and return the harvesters
        """
        harvesters = []  # list of harvesters to be returned

        #
        # Verify that the CTDPF_CKL_WFP harvester has been configured.
        # If so, build the CTDPF_CKL_WFP harvester and add it to the
        # list of harvesters.
        #
        if DataTypeKey.CTDPF_CKL_WFP in self._harvester_config:
            log.debug('CAG DRIVER - build harvester for %s',
                      driver_state[DataTypeKey.CTDPF_CKL_WFP])
            harvester = SingleDirectoryHarvester(
                self._harvester_config.get(DataTypeKey.CTDPF_CKL_WFP),
                driver_state[DataTypeKey.CTDPF_CKL_WFP],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.CTDPF_CKL_WFP),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.CTDPF_CKL_WFP),
                self._exception_callback)

            if harvester is not None:
                harvesters.append(harvester)
            else:
                log.warning('CTDPF_CKL_WFP HARVESTER NOT BUILT')
        #
        # Verify that the CTDPF_CKL_WFP_SIO_MULE harvester has been configured.
        # If so, build the CTDPF_CKL_WFP_SIO_MULE harvester and add it to the
        # list of harvesters.
        #
        if DataTypeKey.CTDPF_CKL_WFP_SIO_MULE in self._harvester_config:
            log.debug('CAG DRIVER - build harvester for %s',
                      driver_state[DataTypeKey.CTDPF_CKL_WFP_SIO_MULE])
            harvester = SingleFileHarvester(
                self._harvester_config.get(DataTypeKey.CTDPF_CKL_WFP_SIO_MULE),
                driver_state[DataTypeKey.CTDPF_CKL_WFP_SIO_MULE],
                lambda file_state: self._file_changed_callback(
                    file_state, DataTypeKey.CTDPF_CKL_WFP_SIO_MULE),
                self._exception_callback)

            if harvester is not None:
                harvesters.append(harvester)
            else:
                log.warning('CTDPF_CKL_WFP_SIO_MULE HARVESTER NOT BUILT')

        return harvesters
Example #32
0
    def _build_harvester(self, driver_state):
        """
        Build and return the harvesters
        """

        harvesters = []  # list of harvesters to be returned

        #
        # Verify that the WFP harvester has been configured.
        # If so, build the harvester and add it to the list of harvesters.
        #
        if DataTypeKey.VEL3D_L_WFP in self._harvester_config:
            wfp_harvester = SingleDirectoryHarvester(
                self._harvester_config.get(DataTypeKey.VEL3D_L_WFP),
                driver_state[DataTypeKey.VEL3D_L_WFP],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.VEL3D_L_WFP),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.VEL3D_L_WFP),
                self._exception_callback)

            if wfp_harvester is not None:
                harvesters.append(wfp_harvester)

        else:
            log.warn('Missing harvester configuration for key %s',
                     DataTypeKey.VEL3D_L_WFP)

        #
        # Verify that the SIO Mule harvester has been configured.
        # If so, build the harvester and add it to the list of harvesters.
        #
        if DataTypeKey.VEL3D_L_WFP_SIO_MULE in self._harvester_config:
            sio_harvester = SingleFileHarvester(
                self._harvester_config.get(DataTypeKey.VEL3D_L_WFP_SIO_MULE),
                driver_state[DataTypeKey.VEL3D_L_WFP_SIO_MULE],
                lambda file_state: self._file_changed_callback(
                    file_state, DataTypeKey.VEL3D_L_WFP_SIO_MULE),
                self._exception_callback)

            if sio_harvester is not None:
                harvesters.append(sio_harvester)

        else:
            log.warn('Missing harvester configuration for key %s',
                     DataTypeKey.VEL3D_L_WFP)

        return harvesters
Example #33
0
 def build_single_harvester(self, driver_state, key):
     """
     Build and return the harvester
     """
     if key in self._harvester_config:
         harvester = SingleDirectoryHarvester(
             self._harvester_config.get(key), driver_state[key],
             lambda filename: self._new_file_callback(filename, key),
             lambda modified: self._modified_file_callback(modified, key),
             self._exception_callback)
     else:
         harvester = None
         log.warn(
             'build_single_harvester did not receive a particle type, harvester instantiation failed'
         )
     return harvester
Example #34
0
    def _build_single_dir_harvester(self, driver_state, data_key):
        """
        Build and return a harvester
        """
        harvester = None
        if data_key in self._harvester_config:

            harvester = SingleDirectoryHarvester(self._harvester_config.get(data_key),
                                                 driver_state[data_key],
                                                 lambda filename: self._new_file_callback(filename, data_key),
                                                 lambda modified: self._modified_file_callback(modified, data_key),
                                                 self._exception_callback)
        else:
            log.warn('No configuration for %s harvester, not building', data_key)

        return harvester
    def test_init(self):
        """
        Test initialize
        """
        config = {'directory': TESTDIR, 'pattern': CONFIG['pattern']}

        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                         self.new_file_found_callback,
                                                         self.file_exception_callback)

        file_harvester.sort_files(['a_1_2.bla', 'a_2_2.bla'])
        file_harvester.start()
        file_harvester.shutdown()
Example #36
0
    def _build_harvester(self, driver_state):
        """
        Build and return the harvesters
        """

        harvesters = []  # list of harvesters to be returned

        #
        # Verify that the WFP_ENG_STC_IMODEM harvester has been configured.
        # If so, build the harvester and add it to the list of harvesters.
        #
        if DataTypeKey.WFP_ENG_STC_IMODEM in self._harvester_config:
            wfp_harvester = SingleDirectoryHarvester(
                self._harvester_config.get(DataTypeKey.WFP_ENG_STC_IMODEM),
                driver_state[DataTypeKey.WFP_ENG_STC_IMODEM],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.WFP_ENG_STC_IMODEM),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.WFP_ENG_STC_IMODEM),
                self._exception_callback)

            if wfp_harvester is not None:
                harvesters.append(wfp_harvester)
            else:
                log.debug('WFP_ENG_STC_IMODEM HARVESTER NOT BUILT')

        #
        # Verify that the WFP_ENG_WFP_SIO_MULE harvester has been configured.
        # If so, build the harvester and add it to the list of harvesters.
        #
        if DataTypeKey.WFP_ENG_WFP_SIO_MULE in self._harvester_config:
            sio_harvester = SingleFileHarvester(
                self._harvester_config.get(DataTypeKey.WFP_ENG_WFP_SIO_MULE),
                driver_state[DataTypeKey.WFP_ENG_WFP_SIO_MULE],
                lambda file_state: self._file_changed_callback(
                    file_state, DataTypeKey.WFP_ENG_WFP_SIO_MULE),
                self._exception_callback)

            if sio_harvester is not None:
                harvesters.append(sio_harvester)
            else:
                log.debug('WFP_ENG_WFP_SIO_MULE HARVESTER NOT BUILT')

        return harvesters
    def test_init(self):
        """
        Test initialize
        """

        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                self.new_file_found_callback,
                                                self.modified_files_found_callback,
                                                self.file_exception_callback)
        file_harvester.start()
        file_harvester.shutdown()
    def test_harvester_with_memento(self):
        """
        Test that the harvester can find file as they are added to a directory,
        using a memento to start partway through the indices
        """
        
        # make sure we have 2 files already in the directory
        self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                       CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2, 0)

        filename_1 = 'unit_' + INDICIES[0] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')
        filename_2 = 'unit_' + INDICIES[1] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')

        # get metadata for the files
        metadata_1 = self.get_file_metadata(filename_1)
        metadata_1[DriverStateKey.INGESTED] = True
        metadata_1[DriverStateKey.PARSER_STATE] = None
        metadata_2 = self.get_file_metadata(filename_2)
        metadata_2[DriverStateKey.INGESTED] = True
        metadata_2[DriverStateKey.PARSER_STATE] = None
        # generate memento with two files ingested (parser state is not looked at)
        memento = {DriverStateKey.VERSION: 0.1,
                   filename_1: metadata_1,
                   filename_2: metadata_2
                    }
        log.debug("starting with memento %s", memento)
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        # with a delay in between
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 2, 9, 5)

        # Wait for three sets of new files to be discovered
        self.wait_for_file(0, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)

        file_harvester.shutdown()
    def test_harvester_with_modified(self):
        """
        Test that the harvester can find file as they are added to a directory,
        using a memento to start partway through the indices
        """

        # make sure we have 2 files already in the directory
        self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                       CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2, 0)

        filename_1 = 'unit_' + INDICIES[0] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')
        filename_2 = 'unit_' + INDICIES[1] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')

        # get metadata for the files
        metadata_1 = self.get_file_metadata(filename_1)
        metadata_1[DriverStateKey.INGESTED] = True
        metadata_1[DriverStateKey.PARSER_STATE] = None
        metadata_2 = self.get_file_metadata(filename_2)
        metadata_2[DriverStateKey.INGESTED] = True
        metadata_2[DriverStateKey.PARSER_STATE] = None
        # generate memento with two files ingested (parser state is not looked at)
        memento = {DriverStateKey.VERSION: 0.1,
                   filename_1: metadata_1,
                   filename_2: metadata_2
                    }
        log.debug("starting with memento %s", memento)
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()
        
        file_path = os.path.join(CONFIG[DataSetDriverConfigKeys.DIRECTORY], filename_1)
        with open(file_path, 'a') as filehandle:
            filehandle.write('a b c d')

        end_time = 0
        while(self.found_modified_count == 0):
            log.debug("Waiting for modified file...")
            time.sleep(2)
            end_time += 2
            if end_time > 60:
                raise Exception("Timeout waiting to find modified files")

        file_harvester.shutdown()
Example #40
0
    def _build_harvester(self, driver_state):
        """
        Build the telemetered and recovered harvester if they are configured
        @param driver_state The starting driver state
        """
        harvesters = []
        if DataSourceKey.DOSTA_LN_WFP_SIO_MULE in self._harvester_config:
            telem_harvester = SingleFileHarvester(
                self._harvester_config.get(
                    DataSourceKey.DOSTA_LN_WFP_SIO_MULE),
                driver_state[DataSourceKey.DOSTA_LN_WFP_SIO_MULE],
                lambda file_state: self._file_changed_callback(
                    file_state, DataSourceKey.DOSTA_LN_WFP_SIO_MULE),
                self._exception_callback)
            harvesters.append(telem_harvester)
        else:
            log.warn(
                'No configuration for dosta ln wfp sio mule harvester, not building'
            )

        if DataSourceKey.DOSTA_LN_WFP in self._harvester_config:
            recov_harvester = SingleDirectoryHarvester(
                self._harvester_config.get(DataSourceKey.DOSTA_LN_WFP),
                driver_state[DataSourceKey.DOSTA_LN_WFP],
                lambda filename: self._new_file_callback(
                    filename, DataSourceKey.DOSTA_LN_WFP),
                lambda modified: self._modified_file_callback(
                    modified, DataSourceKey.DOSTA_LN_WFP),
                self._exception_callback)

            harvesters.append(recov_harvester)
        else:
            log.warn(
                'No configuration for dosta ln wfp harvester, not building')

        return harvesters
Example #41
0
    def _build_harvester(self, driver_state):
        """
        Build the harvester
        @param driver_state The starting driver state
        """
        harvesters = []

        #
        # Verify that the CO Recovered harvester has been configured.
        # If so, build the harvester and add it to the list of harvesters.
        #
        if DataTypeKey.CTDMO_GHQR_CO in self._harvester_config:
            co_harvester = SingleDirectoryHarvester(
                self._harvester_config.get(DataTypeKey.CTDMO_GHQR_CO),
                driver_state[DataTypeKey.CTDMO_GHQR_CO],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.CTDMO_GHQR_CO),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.CTDMO_GHQR_CO),
                self._exception_callback)

            if co_harvester is not None:
                harvesters.append(co_harvester)
            else:
                log.warn('Could not build ctdmo_ghqr_co harvester')

        else:
            log.warn(
                'No configuration for ctdmo_ghqr_co harvester, not building')

        #
        # Verify that the CT Recovered harvester has been configured.
        # If so, build the harvester and add it to the list of harvesters.
        #
        if DataTypeKey.CTDMO_GHQR_CT in self._harvester_config:
            ct_harvester = SingleDirectoryHarvester(
                self._harvester_config.get(DataTypeKey.CTDMO_GHQR_CT),
                driver_state[DataTypeKey.CTDMO_GHQR_CT],
                lambda filename: self._new_file_callback(
                    filename, DataTypeKey.CTDMO_GHQR_CT),
                lambda modified: self._modified_file_callback(
                    modified, DataTypeKey.CTDMO_GHQR_CT),
                self._exception_callback)

            if ct_harvester is not None:
                harvesters.append(ct_harvester)
            else:
                log.warn('Could not build ctdmo_ghqr_ct harvester')

        else:
            log.warn(
                'No configuration for ctdmo_ghqr_ct harvester, not building')

        #
        # Verify that the CT Recovered harvester has been configured.
        # If so, build the harvester and add it to the list of harvesters.
        #
        if DataTypeKey.CTDMO_GHQR_SIO_MULE in self._harvester_config:
            ctdmo_ghqr_sio_mule_harvester = SingleFileHarvester(
                self._harvester_config.get(DataTypeKey.CTDMO_GHQR_SIO_MULE),
                driver_state[DataTypeKey.CTDMO_GHQR_SIO_MULE],
                lambda file_state: self._file_changed_callback(
                    file_state, DataTypeKey.CTDMO_GHQR_SIO_MULE),
                self._exception_callback)

            if ctdmo_ghqr_sio_mule_harvester is not None:
                harvesters.append(ctdmo_ghqr_sio_mule_harvester)
            else:
                log.warn('Could not build ctdmo_ghqr_sio_mule harvester')

        else:
            log.warn(
                'No configuration for ctdmo_ghqr_sio_mule harvester, not building'
            )

        return harvesters