def test_file_mod_wait_time(self):
        """
        that the file mod wait time is actually waiting before finding files
        """
        memento = None
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()
        # put a file in the directory, the mod time will be the create time
        self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                       CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 1, 0)
        # wait until just before the file mod time should allow us to find the files

        # keep track of how long it takes to find the file approximately
        file_found_time = 0;
        while(self.found_file_count == 0):
            time.sleep(1)
            file_found_time += 1
            if file_found_time > 60:
                raise Exception("Timeout waiting to find file")

        if file_found_time < CONFIG.get(DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME):
            # we found the file before the mod time, this is bad!
            file_harvester.shutdown()
            self.fail('Files found in %s seconds' % file_found_time)
        log.debug('File found in %s seconds', file_found_time)
        file_harvester.shutdown()
    def test_harvester_without_frequency(self):
        """
        Test that we can use a default frequency
        """
        config = {'directory': TESTDIR, 'pattern': CONFIG['pattern']}

        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                         self.new_file_found_callback,
                                                         self.file_exception_callback)
        file_harvester.start()

        # start a new event which will copy the first file and increase the
        # file index into data directory with a delay in between
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG['directory'],
                                             CONFIG['pattern'], 2)

        # Wait for three sets of new files to be discovered
        self.wait_for_file(0)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)

        file_harvester.shutdown()
    def test_harvester_without_mod_time(self):
        """
        Test that we can use a default frequency
        """
        config = {DataSetDriverConfigKeys.DIRECTORY: TESTDIR,
                  DataSetDriverConfigKeys.STORAGE_DIRECTORY: TESTDIR,
                  DataSetDriverConfigKeys.PATTERN: CONFIG[DataSetDriverConfigKeys.PATTERN],
                  DataSetDriverConfigKeys.FREQUENCY: 5}

        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2)

        # Wait for two sets of new files to be discovered
        self.wait_for_file(0, 2)
        self.wait_for_file(self.found_file_count, 2)

        file_harvester.shutdown()
 def test_harvester_from_scratch(self):
     """
     Test that the harvester can find files as they are added to a directory,
     starting with just the base file in the directory
     """
     # start the harvester from scratch
     memento = None
     file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                self.new_file_found_callback,
                                                self.file_exception_callback)
     file_harvester.start()
     
     # start a new event which will increase the file index using INDICIES
     self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                          CONFIG['directory'],
                                          CONFIG['pattern'], 0, 6)
     
     # Wait for three sets of new files to be discovered
     self.wait_for_file(0)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
      
     file_harvester.shutdown()
 def test_harvester_with_memento(self):
     """
     Test that the harvester can find file as they are added to a directory,
     using a memento to start partway through the indices
     """
     
     # make sure we have 2 files already in the directory
     self.fill_directory_with_files(CONFIG['directory'], CONFIG['pattern'], 0, 2, 0)
     
     # start at index 2
     memento = CONFIG['directory'] + '/' + 'unit_' + INDICIES[1] + CONFIG['pattern'].replace('*', '')
     log.debug("starting with memento %s", memento)
     file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                self.new_file_found_callback,
                                                self.file_exception_callback)
     file_harvester.start()
     
     # start a new event which will increase the file index using INDICIES
     # with a delay in between
     self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                          CONFIG['directory'],
                                          CONFIG['pattern'], 2, 9)
     
     # Wait for three sets of new files to be discovered
     self.wait_for_file(0)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
      
     file_harvester.shutdown()
    def test_missing_directory(self):
        config = {'directory': TESTDIR, 'pattern': CONFIG['pattern']}

        self.clean_directory(TESTDIR)
        os.rmdir(TESTDIR)
        self.assertFalse(os.path.exists(TESTDIR))

        # start the harvester from scratch
        memento = None

        os.mkdir(TESTDIR)
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                         self.new_file_found_callback,
                                                         self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG['directory'],
                                             CONFIG['pattern'], 0, 2)

        # Wait for three sets of new files to be discovered
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)

        file_harvester.shutdown()
    def test_harvester_from_scratch(self):
        """
        Test that the harvester can find files as they are added to a directory,
        starting with just the base file in the directory
        """
        # start the harvester from scratch
        memento = None
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 10
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                self.new_file_found_callback,
                                                                        self.modified_files_found_callback,
                                                self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 5, 10)

        # Wait for new files to be discovered
        self.wait_for_file(0, 5)
        self.wait_for_file(self.found_file_count, 5)
        self.wait_for_file(self.found_file_count, 5)
        self.wait_for_file(self.found_file_count, 5)
        self.wait_for_file(self.found_file_count, 5)

        file_harvester.shutdown()
    def test_harvester_multi_file(self):
        """
        Set the timing so the harvester finds multiple new files at once
        """
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FREQUENCY] = 1
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15
        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()

        # set the file filler to generate files with only .5 secs between,
        # meaning 2 files will appear in the 1 seconds between the
        # harvester checking
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 12, .5)

        # Wait for sets of new files to be discovered
        self.wait_for_file(0)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)
        
        file_harvester.shutdown()
 def test_harvester_with_memento(self):
     """
     Test that the harvester can find file as they are added to a directory,
     using a memento to start partway through the indices
     """
     # make sure we have 2 files already in the directory
     self.fill_directory_with_files(CONFIG['directory'], CONFIG['pattern'], 2, 0)
     
     # start at index 2
     dir_files = glob.glob(CONFIG['directory'] + '/' + CONFIG['pattern'])
     memento = self.replace_file_index(dir_files[0], 2)
     file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                      self.new_file_found_callback,
                                                      self.file_exception_callback)
     file_harvester.start()
     
     # start a new event which will copy the first file and increase the
     # file index into data directory with a delay in between
     self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                          CONFIG['directory'],
                                          CONFIG['pattern'], 3)
     
     # Wait for three sets of new files to be discovered
     self.wait_for_file(0)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
      
     file_harvester.shutdown()
    def test_harvester_multi_file(self):
        """
        Set the timing so the harvester finds multiple new files at once
        """
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FREQUENCY] = 1
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15
        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()

        # set the file filler to generate files with only .5 secs between,
        # meaning 2 files will appear in the 1 seconds between the
        # harvester checking
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 12, .5)

        # Wait for sets of new files to be discovered
        self.wait_for_file(0)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)
        self.wait_for_file(self.found_file_count)
        
        file_harvester.shutdown()
    def test_file_mod_wait_time(self):
        """
        that the file mod wait time is actually waiting before finding files
        """
        memento = None
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()
        # put a file in the directory, the mod time will be the create time
        self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                       CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 1, 0)
        # wait until just before the file mod time should allow us to find the files

        # keep track of how long it takes to find the file approximately
        file_found_time = 0;
        while(self.found_file_count == 0):
            time.sleep(1)
            file_found_time += 1
            if file_found_time > 60:
                raise Exception("Timeout waiting to find file")

        if file_found_time < CONFIG.get(DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME):
            # we found the file before the mod time, this is bad!
            file_harvester.shutdown()
            self.fail('Files found in %s seconds' % file_found_time)
        log.debug('File found in %s seconds', file_found_time)
        file_harvester.shutdown()
    def test_harvester_without_mod_time(self):
        """
        Test that we can use a default frequency
        """
        config = {DataSetDriverConfigKeys.DIRECTORY: TESTDIR,
                  DataSetDriverConfigKeys.STORAGE_DIRECTORY: TESTDIR,
                  DataSetDriverConfigKeys.PATTERN: CONFIG[DataSetDriverConfigKeys.PATTERN],
                  DataSetDriverConfigKeys.FREQUENCY: 5}

        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2)

        # Wait for two sets of new files to be discovered
        self.wait_for_file(0, 2)
        self.wait_for_file(self.found_file_count, 2)

        file_harvester.shutdown()
    def test_harvester_from_scratch(self):
        """
        Test that the harvester can find files as they are added to a directory,
        starting with just the base file in the directory
        """
        # start the harvester from scratch
        memento = None
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 10
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                self.new_file_found_callback,
                                                                        self.modified_files_found_callback,
                                                self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 5, 10)

        # Wait for new files to be discovered
        self.wait_for_file(0, 5)
        self.wait_for_file(self.found_file_count, 5)
        self.wait_for_file(self.found_file_count, 5)
        self.wait_for_file(self.found_file_count, 5)
        self.wait_for_file(self.found_file_count, 5)

        file_harvester.shutdown()
 def test_harvester_multi_file(self):
     """
     Set the timing so the harvester finds multiple new files at once
     """
     
     # start the harvester from scratch
     memento = None
     file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                 self.new_file_found_callback,
                                                 self.file_exception_callback)
     file_harvester.start()
     
     # set the file filler to generate files with only .5 secs between,
     # meaning 2 files will appear in the 1 seconds between the
     # harvester checking
     self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                          CONFIG['directory'],
                                          CONFIG['pattern'], 0, 12, .5)
     
     # Wait for sets of new files to be discovered
     self.wait_for_file(0)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     self.wait_for_file(self.found_file_count)
     
     file_harvester.shutdown()
    def test_init(self):
        """
        Test initialize
        """

        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                self.new_file_found_callback,
                                                self.modified_files_found_callback,
                                                self.file_exception_callback)
        file_harvester.start()
        file_harvester.shutdown()
    def test_init(self):
        """
        Test initialize
        """

        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                self.new_file_found_callback,
                                                self.modified_files_found_callback,
                                                self.file_exception_callback)
        file_harvester.start()
        file_harvester.shutdown()
    def test_harvester_with_memento(self):
        """
        Test that the harvester can find file as they are added to a directory,
        using a memento to start partway through the indices
        """
        
        # make sure we have 2 files already in the directory
        self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                       CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2, 0)

        filename_1 = 'unit_' + INDICIES[0] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')
        filename_2 = 'unit_' + INDICIES[1] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')

        # get metadata for the files
        metadata_1 = self.get_file_metadata(filename_1)
        metadata_1[DriverStateKey.INGESTED] = True
        metadata_1[DriverStateKey.PARSER_STATE] = None
        metadata_2 = self.get_file_metadata(filename_2)
        metadata_2[DriverStateKey.INGESTED] = True
        metadata_2[DriverStateKey.PARSER_STATE] = None
        # generate memento with two files ingested (parser state is not looked at)
        memento = {DriverStateKey.VERSION: 0.1,
                   filename_1: metadata_1,
                   filename_2: metadata_2
                    }
        log.debug("starting with memento %s", memento)
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        # with a delay in between
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 2, 9, 5)

        # Wait for three sets of new files to be discovered
        self.wait_for_file(0, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)

        file_harvester.shutdown()
    def test_harvester_with_memento(self):
        """
        Test that the harvester can find file as they are added to a directory,
        using a memento to start partway through the indices
        """
        
        # make sure we have 2 files already in the directory
        self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                       CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2, 0)

        filename_1 = 'unit_' + INDICIES[0] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')
        filename_2 = 'unit_' + INDICIES[1] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')

        # get metadata for the files
        metadata_1 = self.get_file_metadata(filename_1)
        metadata_1[DriverStateKey.INGESTED] = True
        metadata_1[DriverStateKey.PARSER_STATE] = None
        metadata_2 = self.get_file_metadata(filename_2)
        metadata_2[DriverStateKey.INGESTED] = True
        metadata_2[DriverStateKey.PARSER_STATE] = None
        # generate memento with two files ingested (parser state is not looked at)
        memento = {DriverStateKey.VERSION: 0.1,
                   filename_1: metadata_1,
                   filename_2: metadata_2
                    }
        log.debug("starting with memento %s", memento)
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()

        # start a new event which will increase the file index using INDICIES
        # with a delay in between
        self.directory_filler = gevent.spawn(self.fill_directory_with_files,
                                             CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                             CONFIG[DataSetDriverConfigKeys.PATTERN], 2, 9, 5)

        # Wait for three sets of new files to be discovered
        self.wait_for_file(0, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)
        self.wait_for_file(self.found_file_count, 2)

        file_harvester.shutdown()
    def test_init(self):
        """
        Test initialize
        """
        config = {'directory': TESTDIR, 'pattern': CONFIG['pattern']}

        # start the harvester from scratch
        memento = None
        file_harvester = SingleDirectoryHarvester(config, memento,
                                                         self.new_file_found_callback,
                                                         self.file_exception_callback)

        file_harvester.sort_files(['a_1_2.bla', 'a_2_2.bla'])
        file_harvester.start()
        file_harvester.shutdown()
    def test_harvester_with_modified(self):
        """
        Test that the harvester can find file as they are added to a directory,
        using a memento to start partway through the indices
        """

        # make sure we have 2 files already in the directory
        self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                       CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2, 0)

        filename_1 = 'unit_' + INDICIES[0] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')
        filename_2 = 'unit_' + INDICIES[1] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')

        # get metadata for the files
        metadata_1 = self.get_file_metadata(filename_1)
        metadata_1[DriverStateKey.INGESTED] = True
        metadata_1[DriverStateKey.PARSER_STATE] = None
        metadata_2 = self.get_file_metadata(filename_2)
        metadata_2[DriverStateKey.INGESTED] = True
        metadata_2[DriverStateKey.PARSER_STATE] = None
        # generate memento with two files ingested (parser state is not looked at)
        memento = {DriverStateKey.VERSION: 0.1,
                   filename_1: metadata_1,
                   filename_2: metadata_2
                    }
        log.debug("starting with memento %s", memento)
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()
        
        file_path = os.path.join(CONFIG[DataSetDriverConfigKeys.DIRECTORY], filename_1)
        with open(file_path, 'a') as filehandle:
            filehandle.write('a b c d')

        end_time = 0
        while(self.found_modified_count == 0):
            log.debug("Waiting for modified file...")
            time.sleep(2)
            end_time += 2
            if end_time > 60:
                raise Exception("Timeout waiting to find modified files")

        file_harvester.shutdown()
    def test_harvester_with_modified(self):
        """
        Test that the harvester can find file as they are added to a directory,
        using a memento to start partway through the indices
        """

        # make sure we have 2 files already in the directory
        self.fill_directory_with_files(CONFIG[DataSetDriverConfigKeys.DIRECTORY],
                                       CONFIG[DataSetDriverConfigKeys.PATTERN], 0, 2, 0)

        filename_1 = 'unit_' + INDICIES[0] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')
        filename_2 = 'unit_' + INDICIES[1] + CONFIG[DataSetDriverConfigKeys.PATTERN].replace('*', '')

        # get metadata for the files
        metadata_1 = self.get_file_metadata(filename_1)
        metadata_1[DriverStateKey.INGESTED] = True
        metadata_1[DriverStateKey.PARSER_STATE] = None
        metadata_2 = self.get_file_metadata(filename_2)
        metadata_2[DriverStateKey.INGESTED] = True
        metadata_2[DriverStateKey.PARSER_STATE] = None
        # generate memento with two files ingested (parser state is not looked at)
        memento = {DriverStateKey.VERSION: 0.1,
                   filename_1: metadata_1,
                   filename_2: metadata_2
                    }
        log.debug("starting with memento %s", memento)
        config = CONFIG.copy()
        config[DataSetDriverConfigKeys.FILE_MOD_WAIT_TIME] = 15
        file_harvester = SingleDirectoryHarvester(CONFIG, memento,
                                                  self.new_file_found_callback,
                                                  self.modified_files_found_callback,
                                                  self.file_exception_callback)
        file_harvester.start()
        
        file_path = os.path.join(CONFIG[DataSetDriverConfigKeys.DIRECTORY], filename_1)
        with open(file_path, 'a') as filehandle:
            filehandle.write('a b c d')

        end_time = 0
        while(self.found_modified_count == 0):
            log.debug("Waiting for modified file...")
            time.sleep(2)
            end_time += 2
            if end_time > 60:
                raise Exception("Timeout waiting to find modified files")

        file_harvester.shutdown()