def test_file(self): downloads = path.join(self.tempdir, 'Downloads') result= self.ingest('file', downloads, '-o', self.documents) expected_res = [ path.join(self.documents, x) for x in [r'Liabilities/CreditCard/\d\d\d\d-\d\d-\d\d\.bank\.csv', r'Assets/Checking/\d\d\d\d-\d\d-\d\d\.ofxdownload\.ofx']] moved_files = list(file_utils.find_files([self.documents])) for regexp in expected_res: self.assertTrue(any(re.match(regexp, filename) for filename in moved_files))
def test_file(self): with test_utils.capture('stdout', 'stderr') as (stdout, stderr): test_utils.run_with_args(file.main, [ '--output', self.documents, path.join(self.tempdir, 'test.import'), path.join(self.tempdir, 'Downloads')]) expected_res = [ path.join(self.documents, x) for x in [r'Liabilities/CreditCard/\d\d\d\d-\d\d-\d\d\.bank\.csv', r'Assets/Checking/\d\d\d\d-\d\d-\d\d\.ofxdownload\.ofx']] moved_files = list(file_utils.find_files([self.documents])) for regexp in expected_res: self.assertTrue(any(re.match(regexp, filename) for filename in moved_files))
def find_imports(importer_config, files_or_directories, logfile=None): """Given an importer configuration, search for files that can be imported in the list of files or directories, run the signature checks on them and return a list of (filename, importers), where 'importers' is a list of importers that matched the file. Args: importer_config: a list of importer instances that define the config. files_or_directories: a list of files of directories to walk recursively and hunt for files to import. logfile: A file object to write log entries to, or None, in which case no log is written out. Yields: Triples of filename found, textified contents of the file, and list of importers matching this file. """ # Iterate over all files found; accumulate the entries by identification. for filename in file_utils.find_files(files_or_directories): if logfile is not None: logfile.write(SECTION.format(filename)) logfile.write('\n') # Skip files that are simply too large. size = path.getsize(filename) if size > FILE_TOO_LARGE_THRESHOLD: logging.warning( "File too large: '{}' ({} bytes); skipping.".format( filename, size)) continue # For each of the sources the user has declared, identify which # match the text. file = cache.get_file(filename) matching_importers = [] for importer in importer_config: try: matched = importer.identify(file) if matched: matching_importers.append(importer) except Exception as exc: logging.error( "Importer %s.identify() raised an unexpected error: %s", importer.name(), exc) yield (filename, matching_importers)
def walk(fords): return sorted(clean(self.tempdir, file_utils.find_files(fords)))