Beispiel #1
0
 def test_file(self):
     downloads = path.join(self.tempdir, 'Downloads')
     result= self.ingest('file', downloads, '-o', self.documents)
     expected_res = [
         path.join(self.documents, x)
         for x in [r'Liabilities/CreditCard/\d\d\d\d-\d\d-\d\d\.bank\.csv',
                   r'Assets/Checking/\d\d\d\d-\d\d-\d\d\.ofxdownload\.ofx']]
     moved_files = list(file_utils.find_files([self.documents]))
     for regexp in expected_res:
         self.assertTrue(any(re.match(regexp, filename) for filename in moved_files))
Beispiel #2
0
 def test_file(self):
     with test_utils.capture('stdout', 'stderr') as (stdout, stderr):
         test_utils.run_with_args(file.main, [
             '--output', self.documents,
             path.join(self.tempdir, 'test.import'),
             path.join(self.tempdir, 'Downloads')])
     expected_res = [
         path.join(self.documents, x)
         for x in [r'Liabilities/CreditCard/\d\d\d\d-\d\d-\d\d\.bank\.csv',
                   r'Assets/Checking/\d\d\d\d-\d\d-\d\d\.ofxdownload\.ofx']]
     moved_files = list(file_utils.find_files([self.documents]))
     for regexp in expected_res:
         self.assertTrue(any(re.match(regexp, filename) for filename in moved_files))
Beispiel #3
0
def find_imports(importer_config, files_or_directories, logfile=None):
    """Given an importer configuration, search for files that can be imported in the
    list of files or directories, run the signature checks on them and return a list
    of (filename, importers), where 'importers' is a list of importers that matched
    the file.

    Args:
      importer_config: a list of importer instances that define the config.
      files_or_directories: a list of files of directories to walk recursively and
                            hunt for files to import.
      logfile: A file object to write log entries to, or None, in which case no log is
        written out.
    Yields:
      Triples of filename found, textified contents of the file, and list of
      importers matching this file.
    """
    # Iterate over all files found; accumulate the entries by identification.
    for filename in file_utils.find_files(files_or_directories):
        if logfile is not None:
            logfile.write(SECTION.format(filename))
            logfile.write('\n')

        # Skip files that are simply too large.
        size = path.getsize(filename)
        if size > FILE_TOO_LARGE_THRESHOLD:
            logging.warning(
                "File too large: '{}' ({} bytes); skipping.".format(
                    filename, size))
            continue

        # For each of the sources the user has declared, identify which
        # match the text.
        file = cache.get_file(filename)
        matching_importers = []
        for importer in importer_config:
            try:
                matched = importer.identify(file)
                if matched:
                    matching_importers.append(importer)
            except Exception as exc:
                logging.error(
                    "Importer %s.identify() raised an unexpected error: %s",
                    importer.name(), exc)

        yield (filename, matching_importers)
Beispiel #4
0
 def walk(fords):
     return sorted(clean(self.tempdir, file_utils.find_files(fords)))