Ejemplo n.º 1
0
Archivo: ingest.py Proyecto: tf198/fava
    def import_data(self):
        """Identify files and importers that can be imported.

        Returns:
            A dict mapping directories to lists of :class:`.FileImportInfo`.
        """
        if not self.config:
            return {}

        ret = {}

        for directory in self.ledger.fava_options["import-dirs"]:
            full_path = path.normpath(
                path.join(path.dirname(self.ledger.beancount_file_path),
                          directory))
            files = list(identify.find_imports(self.config, full_path))
            ret[directory] = []
            for (filename, importers) in files:
                basename = path.basename(filename)
                infos = [
                    file_import_info(filename, importer)
                    for importer in importers
                ]
                ret[directory].append(FileImporters(filename, basename, infos))

        return ret
Ejemplo n.º 2
0
 def test_find_imports__raises_exception(self):
     file1 = path.join(self.tempdir, 'file1.test')
     with open(file1, 'w'):
         pass
     imp = mock.MagicMock()
     imp.identify = mock.MagicMock(side_effect=ValueError("Unexpected error!"))
     imports = list(identify.find_imports([imp], self.tempdir))
     self.assertEqual([(file1, [])], imports)
Ejemplo n.º 3
0
def extract(importer_config,
            files_or_directories,
            output,
            entries=None,
            options_map=None,
            mindate=None,
            ascending=True):
    """Given an importer configuration, search for files that can be imported in the
    list of files or directories, run the signature checks on them, and if it
    succeeds, run the importer on the file.

    A list of entries for an existing ledger can be provided in order to perform
    de-duplication and a minimum date can be provided to filter out old entries.

    Args:
      importer_config: A list of (regexps, importer) pairs, the configuration.
      files_or_directories: A list of strings, filenames or directories to be processed.
      output: A file object, to be written to.
      entries: A list of directives loaded from the existing file for the newly
        extracted entries to be merged in.
      options_map: The options parsed from existing file.
      mindate: Optional minimum date to output transactions for.
      ascending: A boolean, true to print entries in ascending order, false if
        descending is desired.
    """
    allow_none_for_tags_and_links = (
        options_map and options_map["allow_deprecated_none_for_tags_and_links"])

    output.write(HEADER)
    for filename, importers in identify.find_imports(importer_config,
                                                     files_or_directories,
                                                     output):
        for importer in importers:
            # Import and process the file.
            try:
                new_entries, duplicate_entries = extract_from_file(
                    filename,
                    importer,
                    existing_entries=entries,
                    min_date=mindate,
                    allow_none_for_tags_and_links=allow_none_for_tags_and_links)
            except Exception as exc:
                logging.error("Importer %s.extract() raised an unexpected error: %s",
                              importer.name(), exc)
                logging.error("Traceback: %s", traceback.format_exc())
                continue
            if not new_entries and not duplicate_entries:
                continue

            if not ascending:
                new_entries.reverse()
            print_extracted_entries(importer, new_entries, output)
Ejemplo n.º 4
0
    def test_find_imports__file_too_large(self):
        file1 = path.join(self.tempdir, 'file1.test')
        file2 = path.join(self.tempdir, 'file2.test')
        with open(file1, 'w') as file:
            file.write('*' * 16)
        with open(file2, 'w') as file:
            file.write('*' * 256)

        imp = mock.MagicMock()
        imp.identify = mock.MagicMock(return_value=True)

        imports = list(identify.find_imports([imp], self.tempdir))
        self.assertEqual([(file1, [imp])], imports)
Ejemplo n.º 5
0
    def test_find_imports(self):
        file1 = path.join(self.tempdir, 'file1.test')
        file2 = path.join(self.tempdir, 'file2.test')
        file3 = path.join(self.tempdir, 'file3.test')
        for filename in [file1, file2, file3]:
            open(filename, 'w')

        imp1a = _TestImporter(file1)
        imp1b = _TestImporter(file1)
        imp2 = _TestImporter(file2)

        config = [imp1a, imp1b, imp2]
        imports = list(identify.find_imports(config, self.tempdir))
        self.assertEqual([(file1, [imp1a, imp1b]), (file2, [imp2]),
                          (file3, [])], imports)
Ejemplo n.º 6
0
    def identify_directory(self, directory):
        """Identify files and importers for a given directory.

        Args:
            directory: The directory to search (relative to Beancount file).

        Returns:
            Tuples of filenames and lists of matching importers.
        """
        if not self.config:
            return []

        full_path = os.path.normpath(
            os.path.join(os.path.dirname(self.ledger.beancount_file_path),
                         directory))

        return filter(operator.itemgetter(1),
                      identify.find_imports(self.config, full_path))
Ejemplo n.º 7
0
    def import_data(self) -> List[FileImporters]:
        """Identify files and importers that can be imported.

        Returns:
            A list of :class:`.FileImportInfo`.
        """
        if not self.config:
            return []

        ret: List[FileImporters] = []

        for directory in self.ledger.fava_options["import-dirs"]:
            full_path = self.ledger.join_path(directory)
            files = list(identify.find_imports(self.config, full_path))
            for (filename, importers) in files:
                basename = path.basename(filename)
                infos = [
                    file_import_info(filename, importer)
                    for importer in importers
                ]
                ret.append(FileImporters(filename, basename, infos))

        return ret
Ejemplo n.º 8
0
def extract(importer_config,
            files_or_directories,
            output,
            entries=None,
            options_map=None,
            mindate=None,
            ascending=True,
            hooks=None):
    """Given an importer configuration, search for files that can be imported in the
    list of files or directories, run the signature checks on them, and if it
    succeeds, run the importer on the file.

    A list of entries for an existing ledger can be provided in order to perform
    de-duplication and a minimum date can be provided to filter out old entries.

    Args:
      importer_config: A list of (regexps, importer) pairs, the configuration.
      files_or_directories: A list of strings, filenames or directories to be processed.
      output: A file object, to be written to.
      entries: A list of directives loaded from the existing file for the newly
        extracted entries to be merged in.
      options_map: The options parsed from existing file.
      mindate: Optional minimum date to output transactions for.
      ascending: A boolean, true to print entries in ascending order, false if
        descending is desired.
      hooks: An optional list of hook functions to apply to the list of extract
        (filename, entries) pairs, in order. If not specified, find_duplicate_entries()
        is used, automatically.
    """
    allow_none_for_tags_and_links = (
        options_map and options_map["allow_deprecated_none_for_tags_and_links"])

    # Run all the importers and gather their result sets.
    new_entries_list = []
    for filename, importers in identify.find_imports(importer_config,
                                                     files_or_directories):
        for importer in importers:
            # Import and process the file.
            try:
                new_entries = extract_from_file(
                    filename,
                    importer,
                    existing_entries=entries,
                    min_date=mindate,
                    allow_none_for_tags_and_links=allow_none_for_tags_and_links)
                new_entries_list.append((filename, new_entries))
            except Exception as exc:
                logging.exception("Importer %s.extract() raised an unexpected error: %s",
                                  importer.name(), exc)
                continue

    # Find potential duplicate entries in the result sets, either against the
    # list of existing ones, or against each other. A single call to this
    # function is made on purpose, so that the function be able to merge
    # entries.
    if hooks is None:
        hooks = [find_duplicate_entries]
    for hook_fn in hooks:
        new_entries_list = hook_fn(new_entries_list, entries)
    assert isinstance(new_entries_list, list)
    assert all(isinstance(new_entries, tuple) for new_entries in new_entries_list)
    assert all(isinstance(new_entries[0], str) for new_entries in new_entries_list)
    assert all(isinstance(new_entries[1], list) for new_entries in new_entries_list)

    # Print out the results.
    output.write(HEADER)
    for key, new_entries in new_entries_list:
        output.write(identify.SECTION.format(key))
        output.write('\n')
        if not ascending:
            new_entries.reverse()
        print_extracted_entries(new_entries, output)
Ejemplo n.º 9
0
def file(importer_config,
         files_or_directories,
         destination,
         dry_run=False,
         mkdirs=False,
         overwrite=False,
         idify=False,
         logfile=None):
    """File importable files under a destination directory.

    Given an importer configuration object, search for files that can be
    imported under the given list of files or directories and moved them under
    the given destination directory with the date computed by the module
    prepended to the filename. If the date cannot be extracted, use a reasonable
    default for the date (e.g. the last modified time of the file itself).

    If 'mkdirs' is True, create the destination directories before moving the
    files.

    Args:
      importer_config: A list of importer instances that define the config.
      files_or_directories: a list of files of directories to walk recursively and
        hunt for files to import.
      destination: A string, the root destination directory where the files are
        to be filed. The files are organized there under a hierarchy mirroring
        that of the chart of accounts.
      dry_run: A flag, if true, don't actually move the files.
      mkdirs: A flag, if true, make all the intervening directories; otherwise,
        fail to move files to non-existing dirs.
      overwrite: A flag, if true, overwrite an existing destination file.
      idify: A flag, if true, remove whitespace and funky characters in the destination
        filename.
      logfile: A file object to write log entries to, or None, in which case no log is
        written out.
    """
    jobs = []
    has_errors = False
    for filename, importers in identify.find_imports(importer_config,
                                                     files_or_directories,
                                                     logfile):
        # If we're debugging, print out the match text.
        # This option is useful when we're building our importer configuration,
        # to figure out which patterns to create as unique signatures.
        if not importers:
            continue

        # Process a single file.
        new_fullname = file_one_file(filename, importers, destination, idify, logfile)
        if new_fullname is None:
            continue

        # Check if the destination directory exists.
        new_dirname = path.dirname(new_fullname)
        if not path.exists(new_dirname) and not mkdirs:
            logging.error("Destination directory '{}' does not exist.".format(new_dirname))
            has_errors = True
            continue

        # Check if the destination file already exists; we don't want to clobber
        # it by accident.
        if not overwrite and path.exists(new_fullname):
            logging.error("Destination file '{}' already exists.".format(new_fullname))
            has_errors = True
            continue

        jobs.append((filename, new_fullname))

    # Check if any two imported files would be colliding in their destination
    # name, before we move anything.
    destmap = collections.defaultdict(list)
    for src, dest in jobs:
        destmap[dest].append(src)
    for dest, sources in destmap.items():
        if len(sources) != 1:
            logging.error("Collision in destination filenames '{}': from {}.".format(
                dest, ", ".join(["'{}'".format(source) for source in sources])))
            has_errors = True

    # If there are any errors, just don't do anything at all. This is a nicer
    # behaviour than moving just *some* files.
    if dry_run or has_errors:
        return

    # Actually carry out the moving job.
    for old_filename, new_filename in jobs:
        move_xdev_file(old_filename, new_filename, mkdirs)

    return jobs