Beispiel #1
0
    def test_identify_collision(self):
        importers = [
            tests.utils.Importer('A', 'Assets:Tests', 'text/csv'),
            tests.utils.Importer('B', 'Assets:Tests', 'text/csv'),
        ]

        importer = identify.identify(importers, path.abspath('test.txt'))
        self.assertIsNone(importer)

        with self.assertRaises(exceptions.Error):
            importer = identify.identify(importers, path.abspath('test.csv'))
Beispiel #2
0
def _identify(ctx, src, failfast, verbose):
    """Identify files for import.

    Walk the SRC list of files or directories and report each file
    identified by one of the configured importers.  When verbose
    output is requested, also print the account name associated to the
    document by the importer.

    """
    log = utils.logger(verbose)
    errors = exceptions.ExceptionsTrap(log)

    for filename in _walk(src, log):
        with errors:
            importer = identify.identify(ctx.importers, filename)
            if not importer:
                log('')  # Newline.
                continue

            # Signal processing of this document.
            log(' ...', nl=False)

            # When verbose output is requested, get the associated account.
            account = importer.file_account(
                cache.get_file(filename)) if verbose else None

            log(' OK', fg='green')
            log(f'  {importer.name():}')
            log(f'  {account:}', 1)

        if failfast and errors:
            break

    if errors:
        sys.exit(1)
Beispiel #3
0
    def test_identify(self):
        importers = [
            tests.utils.Importer('A', 'Assets:Tests', 'application/pdf'),
            tests.utils.Importer('B', 'Assets:Tests', 'text/csv'),
        ]

        # Pass an absolute path to identify() to make the cache code
        # used internally by the importers happy. This can go away
        # once FileMemo is removed from the importers interface.
        importer = identify.identify(importers, path.abspath('test.txt'))
        self.assertIsNone(importer)

        importer = identify.identify(importers, path.abspath('test.pdf'))
        self.assertEqual(importer.name, 'A')

        importer = identify.identify(importers, path.abspath('test.csv'))
        self.assertEqual(importer.name, 'B')
Beispiel #4
0
def _extract(ctx, src, output, existing, reverse, failfast, quiet):
    """Extract transactions from documents.

    Walk the SRC list of files or directories and extract the ledger
    entries from each file identified by one of the configured
    importers.  The entries are written to the specified output file
    or to the standard output in Beancount ledger format in sections
    associated to the source document.

    """
    verbosity = -quiet
    log = utils.logger(verbosity, err=True)
    errors = exceptions.ExceptionsTrap(log)

    # Load the ledger, if one is specified.
    existing_entries = loader.load_file(existing)[0] if existing else []

    extracted = []
    for filename in _walk(src, log):
        with errors:
            importer = identify.identify(ctx.importers, filename)
            if not importer:
                log('')  # Newline.
                continue

            # Signal processing of this document.
            log(' ...', nl=False)

            # Extract entries.
            entries = extract.extract_from_file(importer, filename,
                                                existing_entries)
            extracted.append((filename, entries))
            log(' OK', fg='green')

        if failfast and errors:
            break

    # Invoke hooks.
    hooks = [extract.find_duplicate_entries
             ] if ctx.hooks is None else ctx.hooks
    for func in hooks:
        extracted = func(extracted, existing_entries)

    # Reverse sort order, if requested.
    if reverse:
        for filename, entries in extracted:
            entries.reverse()

    # Serialize entries.
    extract.print_extracted_entries(extracted, output)

    if errors:
        sys.exit(1)
Beispiel #5
0
def _archive(ctx, src, destination, dry_run, overwrite, failfast):
    """Archive documents.

    Walk the SRC list of files or directories and move each file
    identified by one of the configured importers in a directory
    hierarchy mirroring the structure of the accounts associated to
    the documents and with a file name composed by the document date
    and document name returned by the importer.

    Documents are moved to their filing location only when no errors
    are encountered processing all the input files.  Documents in the
    destination directory are not overwritten, unless the --force
    option is used.  When the directory hierarchy root is not
    specified with the --destination DIR options, it is assumed to be
    directory in which the ingest script is located.

    """
    # If the output directory is not specified, move the files at the
    # root where the import script is located. Providing this default
    # seems better than using a required option.
    if destination is None:
        import __main__
        destination = os.path.dirname(os.path.abspath(__main__.__file__))

    log = utils.logger()
    errors = exceptions.ExceptionsTrap(log)
    renames = []

    for filename in _walk(src, log):
        with errors:
            importer = identify.identify(ctx.importers, filename)
            if not importer:
                log('')  # Newline.
                continue

            # Signal processing of this document.
            log(' ...', nl=False)

            destpath = archive.filepath(importer, filename)

            # Prepend destination directory path.
            destpath = os.path.join(destination, destpath)

            # Check for destination filename collisions.
            collisions = [src for src, dst in renames if dst == destpath]
            if collisions:
                raise exceptions.Error('Collision in destination file path.',
                                       destpath)

            # Check if the destination file already exists.
            if not overwrite and os.path.exists(destpath):
                raise exceptions.Error('Destination file already exists.',
                                       destpath)

            renames.append((filename, destpath))
            log(' OK', fg='green')
            log(f'  {destpath:}')

        if failfast and errors:
            break

    # If there are any errors, stop here.
    if errors:
        log('# Errors detected: documents will not be filed.')
        sys.exit(1)

    if not dry_run:
        for src, dst in renames:
            archive.move(src, dst)
Beispiel #6
0
 def identify(self, what, *args, **kwargs):
     identify.identify(self.importers, what, *args, **kwargs)