Beispiel #1
 def filename(self, filepath):
     filename = self.importer.file_name(cache.get_file(filepath))
     # The current implementation of the archive command does not
     # modify the filename returned by the importer. This preserves
     # backward compatibility with the old implementation of the
     # command.
     return misc_utils.idify(filename) if filename else None
Beispiel #2
def filepath(importer, filepath: str) -> str:
    """Compute filing path for a document.

    The path mirrors the structure of the accounts associated to the
    documents and with a file name composed by the document date and
    document name returned by the importer.

      importer: The importer instance to handle the document.
      filepath: Filesystem path to the document.

      Filing tree location where to store the document.

      beangulp.exceptions.Error: The canonical file name returned by
      the importer contains a path separator charachter or seems to
      contain a date.

    file = cache.get_file(filepath)

    # Get the account corresponding to the file.
    account = importer.file_account(file)
    filename = importer.file_name(file) or os.path.basename(
    date = importer.file_date(file) or utils.getmdate(

    # The returned filename cannot contain the file path separator character.
    if os.sep in filename:
        raise Error("The filename contains path separator character.")

    if re.match(r'\d\d\d\d-\d\d-\d\d', filename):
        raise Error("The filename contains what looks like a date.")

    # Remove whitespace and other funny characters from the filename.
    # TODO(dnicolodi): This should probably be importer responsibility.
    filename = misc_utils.idify(filename)

    # Prepend account directory and date prefix.
    filename = os.path.join(account.replace(':', os.sep),

    return filename
Beispiel #3
def file_one_file(filename, importers, destination, idify=False, logfile=None):
    """Move a single filename using its matched importers.

      filename: A string, the name of the downloaded file to be processed.
      importers: A list of importer instances that handle this file.
      destination: A string, the root destination directory where the files are
        to be filed. The files are organized there under a hierarchy mirroring
        that of the chart of accounts.
      idify: A flag, if true, remove whitespace and funky characters in the destination
      logfile: A file object to write log entries to, or None, in which case no log is
        written out.
      The full new destination filename on success, and None if there was an error.
    # Create an object to cache all the conversions between the importers
    # and phases and what-not.
    file = cache.get_file(filename)

    # Get the account corresponding to the file.
    file_accounts = []
    for index, importer in enumerate(importers):
            account_ = importer.file_account(file)
        except Exception as exc:
            account_ = None
            logging.exception("Importer %s.file_account() raised an unexpected error: %s",
                    , exc)
        if account_ is not None:

    file_accounts_set = set(file_accounts)
    if not file_accounts_set:
        logging.error("No account provided by importers: {}".format(
            ", ".join( for imp in importers)))
        return None

    if len(file_accounts_set) > 1:
        logging.warning("Ambiguous accounts from many importers: {}".format(
            ', '.join(file_accounts_set)))
        # Note: Don't exit; select the first matching importer's account.

    file_account = file_accounts.pop(0)

    # Given multiple importers, select the first one that was yielded to
    # obtain the date and process the filename.
    importer = importers[0]

    # Compute the date from the last modified time.
    mtime = path.getmtime(filename)
    mtime_date = datetime.datetime.fromtimestamp(mtime).date()

    # Try to get the file's date by calling a module support function. The
    # module may be able to extract the date from the filename, from the
    # contents of the file itself (e.g. scraping some text from the PDF
    # contents, or grabbing the last line of a CSV file).
        date = importer.file_date(file)
    except Exception as exc:
        logging.exception("Importer %s.file_date() raised an unexpected error: %s",
                , exc)
        date = None
    if date is None:
        # Fallback on the last modified time of the file.
        date = mtime_date
        date_source = 'mtime'
        date_source = 'contents'

    # Apply filename renaming, if implemented.
    # Otherwise clean up the filename.
        clean_filename = importer.file_name(file)

        # Warn the importer implementor if a name is returned and it's an
        # absolute filename.
        if clean_filename and (path.isabs(clean_filename) or os.sep in clean_filename):
            logging.error(("The importer '%s' file_name() method should return a relative "
                           "filename; the filename '%s' is absolute or contains path "
                , clean_filename)
    except Exception as exc:
        logging.exception("Importer %s.file_name() raised an unexpected error: %s",
                , exc)
        clean_filename = None
    if clean_filename is None:
        # If no filename has been provided, use the basename.
        clean_filename = path.basename(
    elif re.match(r'\d\d\d\d-\d\d-\d\d', clean_filename):
        logging.error("The importer '%s' file_name() method should not date the "
                      "returned filename. Implement file_date() instead.")

    # We need a simple filename; remove the directory part if there is one.
    clean_basename = path.basename(clean_filename)

    # Remove whitespace if requested.
    if idify:
        clean_basename = misc_utils.idify(clean_basename)

    # Prepend the date prefix.
    new_filename = '{0:%Y-%m-%d}.{1}'.format(date, clean_basename)

    # Prepend destination directory.
    new_fullname = path.normpath(path.join(destination,
                                           file_account.replace(account.sep, os.sep),

    # Print the filename and which modules matched.
    if logfile is not None:
        logfile.write('Importer:    {}\n'.format( if importer else '-'))
        logfile.write('Account:     {}\n'.format(file_account))
        logfile.write('Date:        {} (from {})\n'.format(date, date_source))
        logfile.write('Destination: {}\n'.format(new_fullname))

    return new_fullname
Beispiel #4
 def test_idify(self):
                      misc_utils.idify(' A great movie (for us) .mp4 '))
     self.assertEqual('A____B.pdf', misc_utils.idify('A____B_._pdf'))
Beispiel #5
 def filename(self, filepath):
     return path.basename(misc_utils.idify(filepath))