예제 #1
0
    def test_zero_balance_produces_assertion(self, filename):
        # pylint: disable=line-too-long
        """\
          Details,Posting Date,"Description",Amount,Type,Balance,Check or Slip #,
          DEBIT,3/18/2016,"Payment to Chafe card ending in 1234 03/18",-2680.89,ACCT_XFER,0,,
        """
        file = cache.get_file(filename)

        importer = csv.Importer(
            {
                Col.DATE: 'Posting Date',
                Col.NARRATION1: 'Description',
                Col.NARRATION2: 'Check or Slip #',
                Col.AMOUNT: 'Amount',
                Col.BALANCE: 'Balance',
                Col.DRCR: 'Details'
            },
            'Assets:Bank',
            'USD', ('Details,Posting Date,"Description",Amount,'
                    'Type,Balance,Check or Slip #,'),
            institution='chafe')
        entries = importer.extract(file)
        self.assertEqualEntries(
            r"""

          2016-03-18 * "Payment to Chafe card ending in 1234 03/18"
            Assets:Bank  -2680.89 USD
            
          2016-03-19 balance Assets:Bank                                     0 USD

        """, entries)
예제 #2
0
    def test_mixin(self, entries, errors, _):
        """
        2014-01-01 open Assets:US:BofA:Checking                   USD

        2014-05-19 * "Verizon Wireless" ""
          Assets:US:BofA:Checking                          -44.34 USD
        
        2014-05-23 * "Wine-Tarner Cable" ""
          Assets:US:BofA:Checking                          -80.17 USD
        
        2014-06-04 * "BANK FEES" "Monthly bank fee"
          Assets:US:BofA:Checking                           -4.00 USD
        
        2014-06-04 * "RiverBank Properties" "Paying the rent"
          Assets:US:BofA:Checking                        -2400.00 USD
        
        2014-06-08 * "EDISON POWER" ""
          Assets:US:BofA:Checking                          -65.00 USD
        """
        def filter_last_two(entries):
            return entries[-2:]

        file = cache.get_file(path.join(tempfile.gettempdir(), 'test'))

        # Running with no filters should return the extracted entries unchanged
        importer = Importer(entries, 'Assets:US:BofA:Checking', filters=[])
        extracted_entries = importer.extract(file)
        self.assertEqualEntries(extracted_entries, entries)

        # Run with a filter that should pass only the last two entries
        importer = Importer(entries,
                            'Assets:US:BofA:Checking',
                            filters=[filter_last_two])
        extracted_entries = importer.extract(file)
        self.assertEqualEntries(extracted_entries, entries[-2:])
예제 #3
0
    def test_importer(self, entries, errors, _):
        """
        2014-01-01 open Assets:US:BofA:Checking                   USD

        2014-05-19 * "Verizon Wireless" ""
          Assets:US:BofA:Checking                          -44.34 USD
        
        2014-05-23 * "Wine-Tarner Cable" ""
          Assets:US:BofA:Checking                          -80.17 USD
        
        2014-06-04 * "BANK FEES" "Monthly bank fee"
          Assets:US:BofA:Checking                           -4.00 USD
        
        2014-06-04 * "RiverBank Properties" "Paying the rent"
          Assets:US:BofA:Checking                        -2400.00 USD
        
        2014-06-08 * "EDISON POWER" ""
          Assets:US:BofA:Checking                          -65.00 USD
        """
        account = 'Assets:US:BofA:Checking'
        file = cache.get_file(path.join(tempfile.gettempdir(), 'test'))
        importer = testing.ConstImporter(entries, account)

        assert importer.file_account(file) == account
        assert importer.file_name(file) == None
        assert importer.identify(file)
        assert importer.file_date(file) == datetime.date(2014, 6, 8)

        extracted_entries = importer.extract(file)
        self.assertEqualEntries(extracted_entries, entries)
예제 #4
0
    def test_expect_file_date(self, filename, msg):
        """Compute the imported file date and compare to an expected output.

        If an expected file (as <filename>.file_date) is not present, we issue a
        warning. Missing expected files can be written out by removing them
        before running the tests.

        Args:
          filename: A string, the name of the file to import using self.importer.
        Raises:
          AssertionError: If the contents differ from the expected file.
        """
        # Import the date.
        file = cache.get_file(filename)
        date = self.importer.file_date(file)
        if date is None:
            self.fail("No date produced from {}".format(file.name))

        expect_filename = '{}.file_date'.format(file.name)
        if path.exists(expect_filename) and path.getsize(expect_filename) > 0:
            expect_date_str = open(expect_filename,
                                   encoding='utf-8').read().strip()
            expect_date = datetime.datetime.strptime(expect_date_str,
                                                     '%Y-%m-%d').date()
            self.assertEqual(expect_date, date)
        else:
            # Write out the expected file for review.
            with open(expect_filename, 'w', encoding='utf-8') as outfile:
                print(date.strftime('%Y-%m-%d'), file=outfile)
            self.skipTest("Expected file not present; generating '{}'".format(
                expect_filename))
예제 #5
0
    def test_importer(self, entries, errors, _):
        """
        2014-01-01 open Assets:US:BofA:Checking                   USD

        2014-05-19 * "Verizon Wireless" ""
          Assets:US:BofA:Checking                          -44.34 USD
        
        2014-05-23 * "Wine-Tarner Cable" ""
          Assets:US:BofA:Checking                          -80.17 USD
        
        2014-06-04 * "BANK FEES" "Monthly bank fee"
          Assets:US:BofA:Checking                           -4.00 USD
        
        2014-06-04 * "RiverBank Properties" "Paying the rent"
          Assets:US:BofA:Checking                        -2400.00 USD
        
        2014-06-08 * "EDISON POWER" ""
          Assets:US:BofA:Checking                          -65.00 USD
        """
        account = 'Assets:US:BofA:Checking'
        file = cache.get_file(path.join(tempfile.gettempdir(), 'test'))
        importer = testing.ConstImporter(entries, account)

        assert importer.file_account(file) == account
        assert importer.file_name(file) == None
        assert importer.identify(file)
        assert importer.file_date(file) == datetime.date(2014, 6, 8)

        extracted_entries = importer.extract(file)
        self.assertEqualEntries(extracted_entries, entries)
예제 #6
0
    def test_expect_file_name(self, filename, msg):
        """Compute the imported file name and compare to an expected output.

        If an expected file (as <filename>.file_name) is not present, we issue a
        warning. Missing expected files can be written out by removing them
        before running the tests.

        Args:
          filename: A string, the name of the file to import using self.importer.
        Raises:
          AssertionError: If the contents differ from the expected file.
        """
        # Import the date.
        file = cache.get_file(filename)
        generated_basename = self.importer.file_name(file)
        if generated_basename is None:
            self.fail("No filename produced from {}".format(filename))

        # Check that we're getting a non-null relative simple filename.
        self.assertFalse(path.isabs(generated_basename), generated_basename)
        self.assertNotRegex(generated_basename, os.sep)

        expect_filename = '{}.file_name'.format(file.name)
        if path.exists(expect_filename) and path.getsize(expect_filename) > 0:
            expect_filename = open(expect_filename,
                                   encoding='utf-8').read().strip()
            self.assertEqual(expect_filename, generated_basename)
        else:
            # Write out the expected file for review.
            with open(expect_filename, 'w', encoding='utf-8') as file:
                print(generated_basename, file=file)
            self.skipTest("Expected file not present; generating '{}'".format(
                expect_filename))
예제 #7
0
    def test_categorizer_two_arguments(self, filename):
        """\
          Date,Amount,Payee,Description
          6/2/2020,30.00,"Payee here","Description"
          7/2/2020,-25.00,"Supermarket","Groceries"
        """
        file = cache.get_file(filename)

        def categorizer(txn, row):
            txn = txn._replace(payee=row[2])
            txn.meta['source'] = pformat(row)
            return txn

        importer = csv.Importer(
            {
                Col.DATE: 'Date',
                Col.NARRATION: 'Description',
                Col.AMOUNT: 'Amount'
            },
            'Assets:Bank',
            'EUR', ('Date,Amount,Payee,Description'),
            categorizer=categorizer,
            institution='foobar')
        entries = importer.extract(file)
        self.assertEqualEntries(
            r"""

          2020-06-02 * "Payee here" "Description"
            source: "['6/2/2020', '30.00', 'Supermarket', 'Groceries']"
            Assets:Bank  30.00 EUR
        
          2020-07-02 * "Supermarket" "Groceries"
            source: "['7/2/2020', '-25.00', 'Supermarket', 'Groceries']"
            Assets:Bank  -25.00 EUR
        """, entries)
예제 #8
0
    def test_date_formats(self, filename):
        """\
          Posting,Description,Amount
          11/7/2016,A,2
          12/7/2016,B,3
          13/7/2016,C,4
        """
        file = cache.get_file(filename)
        importer = csv.Importer(
            {
                Col.DATE: 'Posting',
                Col.NARRATION: 'Description',
                Col.AMOUNT: 'Amount'
            },
            'Assets:Bank',
            'EUR', [],
            dateutil_kwds={'dayfirst': True})
        entries = importer.extract(file)
        self.assertEqualEntries(
            r"""

          2016-07-11 * "A"
            Assets:Bank  2 EUR

          2016-07-12 * "B"
            Assets:Bank  3 EUR

          2016-07-13 * "C"
            Assets:Bank  4 EUR

        """, entries)
예제 #9
0
    def test_mixin(self, entries, errors, _):
        """
        2014-01-01 open Assets:US:BofA:Checking                   USD

        2014-05-19 * "Verizon Wireless" ""
          Assets:US:BofA:Checking                          -44.34 USD
        
        2014-05-23 * "Wine-Tarner Cable" ""
          Assets:US:BofA:Checking                          -80.17 USD
        
        2014-06-04 * "BANK FEES" "Monthly bank fee"
          Assets:US:BofA:Checking                           -4.00 USD
        
        2014-06-04 * "RiverBank Properties" "Paying the rent"
          Assets:US:BofA:Checking                        -2400.00 USD
        
        2014-06-08 * "EDISON POWER" ""
          Assets:US:BofA:Checking                          -65.00 USD
        """
        def filter_last_two(entries):
            return entries[-2:]

        file = cache.get_file(path.join(tempfile.gettempdir(), 'test'))

        # Running with no filters should return the extracted entries unchanged
        importer = Importer(entries, 'Assets:US:BofA:Checking', filters=[])
        extracted_entries = importer.extract(file)
        self.assertEqualEntries(extracted_entries, entries)

        # Run with a filter that should pass only the last two entries
        importer = Importer(entries, 'Assets:US:BofA:Checking',
                            filters=[filter_last_two])
        extracted_entries = importer.extract(file)
        self.assertEqualEntries(extracted_entries, entries[-2:])
예제 #10
0
def test_identify(filename, expected):
    importer = filing.Importer('Assets:Testing',
                               filename_regexp=r'test-\d{2}\.(pdf|csv)')
    file = cache.get_file(path.join(DATADIR, filename))
    if expected:
        assert importer.identify(file)
    else:
        assert not importer.identify(file)
예제 #11
0
def test_identify(filename, expected):
    importer = filing.Importer('Assets:Testing',
                               filename_regexp=r'test-\d{2}\.(pdf|csv)')
    file = cache.get_file(path.join(DATADIR, filename))
    if expected:
        assert importer.identify(file)
    else:
        assert not importer.identify(file)
예제 #12
0
def get_info(raw_entry: Directive) -> dict:
    if raw_entry.meta["filename"].endswith(".beancount"):
        ftype = "text/plain"
    else:
        ftype = get_file(raw_entry.meta['filename']).mimetype()
    return dict(
        type=ftype,
        filename=raw_entry.meta['filename'],
        line=raw_entry.meta['lineno'],
    )
예제 #13
0
def test_basics():
    account = 'Assets:Checking'
    importer = filing.Importer(
        account, basename=None, filename_regexp='test.pdf')
    file = cache.get_file(path.join(DATADIR, 'test.pdf'))

    assert importer.name() == 'beansoup.importers.filing.Importer: "{}"'.format(account)
    assert importer.file_account(file) == account
    assert importer.file_name(file) == None
    assert importer.extract(file) == []

    account = 'Liabilities:Visa'
    importer = filing.Importer(
        account, basename='filed', filename_regexp='test.pdf')
    file = cache.get_file(path.join(DATADIR, 'test.pdf'))
    assert importer.name() == 'beansoup.importers.filing.Importer: "{}"'.format(account)
    assert importer.file_account(file) == account
    assert importer.file_name(file) == 'filed.pdf'
    assert importer.extract(file) == []
예제 #14
0
    def test_expect_identify(self, filename, msg):
        """Attempt to identify a file and expect results to be true.

        Args:
          filename: A string, the name of the file to import using self.importer.
        Raises:
          AssertionError: If the contents differ from the expected file.
        """
        file = cache.get_file(filename)
        matched = self.importer.identify(file)
        self.assertTrue(matched)
예제 #15
0
def extract_from_file(filename,
                      importer,
                      existing_entries=None,
                      min_date=None,
                      allow_none_for_tags_and_links=False):
    """Import entries from file 'filename' with the given matches,

    Also cross-check against a list of provided 'existing_entries' entries,
    de-duplicating and possibly auto-categorizing.

    Args:
      filename: The name of the file to import.
      importer: An importer object that matched the file.
      existing_entries: A list of existing entries parsed from a ledger, used to
        detect duplicates and automatically complete or categorize transactions.
      min_date: A date before which entries should be ignored. This is useful
        when an account has a valid check/assert; we could just ignore whatever
        comes before, if desired.
      allow_none_for_tags_and_links: A boolean, whether to allow plugins to
        generate Transaction objects with None as value for the 'tags' or 'links'
        attributes.
    Returns:
      A list of new imported entries.
    Raises:
      Exception: If there is an error in the importer's extract() method.
    """
    # Extract the entries.
    file = cache.get_file(filename)

    # Note: Let the exception through on purpose. This makes developing
    # importers much easier by rendering the details of the exceptions.
    #
    # Note: For legacy support, support calling without the existing entries.
    kwargs = {}
    if 'existing_entries' in inspect.signature(importer.extract).parameters:
        kwargs['existing_entries'] = existing_entries
    new_entries = importer.extract(file, **kwargs)
    if not new_entries:
        return []

    # Make sure the newly imported entries are sorted; don't trust the importer.
    new_entries.sort(key=data.entry_sortkey)

    # Ensure that the entries are typed correctly.
    for entry in new_entries:
        data.sanity_check_types(entry, allow_none_for_tags_and_links)

    # Filter out entries with dates before 'min_date'.
    if min_date:
        new_entries = list(
            itertools.dropwhile(lambda x: x.date < min_date, new_entries))

    return new_entries
예제 #16
0
    def test_column_types(self, filename):
        # pylint: disable=line-too-long
        """\
          Details,Posting Date,"Description",Amount,Type,Balance,Check or Slip #,
          DEBIT,3/18/2016,"Payment to Chafe card ending in 1234 03/18",-2680.89,ACCT_XFER,3409.86,,
          CREDIT,3/15/2016,"EMPLOYER INC    DIRECT DEP                 PPD ID: 1111111111",2590.73,ACH_CREDIT,6090.75,,
          DEBIT,3/14/2016,"INVESTMENT SEC   TRANSFER   A5144608        WEB ID: 1234456789",-150.00,ACH_DEBIT,3500.02,,
          DEBIT,3/6/2016,"ATM WITHDRAWAL                       001234  03/8888 DELANC",-60.00,ATM,3650.02,,
          CREDIT,3/5/2016,"CA STATE         NYSTTAXRFD                 PPD ID: 1111111111",110.00,ACH_CREDIT,3710.02,,
          DEBIT,3/4/2016,"BOOGLE           WALLET     US000NEI9T      WEB ID: C234567890",-1300.00,ACH_DEBIT,3600.02,,
        """
        file = cache.get_file(filename)

        importer = csv.Importer(
            {
                Col.DATE: 'Posting Date',
                Col.NARRATION1: 'Description',
                Col.NARRATION2: 'Check or Slip #',
                Col.AMOUNT: 'Amount',
                Col.BALANCE: 'Balance',
                Col.DRCR: 'Details'
            },
            'Assets:Bank',
            'USD', ('Details,Posting Date,"Description",Amount,'
                    'Type,Balance,Check or Slip #,'),
            institution='chafe')
        entries = importer.extract(file)
        self.assertEqualEntries(
            r"""

          2016-03-18 * "Payment to Chafe card ending in 1234 03/18"
            Assets:Bank  -2680.89 USD

          2016-03-15 * "EMPLOYER INC    DIRECT DEP                 PPD ID: 1111111111"
            Assets:Bank  2590.73 USD

          2016-03-14 * "INVESTMENT SEC   TRANSFER   A5144608        WEB ID: 1234456789"
            Assets:Bank  -150.00 USD

          2016-03-06 * "ATM WITHDRAWAL                       001234  03/8888 DELANC"
            Assets:Bank  -60.00 USD

          2016-03-05 * "CA STATE         NYSTTAXRFD                 PPD ID: 1111111111"
            Assets:Bank  110.00 USD

          2016-03-04 * "BOOGLE           WALLET     US000NEI9T      WEB ID: C234567890"
            Assets:Bank  -1300.00 USD

          2016-03-19 balance Assets:Bank                                     3409.86 USD

        """, entries)
예제 #17
0
def test_basics():
    account = 'Assets:Checking'
    importer = filing.Importer(account,
                               basename=None,
                               filename_regexp='test.pdf')
    file = cache.get_file(path.join(DATADIR, 'test.pdf'))

    assert importer.name(
    ) == 'beansoup.importers.filing.Importer: "{}"'.format(account)
    assert importer.file_account(file) == account
    assert importer.file_name(file) == None
    assert importer.extract(file) == []

    account = 'Liabilities:Visa'
    importer = filing.Importer(account,
                               basename='filed',
                               filename_regexp='test.pdf')
    file = cache.get_file(path.join(DATADIR, 'test.pdf'))
    assert importer.name(
    ) == 'beansoup.importers.filing.Importer: "{}"'.format(account)
    assert importer.file_account(file) == account
    assert importer.file_name(file) == 'filed.pdf'
    assert importer.extract(file) == []
예제 #18
0
    def file_date(filename, importer):
        """Date for the given file.

        Args:
            filename: The full path to a file.
            importer: An importer that matched the file.

        Returns:
            The date or the exception message if one occurs.
        """
        try:
            return importer.file_date(cache.get_file(filename))
        except Exception as exception:  # pylint: disable=broad-except
            return str(exception)
예제 #19
0
    def __init__(self, directory: str, account: str,
                 importer: ImporterProtocol, **kwargs) -> None:
        super().__init__(**kwargs)
        self.directory = os.path.expanduser(directory)
        self.importer = importer
        self.account = account

        # get _FileMemo object for each file
        files = [
            get_file(os.path.abspath(f)) for f in filter(
                os.path.isfile,
                glob(os.path.join(directory, '**', '*'), recursive=True))
        ]
        # filter the valid files for this importer
        self.files = [f for f in files if self.importer.identify(f)]
예제 #20
0
def find_imports(importer_config, files_or_directories, logfile=None):
    """Given an importer configuration, search for files that can be imported in the
    list of files or directories, run the signature checks on them and return a list
    of (filename, importers), where 'importers' is a list of importers that matched
    the file.

    Args:
      importer_config: a list of importer instances that define the config.
      files_or_directories: a list of files of directories to walk recursively and
                            hunt for files to import.
      logfile: A file object to write log entries to, or None, in which case no log is
        written out.
    Yields:
      Triples of filename found, textified contents of the file, and list of
      importers matching this file.
    """
    # Iterate over all files found; accumulate the entries by identification.
    for filename in file_utils.find_files(files_or_directories):
        if logfile is not None:
            logfile.write(SECTION.format(filename))
            logfile.write('\n')

        # Skip files that are simply too large.
        size = path.getsize(filename)
        if size > FILE_TOO_LARGE_THRESHOLD:
            logging.warning(
                "File too large: '{}' ({} bytes); skipping.".format(
                    filename, size))
            continue

        # For each of the sources the user has declared, identify which
        # match the text.
        file = cache.get_file(filename)
        matching_importers = []
        for importer in importer_config:
            try:
                matched = importer.identify(file)
                if matched:
                    matching_importers.append(importer)
            except Exception as exc:
                logging.error(
                    "Importer %s.identify() raised an unexpected error: %s",
                    importer.name(), exc)

        yield (filename, matching_importers)
예제 #21
0
def file_import_info(filename: str, importer) -> FileImportInfo:
    """Generate info about a file with an importer."""
    # pylint: disable=broad-except
    file = cache.get_file(filename)
    try:
        account = importer.file_account(file)
    except Exception:
        account = ""
    try:
        date = importer.file_date(file)
    except Exception:
        date = datetime.date.today()
    try:
        name = importer.file_name(file)
    except Exception:
        name = path.basename(filename)

    return FileImportInfo(importer.name(), account, date, name)
예제 #22
0
def identify(importers_list, files_or_directories):
    """Run the identification loop.

    Args:
      importers_list: A list of importer instances.
      files_or_directories: A list of strings, files or directories.
    """
    logfile = sys.stdout
    for filename, importers in find_imports(importers_list,
                                            files_or_directories,
                                            logfile=logfile):
        file = cache.get_file(filename)
        for importer in importers:
            logfile.write('Importer:    {}\n'.format(
                importer.name() if importer else '-'))
            logfile.write('Account:     {}\n'.format(
                importer.file_account(file)))
            logfile.write('\n')
예제 #23
0
def test_pdf_filing_importer(first_day, filename, expected_date):
    account = 'Liabilities:Amex'
    importer = amex.PdfFilingImporter(account,
                                      basename='amex',
                                      first_day=first_day)
    file = cache.get_file(path.join(DATADIR, filename))

    assert importer.name(
    ) == 'beansoup.importers.amex.PdfFilingImporter: "{}"'.format(account)
    assert importer.file_account(file) == account
    assert importer.file_name(file) == 'amex.pdf'
    assert importer.extract(file) == []

    if expected_date:
        assert importer.identify(file)
        assert importer.file_date(file) == expected_date
    else:
        assert not importer.identify(file)
        assert not importer.file_date(file)
예제 #24
0
    def test_importer_against_liability(self, filename):
        """
        12/06/2015,SKYPE                    123456789,14.00,,97.62
        12/07/2015,STM-LAURIER              MONTREAL,22.50,,120.12
        12/13/2015,PAYMENT - THANK YOU,,97.62,22.50
        12/14/2015,RESTAURANT PHAYA THAI    MONTREAL,40.00,,62.50
        12/16/2015,STM-CHARLEVOIX           MONTREAL,45.00,,107.50
        """
        file = cache.get_file(filename)

        account = 'Liabilities:TD:Visa'
        importer = td.Importer(account,
                               'CAD',
                               'td-visa',
                               first_day=4,
                               filename_regexp=path.basename(filename))

        assert importer.file_account(file) == account
        assert importer.file_name(file) == 'td-visa.csv'
        assert importer.identify(file)
        assert importer.file_date(file) == datetime.date(2016, 1, 3)

        entries = importer.extract(file)
        self.assertEqualEntries(
            """
        2015-12-06 * "SKYPE                    123456789"
          Liabilities:TD:Visa  -14.00 CAD
        
        2015-12-07 * "STM-LAURIER              MONTREAL"
          Liabilities:TD:Visa  -22.50 CAD
        
        2015-12-13 * "PAYMENT - THANK YOU"
          Liabilities:TD:Visa  97.62 CAD
        
        2015-12-14 * "RESTAURANT PHAYA THAI    MONTREAL"
          Liabilities:TD:Visa  -40.00 CAD
        
        2015-12-16 * "STM-CHARLEVOIX           MONTREAL"
          Liabilities:TD:Visa  -45.00 CAD
        
        2016-01-04 balance Liabilities:TD:Visa    -107.50 CAD
        """, entries)
예제 #25
0
파일: test_td.py 프로젝트: fxtlabs/beansoup
    def test_importer_against_liability(self, filename):
        """
        12/06/2015,SKYPE                    123456789,14.00,,97.62
        12/07/2015,STM-LAURIER              MONTREAL,22.50,,120.12
        12/13/2015,PAYMENT - THANK YOU,,97.62,22.50
        12/14/2015,RESTAURANT PHAYA THAI    MONTREAL,40.00,,62.50
        12/16/2015,STM-CHARLEVOIX           MONTREAL,45.00,,107.50
        """
        file = cache.get_file(filename)

        account = 'Liabilities:TD:Visa'
        importer = td.Importer(account, 'CAD', 'td-visa',
                               first_day=4,
                               filename_regexp=path.basename(filename))

        assert importer.file_account(file) == account
        assert importer.file_name(file) == 'td-visa.csv'
        assert importer.identify(file)
        assert importer.file_date(file) == datetime.date(2016, 1, 3)

        entries = importer.extract(file)
        self.assertEqualEntries("""
        2015-12-06 * "SKYPE                    123456789"
          Liabilities:TD:Visa  -14.00 CAD
        
        2015-12-07 * "STM-LAURIER              MONTREAL"
          Liabilities:TD:Visa  -22.50 CAD
        
        2015-12-13 * "PAYMENT - THANK YOU"
          Liabilities:TD:Visa  97.62 CAD
        
        2015-12-14 * "RESTAURANT PHAYA THAI    MONTREAL"
          Liabilities:TD:Visa  -40.00 CAD
        
        2015-12-16 * "STM-CHARLEVOIX           MONTREAL"
          Liabilities:TD:Visa  -45.00 CAD
        
        2016-01-04 balance Liabilities:TD:Visa    -107.50 CAD
        """, entries)
예제 #26
0
    def test_explict_encoding_utf8(self, filename):
        """\
          Posting,Description,Amount
          2020/08/08,🍏,2
        """
        file = cache.get_file(filename)
        importer = csv.Importer(
            {
                Col.DATE: 'Posting',
                Col.NARRATION: 'Description',
                Col.AMOUNT: 'Amount'
            },
            'Assets:Bank',
            'EUR', [],
            encoding='utf-8')
        entries = importer.extract(file)
        self.assertEqualEntries(
            r"""

          2020-08-08 * "🍏"
            Assets:Bank  2 EUR

        """, entries)
예제 #27
0
    def test_categorizer_one_argument(self, filename):
        """\
          Date,Amount,Payee,Description
          6/2/2020,30.00,"Payee here","Description"
          7/2/2020,-25.00,"Supermarket","Groceries"
        """
        file = cache.get_file(filename)

        def categorizer(txn):
            if txn.narration == "Groceries":
                txn.postings.append(
                    data.Posting("Expenses:Groceries", -txn.postings[0].units,
                                 None, None, None, None))

            return txn

        importer = csv.Importer(
            {
                Col.DATE: 'Date',
                Col.NARRATION: 'Description',
                Col.AMOUNT: 'Amount'
            },
            'Assets:Bank',
            'EUR', ('Date,Amount,Payee,Description'),
            categorizer=categorizer,
            institution='foobar')
        entries = importer.extract(file)
        self.assertEqualEntries(
            r"""

          2020-06-02 * "Description"
            Assets:Bank  30.00 EUR
        
          2020-07-02 * "Groceries"
            Assets:Bank  -25.00 EUR
            Expenses:Groceries  25.00 EUR
        """, entries)
예제 #28
0
    def test_links(self, filename):
        """\
          Date,Description,Amount,Link
          2020-07-03,A,2,
          2020-07-03,B,3,123
        """
        file = cache.get_file(filename)
        importer = csv.Importer(
            {
                Col.DATE: 'Date',
                Col.NARRATION: 'Description',
                Col.AMOUNT: 'Amount',
                Col.REFERENCE_ID: 'Link'
            }, 'Assets:Bank', 'EUR', [])
        entries = importer.extract(file)
        self.assertEqualEntries(
            r"""

          2020-07-03 * "A"
            Assets:Bank  2 EUR

          2020-07-03 * "B" ^123
            Assets:Bank  3 EUR
        """, entries)
예제 #29
0
    def test_tags(self, filename):
        """\
          Date,Description,Amount,Tag
          2020-07-03,A,2,
          2020-07-03,B,3,foo
        """
        file = cache.get_file(filename)
        importer = csv.Importer(
            {
                Col.DATE: 'Date',
                Col.NARRATION: 'Description',
                Col.AMOUNT: 'Amount',
                Col.TAG: 'Tag'
            }, 'Assets:Bank', 'EUR', [])
        entries = importer.extract(file)
        self.assertEqualEntries(
            r"""

          2020-07-03 * "A"
            Assets:Bank  2 EUR

          2020-07-03 * "B" #foo
            Assets:Bank  3 EUR
        """, entries)
예제 #30
0
    def __init__(self, directory: str, account: str,
                 importer: ImporterProtocol, **kwargs) -> None:
        super().__init__(**kwargs)
        self.directory = os.path.expanduser(directory)
        self.importer = importer
        self.account = account

        # get _FileMemo object for each file
        files = [
            get_file(f) for f in filter(
                os.path.isfile,
                glob(os.path.join(directory, '**', '*'), recursive=True))
        ]

        # filter the valid files for this importer
        # handle the fact that importer.identify could raise an exception
        # instead of returning False, but that ultimately means False for this purpose
        def try_identify(importer, file):
            try:
                return importer.identify(file)
            except:
                return False

        self.files = [f for f in files if try_identify(self.importer, f)]
예제 #31
0
def get_info(raw_entry: Directive) -> dict:
    return dict(
        type=get_file(raw_entry.meta['filename']).mimetype(),
        filename=raw_entry.meta['filename'],
        line=raw_entry.meta['lineno'],
    )
예제 #32
0
def test_file_date(first_day, filename_regexp, filename, expected):
    importer = filing.Importer('Assets:Testing',
                               first_day=first_day,
                               filename_regexp=filename_regexp)
    file = cache.get_file(path.join(DATADIR, filename))
    assert importer.file_date(file) == expected
예제 #33
0
def test_file_date(first_day, filename_regexp, filename, expected):
    importer = filing.Importer('Assets:Testing',
                               first_day=first_day,
                               filename_regexp=filename_regexp)
    file = cache.get_file(path.join(DATADIR, filename))
    assert importer.file_date(file) == expected
예제 #34
0
파일: test_td.py 프로젝트: fxtlabs/beansoup
    def test_importer_against_asset(self, filename):
        """
        04/01/2016,12-345 Smith    RLS,404.38,,5194.21
        04/05/2016,COSTCO #9876543,60.24,,5133.97
        04/05/2016,METRO ETS 2020,34.90,,5099.07
        04/05/2016,POISSONERIE DU,31.78,,5067.29
        04/05/2016,LES DOUCEURS DU,12.39,,5054.90
        04/05/2016,FROMAGERIE ATWA,42.17,,5012.73
        04/07/2016,CHQ#00123-456789,16.00,,4996.73
        04/12/2016,FROMAGERIE ATWA,39.46,,4957.27
        04/12/2016,DAVID'S TEA,27.50,,4929.77
        04/12/2016,PATISSERIE SAIN,32.00,,4897.77
        04/12/2016,GAZ METRO        BPY,247.26,,4650.51
        04/14/2016,VIDEOTRON LTEE   BPY,237.74,,4412.77
        04/14/2016,TD VISA      A1B2C3,74.37,,4338.40
        04/16/2016,FRUITERIE ATWAT,24.65,,4313.75
        04/16/2016,POISSONERIE NOU,64.79,,4248.96
        04/16/2016,CHQ#00125-9876543,160.00,,4088.96
        04/19/2016,CHQ#00124-9876543,900.00,,3188.96
        04/22/2016,AMEX         B2C3D4,734.59,,2454.37
        04/23/2016,POISSONERIE DU,57.18,,2397.19
        04/28/2016,BELL CANADA      BPY,25.30,,2371.89
        04/29/2016,CINEPLEX #9172,23.00,,2348.89
        04/29/2016,CANADA           RIT,,345.24,2694.13
        04/29/2016,12345678900WIRE,,210.32,2904.45
        04/30/2016,BARON SPORTS,21.28,,2883.17
        """
        file = cache.get_file(filename)

        account = 'Assets:TD:Checking'
        importer = td.Importer(account, 'CAD', 'td-checking',
                               first_day=1,
                               filename_regexp=path.basename(filename))

        assert importer.file_account(file) == account
        assert importer.file_name(file) == 'td-checking.csv'
        assert importer.identify(file)
        assert importer.file_date(file) == datetime.date(2016, 4, 30)

        entries = importer.extract(file)
        self.assertEqualEntries("""
        2016-04-01 * "12-345 Smith    RLS"
          Assets:TD:Checking  -404.38 CAD
        
        2016-04-05 * "COSTCO #9876543"
          Assets:TD:Checking  -60.24 CAD
        
        2016-04-05 * "METRO ETS 2020"
          Assets:TD:Checking  -34.90 CAD
        
        2016-04-05 * "POISSONERIE DU"
          Assets:TD:Checking  -31.78 CAD
        
        2016-04-05 * "LES DOUCEURS DU"
          Assets:TD:Checking  -12.39 CAD
        
        2016-04-05 * "FROMAGERIE ATWA"
          Assets:TD:Checking  -42.17 CAD
        
        2016-04-07 * "CHQ#00123-456789"
          Assets:TD:Checking  -16.00 CAD
        
        2016-04-12 * "FROMAGERIE ATWA"
          Assets:TD:Checking  -39.46 CAD
        
        2016-04-12 * "DAVID'S TEA"
          Assets:TD:Checking  -27.50 CAD
        
        2016-04-12 * "PATISSERIE SAIN"
          Assets:TD:Checking  -32.00 CAD
        
        2016-04-12 * "GAZ METRO        BPY"
          Assets:TD:Checking  -247.26 CAD
        
        2016-04-14 * "VIDEOTRON LTEE   BPY"
          Assets:TD:Checking  -237.74 CAD
        
        2016-04-14 * "TD VISA      A1B2C3"
          Assets:TD:Checking  -74.37 CAD
        
        2016-04-16 * "FRUITERIE ATWAT"
          Assets:TD:Checking  -24.65 CAD
        
        2016-04-16 * "POISSONERIE NOU"
          Assets:TD:Checking  -64.79 CAD
        
        2016-04-16 * "CHQ#00125-9876543"
          Assets:TD:Checking  -160.00 CAD
        
        2016-04-19 * "CHQ#00124-9876543"
          Assets:TD:Checking  -900.00 CAD
        
        2016-04-22 * "AMEX         B2C3D4"
          Assets:TD:Checking  -734.59 CAD
        
        2016-04-23 * "POISSONERIE DU"
          Assets:TD:Checking  -57.18 CAD
        
        2016-04-28 * "BELL CANADA      BPY"
          Assets:TD:Checking  -25.30 CAD
        
        2016-04-29 * "CINEPLEX #9172"
          Assets:TD:Checking  -23.00 CAD
        
        2016-04-29 * "CANADA           RIT"
          Assets:TD:Checking  345.24 CAD
        
        2016-04-29 * "12345678900WIRE"
          Assets:TD:Checking  210.32 CAD
        
        2016-04-30 * "BARON SPORTS"
          Assets:TD:Checking  -21.28 CAD
        
        2016-05-01 balance Assets:TD:Checking   2883.17 CAD
        """, entries)
예제 #35
0
def extract_from_file(filename, importer,
                      existing_entries=None,
                      min_date=None,
                      allow_none_for_tags_and_links=False):
    """Import entries from file 'filename' with the given matches,

    Also cross-check against a list of provided 'existing_entries' entries,
    de-duplicating and possibly auto-categorizing.

    Args:
      filename: The name of the file to import.
      importer: An importer object that matched the file.
      existing_entries: A list of existing entries parsed from a ledger, used to
        detect duplicates and automatically complete or categorize transactions.
      min_date: A date before which entries should be ignored. This is useful
        when an account has a valid check/assert; we could just ignore whatever
        comes before, if desired.
      allow_none_for_tags_and_links: A boolean, whether to allow plugins to
        generate Transaction objects with None as value for the 'tags' or 'links'
        attributes.
    Returns:
      A list of new imported entries and a subset of these which have been
      identified as possible duplicates.
    Raises:
      Exception: If there is an error in the importer's extract() method.
    """
    # Extract the entries.
    file = cache.get_file(filename)

    # Note: Let the exception through on purpose. This makes developing
    # importers much easier by rendering the details of the exceptions.
    new_entries = importer.extract(file)
    if not new_entries:
        return [], []

    # Make sure the newly imported entries are sorted; don't trust the importer.
    new_entries.sort(key=data.entry_sortkey)

    # Ensure that the entries are typed correctly.
    for entry in new_entries:
        data.sanity_check_types(entry, allow_none_for_tags_and_links)

    # Filter out entries with dates before 'min_date'.
    if min_date:
        new_entries = list(itertools.dropwhile(lambda x: x.date < min_date,
                                               new_entries))

    # Find potential matching entries.
    duplicate_entries = []
    if existing_entries is not None:
        duplicate_pairs = similar.find_similar_entries(new_entries, existing_entries)
        duplicate_set = set(id(entry) for entry, _ in duplicate_pairs)

        # Add a metadata marker to the extracted entries for duplicates.
        mod_entries = []
        for entry in new_entries:
            if id(entry) in duplicate_set:
                marked_meta = entry.meta.copy()
                marked_meta[DUPLICATE_META] = True
                entry = entry._replace(meta=marked_meta)
                duplicate_entries.append(entry)
            mod_entries.append(entry)
        new_entries = mod_entries

    return new_entries, duplicate_entries
예제 #36
0
def file_one_file(filename, importers, destination, idify=False, logfile=None):
    """Move a single filename using its matched importers.

    Args:
      filename: A string, the name of the downloaded file to be processed.
      importers: A list of importer instances that handle this file.
      destination: A string, the root destination directory where the files are
        to be filed. The files are organized there under a hierarchy mirroring
        that of the chart of accounts.
      idify: A flag, if true, remove whitespace and funky characters in the destination
        filename.
      logfile: A file object to write log entries to, or None, in which case no log is
        written out.
    Returns:
      The full new destination filename on success, and None if there was an error.
    """
    # Create an object to cache all the conversions between the importers
    # and phases and what-not.
    file = cache.get_file(filename)

    # Get the account corresponding to the file.
    file_accounts = []
    for index, importer in enumerate(importers):
        try:
            account_ = importer.file_account(file)
        except Exception as exc:
            account_ = None
            logging.exception("Importer %s.file_account() raised an unexpected error: %s",
                              importer.name(), exc)
        if account_ is not None:
            file_accounts.append(account_)

    file_accounts_set = set(file_accounts)
    if not file_accounts_set:
        logging.error("No account provided by importers: {}".format(
            ", ".join(imp.name() for imp in importers)))
        return None

    if len(file_accounts_set) > 1:
        logging.warning("Ambiguous accounts from many importers: {}".format(
            ', '.join(file_accounts_set)))
        # Note: Don't exit; select the first matching importer's account.

    file_account = file_accounts.pop(0)

    # Given multiple importers, select the first one that was yielded to
    # obtain the date and process the filename.
    importer = importers[0]

    # Compute the date from the last modified time.
    mtime = path.getmtime(filename)
    mtime_date = datetime.datetime.fromtimestamp(mtime).date()

    # Try to get the file's date by calling a module support function. The
    # module may be able to extract the date from the filename, from the
    # contents of the file itself (e.g. scraping some text from the PDF
    # contents, or grabbing the last line of a CSV file).
    try:
        date = importer.file_date(file)
    except Exception as exc:
        logging.exception("Importer %s.file_date() raised an unexpected error: %s",
                          importer.name(), exc)
        date = None
    if date is None:
        # Fallback on the last modified time of the file.
        date = mtime_date
        date_source = 'mtime'
    else:
        date_source = 'contents'

    # Apply filename renaming, if implemented.
    # Otherwise clean up the filename.
    try:
        clean_filename = importer.file_name(file)

        # Warn the importer implementor if a name is returned and it's an
        # absolute filename.
        if clean_filename and (path.isabs(clean_filename) or os.sep in clean_filename):
            logging.error(("The importer '%s' file_name() method should return a relative "
                           "filename; the filename '%s' is absolute or contains path "
                           "separators"),
                          importer.name(), clean_filename)
    except Exception as exc:
        logging.exception("Importer %s.file_name() raised an unexpected error: %s",
                          importer.name(), exc)
        clean_filename = None
    if clean_filename is None:
        # If no filename has been provided, use the basename.
        clean_filename = path.basename(file.name)
    elif re.match(r'\d\d\d\d-\d\d-\d\d', clean_filename):
        logging.error("The importer '%s' file_name() method should not date the "
                      "returned filename. Implement file_date() instead.")

    # We need a simple filename; remove the directory part if there is one.
    clean_basename = path.basename(clean_filename)

    # Remove whitespace if requested.
    if idify:
        clean_basename = misc_utils.idify(clean_basename)

    # Prepend the date prefix.
    new_filename = '{0:%Y-%m-%d}.{1}'.format(date, clean_basename)

    # Prepend destination directory.
    new_fullname = path.normpath(path.join(destination,
                                           file_account.replace(account.sep, os.sep),
                                           new_filename))

    # Print the filename and which modules matched.
    if logfile is not None:
        logfile.write('Importer:    {}\n'.format(importer.name() if importer else '-'))
        logfile.write('Account:     {}\n'.format(file_account))
        logfile.write('Date:        {} (from {})\n'.format(date, date_source))
        logfile.write('Destination: {}\n'.format(new_fullname))
        logfile.write('\n')

    return new_fullname
def with_testdir(directory):
    """Parametrizing fixture that provides files from a directory."""
    return pytest.mark.parametrize(
        "file", [cache.get_file(fn) for fn in find_input_files(directory)])