Example #1
0
def disabled_test_factory():
    TESTDATA = (
        '1-800-RING-RCN (1-800-746-4726)Website:'
        'Statement Date:Account Number:Payment Due Date:1'
        'Statement Date:Account Number:Payment Due Date:01/31/20172301-')

    extractor = ExtractorFactory.get_matching_extractor(TESTDATA)

    assert type(extractor) is RcnDateExtractor
Example #2
0
def test_factory():
    TESTDATA = ('BalanceVested Percent9/30/15 Vested BalanceEMPLOYEE 401(K)'
                '$2Performance SummaryYour Personal Rate of '
                'Return for the period 7/1/15 through 9/30/15 is -2.61%. '
                'Visit us at https://www.planservices.com/')

    extractor = ExtractorFactory.get_matching_extractor(TESTDATA)

    assert type(extractor) is AscensusDateExtractor
Example #3
0
def test_factory():
    TESTDATA = (
        'Client Services: 800-662-2739December 31, 2017, year-to-date statement'
        'DateTransactionAmountSharepriceSharestransactedTotalsharesownedValue'
        'Beginning balance on 12/31/2016$12.34537.672'
        '3214.354Ending balance on 12/31/2017$23,456.78'
        'Client Services: 800-662-2739Page 2 of8'
        'Vanguard, P.O. Box 2600Valley Forge, PA 19482-2600')

    extractor = ExtractorFactory.get_matching_extractor(TESTDATA)

    assert type(extractor) is VanguardDateExtractor
def test_factory():
    TESTDATA = (
        'Your 360 Savings Activity Account: DJT '
        'Annual Percentage Yield Earned: 0.75% '
        'ActivityDateAmountBalance'
        'Opening Balance02/01/2016$13,213.32'
        'My Info section.capitalone360.comInteractive'
        'Deposit from VPUTIN02/16/2016$350,000,000.00$350,013,213.32'
        'Withdrawal to Electric Checking XXXX259476 / 27 / 2016'
        '$(130, 000, 000.00)$220, 013, 213.32'
        'Closing Balance02 / 29 / 2016$220, 013, 213.32'
        'See below for important information.')

    extractor = ExtractorFactory.get_matching_extractor(TESTDATA)

    assert type(extractor) is CapitalOne360DateExtractor
Example #5
0
    def determine_action_for_file(self, filepath):
        """ Determine the action required on the specified file:
            - Checks for a duplicate using MD5 hash
            - Extracts text from the document
            - Extracts meaningful data from the document
            - Constructs the necessary Action to take on the file and appends
              it to the actions list
        """

        # TODO: perform this every time? Or only when we find a duplicate target filename?
        file_hash = Md5Reader().parse(filepath)
        if self.config.hash_only:
            print('{} - {}'.format(file_hash, filepath))
            return

        if file_hash in self.hashes:
            self.actions.append(
                Action.create_delete_action(
                    filepath, reason='Duplicate hash: {}'.format(file_hash)))
            return

            # TODO: can PdfReader accept/process the same streamed data as Md5Reader?
        contents = self.reader.parse(filepath)

        if self.config.extract_only:
            print(contents)
            return

        extractor = ExtractorFactory.get_matching_extractor(contents)

        data = extractor.extract(contents)

        data.set_source(filepath)
        data.set_hash(file_hash)

        new_name = extractor.rename(data)
        old_name = filepath.split('/')[-1]
        new_path = filepath[0:filepath.rfind('/') + 1] + new_name

        self.hashes.append(file_hash)

        if old_name == new_name:
            action = Action.create_ignore_action(
                filepath, reason='Already named correctly')
        elif os.path.isfile(new_path):

            # TODO: this isn't good enough to avoid an overwrite, as there is no
            #       coordination between files.
            #       How about storing the data objects in a hash of target paths?
            #       Or, key's c

            existing_hash = Md5Reader().parse(new_path)
            if existing_hash == data.get_hash():
                reason = (
                    '{} matches existing hash ({}) - shared by [{}].'.format(
                        data.get_source(), existing_hash, new_path))
                action = Action.create_delete_action(data.get_source(),
                                                     reason=reason)

            else:
                action = Action.create_ignore_action(
                    filepath,
                    reason=
                    ('Target File [{}] already exists. Ignoring to avoid an overwrite.'
                     .format(new_name)))
        else:
            action = Action.create_rename_action(filepath, new_path)

        self.actions.append(action)

        if self.config.verbose:
            print('Adding action: {}'.format(action))
Example #6
0
def test_factory():

    extractor = ExtractorFactory.get_matching_extractor(TESTDATA)

    assert type(extractor) is SocialSecurityDateExtractor
Example #7
0
def test_factory():

    extractor = ExtractorFactory.get_matching_extractor(TESTDATA)

    assert isinstance(extractor, EXTRACTOR_UNDER_TEST)
Example #8
0
def test_factory():

    extractor = ExtractorFactory.get_matching_extractor(TESTDATA)

    assert type(extractor) is HanscomDateExtractor
Example #9
0
def test_factory():

    extractor = ExtractorFactory.get_matching_extractor(TESTDATA)

    assert type(extractor) is EXPECTED_EXTRACTOR
def test_factory():

    extractor = ExtractorFactory.get_matching_extractor(TESTDATA)

    assert type(extractor) is AmazonPrimeDateExtractor