Beispiel #1
0
    def test_threadsafe_resumable(self, tmpdir):
        log = defaultdict(list)

        def listener(name, row):
            log[name].append(list(row))

        def job(payload):
            i, row = payload
            s = int(row[2])
            time.sleep(s * .1)

            return i, row

        output_path = str(tmpdir.join('./enriched_resumable_threadsafe.csv'))

        resumer = ThreadSafeResumer(output_path, listener=listener)

        with open('./test/resources/people_unordered.csv') as f, resumer:

            enricher = casanova.threadsafe_enricher(
                f, resumer,
                add=('x2',),
                keep=('name',)
            )

            for j, (i, row) in enumerate(imap_unordered(enricher, job, 3)):
                enricher.writerow(i, row, [(i + 1) * 2])

                if j == 1:
                    break

        def sort_output(o):
            return sorted(tuple(i) for i in o)

        assert sort_output(collect_csv(output_path)) == sort_output([
            ['name', 'index', 'x2'],
            ['Mary', '1', '4'],
            ['Julia', '2', '6']
        ])

        with open('./test/resources/people_unordered.csv') as f, resumer:

            enricher = casanova.threadsafe_enricher(
                f, resumer,
                add=('x2',),
                keep=('name',)
            )

            for j, (i, row) in enumerate(imap_unordered(enricher, job, 3)):
                enricher.writerow(i, row, [(i + 1) * 2])

        assert sort_output(collect_csv(output_path)) == sort_output([
            ['name', 'index', 'x2'],
            ['Mary', '1', '4'],
            ['Julia', '2', '6'],
            ['John', '0', '2']
        ])

        assert sort_output(log['output.row']) == sort_output([['Mary', '1', '4'], ['Julia', '2', '6']])
        assert sort_output(log['filter.row']) == sort_output([[1, ['Mary', 'Sue', '1']], [2, ['Julia', 'Stone', '2']]])
Beispiel #2
0
    def test_resumable(self, tmpdir):
        output_path = str(tmpdir.join('./written_resumable.csv'))

        def stream(offset=0):
            return range(offset, 6)

        with LastCellResumer(output_path, 'index') as resumer:
            writer = Writer(resumer, ['index'])

            for i in stream(resumer.get_state() or 0):
                if i == 3:
                    break
                writer.writerow([i])

        assert collect_csv(output_path) == [['index'], ['0'], ['1'], ['2']]

        with LastCellResumer(output_path, 'index') as resumer:
            writer = Writer(resumer, ['index'])

            assert resumer.get_state() == '2'
            n = resumer.pop_state()

            assert n == '2'
            assert resumer.pop_state() is None

            for i in stream(int(n) + 1):
                writer.writerow([i])

        assert collect_csv(output_path) == [['index'], ['0'], ['1'], ['2'],
                                            ['3'], ['4'], ['5']]
Beispiel #3
0
    def test_resumable(self, tmpdir):

        log = defaultdict(list)

        def listener(name, row):
            log[name].append(list(row))

        output_path = str(tmpdir.join('./enriched_resumable.csv'))

        resumer = RowCountResumer(output_path, listener=listener)

        with open('./test/resources/people.csv') as f, resumer:

            enricher = casanova.enricher(
                f, resumer,
                add=('x2',),
                keep=('name',)
            )

            row = next(iter(enricher))
            enricher.writerow(row, [2])

        assert collect_csv(output_path) == [
            ['name', 'x2'],
            ['John', '2']
        ]

        with open('./test/resources/people.csv') as f, resumer:

            enricher = casanova.enricher(
                f, resumer,
                add=('x2',),
                keep=('name',)
            )

            for i, row in enumerate(enricher):
                enricher.writerow(row, [(i + 2) * 2])

        assert collect_csv(output_path) == [
            ['name', 'x2'],
            ['John', '2'],
            ['Mary', '4'],
            ['Julia', '6']
        ]

        assert log == {
            'output.row': [['John', '2']],
            'input.row': [['John', 'Matthews']]
        }
Beispiel #4
0
    def test_batch_enricher(self, tmpdir):
        output_path = str(tmpdir.join('./enriched.csv'))
        with open('./test/resources/people.csv') as f, \
             open(output_path, 'w', newline='') as of:
            enricher = casanova.batch_enricher(f, of, add=('color',), keep=('surname',))

            for row in enricher:
                enricher.writebatch(row, [['blue'], ['red']], cursor='next')
                enricher.writebatch(row, [['purple'], ['cyan']])

        assert collect_csv(output_path) == [
            ['surname', 'cursor', 'color'],
            ['Matthews', '', 'blue'],
            ['Matthews', 'next', 'red'],
            ['Matthews', '', 'purple'],
            ['Matthews', 'end', 'cyan'],
            ['Sue', '', 'blue'],
            ['Sue', 'next', 'red'],
            ['Sue', '', 'purple'],
            ['Sue', 'end', 'cyan'],
            ['Stone', '', 'blue'],
            ['Stone', 'next', 'red'],
            ['Stone', '', 'purple'],
            ['Stone', 'end', 'cyan']
        ]
Beispiel #5
0
    def test_threadsafe_resuming_soundness(self, tmpdir):
        output_path = str(tmpdir.join('./threadsafe_resuming_soundness.csv'))

        with open('./test/resources/more_people.csv') as f, open(output_path, 'w', newline='') as of:
            enricher = casanova.threadsafe_enricher(f, of)

            for index, row in enricher:
                enricher.writerow(index, row)

                if index >= 2:
                    break

        resumer = ThreadSafeResumer(output_path)
        with casanova.threadsafe_enricher('./test/resources/more_people.csv', resumer) as enricher, resumer:
            for index, row in enricher:
                enricher.writerow(index, row)

        assert collect_csv(output_path) == [
            ['name', 'index'],
            ['John', '0'],
            ['Lisa', '1'],
            ['Mary', '2'],
            ['Alexander', '3'],
            ['Gary', '4']
        ]
Beispiel #6
0
    def test_threadsafe(self, tmpdir):
        def job(payload):
            i, row = payload
            s = int(row[2])
            time.sleep(s * .01)

            return i, row

        output_path = str(tmpdir.join('./enriched_resumable_threadsafe.csv'))
        with open('./test/resources/people_unordered.csv') as f, \
             open(output_path, 'w', newline='') as of:

            enricher = casanova.threadsafe_enricher(
                f, of,
                add=('x2',),
                keep=('name',)
            )

            for i, row in imap_unordered(enricher, job, 3):
                enricher.writerow(i, row, [(i + 1) * 2])

        def sort_output(o):
            return sorted(tuple(i) for i in o)

        assert sort_output(collect_csv(output_path)) == sort_output([
            ['name', 'index', 'x2'],
            ['Mary', '1', '4'],
            ['Julia', '2', '6'],
            ['John', '0', '2']
        ])
Beispiel #7
0
    def test_basics(self):
        output = StringIO()
        writer = Writer(output, ['name', 'surname'])
        writer.writerow(['John', 'Cage'])
        writer.writerow(['Julia', 'Andrews'])

        assert collect_csv(output) == [['name', 'surname'], ['John', 'Cage'],
                                       ['Julia', 'Andrews']]
Beispiel #8
0
    def test_dialect(self, tmpdir):
        output_path = str(tmpdir.join('./enriched.csv'))
        with open('./test/resources/semicolons.csv') as f, \
             open(output_path, 'w', newline='') as of:
            enricher = casanova.enricher(f, of, add=('line',), delimiter=';')

            for i, row in enumerate(enricher):
                enricher.writerow(row, [i])

        assert collect_csv(output_path) == [
            ['name', 'surname', 'line'],
            ['Rose', 'Philips', '0'],
            ['Luke', 'Atman', '1']
        ]
Beispiel #9
0
    def test_keep(self, tmpdir):
        output_path = str(tmpdir.join('./enriched_keep.csv'))
        with open('./test/resources/people.csv') as f, \
             open(output_path, 'w', newline='') as of:
            enricher = casanova.enricher(f, of, keep=('name',), add=('line',))

            for i, row in enumerate(enricher):
                enricher.writerow(row, [i])

        assert collect_csv(output_path) == [
            ['name', 'line'],
            ['John', '0'],
            ['Mary', '1'],
            ['Julia', '2']
        ]