def test_generator(self):
        from ambry_sources.sources import GeneratorSource, SourceSpec
        from ambry_sources import head, tail
        cache_fs = fsopendir(self.setup_temp_dir())

        def gen():

            yield list('abcde')

            for i in range(10):
                yield [i, i + 1, i + 2, i + 3, i + 4]

        f = HDFPartition(cache_fs, 'foobar')

        s = GeneratorSource(SourceSpec('foobar'), gen())

        ri = RowIntuiter().run(head(s, 100), tail(s, 100))
        row_spec = self._row_intuiter_to_dict(ri)
        ti = TypeIntuiter().process_header(ri.headers).run(GeneratorSource(SourceSpec('foobar'), gen()))
        with f.writer as w:
            w.set_row_spec(row_spec, ri.headers)
            w.set_types(ti)

        f.load_rows(GeneratorSource(SourceSpec('foobar'), gen()))

        self.assertEqual(f.headers, list('abcde'))
        rows = []

        for row in f.select():
            rows.append(row.dict)
        self.assertEqual(len(rows), 10)
        self.assertEqual(rows[0], {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4})
        self.assertEqual(rows[-1], {'a': 9, 'b': 10, 'c': 11, 'd': 12, 'e': 13})
    def test_creates_virtual_tables_for_partition_with_segment_without_errors(
            self):

        fs = fsopendir('temp://')

        def gen():
            # generate header
            yield ['col1', 'col2']

            # generate rows
            yield [0, 0]
            yield [1, 1]

        mprows = MPRowsFile(fs, 'example.com/simple-0.1.3/1.mpr')
        mprows.load_rows(GeneratorSource(SourceSpec('foobar'), gen()))

        # create virtual tables. This should not raise an error.
        #
        connection = apsw.Connection(':memory:')
        try:
            add_partition(connection, mprows, 'vid1')
        except Exception as exc:
            raise AssertionError(
                'partition adding unexpectadly failed with {} error.'.format(
                    exc))

        # check selected rows
        #
        cursor = connection.cursor()
        result = cursor.execute('SELECT * FROM {}'.format('vid1')).fetchall()
        self.assertEqual(result, [(0, 0), (1, 1)])
    def test_executes_select_query_without_any_error(self, fake_shares):
        fake_shares.return_value = True

        def gen():
            # generate header
            yield ['col1', 'col2']

            # generate first row
            yield [0, 0]

        fs = fsopendir('temp://')
        datafile = MPRowsFile(fs, 'vid1')
        datafile.load_rows(GeneratorSource(SourceSpec('foobar'), gen()))
        connection = None
        try:
            PostgreSQLTestBase._create_postgres_test_db()
            connection = psycopg2.connect(**PostgreSQLTestBase.pg_test_db_data)

            # create foreign table for partition
            with connection.cursor() as cursor:
                # we have to close opened transaction.
                cursor.execute('COMMIT;')
                add_partition(cursor, datafile, 'vid1')

            # query just created foreign table.
            with connection.cursor() as cursor:
                cursor.execute('SELECT * FROM partitions.vid1;')
        finally:
            if connection:
                connection.close()
            PostgreSQLTestBase._drop_postgres_test_db()
Exemple #4
0
    def test_generator(self):
        from ambry_sources.sources import GeneratorSource, SourceSpec

        cache_fs = fsopendir(self.setup_temp_dir())

        def gen():

            yield list('abcde')

            for i in range(10):
                yield [i, i + 1, i + 2, i + 3, i + 4]

        f = MPRowsFile(cache_fs, 'foobar').load_rows(
            GeneratorSource(SourceSpec('foobar'), gen()))

        self.assertEqual(1, f.info['data_start_row'])
        self.assertEqual(11, f.info['data_end_row'])
        self.assertEqual([0], f.info['header_rows'])

        self.assertEqual(f.headers, list('abcde'))
        rows = list(f.select())
        self.assertEqual(len(rows), 10)
        self.assertEqual(sorted(rows[0].keys()), sorted(list('abcde')))

        self.assertTrue(f.is_finalized)
Exemple #5
0
 def test_uses_url_as_table(self):
     fake_execute = Mock(return_value=iter([[1], [2]]))
     connection = AttrDict({'execute': fake_execute})
     spec = SourceSpec('table1')
     relation_source = DatabaseRelationSource(spec, 'sqlite', connection)
     rows = [x for x in relation_source]
     self.assertEqual(rows, [[1], [2]])
     fake_execute.assert_called_once_with(
         'SELECT * FROM {};'.format('table1'))
Exemple #6
0
    def spec(self):
        """Return a SourceSpec to describe this source"""
        from ambry_sources.sources import SourceSpec

        d = self.dict
        d['url'] = self.url

        # Will get the URL twice; once as ref and once as URL, but the ref is ignored

        return SourceSpec(**d)
    def _get_generator_source(self, header, rows):
        def gen():
            # generate header
            yield header

            # generate rows
            for row in rows:
                yield row

        return GeneratorSource(SourceSpec('foobar'), gen())
    def load_sources(cls, file_name='sources.csv'):
        import tests
        import csv
        from os.path import join, dirname
        from ambry_sources.sources import ColumnSpec, SourceSpec

        test_data = fsopendir(join(dirname(tests.__file__), 'test_data'))

        sources = {}

        fixed_widths = (
            ('id', 1, 6),
            ('uuid', 7, 34),
            ('int', 41, 3),
            ('float', 44, 14),
        )

        fw_columns = [
            ColumnSpec(**dict(list(zip('name start width'.split(), e))))
            for e in fixed_widths
        ]

        with test_data.open(file_name) as f:
            r = csv.DictReader(f)

            for row in r:

                if row['name'] == 'simple_fixed':
                    row['columns'] = fw_columns

                ss = SourceSpec(**row)

                if 'expect_headers' in row or 'expect_start' in row:
                    ss.expect_headers = row.get('expect_headers')
                    try:
                        ss.expect_start = int(row.get('expect_start'))
                    except ValueError:
                        ss.expect_start = None

                sources[ss.name] = ss

        return sources
Exemple #9
0
 def test_reads_layer_specified_by_segment(self, fake_open, fake_shape,
                                           fake_dumps):
     fake_collection = self._get_fake_collection()
     fake_open.return_value = fake_collection
     spec = SourceSpec('http://example.com', segment=5)
     fstor = Mock(spec=DelayedOpen)
     fstor._fs = Mock()
     source = ShapefileSource(spec, fstor)
     next(source._get_row_gen())
     self.assertEqual(len(fake_open.mock_calls), 1)
     self.assertEqual(fake_open.call_args_list[0][1]['layer'], 5,
                      'open function was called with wrong layer.')
Exemple #10
0
def _get_generator_source(rows=None):
    if not rows:
        rows = [[0, 0], [1, 1], [2, 2]]

    def gen(rows=rows):
        # generate header
        yield ['col1', 'col2']

        # generate some rows
        for row in rows:
            yield row

    return GeneratorSource(SourceSpec('foobar'), gen())
    def load_sources(cls, file_name='sources.csv'):
        import tests
        import csv
        from os.path import join, dirname
        from ambry_sources.sources import ColumnSpec, SourceSpec

        test_data = fsopendir(join(dirname(tests.__file__), 'test_data'))

        sources = {}

        fixed_widths = (('id', 1, 6),
                        ('uuid', 7, 34),
                        ('int', 41, 3),
                        ('float', 44, 14),
                        )

        fw_columns = [ColumnSpec(**dict(list(zip('name start width'.split(), e)))) for e in fixed_widths]

        with test_data.open(file_name) as f:
            r = csv.DictReader(f)

            for row in r:

                if row['name'] == 'simple_fixed':
                    row['columns'] = fw_columns

                ss = SourceSpec(**row)

                if 'expect_headers' in row or 'expect_start' in row:
                    ss.expect_headers = row.get('expect_headers')
                    try:
                        ss.expect_start = int(row.get('expect_start'))
                    except ValueError:
                        ss.expect_start = None

                sources[ss.name] = ss

        return sources
Exemple #12
0
 def test_populates_columns_of_the_spec(self, fake_open, fake_get,
                                        fake_shape, fake_dumps):
     fake_collection = self._get_fake_collection()
     fake_open.return_value = fake_collection
     fake_get.return_value = [{'name': 'col1', 'type': 'int'}]
     spec = SourceSpec('http://example.com')
     fstor = Mock(spec=DelayedOpen)
     fstor._fs = Mock()
     source = ShapefileSource(spec, fstor)
     next(source._get_row_gen())
     self.assertEqual(len(source.spec.columns), 1)
     self.assertEqual(source.spec.columns[0].name, 'col1')
     self.assertEqual(len(fake_open.mock_calls), 1)
     self.assertEqual(len(fake_get.mock_calls), 2)
Exemple #13
0
 def test_converts_row_id_to_integer(self, fake_open, fake_get, fake_shape,
                                     fake_dumps):
     fake_collection = self._get_fake_collection()
     fake_open.return_value = fake_collection
     fake_shape.expects_call().is_a_stub()
     fake_dumps.expects_call().is_a_stub()
     fake_get.return_value = [{'name': 'col1', 'type': 'int'}]
     spec = SourceSpec('http://example.com')
     fstor = Mock(spec=DelayedOpen)
     fstor._fs = Mock()
     source = ShapefileSource(spec, fstor)
     row_gen = source._get_row_gen()
     first_row = next(row_gen)
     self.assertEqual(first_row[0], 0)
     self.assertEqual(len(fake_open.mock_calls), 1)
     self.assertEqual(len(fake_get.mock_calls), 2)
Exemple #14
0
 def test_last_element_in_the_row_is_wkt(self, fake_open, fake_get,
                                         fake_shape, fake_dumps):
     fake_collection = self._get_fake_collection()
     fake_open.return_value = fake_collection
     fake_shape.expects_call().is_a_stub()
     fake_dumps.return_value = 'I AM FAKE WKT'
     fake_get.return_value = [{'name': 'col1', 'type': 'int'}]
     spec = SourceSpec('http://example.com')
     fstor = Mock(spec=DelayedOpen)
     fstor._fs = Mock()
     source = ShapefileSource(spec, fstor)
     row_gen = source._get_row_gen()
     first_row = next(row_gen)
     self.assertEqual(first_row[-1], 'I AM FAKE WKT')
     self.assertEqual(len(fake_open.mock_calls), 1)
     self.assertEqual(len(fake_get.mock_calls), 2)
Exemple #15
0
 def test_saves_header(self, fake_open, fake_get, fake_shape, fake_dumps):
     fake_collection = self._get_fake_collection()
     fake_open.return_value = fake_collection
     fake_get.return_value = [{
         'name': 'id',
         'type': 'int'
     }, {
         'name': 'col1',
         'type': 'int'
     }, {
         'name': 'geometry',
         'type': 'geometry_type'
     }]
     spec = SourceSpec('http://example.com')
     fstor = Mock(spec=DelayedOpen)
     fstor._fs = Mock()
     source = ShapefileSource(spec, fstor)
     next(source._get_row_gen())
     self.assertEqual(source._headers, ['id', 'col1', 'geometry'])
     self.assertEqual(len(fake_open.mock_calls), 1)
     self.assertEqual(len(fake_get.mock_calls), 2)
    def test_allow_source_data_to_start_from_0_row(self):
        columns = [
            ColumnSpec(name='col1', position=1, start=1, width=1),
            ColumnSpec(name='col2', position=2, start=3, width=1)
        ]
        spec = SourceSpec('http://example.com',
                          header_lines=[],
                          start_line=0,
                          columns=columns)
        assert spec.start_line == 0
        fs = fsopendir('temp://')
        with fs.open('temp.txt', 'w') as f:
            f.write(u'1 1\n')
            f.write(u'2 2\n')
            f.write(u'3 3\n')
        fstor = DelayedOpen(fs, 'temp.txt')

        fixed_source = FixedSource(spec, fstor)
        source_data = [x for x in fixed_source]
        self.assertEqual(len(source_data), 3)
        self.assertEqual(source_data, [['1', '1'], ['2', '2'], ['3', '3']])
    def test_creates_virtual_table_for_source_with_header_containing_sql_reserved_words(
            self):
        # build rows reader
        cache_fs = fsopendir(self.setup_temp_dir())

        spec = SourceSpec('foobar')

        def gen():

            # yield header
            yield ['create', 'index', 'where', 'select', 'distinct']

            # yield rows
            for i in range(10):
                yield [i, i + 1, i + 2, i + 3, i + 4]

        s = GeneratorSource(spec, gen())
        mprows = MPRowsFile(cache_fs, spec.name).load_rows(s)

        connection = apsw.Connection(':memory:')
        table = 'table1'
        add_partition(connection, mprows, table)

        # check all columns and some rows.
        cursor = connection.cursor()
        query = 'SELECT count(*) FROM {};'.format(table)
        result = cursor.execute(query).fetchall()
        self.assertEqual(result, [(10, )])

        with mprows.reader as r:
            expected_first_row = next(iter(r)).row

        # query by columns.
        query = 'SELECT "create", "index", "where", "select", "distinct" FROM {} LIMIT 1;'.format(
            table)
        result = cursor.execute(query).fetchall()
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0], expected_first_row)
Exemple #18
0
        class Rows(object):
            spec = SourceSpec(None, header_lines=(3, 4), start_line=5)

            def __iter__(self):
                return iter(rows)