Exemple #1
0
    def test_simple(self):
        tmp = maketemp()

        hive_uri = 'sqlite:///%s' % os.path.join(tmp, 'hive.db')
        hive_metadata = create.create_hive(
            hive_uri=hive_uri,
            )
        dimension_id = create.create_dimension(
            hive_metadata=hive_metadata,
            dimension_name='frob',
            directory_uri=hive_uri,
            db_type='INTEGER',
            )
        hive_metadata.bind.dispose()

        hive_metadata = connect.get_hive(
            hive_uri=hive_uri,
            )
        t = hive_metadata.tables['partition_dimension_metadata']
        got = t.select().execute().fetchall()
        got = [dict(row) for row in got]
        #TODO
        for row in got: del row['db_type']
        eq_(
            got,
            [
                dict(
                    id=dimension_id,
                    name='frob',
                    index_uri=hive_uri,
                    ),
                ],
            )
        hive_metadata.bind.dispose()
Exemple #2
0
    def test_simple(self):
        tmp = maketemp()
        directory_uri = 'sqlite:///%s' % os.path.join(tmp, 'directory.db')

        directory_metadata = create.create_primary_index(
            directory_uri=directory_uri,
            dimension_name='frob',
            db_type='INTEGER',
            )
        assert isinstance(directory_metadata, sq.MetaData)
        assert directory_metadata.bind is not None
        assert isinstance(directory_metadata.bind, sq.engine.Engine)
        eq_(str(directory_metadata.bind.url), directory_uri)
        directory_metadata.bind.dispose()
        engine = sq.create_engine(directory_uri)
        res = engine.execute(
            "SELECT name FROM sqlite_master WHERE type='table'")
        got = res.fetchall()
        res.close()
        engine.dispose()
        got = [row[0] for row in got]
        eq_(
            got,
            ['hive_primary_frob'],
            )
Exemple #3
0
 def test_repeat(self):
     tmp = maketemp()
     db_uri = 'sqlite:///%s' % os.path.join(tmp, 'hive.db')
     hive_metadata = create.create_hive(db_uri)
     hive_metadata.bind.dispose()
     hive_metadata = create.create_hive(db_uri)
     hive_metadata.bind.dispose()
Exemple #4
0
 def test_repeat(self):
     tmp = maketemp()
     hive_uri = 'sqlite:///%s' % os.path.join(tmp, 'hive.db')
     hive_metadata = create.create_hive(hive_uri)
     dimension_id = create.create_dimension(
         hive_metadata=hive_metadata,
         dimension_name='frob',
         directory_uri='fake-dir-uri',
         db_type='INTEGER',
         )
     node_id = create.create_node(
         hive_metadata,
         'frob',
         'node1',
         'fake-node-uri',
         )
     e = assert_raises(
         create.NodeExistsError,
         create.create_node,
         hive_metadata,
         'frob',
         'node1',
         'fake-node-uri',
         )
     hive_metadata.bind.dispose()
     eq_(
         str(e),
         'Node exists already: %r' % 'node1',
         )
Exemple #5
0
    def test_simple(self):
        tmp = maketemp()
        hive_uri = 'sqlite:///%s' % os.path.join(tmp, 'hive.db')

        hive_metadata = create.create_hive(hive_uri)
        dimension_id = create.create_dimension(
            hive_metadata=hive_metadata,
            dimension_name='frob',
            directory_uri='fake-dir-uri',
            db_type='INTEGER',
            )

        t = hive_metadata.tables['partition_dimension_metadata']
        q = sq.select(
            [
                t.c.id,
                t.c.name,
                t.c.index_uri,
                ],
            )
        res = q.execute()
        got = res.fetchall()
        res.close()
        hive_metadata.bind.dispose()
        eq_(len(got), 1)
        (got,) = got
        eq_(
            dict(got),
            dict(
                id=dimension_id,
                name='frob',
                index_uri='fake-dir-uri',
                ),
            )
Exemple #6
0
    def test_repeat(self):
        # assign_node is idempotent and shouldn't even be racy against
        # itself (latter not really unit testable)
        tmp = maketemp()

        p42_metadata = sq.MetaData()
        p42_metadata.bind = sq.create_engine(
            'sqlite:///%s' % os.path.join(tmp, 'p42.db'),
            strategy='threadlocal',
            )
        t_frob = sq.Table(
            'frob',
            p42_metadata,
            sq.Column('id', sq.Integer, primary_key=True),
            sq.Column('xyzzy', sq.Integer, nullable=False),
            )
        p42_metadata.create_all()

        directory_metadata = create.create_primary_index(
            directory_uri='sqlite:///%s' % os.path.join(tmp, 'directory.db'),
            dimension_name='frob',
            db_type='INTEGER',
            )
        hive_metadata = create.create_hive(
            'sqlite:///%s' % os.path.join(tmp, 'hive.db'))

        dimension_id = create.create_dimension(
            hive_metadata=hive_metadata,
            dimension_name='frob',
            directory_uri=str(directory_metadata.bind.url),
            db_type='INTEGER',
            )
        create.create_node(
            hive_metadata=hive_metadata,
            dimension_id=dimension_id,
            node_name='node42',
            node_uri=str(p42_metadata.bind.url),
            )

        node_engine = connect.assign_node(hive_metadata, 'frob', 1)
        assert isinstance(node_engine, sq.engine.Engine)
        eq_(str(node_engine.url), str(p42_metadata.bind.url))
        node_engine.dispose()

        node_engine = connect.assign_node(hive_metadata, 'frob', 1)
        assert isinstance(node_engine, sq.engine.Engine)
        eq_(str(node_engine.url), str(p42_metadata.bind.url))
        node_engine.dispose()

        t = directory_metadata.tables['hive_primary_frob']
        q = sq.select(
            [sq.func.count('*').label('count')],
            from_obj=[t],
            )
        r = q.execute().fetchone()
        got = r['count']
        eq_(got, 1)
        directory_metadata.bind.dispose()
Exemple #7
0
    def test_simple(self):
        tmp = maketemp()

        p42_metadata = sq.MetaData()
        p42_metadata.bind = sq.create_engine(
            'sqlite:///%s' % os.path.join(tmp, 'p42.db'),
            strategy='threadlocal',
            )
        t_frob = sq.Table(
            'frob',
        p42_metadata,
            sq.Column('id', sq.Integer, primary_key=True),
            sq.Column('xyzzy', sq.Integer, nullable=False),
            )
        p42_metadata.create_all()

        hive_metadata = create.create_hive(
            'sqlite:///%s' % os.path.join(tmp, 'hive.db'))
        directory_metadata = create.create_primary_index(
            directory_uri='sqlite:///%s' \
                % os.path.join(tmp, 'directory.db'),
            dimension_name='frob',
            db_type='INTEGER',
            )
        dimension_id = create.create_dimension(
            hive_metadata=hive_metadata,
            dimension_name='frob',
            directory_uri=str(directory_metadata.bind.url),
            db_type='INTEGER',
            )
        directory_metadata.bind.dispose()
        node_id = create.create_node(
            hive_metadata=hive_metadata,
            dimension_id=dimension_id,
            node_name='node42',
            node_uri=str(p42_metadata.bind.url),
            )
        node_engine = connect.assign_node(
            hive_metadata=hive_metadata,
            dimension_name='frob',
            dimension_value=1,
            )
        node_engine.dispose()

        got = connect.get_engine(
            hive_metadata=hive_metadata,
            dimension_name='frob',
            dimension_value=1,
            )
        assert isinstance(got, sq.engine.Engine)
        eq_(str(got.url), str(p42_metadata.bind.url))
        got.dispose()
        hive_metadata.bind.dispose()
        p42_metadata.bind.dispose()
Exemple #8
0
 def test_repeat(self):
     tmp = maketemp()
     directory_uri = 'sqlite:///%s' % os.path.join(tmp, 'directory.db')
     directory_metadata = create.create_primary_index(
         directory_uri=directory_uri,
         dimension_name='frob',
         db_type='INTEGER',
         )
     directory_metadata.bind.dispose()
     directory_metadata = create.create_primary_index(
         directory_uri=directory_uri,
         dimension_name='frob',
         db_type='INTEGER',
         )
     directory_metadata.bind.dispose()
Exemple #9
0
    def check_type(self, typename, sqlalch_type):
        tmp = maketemp()
        directory_uri = 'sqlite:///%s' % os.path.join(tmp, 'directory.db')

        directory_metadata = create.create_primary_index(
            directory_uri=directory_uri,
            dimension_name='frob',
            db_type=typename,
            )
        column = directory_metadata.tables['hive_primary_frob'].c.id
        assert isinstance(column.type, sqlalch_type), (
            'db_type %r must result in sqlalch type %r, got %r'
            % (typename, sqlalch_type, type(column.type)),
            )
        directory_metadata.bind.dispose()
Exemple #10
0
    def test_bad_no_dimension(self):
        tmp = maketemp()

        hive_metadata = create.create_hive(
            'sqlite:///%s' % os.path.join(tmp, 'hive.db'))

        e = assert_raises(
            connect.NoSuchDimensionError,
            connect.unassign_node,
            hive_metadata=hive_metadata,
            dimension_name='frob',
            dimension_value=1,
            node_name='fake',
            )
        eq_(
            str(e),
            'No such dimension: %r' % 'frob',
            )
Exemple #11
0
    def test_bad_node(self):
        tmp = maketemp()

        hive_metadata = create.create_hive(
            'sqlite:///%s' % os.path.join(tmp, 'hive.db'))
        directory_metadata = create.create_primary_index(
            directory_uri='sqlite:///%s' % os.path.join(tmp, 'directory.db'),
            dimension_name='frob',
            db_type='INTEGER',
            )
        dimension_id = create.create_dimension(
            hive_metadata=hive_metadata,
            dimension_name='frob',
            directory_uri=str(directory_metadata.bind.url),
            db_type='INTEGER',
            )
        directory_metadata.bind.dispose()
        node_id = create.create_node(
            hive_metadata=hive_metadata,
            dimension_id=dimension_id,
            node_name='node34',
            node_uri='sqlite://',
            )
        node_engine = connect.assign_node(
            hive_metadata=hive_metadata,
            dimension_name='frob',
            dimension_value=1,
            )
        node_engine.dispose()
        hive_metadata.tables['node_metadata'].delete().execute()
        hive_metadata.bind.dispose()

        e = assert_raises(
            connect.NoSuchNodeError,
            connect.get_engine,
            hive_metadata,
            'frob',
            1,
            )
        eq_(
            str(e),
            'No such node: dimension %r, node_id %d' \
                % ('frob', node_id)
            )
Exemple #12
0
 def test_bad_id(self):
     tmp = maketemp()
     hive_metadata = create.create_hive(
         'sqlite:///%s' % os.path.join(tmp, 'hive.db'))
     directory_metadata = create.create_primary_index(
         directory_uri='sqlite:///%s' \
             % os.path.join(tmp, 'directory.db'),
         dimension_name='frob',
         db_type='INTEGER',
         )
     dimension_id = create.create_dimension(
         hive_metadata=hive_metadata,
         dimension_name='frob',
         directory_uri=str(directory_metadata.bind.url),
         db_type='INTEGER',
         )
     create.create_node(
         hive_metadata=hive_metadata,
         dimension_id=dimension_id,
         node_name='node42',
         node_uri='sqlite://',
         )
     dimension_value = 1
     node_engine = connect.assign_node(
         hive_metadata=hive_metadata,
         dimension_name='frob',
         dimension_value=dimension_value,
         )
     node_engine.dispose()
     directory_metadata.bind.dispose()
     e = assert_raises(
         connect.NoSuchIdError,
         connect.get_engine,
         hive_metadata=hive_metadata,
         dimension_name='frob',
         # make it wrong to trigger the error
         dimension_value=dimension_value+1,
         )
     eq_(
         str(e),
         'No such id: dimension %r, dimension_value %r' \
             % ('frob', dimension_value+1),
         )
     hive_metadata.bind.dispose()
Exemple #13
0
 def test_simple(self):
     tmp = maketemp()
     db_uri = 'sqlite:///%s' % os.path.join(tmp, 'hive.db')
     hive_metadata = create.create_hive(db_uri)
     assert isinstance(hive_metadata, sq.MetaData)
     assert hive_metadata.bind is not None
     assert isinstance(hive_metadata.bind, sq.engine.Engine)
     eq_(str(hive_metadata.bind.url), db_uri)
     hive_metadata.bind.dispose()
     engine = sq.create_engine(db_uri)
     res = engine.execute(
         "SELECT name FROM sqlite_master WHERE type='table'")
     got = res.fetchall()
     res.close()
     engine.dispose()
     got = [row[0] for row in got]
     eq_(
         sorted(got),
         sorted(hive.metadata.tables.keys()),
         )
Exemple #14
0
 def test_bad_dimension(self):
     tmp = maketemp()
     hive_metadata = create.create_hive(
         'sqlite:///%s' % os.path.join(tmp, 'hive.db'))
     create.create_dimension(
         hive_metadata=hive_metadata,
         dimension_name='these-are-nt-the-droids',
         directory_uri='fake',
         db_type='INTEGER',
         )
     e = assert_raises(
         connect.NoSuchDimensionError,
         connect.get_engine,
         hive_metadata=hive_metadata,
         dimension_name='frob',
         dimension_value=123,
         )
     eq_(
         str(e),
         'No such dimension: %r' % 'frob',
         )
     hive_metadata.bind.dispose()
Exemple #15
0
    def test_bad_no_assignment(self):
        tmp = maketemp()

        directory_metadata = create.create_primary_index(
            directory_uri='sqlite:///%s' \
                % os.path.join(tmp, 'directory.db'),
            dimension_name='frob',
            db_type='INTEGER',
            )
        hive_metadata = create.create_hive(
            'sqlite:///%s' % os.path.join(tmp, 'hive.db'))

        dimension_id = create.create_dimension(
            hive_metadata=hive_metadata,
            dimension_name='frob',
            directory_uri=str(directory_metadata.bind.url),
            db_type='INTEGER',
            )
        node_id = create.create_node(
            hive_metadata=hive_metadata,
            dimension_id=dimension_id,
            node_name='node42',
            node_uri='fake',
            )

        e = assert_raises(
            connect.NoSuchNodeForDimensionValueError,
            connect.unassign_node,
            hive_metadata=hive_metadata,
            dimension_name='frob',
            dimension_value=1,
            node_name='node42',
            )
        eq_(
            str(e),
            'Node not found for dimension value:'
            +' dimension %r value %r, node name %r'
            % ('frob', 1, 'node42'),
            )
Exemple #16
0
    def test_bad_no_node(self):
        tmp = maketemp()

        directory_metadata = create.create_primary_index(
            directory_uri='sqlite:///%s' \
                % os.path.join(tmp, 'directory.db'),
            dimension_name='frob',
            db_type='INTEGER',
            )
        hive_metadata = create.create_hive(
            'sqlite:///%s' % os.path.join(tmp, 'hive.db'))

        dimension_id = create.create_dimension(
            hive_metadata=hive_metadata,
            dimension_name='frob',
            directory_uri=str(directory_metadata.bind.url),
            db_type='INTEGER',
            )
        node_id = create.create_node(
            hive_metadata=hive_metadata,
            # make it wrong to trigger the error
            dimension_id=dimension_id+1,
            node_name='node42',
            node_uri='fake',
            )

        e = assert_raises(
            connect.NoNodesForDimensionError,
            connect.unassign_node,
            hive_metadata=hive_metadata,
            dimension_name='frob',
            dimension_value=1,
            node_name='not-exist',
            )
        eq_(
            str(e),
            'No nodes found for dimension: %r' % 'frob',
            )
Exemple #17
0
def test_createAndUseTheHive():
    # Install The Hive Metadata Schema
    tmp = maketemp()
    hive_uri = 'sqlite:///%s' % os.path.join(tmp, 'hive.db')
    node_uri = 'sqlite:///%s' % os.path.join(tmp, 'aNode.db')

    hive_metadata = create.create_hive(hive_uri)
    hive_metadata.bind.dispose()

    # Load a Hive
    hive_metadata = connect.get_hive(hive_uri)

    # Create a Partition Dimension and add it to the Hive

    # We are going to partition our Product domain using the product
    # type string.
    dimension_name = 'ProductType'
    directory_metadata = create.create_primary_index(
        hive_uri,
        dimension_name,
        db_type='CHAR',
        )
    dimension_id = create.create_dimension(
        hive_metadata=hive_metadata,
        dimension_name=dimension_name,
        directory_uri=hive_uri,
        db_type='CHAR',
        )

    # Create a Data Node and add it to the partition dimension
    node_id = create.create_node(
        hive_metadata=hive_metadata,
        dimension_id=dimension_id,
        node_name='aNode',
        node_uri=node_uri,
        )

    # Make sure everything we just added actually got put into the
    # hive meta data.
    q = sq.select(
        [sq.func.count('*').label('count')],
        from_obj=[
            hive_metadata.tables['partition_dimension_metadata'],
            ],
        )
    res = q.execute().fetchone()
    assert res['count'] > 0

    t_part = hive_metadata.tables['partition_dimension_metadata']
    t_node = hive_metadata.tables['node_metadata']
    q = sq.select(
        [sq.func.count(t_node.c.id).label('count')],
        sq.and_(
            t_part.c.id==t_node.c.partition_dimension_id,
            t_part.c.name==dimension_name,
            ),
        )
    res = q.execute().fetchone()
    assert res['count'] > 0

    # Add a key, just to test.
    key = "knife";
    node_engine = connect.assign_node(
        hive_metadata=hive_metadata,
        dimension_name=dimension_name,
        dimension_value=key,
        )
    node_engine.dispose()

    # Just cleaning up the random key.

    # TODO I made this take node_name too, seemed like a more robust
    # API; for the purposes of this test, we know we only have one
    # node --tv
    connect.unassign_node(
        hive_metadata=hive_metadata,
        dimension_name=dimension_name,
        dimension_value=key,
        node_name='aNode',
        )

    # At this point there is no real data in the Hive just a directory
    # of Primary key to node mappings.
    generic_node_metadata = sq.MetaData()
    sq.Table(
        'products',
        generic_node_metadata,
        sq.Column('id', sq.Integer, primary_key=True),
        sq.Column('name', sq.String(255), nullable=False, unique=True),
        sq.Column('type', sq.String(255), nullable=False),
        )

    # First we need to load our data schema on to each data node.
    q = sq.select(
        [t_node.c.uri],
        sq.and_(
            t_node.c.partition_dimension_id==t_part.c.id,
            t_part.c.name==dimension_name,
            ),
        )
    res = q.execute().fetchall()
    for row in res:
        # TODO not supported yet by snakepit:
        # Ordinarily to get a connection to node from the hive we
        # would have to provide a key and the permissions (READ or
        # READWRITE) with which we want to acquire the connection.
        # However the getUnsafe method can be used [AND SHOULD ONLY BE
        # USED] for cases like this when there is no data yet loadedon
        # a node and thus no key to dereference.

        node_uri = row[t_node.c.uri]
        node_engine = sq.create_engine(node_uri)
        node_metadata = sq.MetaData()
        node_metadata.bind = node_engine
        for table in generic_node_metadata.tables.values():
            table.tometadata(node_metadata)
        node_metadata.create_all()
        node_metadata.bind.dispose()

    # Set up a secondary index on products so that we can query them
    # by name

    # First create a Resource and add it to the Hive. All Secondary
    # Indexes will be associated with this Resource.
    resource_name = 'Product'
#     create.create_secondary_index(
#         directory_uri=hive_uri,
#         resource_name=resource_name,
#         column_name='type', #TODO?
#         )

    t_resource = hive_metadata.tables['resource_metadata']
    r = t_resource.insert().execute(
        dimension_id=dimension_id,
        name=resource_name,
        db_type='INTEGER',
        is_partitioning_resource=False,
        )
    (resource_id,) = r.last_inserted_ids()

    # Now create a SecondaryIndex
#     create.create_secondary_index(
#         directory_uri=hive_uri,
#         resource_name=resource_name,
#         column_name='name',
#         )

    # Add it to the Hive
#     create.add_secondary_index(
#         hive_metadata=hive_metadata,
#         resource_id=resource_id,
#         column_name='name',
#         db_type='CHAR',
#         )
    # Note: SecondaryIndexes are identified by
    # ResourceName.IndexColumnName

    # Now lets add a product to the hive.
    spork = dict(
        id=23,
        name='Spork',
        type='Cutlery',
        )

    # First we have to add a primary index entry in order to get
    # allocated to a data node.

    # While it is possible to write a record to multiple locations
    # within the Hive, the default implementation inserts a single
    # copy.
    node_engine = connect.assign_node(
        hive_metadata=hive_metadata,
        dimension_name=dimension_name,
        dimension_value=spork['type'],
        )

    # Next we insert the record into the assigned data node
    node_metadata = sq.MetaData()
    node_metadata.bind = node_engine
    for table in generic_node_metadata.tables.values():
        table.tometadata(node_metadata)
    node_metadata.tables['products'].insert(spork).execute()
    node_metadata.bind.dispose()

    # Update the resource id so that the hive can locate it
#     create.insert_resource_id(
#         hive_metadata=hive_metadata,
#         dimension_name=dimension_name,
#         resource_name=resource_name,
#         id=spork['id'],
#         pkey=spork['type'],
#         )

    # Finally we update the SecondaryIndex
#     connect.insert_secondary_index_key(
#         hive_metadata=hive_metadata,
#         dimension_name=dimension_name,
#         resource_name=resource_name,
#         column_name='name',
#         id=spork['name'],
#         pkey=spork['id'],
#         )

    # Retrieve spork by Primary Key
    node_engine = connect.get_engine(
        hive_metadata=hive_metadata,
        dimension_name=dimension_name,
        dimension_value=spork['type'],
        #TODO access=READ,
        )

    # Here I am taking advantage of the fact that I know there is only
    # one copy.
    node_metadata = sq.MetaData()
    node_metadata.bind = node_engine
    for table in generic_node_metadata.tables.values():
        table.tometadata(node_metadata)

    t = node_metadata.tables['products']
    q = sq.select(
        [
            t.c.id,
            t.c.name,
            ],
        t.c.id==spork['id'],
        limit=1,
        )
    res = q.execute().fetchone()
    assert res is not None
    product_a = dict(res)
    node_metadata.bind.dispose()

    # Make sure its a spork
    assert spork['name'] == product_a['name']