def test_simple(self): tmp = maketemp() hive_uri = 'sqlite:///%s' % os.path.join(tmp, 'hive.db') hive_metadata = create.create_hive( hive_uri=hive_uri, ) dimension_id = create.create_dimension( hive_metadata=hive_metadata, dimension_name='frob', directory_uri=hive_uri, db_type='INTEGER', ) hive_metadata.bind.dispose() hive_metadata = connect.get_hive( hive_uri=hive_uri, ) t = hive_metadata.tables['partition_dimension_metadata'] got = t.select().execute().fetchall() got = [dict(row) for row in got] #TODO for row in got: del row['db_type'] eq_( got, [ dict( id=dimension_id, name='frob', index_uri=hive_uri, ), ], ) hive_metadata.bind.dispose()
def test_simple(self): tmp = maketemp() directory_uri = 'sqlite:///%s' % os.path.join(tmp, 'directory.db') directory_metadata = create.create_primary_index( directory_uri=directory_uri, dimension_name='frob', db_type='INTEGER', ) assert isinstance(directory_metadata, sq.MetaData) assert directory_metadata.bind is not None assert isinstance(directory_metadata.bind, sq.engine.Engine) eq_(str(directory_metadata.bind.url), directory_uri) directory_metadata.bind.dispose() engine = sq.create_engine(directory_uri) res = engine.execute( "SELECT name FROM sqlite_master WHERE type='table'") got = res.fetchall() res.close() engine.dispose() got = [row[0] for row in got] eq_( got, ['hive_primary_frob'], )
def test_repeat(self): tmp = maketemp() db_uri = 'sqlite:///%s' % os.path.join(tmp, 'hive.db') hive_metadata = create.create_hive(db_uri) hive_metadata.bind.dispose() hive_metadata = create.create_hive(db_uri) hive_metadata.bind.dispose()
def test_repeat(self): tmp = maketemp() hive_uri = 'sqlite:///%s' % os.path.join(tmp, 'hive.db') hive_metadata = create.create_hive(hive_uri) dimension_id = create.create_dimension( hive_metadata=hive_metadata, dimension_name='frob', directory_uri='fake-dir-uri', db_type='INTEGER', ) node_id = create.create_node( hive_metadata, 'frob', 'node1', 'fake-node-uri', ) e = assert_raises( create.NodeExistsError, create.create_node, hive_metadata, 'frob', 'node1', 'fake-node-uri', ) hive_metadata.bind.dispose() eq_( str(e), 'Node exists already: %r' % 'node1', )
def test_simple(self): tmp = maketemp() hive_uri = 'sqlite:///%s' % os.path.join(tmp, 'hive.db') hive_metadata = create.create_hive(hive_uri) dimension_id = create.create_dimension( hive_metadata=hive_metadata, dimension_name='frob', directory_uri='fake-dir-uri', db_type='INTEGER', ) t = hive_metadata.tables['partition_dimension_metadata'] q = sq.select( [ t.c.id, t.c.name, t.c.index_uri, ], ) res = q.execute() got = res.fetchall() res.close() hive_metadata.bind.dispose() eq_(len(got), 1) (got,) = got eq_( dict(got), dict( id=dimension_id, name='frob', index_uri='fake-dir-uri', ), )
def test_repeat(self): # assign_node is idempotent and shouldn't even be racy against # itself (latter not really unit testable) tmp = maketemp() p42_metadata = sq.MetaData() p42_metadata.bind = sq.create_engine( 'sqlite:///%s' % os.path.join(tmp, 'p42.db'), strategy='threadlocal', ) t_frob = sq.Table( 'frob', p42_metadata, sq.Column('id', sq.Integer, primary_key=True), sq.Column('xyzzy', sq.Integer, nullable=False), ) p42_metadata.create_all() directory_metadata = create.create_primary_index( directory_uri='sqlite:///%s' % os.path.join(tmp, 'directory.db'), dimension_name='frob', db_type='INTEGER', ) hive_metadata = create.create_hive( 'sqlite:///%s' % os.path.join(tmp, 'hive.db')) dimension_id = create.create_dimension( hive_metadata=hive_metadata, dimension_name='frob', directory_uri=str(directory_metadata.bind.url), db_type='INTEGER', ) create.create_node( hive_metadata=hive_metadata, dimension_id=dimension_id, node_name='node42', node_uri=str(p42_metadata.bind.url), ) node_engine = connect.assign_node(hive_metadata, 'frob', 1) assert isinstance(node_engine, sq.engine.Engine) eq_(str(node_engine.url), str(p42_metadata.bind.url)) node_engine.dispose() node_engine = connect.assign_node(hive_metadata, 'frob', 1) assert isinstance(node_engine, sq.engine.Engine) eq_(str(node_engine.url), str(p42_metadata.bind.url)) node_engine.dispose() t = directory_metadata.tables['hive_primary_frob'] q = sq.select( [sq.func.count('*').label('count')], from_obj=[t], ) r = q.execute().fetchone() got = r['count'] eq_(got, 1) directory_metadata.bind.dispose()
def test_simple(self): tmp = maketemp() p42_metadata = sq.MetaData() p42_metadata.bind = sq.create_engine( 'sqlite:///%s' % os.path.join(tmp, 'p42.db'), strategy='threadlocal', ) t_frob = sq.Table( 'frob', p42_metadata, sq.Column('id', sq.Integer, primary_key=True), sq.Column('xyzzy', sq.Integer, nullable=False), ) p42_metadata.create_all() hive_metadata = create.create_hive( 'sqlite:///%s' % os.path.join(tmp, 'hive.db')) directory_metadata = create.create_primary_index( directory_uri='sqlite:///%s' \ % os.path.join(tmp, 'directory.db'), dimension_name='frob', db_type='INTEGER', ) dimension_id = create.create_dimension( hive_metadata=hive_metadata, dimension_name='frob', directory_uri=str(directory_metadata.bind.url), db_type='INTEGER', ) directory_metadata.bind.dispose() node_id = create.create_node( hive_metadata=hive_metadata, dimension_id=dimension_id, node_name='node42', node_uri=str(p42_metadata.bind.url), ) node_engine = connect.assign_node( hive_metadata=hive_metadata, dimension_name='frob', dimension_value=1, ) node_engine.dispose() got = connect.get_engine( hive_metadata=hive_metadata, dimension_name='frob', dimension_value=1, ) assert isinstance(got, sq.engine.Engine) eq_(str(got.url), str(p42_metadata.bind.url)) got.dispose() hive_metadata.bind.dispose() p42_metadata.bind.dispose()
def test_repeat(self): tmp = maketemp() directory_uri = 'sqlite:///%s' % os.path.join(tmp, 'directory.db') directory_metadata = create.create_primary_index( directory_uri=directory_uri, dimension_name='frob', db_type='INTEGER', ) directory_metadata.bind.dispose() directory_metadata = create.create_primary_index( directory_uri=directory_uri, dimension_name='frob', db_type='INTEGER', ) directory_metadata.bind.dispose()
def check_type(self, typename, sqlalch_type): tmp = maketemp() directory_uri = 'sqlite:///%s' % os.path.join(tmp, 'directory.db') directory_metadata = create.create_primary_index( directory_uri=directory_uri, dimension_name='frob', db_type=typename, ) column = directory_metadata.tables['hive_primary_frob'].c.id assert isinstance(column.type, sqlalch_type), ( 'db_type %r must result in sqlalch type %r, got %r' % (typename, sqlalch_type, type(column.type)), ) directory_metadata.bind.dispose()
def test_bad_no_dimension(self): tmp = maketemp() hive_metadata = create.create_hive( 'sqlite:///%s' % os.path.join(tmp, 'hive.db')) e = assert_raises( connect.NoSuchDimensionError, connect.unassign_node, hive_metadata=hive_metadata, dimension_name='frob', dimension_value=1, node_name='fake', ) eq_( str(e), 'No such dimension: %r' % 'frob', )
def test_bad_node(self): tmp = maketemp() hive_metadata = create.create_hive( 'sqlite:///%s' % os.path.join(tmp, 'hive.db')) directory_metadata = create.create_primary_index( directory_uri='sqlite:///%s' % os.path.join(tmp, 'directory.db'), dimension_name='frob', db_type='INTEGER', ) dimension_id = create.create_dimension( hive_metadata=hive_metadata, dimension_name='frob', directory_uri=str(directory_metadata.bind.url), db_type='INTEGER', ) directory_metadata.bind.dispose() node_id = create.create_node( hive_metadata=hive_metadata, dimension_id=dimension_id, node_name='node34', node_uri='sqlite://', ) node_engine = connect.assign_node( hive_metadata=hive_metadata, dimension_name='frob', dimension_value=1, ) node_engine.dispose() hive_metadata.tables['node_metadata'].delete().execute() hive_metadata.bind.dispose() e = assert_raises( connect.NoSuchNodeError, connect.get_engine, hive_metadata, 'frob', 1, ) eq_( str(e), 'No such node: dimension %r, node_id %d' \ % ('frob', node_id) )
def test_bad_id(self): tmp = maketemp() hive_metadata = create.create_hive( 'sqlite:///%s' % os.path.join(tmp, 'hive.db')) directory_metadata = create.create_primary_index( directory_uri='sqlite:///%s' \ % os.path.join(tmp, 'directory.db'), dimension_name='frob', db_type='INTEGER', ) dimension_id = create.create_dimension( hive_metadata=hive_metadata, dimension_name='frob', directory_uri=str(directory_metadata.bind.url), db_type='INTEGER', ) create.create_node( hive_metadata=hive_metadata, dimension_id=dimension_id, node_name='node42', node_uri='sqlite://', ) dimension_value = 1 node_engine = connect.assign_node( hive_metadata=hive_metadata, dimension_name='frob', dimension_value=dimension_value, ) node_engine.dispose() directory_metadata.bind.dispose() e = assert_raises( connect.NoSuchIdError, connect.get_engine, hive_metadata=hive_metadata, dimension_name='frob', # make it wrong to trigger the error dimension_value=dimension_value+1, ) eq_( str(e), 'No such id: dimension %r, dimension_value %r' \ % ('frob', dimension_value+1), ) hive_metadata.bind.dispose()
def test_simple(self): tmp = maketemp() db_uri = 'sqlite:///%s' % os.path.join(tmp, 'hive.db') hive_metadata = create.create_hive(db_uri) assert isinstance(hive_metadata, sq.MetaData) assert hive_metadata.bind is not None assert isinstance(hive_metadata.bind, sq.engine.Engine) eq_(str(hive_metadata.bind.url), db_uri) hive_metadata.bind.dispose() engine = sq.create_engine(db_uri) res = engine.execute( "SELECT name FROM sqlite_master WHERE type='table'") got = res.fetchall() res.close() engine.dispose() got = [row[0] for row in got] eq_( sorted(got), sorted(hive.metadata.tables.keys()), )
def test_bad_dimension(self): tmp = maketemp() hive_metadata = create.create_hive( 'sqlite:///%s' % os.path.join(tmp, 'hive.db')) create.create_dimension( hive_metadata=hive_metadata, dimension_name='these-are-nt-the-droids', directory_uri='fake', db_type='INTEGER', ) e = assert_raises( connect.NoSuchDimensionError, connect.get_engine, hive_metadata=hive_metadata, dimension_name='frob', dimension_value=123, ) eq_( str(e), 'No such dimension: %r' % 'frob', ) hive_metadata.bind.dispose()
def test_bad_no_assignment(self): tmp = maketemp() directory_metadata = create.create_primary_index( directory_uri='sqlite:///%s' \ % os.path.join(tmp, 'directory.db'), dimension_name='frob', db_type='INTEGER', ) hive_metadata = create.create_hive( 'sqlite:///%s' % os.path.join(tmp, 'hive.db')) dimension_id = create.create_dimension( hive_metadata=hive_metadata, dimension_name='frob', directory_uri=str(directory_metadata.bind.url), db_type='INTEGER', ) node_id = create.create_node( hive_metadata=hive_metadata, dimension_id=dimension_id, node_name='node42', node_uri='fake', ) e = assert_raises( connect.NoSuchNodeForDimensionValueError, connect.unassign_node, hive_metadata=hive_metadata, dimension_name='frob', dimension_value=1, node_name='node42', ) eq_( str(e), 'Node not found for dimension value:' +' dimension %r value %r, node name %r' % ('frob', 1, 'node42'), )
def test_bad_no_node(self): tmp = maketemp() directory_metadata = create.create_primary_index( directory_uri='sqlite:///%s' \ % os.path.join(tmp, 'directory.db'), dimension_name='frob', db_type='INTEGER', ) hive_metadata = create.create_hive( 'sqlite:///%s' % os.path.join(tmp, 'hive.db')) dimension_id = create.create_dimension( hive_metadata=hive_metadata, dimension_name='frob', directory_uri=str(directory_metadata.bind.url), db_type='INTEGER', ) node_id = create.create_node( hive_metadata=hive_metadata, # make it wrong to trigger the error dimension_id=dimension_id+1, node_name='node42', node_uri='fake', ) e = assert_raises( connect.NoNodesForDimensionError, connect.unassign_node, hive_metadata=hive_metadata, dimension_name='frob', dimension_value=1, node_name='not-exist', ) eq_( str(e), 'No nodes found for dimension: %r' % 'frob', )
def test_createAndUseTheHive(): # Install The Hive Metadata Schema tmp = maketemp() hive_uri = 'sqlite:///%s' % os.path.join(tmp, 'hive.db') node_uri = 'sqlite:///%s' % os.path.join(tmp, 'aNode.db') hive_metadata = create.create_hive(hive_uri) hive_metadata.bind.dispose() # Load a Hive hive_metadata = connect.get_hive(hive_uri) # Create a Partition Dimension and add it to the Hive # We are going to partition our Product domain using the product # type string. dimension_name = 'ProductType' directory_metadata = create.create_primary_index( hive_uri, dimension_name, db_type='CHAR', ) dimension_id = create.create_dimension( hive_metadata=hive_metadata, dimension_name=dimension_name, directory_uri=hive_uri, db_type='CHAR', ) # Create a Data Node and add it to the partition dimension node_id = create.create_node( hive_metadata=hive_metadata, dimension_id=dimension_id, node_name='aNode', node_uri=node_uri, ) # Make sure everything we just added actually got put into the # hive meta data. q = sq.select( [sq.func.count('*').label('count')], from_obj=[ hive_metadata.tables['partition_dimension_metadata'], ], ) res = q.execute().fetchone() assert res['count'] > 0 t_part = hive_metadata.tables['partition_dimension_metadata'] t_node = hive_metadata.tables['node_metadata'] q = sq.select( [sq.func.count(t_node.c.id).label('count')], sq.and_( t_part.c.id==t_node.c.partition_dimension_id, t_part.c.name==dimension_name, ), ) res = q.execute().fetchone() assert res['count'] > 0 # Add a key, just to test. key = "knife"; node_engine = connect.assign_node( hive_metadata=hive_metadata, dimension_name=dimension_name, dimension_value=key, ) node_engine.dispose() # Just cleaning up the random key. # TODO I made this take node_name too, seemed like a more robust # API; for the purposes of this test, we know we only have one # node --tv connect.unassign_node( hive_metadata=hive_metadata, dimension_name=dimension_name, dimension_value=key, node_name='aNode', ) # At this point there is no real data in the Hive just a directory # of Primary key to node mappings. generic_node_metadata = sq.MetaData() sq.Table( 'products', generic_node_metadata, sq.Column('id', sq.Integer, primary_key=True), sq.Column('name', sq.String(255), nullable=False, unique=True), sq.Column('type', sq.String(255), nullable=False), ) # First we need to load our data schema on to each data node. q = sq.select( [t_node.c.uri], sq.and_( t_node.c.partition_dimension_id==t_part.c.id, t_part.c.name==dimension_name, ), ) res = q.execute().fetchall() for row in res: # TODO not supported yet by snakepit: # Ordinarily to get a connection to node from the hive we # would have to provide a key and the permissions (READ or # READWRITE) with which we want to acquire the connection. # However the getUnsafe method can be used [AND SHOULD ONLY BE # USED] for cases like this when there is no data yet loadedon # a node and thus no key to dereference. node_uri = row[t_node.c.uri] node_engine = sq.create_engine(node_uri) node_metadata = sq.MetaData() node_metadata.bind = node_engine for table in generic_node_metadata.tables.values(): table.tometadata(node_metadata) node_metadata.create_all() node_metadata.bind.dispose() # Set up a secondary index on products so that we can query them # by name # First create a Resource and add it to the Hive. All Secondary # Indexes will be associated with this Resource. resource_name = 'Product' # create.create_secondary_index( # directory_uri=hive_uri, # resource_name=resource_name, # column_name='type', #TODO? # ) t_resource = hive_metadata.tables['resource_metadata'] r = t_resource.insert().execute( dimension_id=dimension_id, name=resource_name, db_type='INTEGER', is_partitioning_resource=False, ) (resource_id,) = r.last_inserted_ids() # Now create a SecondaryIndex # create.create_secondary_index( # directory_uri=hive_uri, # resource_name=resource_name, # column_name='name', # ) # Add it to the Hive # create.add_secondary_index( # hive_metadata=hive_metadata, # resource_id=resource_id, # column_name='name', # db_type='CHAR', # ) # Note: SecondaryIndexes are identified by # ResourceName.IndexColumnName # Now lets add a product to the hive. spork = dict( id=23, name='Spork', type='Cutlery', ) # First we have to add a primary index entry in order to get # allocated to a data node. # While it is possible to write a record to multiple locations # within the Hive, the default implementation inserts a single # copy. node_engine = connect.assign_node( hive_metadata=hive_metadata, dimension_name=dimension_name, dimension_value=spork['type'], ) # Next we insert the record into the assigned data node node_metadata = sq.MetaData() node_metadata.bind = node_engine for table in generic_node_metadata.tables.values(): table.tometadata(node_metadata) node_metadata.tables['products'].insert(spork).execute() node_metadata.bind.dispose() # Update the resource id so that the hive can locate it # create.insert_resource_id( # hive_metadata=hive_metadata, # dimension_name=dimension_name, # resource_name=resource_name, # id=spork['id'], # pkey=spork['type'], # ) # Finally we update the SecondaryIndex # connect.insert_secondary_index_key( # hive_metadata=hive_metadata, # dimension_name=dimension_name, # resource_name=resource_name, # column_name='name', # id=spork['name'], # pkey=spork['id'], # ) # Retrieve spork by Primary Key node_engine = connect.get_engine( hive_metadata=hive_metadata, dimension_name=dimension_name, dimension_value=spork['type'], #TODO access=READ, ) # Here I am taking advantage of the fact that I know there is only # one copy. node_metadata = sq.MetaData() node_metadata.bind = node_engine for table in generic_node_metadata.tables.values(): table.tometadata(node_metadata) t = node_metadata.tables['products'] q = sq.select( [ t.c.id, t.c.name, ], t.c.id==spork['id'], limit=1, ) res = q.execute().fetchone() assert res is not None product_a = dict(res) node_metadata.bind.dispose() # Make sure its a spork assert spork['name'] == product_a['name']