def test_simple(self): tmp = maketemp() p42_metadata = sq.MetaData() p42_metadata.bind = sq.create_engine( 'sqlite:///%s' % os.path.join(tmp, 'p42.db'), strategy='threadlocal', ) t_frob = sq.Table( 'frob', p42_metadata, sq.Column('id', sq.Integer, primary_key=True), sq.Column('xyzzy', sq.Integer, nullable=False), ) p42_metadata.create_all() hive_metadata = create.create_hive( 'sqlite:///%s' % os.path.join(tmp, 'hive.db')) directory_metadata = create.create_primary_index( directory_uri='sqlite:///%s' \ % os.path.join(tmp, 'directory.db'), dimension_name='frob', db_type='INTEGER', ) dimension_id = create.create_dimension( hive_metadata=hive_metadata, dimension_name='frob', directory_uri=str(directory_metadata.bind.url), db_type='INTEGER', ) directory_metadata.bind.dispose() node_id = create.create_node( hive_metadata=hive_metadata, dimension_id=dimension_id, node_name='node42', node_uri=str(p42_metadata.bind.url), ) node_engine = connect.assign_node( hive_metadata=hive_metadata, dimension_name='frob', dimension_value=1, ) node_engine.dispose() got = connect.get_engine( hive_metadata=hive_metadata, dimension_name='frob', dimension_value=1, ) assert isinstance(got, sq.engine.Engine) eq_(str(got.url), str(p42_metadata.bind.url)) got.dispose() hive_metadata.bind.dispose() p42_metadata.bind.dispose()
def test_createAndUseTheHive(): # Install The Hive Metadata Schema tmp = maketemp() hive_uri = 'sqlite:///%s' % os.path.join(tmp, 'hive.db') node_uri = 'sqlite:///%s' % os.path.join(tmp, 'aNode.db') hive_metadata = create.create_hive(hive_uri) hive_metadata.bind.dispose() # Load a Hive hive_metadata = connect.get_hive(hive_uri) # Create a Partition Dimension and add it to the Hive # We are going to partition our Product domain using the product # type string. dimension_name = 'ProductType' directory_metadata = create.create_primary_index( hive_uri, dimension_name, db_type='CHAR', ) dimension_id = create.create_dimension( hive_metadata=hive_metadata, dimension_name=dimension_name, directory_uri=hive_uri, db_type='CHAR', ) # Create a Data Node and add it to the partition dimension node_id = create.create_node( hive_metadata=hive_metadata, dimension_id=dimension_id, node_name='aNode', node_uri=node_uri, ) # Make sure everything we just added actually got put into the # hive meta data. q = sq.select( [sq.func.count('*').label('count')], from_obj=[ hive_metadata.tables['partition_dimension_metadata'], ], ) res = q.execute().fetchone() assert res['count'] > 0 t_part = hive_metadata.tables['partition_dimension_metadata'] t_node = hive_metadata.tables['node_metadata'] q = sq.select( [sq.func.count(t_node.c.id).label('count')], sq.and_( t_part.c.id==t_node.c.partition_dimension_id, t_part.c.name==dimension_name, ), ) res = q.execute().fetchone() assert res['count'] > 0 # Add a key, just to test. key = "knife"; node_engine = connect.assign_node( hive_metadata=hive_metadata, dimension_name=dimension_name, dimension_value=key, ) node_engine.dispose() # Just cleaning up the random key. # TODO I made this take node_name too, seemed like a more robust # API; for the purposes of this test, we know we only have one # node --tv connect.unassign_node( hive_metadata=hive_metadata, dimension_name=dimension_name, dimension_value=key, node_name='aNode', ) # At this point there is no real data in the Hive just a directory # of Primary key to node mappings. generic_node_metadata = sq.MetaData() sq.Table( 'products', generic_node_metadata, sq.Column('id', sq.Integer, primary_key=True), sq.Column('name', sq.String(255), nullable=False, unique=True), sq.Column('type', sq.String(255), nullable=False), ) # First we need to load our data schema on to each data node. q = sq.select( [t_node.c.uri], sq.and_( t_node.c.partition_dimension_id==t_part.c.id, t_part.c.name==dimension_name, ), ) res = q.execute().fetchall() for row in res: # TODO not supported yet by snakepit: # Ordinarily to get a connection to node from the hive we # would have to provide a key and the permissions (READ or # READWRITE) with which we want to acquire the connection. # However the getUnsafe method can be used [AND SHOULD ONLY BE # USED] for cases like this when there is no data yet loadedon # a node and thus no key to dereference. node_uri = row[t_node.c.uri] node_engine = sq.create_engine(node_uri) node_metadata = sq.MetaData() node_metadata.bind = node_engine for table in generic_node_metadata.tables.values(): table.tometadata(node_metadata) node_metadata.create_all() node_metadata.bind.dispose() # Set up a secondary index on products so that we can query them # by name # First create a Resource and add it to the Hive. All Secondary # Indexes will be associated with this Resource. resource_name = 'Product' # create.create_secondary_index( # directory_uri=hive_uri, # resource_name=resource_name, # column_name='type', #TODO? # ) t_resource = hive_metadata.tables['resource_metadata'] r = t_resource.insert().execute( dimension_id=dimension_id, name=resource_name, db_type='INTEGER', is_partitioning_resource=False, ) (resource_id,) = r.last_inserted_ids() # Now create a SecondaryIndex # create.create_secondary_index( # directory_uri=hive_uri, # resource_name=resource_name, # column_name='name', # ) # Add it to the Hive # create.add_secondary_index( # hive_metadata=hive_metadata, # resource_id=resource_id, # column_name='name', # db_type='CHAR', # ) # Note: SecondaryIndexes are identified by # ResourceName.IndexColumnName # Now lets add a product to the hive. spork = dict( id=23, name='Spork', type='Cutlery', ) # First we have to add a primary index entry in order to get # allocated to a data node. # While it is possible to write a record to multiple locations # within the Hive, the default implementation inserts a single # copy. node_engine = connect.assign_node( hive_metadata=hive_metadata, dimension_name=dimension_name, dimension_value=spork['type'], ) # Next we insert the record into the assigned data node node_metadata = sq.MetaData() node_metadata.bind = node_engine for table in generic_node_metadata.tables.values(): table.tometadata(node_metadata) node_metadata.tables['products'].insert(spork).execute() node_metadata.bind.dispose() # Update the resource id so that the hive can locate it # create.insert_resource_id( # hive_metadata=hive_metadata, # dimension_name=dimension_name, # resource_name=resource_name, # id=spork['id'], # pkey=spork['type'], # ) # Finally we update the SecondaryIndex # connect.insert_secondary_index_key( # hive_metadata=hive_metadata, # dimension_name=dimension_name, # resource_name=resource_name, # column_name='name', # id=spork['name'], # pkey=spork['id'], # ) # Retrieve spork by Primary Key node_engine = connect.get_engine( hive_metadata=hive_metadata, dimension_name=dimension_name, dimension_value=spork['type'], #TODO access=READ, ) # Here I am taking advantage of the fact that I know there is only # one copy. node_metadata = sq.MetaData() node_metadata.bind = node_engine for table in generic_node_metadata.tables.values(): table.tometadata(node_metadata) t = node_metadata.tables['products'] q = sq.select( [ t.c.id, t.c.name, ], t.c.id==spork['id'], limit=1, ) res = q.execute().fetchone() assert res is not None product_a = dict(res) node_metadata.bind.dispose() # Make sure its a spork assert spork['name'] == product_a['name']