def test_from_attributes(conn): """Test creation of a new attribute store from attributes.""" with closing(conn.cursor()) as cursor: datasource = DataSource.from_name(cursor, "integration-test") entitytype = EntityType.from_name(cursor, "UtranCell") attributes = [ Attribute("cntr1", "integer", "description for this attribute"), Attribute("cntr2", "integer", "description for this attribute")] attributestore = AttributeStore.from_attributes( cursor, datasource, entitytype, attributes) expected_table_name = "integration-test_UtranCell" eq_(attributestore.table_name(), expected_table_name) conn.commit() query = ( "SELECT attribute_directory.to_table_name(attributestore) " "FROM attribute_directory.attributestore " "WHERE id = %s") args = attributestore.id, cursor.execute(query, args) table_name, = cursor.fetchone() eq_(table_name, expected_table_name)
def store(self, column_names, fields, raw_data_rows): get_timestamp = operator.itemgetter(1) for timestamp, grouped_rows in grouped_by(raw_data_rows, get_timestamp): rows = [ (dn, values) for dn, _, values in grouped_rows ] entity_ref = EntityDnRef(rows[0][0]) with closing(self.conn.cursor()) as cursor: datasource = DataSource.from_name(cursor, self.datasource) entitytype = entity_ref.get_entitytype(cursor) trendstore = TrendStore.get( cursor, datasource, entitytype, self.granularity ) if not trendstore: partition_size = 86400 trendstore = TrendStore(datasource, entitytype, self.granularity, partition_size, "table").create(cursor) self.conn.commit() utc_timestamp = timestamp.astimezone(pytz.utc) utc_timestamp_str = self.offset(utc_timestamp).strftime("%Y-%m-%dT%H:%M:%S") raw_datapackage = RawDataPackage( self.granularity, utc_timestamp_str, column_names, rows) trendstore.store_raw(raw_datapackage).run(self.conn)
def test_store_batch_update(conn): """Test batch wise storing with updates using staging table.""" with closing(conn.cursor()) as cursor: attribute_names = ['CCR', 'Drops'] timestamp = pytz.utc.localize(datetime.utcnow()) datasource = DataSource.from_name(cursor, "integration-test") entitytype = EntityType.from_name(cursor, "UtranCell") datapackage = DataPackage( attribute_names, [(10023 + i, timestamp, ('0.9919', '17')) for i in range(100)] ) update_datapackage = DataPackage( attribute_names, [(10023 + i, timestamp, ('0.9918', '18')) for i in range(100)] ) attributes = datapackage.deduce_attributes() attributestore = AttributeStore(datasource, entitytype, attributes) attributestore.create(cursor) attributestore.store_batch(cursor, datapackage) conn.commit() modified_query = ( 'SELECT modified FROM {0} ' 'WHERE entity_id = 10023').format( attributestore.history_table.render()) cursor.execute(modified_query) modified_a, = cursor.fetchone() attributestore.store_batch(cursor, update_datapackage) conn.commit() cursor.execute(modified_query) modified_b, = cursor.fetchone() assert modified_b > modified_a cursor.execute( "SELECT attribute_directory.materialize_curr_ptr(attributestore) " "FROM attribute_directory.attributestore " "WHERE id = %s", (attributestore.id,)) query = ( 'SELECT timestamp, "Drops" ' 'FROM {0}').format(attributestore.table.render()) cursor.execute(query) # Row count should be the same as the stored batch size eq_(cursor.rowcount, len(datapackage.rows)) stored_timestamp, drops = cursor.fetchone() # Timestamp should be the same as the stored batch timestamp eq_(stored_timestamp, timestamp) eq_(drops, 18)
def test_compact(conn): """Test compacting of redundant data.""" def make_rows(timestamp): return [ (10023 + i, timestamp, ('0.9919', '17')) for i in range(100) ] with closing(conn.cursor()) as cursor: attribute_names = ['CCR', 'Drops'] datasource = DataSource.from_name(cursor, "integration-test") entitytype = EntityType.from_name(cursor, "UtranCell") timestamp = pytz.utc.localize(datetime.utcnow()) datapackage_a = DataPackage( attribute_names=attribute_names, rows=make_rows(timestamp) ) datapackage_b = DataPackage( attribute_names=attribute_names, rows=make_rows(timestamp + timedelta(10)) ) attributes = datapackage_a.deduce_attributes() attributestore = AttributeStore(datasource, entitytype, attributes) attributestore.create(cursor) attributestore.store_batch(cursor, datapackage_a) conn.commit() attributestore.store_batch(cursor, datapackage_b) conn.commit() count_query = ( "SELECT count(*) " "FROM {0}").format(attributestore.history_table.render()) cursor.execute(count_query) count, = cursor.fetchone() # Row count should be the same as the stored batch sizes summed eq_(count, len(datapackage_b.rows) + len(datapackage_a.rows)) attributestore.compact(cursor) conn.commit() cursor.execute(count_query) count, = cursor.fetchone() # Row count should be the same as the first stored batch size eq_(count, len(datapackage_a.rows))
def test_store_empty_rows(conn): """Test storing of empty datapackage.""" with closing(conn.cursor()) as cursor: attribute_names = ['CCR', 'Drops'] data_rows = [] datasource = DataSource.from_name(cursor, "integration-test") entitytype = EntityType.from_name(cursor, "UtranCell") datapackage = DataPackage(attribute_names, data_rows) attributes = datapackage.deduce_attributes() attributestore = AttributeStore(datasource, entitytype, attributes) attributestore.create(cursor) attributestore.store_txn(datapackage).run(conn) conn.commit()
def test_store_empty_attributes(conn): """Test storing of empty datapackage.""" with closing(conn.cursor()) as cursor: attribute_names = [] timestamp = pytz.utc.localize(datetime.utcnow()) rows = [(10023 + i, timestamp, tuple()) for i in range(100)] datasource = DataSource.from_name(cursor, "integration-test") entitytype = EntityType.from_name(cursor, "UtranCell") datapackage = DataPackage(attribute_names, rows) attributes = datapackage.deduce_attributes() attributestore = AttributeStore(datasource, entitytype, attributes) attributestore.create(cursor) attributestore.store_txn(datapackage).run(conn) conn.commit()
def test_store_txn_with_empty(conn): """Test transactional storing with empty value.""" with closing(conn.cursor()) as cursor: datasource = DataSource.from_name(cursor, "integration-test") entitytype = EntityType.from_name(cursor, "UtranCell") timestamp = pytz.utc.localize(datetime.utcnow()) datapackage = DataPackage( attribute_names=['freeText'], rows=[ (10023, timestamp, ('',)) ] ) attributes = datapackage.deduce_attributes() eq_(attributes[0].datatype, 'smallint') attributestore = AttributeStore.from_attributes( cursor, datasource, entitytype, attributes) conn.commit() attributestore.store_txn(datapackage).run(conn)
def store(self, column_names, fields, raw_data_rows): rows = list(raw_data_rows) raw_datapackage = RawDataPackage(column_names, rows) attributes = raw_datapackage.deduce_attributes() entity_ref = EntityDnRef(rows[0][0]) with closing(self.conn.cursor()) as cursor: datasource = DataSource.from_name(cursor, self.datasource) entitytype = entity_ref.get_entitytype(cursor) attributestore = AttributeStore.from_attributes( cursor, datasource, entitytype, attributes) self.conn.commit() attributestore.store_raw(raw_datapackage).run(self.conn)
def test_store_batch_with_list_c(conn): """Test batch wise storing using staging table.""" attribute_names = ['height', 'refs'] timestamp = pytz.utc.localize(datetime.utcnow()) data_rows = [ (10023, timestamp, ('19.5', ['', '', '', ''])), (10024, timestamp, ('19.3', ['', '', '', ''])) ] datapackage = DataPackage(attribute_names, data_rows) attributes = datapackage.deduce_attributes() with closing(conn.cursor()) as cursor: datasource = DataSource.from_name(cursor, "integration-test") entitytype = EntityType.from_name(cursor, "UtranCell") attributestore = AttributeStore(datasource, entitytype, attributes) attributestore.create(cursor) attributestore.store_batch(cursor, datapackage) conn.commit()
def test_store_batch_with_list_a(conn): """Test batch wise storing using staging table.""" attribute_names = ['height', 'refs'] timestamp = pytz.utc.localize(datetime.utcnow()) data_rows = [ (10023 + i, timestamp, ('19.5', ['r34', 'r23', 'r33'])) for i in range(100)] datapackage = DataPackage(attribute_names, data_rows) attributes = datapackage.deduce_attributes() with closing(conn.cursor()) as cursor: datasource = DataSource.from_name(cursor, "integration-test") entitytype = EntityType.from_name(cursor, "UtranCell") attributestore = AttributeStore(datasource, entitytype, attributes) attributestore.create(cursor) attributestore.store_batch(cursor, datapackage) conn.commit() cursor.execute( "SELECT attribute_directory.materialize_curr_ptr(attributestore) " "FROM attribute_directory.attributestore " "WHERE id = %s", (attributestore.id,)) query = ( "SELECT timestamp, height " "FROM {0}").format(attributestore.table.render()) cursor.execute(query) # Row count should be the same as the stored batch size eq_(cursor.rowcount, len(datapackage.rows)) stored_timestamp, height = cursor.fetchone() # Timestamp should be the same as the stored batch timestamp eq_(stored_timestamp, timestamp) eq_(height, 19.5)
def store(self, column_names, fields, raw_data_rows): with closing(self.conn.cursor()) as cursor: datasource = DataSource.from_name(cursor, self.datasource) notificationstore = NotificationStore.load(cursor, datasource) rows = list(raw_data_rows) if notificationstore: datatype_dict = { attribute.name: attribute.data_type for attribute in notificationstore.attributes } def merge_datatypes(): for name in column_names: configured_descriptor = fields.get(name) notificationstore_type = datatype_dict[name] if configured_descriptor: if configured_descriptor.data_type.name != notificationstore_type: raise Exception("Attribute({} {}) type of notificationstore does not match configured" " type: {}".format(name, notificationstore_type, configured_descriptor.data_type.name)) yield configured_descriptor else: yield ColumnDescriptor(name, datatype_map[notificationstore_type], {}) column_descriptors = list(merge_datatypes()) else: deduced_datatype_names = deduce_data_types( map(operator.itemgetter(2), rows) ) def merge_datatypes(): for column_name, datatype_name in zip(column_names, deduced_datatype_names): configured_descriptor = fields.get(column_name) if configured_descriptor: yield configured_descriptor else: yield ColumnDescriptor(column_name, datatype_map[datatype_name], {}) column_descriptors = list(merge_datatypes()) attributes = [ Attribute(name, column_descriptor.data_type.name, '') for name, column_descriptor in zip(column_names, column_descriptors) ] notificationstore = NotificationStore( datasource, attributes ).create(cursor) self.conn.commit() parsers = [column_descriptor.string_parser() for column_descriptor in column_descriptors] for dn, timestamp, values in rows: record = Record( EntityDnRef(dn), timestamp, column_names, [parse(value) for parse, value in zip(parsers, values)] ) notificationstore.store_record(record)(cursor) self.conn.commit()