def store_batch_insert(cursor, table, datapackage, modified): column_names = ["entity_id", "timestamp", "modified"] column_names.extend(datapackage.trend_names) dest_column_names = ",".join('"{0}"'.format(column_name) for column_name in column_names) parameters = ", ".join(["%s"] * len(column_names)) query = ( "INSERT INTO {0} ({1}) " "VALUES ({2})").format(table.render(), dest_column_names, parameters) rows = [(entity_id, datapackage.timestamp, modified) + tuple(values) for entity_id, values in datapackage.rows] logging.debug(cursor.mogrify(query, first(rows))) try: cursor.executemany(query, rows) except psycopg2.DatabaseError as exc: m = str(exc) if m.find("violates check constraint") > -1: print(cursor.mogrify(query, first(rows))) print(m) raise translate_postgresql_exception(exc)
def test_update_modified_column(conn): curr_timezone = timezone("Europe/Amsterdam") trend_names = ['CellID', 'CCR', 'Drops'] data_rows = [ (10023, ('10023', '0.9919', '17')), (10047, ('10047', '0.9963', '18')) ] data_types = extract_data_types(data_rows) update_data_rows = [(10023, ('10023', '0.9919', '17'))] timestamp = curr_timezone.localize(datetime.now()) granularity = create_granularity("900") with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src009") entitytype = name_to_entitytype(cursor, "test-type001") trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) partition = trendstore.partition(timestamp) table = partition.table() partition.create(cursor) partition.check_columns_exist(trend_names, data_types)(cursor) store(conn, SCHEMA, table.name, trend_names, timestamp, data_rows) time.sleep(1) store(conn, SCHEMA, table.name, trend_names, timestamp, update_data_rows) conn.commit() query = table.select([Column("modified")]) query.execute(cursor) modified_list = [modified for modified in cursor.fetchall()] assert_not_equal(modified_list[0], modified_list[1]) table.select(Call("max", Column("modified"))).execute(cursor) max_modified = first(cursor.fetchone()) modified_table.select(Column("end")).where_( Eq(Column("table_name"), table.name)).execute(cursor) end = first(cursor.fetchone()) eq_(end, max_modified)
def test_store_copy_from_2(conn): trend_names = ['CCR', 'CCRatts', 'Drops'] data_rows = [ (10023, ('0.9919', '2105', '17')) ] data_types = ['integer', 'smallint', 'smallint'] curr_timezone = timezone("Europe/Amsterdam") timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0)) modified = curr_timezone.localize(datetime.now()) granularity = create_granularity("900") with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src010") entitytype = name_to_entitytype(cursor, "test-type002") trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) partition = trendstore.partition(timestamp) partition.create(cursor) partition.check_columns_exist(trend_names, data_types)(cursor) table = partition.table() store_copy_from(conn, SCHEMA, table.name, trend_names, timestamp, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 1) table.select(Call("max", Column("modified"))).execute(cursor) max_modified = first(cursor.fetchone()) eq_(max_modified, modified)
def create(self, cursor): """Create notification store in database in return itself.""" if self.id: raise NotImplementedError() else: query = ( "INSERT INTO notification.notificationstore " "(datasource_id, version) " "VALUES (%s, %s) RETURNING id") args = self.datasource.id, self.version cursor.execute(query, args) self.id = first(cursor.fetchone()) for attribute in self.attributes: query = ( "INSERT INTO notification.attribute " "(notificationstore_id, name, data_type, description) " "VALUES (%s, %s, %s, %s) " "RETURNING id") args = (self.id, attribute.name, attribute.data_type, attribute.description) cursor.execute(query, args) return self
def count_for_table(table): query = query_template.format(table.render()) try: cursor.execute(query, args) except (psycopg2.ProgrammingError, psycopg2.InternalError): return 0 else: return first(cursor.fetchone())
def get_function_mapping(cursor, *args): id = first(args) try: return helpers.get_function_mapping(cursor, id) except: add_function_mapping(cursor, *args) return helpers.get_function_mapping(cursor, id)
def get_function_set(cursor, *args): id = first(args) try: return helpers.get_function_set(cursor, id) except helpers.NoSuchFunctionSetError: add_function_set(cursor, *args) return helpers.get_function_set(cursor, id)
def test_store_copy_from_1(conn): trend_names = ['CellID', 'CCR', 'CCRatts', 'Drops'] data_rows = [ (10023, ('10023', '0.9919', '2105', '17')), (10047, ('10047', '0.9963', '4906', '18')), (10048, ('10048', '0.9935', '2448', '16')), (10049, ('10049', '0.9939', '5271', '32')), (10050, ('10050', '0.9940', '3693', '22')), (10051, ('10051', '0.9944', '3753', '21')), (10052, ('10052', '0.9889', '2168', '24')), (10053, ('10053', '0.9920', '2372', '19')), (10085, ('10085', '0.9987', '2282', '3')), (10086, ('10086', '0.9972', '1763', '5')), (10087, ('10087', '0.9931', '1453', '10')) ] curr_timezone = timezone("Europe/Amsterdam") data_types = extract_data_types(data_rows) timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0)) granularity = create_granularity("900") modified = curr_timezone.localize(datetime.now()) with closing(conn.cursor()) as cursor: datasource = name_to_datasource(cursor, "test-src009") entitytype = name_to_entitytype(cursor, "test-type001") trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor) partition = trendstore.partition(timestamp) table = partition.table() partition.create(cursor) partition.check_columns_exist(trend_names, data_types)(cursor) store_copy_from(conn, SCHEMA, table.name, trend_names, timestamp, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 11) table.select(Call("max", Column("modified"))).execute(cursor) max_modified = first(cursor.fetchone()) eq_(max_modified, modified)
def test_store_insert_rows(conn): table = Table(SCHEMA, 'storage_tmp_test_table') trend_names = ['CellID', 'CCR', 'Drops'] data_rows = [ (10023, ('10023', '0.9919', '17')), (10047, ('10047', '0.9963', '18')) ] curr_timezone = timezone("Europe/Amsterdam") modified = curr_timezone.localize(datetime.now()) time1 = curr_timezone.localize(datetime.now()) time2 = time1 - timedelta(days=1) data_types = extract_data_types(data_rows) with closing(conn.cursor()) as cursor: table.drop().if_exists().execute(cursor) create_trend_table(conn, SCHEMA, table.name, trend_names, data_types) store_insert_rows(conn, SCHEMA, table.name, trend_names, time1, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 2) store_insert_rows(conn, SCHEMA, table.name, trend_names, time2, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 4) store_insert_rows(conn, SCHEMA, table.name, trend_names, time1, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 4) table.select(Call("max", Column("modified"))).execute(cursor) max_modified = first(cursor.fetchone()) eq_(max_modified, modified)
def test_store_using_tmp(conn): table = Table(SCHEMA, 'storage_tmp_test_table') trend_names = ['CellID', 'CCR', 'RadioFail', 'RFOldHo', 'AbisFailCall'] data_rows = [ (10023, ('10023', '0.9919', '10', '3', '3')), (10047, ('10047', '0.9963', '11', '5', '0')), (10048, ('10048', '0.9935', '12', '3', '0')), (10049, ('10049', '0.9939', '20', '3', '4')), (10050, ('10050', '0.9940', '18', '3', '0')), (10051, ('10051', '0.9944', '17', '2', '2')), (10052, ('10052', '0.9889', '18', '2', '0')), (10053, ('10053', '0.9920', '15', '3', '1')), (10023, ('10023', '0.9931', '9', '0', '1')), (10085, ('10085', '0.9987', '3', '0', '0')), (10086, ('10086', '0.9972', '3', '2', '0')) ] data_types = extract_data_types(data_rows) with closing(conn.cursor()) as cursor: table.drop().if_exists().execute(cursor) create_trend_table(conn, SCHEMA, table.name, trend_names, data_types) curr_timezone = timezone("Europe/Amsterdam") timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0)) modified = curr_timezone.localize(datetime.now()) store_using_tmp(conn, SCHEMA, table.name, trend_names, timestamp, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 10) table.select(Call("max", Column("modified"))).execute(cursor) max_modified = first(cursor.fetchone()) eq_(max_modified, modified)
def retrieve(cursor, tables, columns, entities, start, end, subquery_filter=None, relation_table_name=None, limit=None, entitytype=None): """ Retrieve data. :param cursor: Minerva database cursor :param columns: A list of column identifiers (possibly for different datasources) :param entities: List of entity Ids :param start: The start timestamp of the range of trend values :param end: The end timestamp of the range of trend values :param subquery_filter: optional subquery for additional filtering by JOINing on field 'id' = entity_id :param relation_table_name: optional relation table name for converting entity ids to related ones """ all_rows = [] if entities is not None and len(entities) == 0: return [] columns = map(ensure_column, columns) # group tables by partition size signature to be able to JOIN them later tables_by_partition_signature = {} for table in tables: signature = table.name.split("_")[-1] tables_by_partition_signature.setdefault(signature, []).append(table) for tables in tables_by_partition_signature.values(): params = [] if start == end and start is not None and len(tables) > 1: cols = [As(Argument(), "timestamp"), Column("dn"), As(Column("id"), "entity_id")] q = Select(cols, from_=Table("directory", "entity"), where_=Eq(Column("entitytype_id"), Value(entitytype.id))) with_query = WithQuery("t", query=q) params.append(start) base_timestamp_column = Column("t", "timestamp") base_entity_id_column = Column("t", "entity_id") from_item = FromItem(Table("t")) data_tables = tables else: with_query = None base_tbl = first(tables) base_timestamp_column = Column(base_tbl, "timestamp") base_entity_id_column = Column(base_tbl, "entity_id") from_item = FromItem(base_tbl) data_tables = tables[1:] for tbl in data_tables: timestamp_comparison = Eq(Column(tbl, "timestamp"), base_timestamp_column) entity_id_comparison = Eq(Column(tbl, "entity_id"), base_entity_id_column) join_condition = And(timestamp_comparison, entity_id_comparison) from_item = from_item.join(tbl, on=join_condition, join_type="LEFT") if subquery_filter: filter_tbl = Literal("({0}) AS filter".format(subquery_filter)) from_item = from_item.join(filter_tbl, on=Eq(Column("filter", "id"), base_entity_id_column)) if relation_table_name: relation_table = Table("relation", relation_table_name) join_condition = Eq(Column("r", "source_id"), base_entity_id_column) from_item = from_item.left_join(As(relation_table, "r"), on=join_condition) entity_id_column = Column("r", "target_id") else: entity_id_column = base_entity_id_column partition_columns = [entity_id_column, base_timestamp_column] + columns where_parts = [] if not with_query: if start == end and start is not None: condition = Eq(base_timestamp_column, Argument()) where_parts.append(condition) params.append(start) else: if not start is None: condition = Gt(base_timestamp_column, Argument()) where_parts.append(condition) params.append(start) if not end is None: condition = LtEq(base_timestamp_column, Argument()) where_parts.append(condition) params.append(end) if not entities is None: condition = Literal("{0} IN ({1:s})".format(base_entity_id_column.render(), ",".join(str(entity_id) for entity_id in entities))) where_parts.append(condition) if where_parts: where_clause = ands(where_parts) else: where_clause = None select = Select(partition_columns, with_query=with_query, from_=from_item, where_=where_clause, limit=limit) query = select.render() try: cursor.execute(query, params) except psycopg2.ProgrammingError as exc: msg = "{} in query: {}".format(exc, cursor.mogrify(query, params)) raise Exception(msg) else: all_rows.extend(cursor.fetchall()) return all_rows
def test_run(conn): with closing(conn.cursor()) as cursor: reset_db(cursor) conn.commit() minerva_context = MinervaContext(conn, conn) source_granularity = create_granularity("900") dest_granularity = create_granularity("900") with closing(conn.cursor()) as cursor: source_datasource_1 = get_dummy_datasource(cursor, "dummy-src-1") source_datasource_2 = get_dummy_datasource(cursor, "dummy-src-2") dest_datasource = get_dummy_datasource(cursor, "dummy-transform-src") entitytype = get_dummy_entitytype(cursor, dummy_type_name) partition_size = 86400 trendstore_1 = TrendStore( source_datasource_1, entitytype, source_granularity, partition_size, "table") trendstore_1.create(cursor) trendstore_2 = TrendStore( source_datasource_2, entitytype, source_granularity, partition_size, "table") trendstore_2.create(cursor) result_trendstore = TrendStore( dest_datasource, entitytype, dest_granularity, partition_size, "table") result_trendstore.create(cursor) function_mappings = [ add_function_mapping(cursor, None, ["counter_a"], "identity_a"), add_function_mapping(cursor, None, ["counter_b"], "identity_b"), add_function_mapping(cursor, None, ["counter_c"], "identity_c"), add_function_mapping(cursor, "add", ["counter_a", "counter_b"], "add_a_b"), add_function_mapping(cursor, "multiply", ["counter_a", "300"], "a_times_300")] function_mapping_ids = [fm.id for fm in function_mappings] function_set_qtr = add_function_set(cursor, "test_set", "", function_mapping_ids, [source_datasource_1.id, source_datasource_2.id], entitytype.id, source_granularity.name, dest_datasource.id, entitytype.id, dest_granularity.name, None, [], None, True) entities = map(partial(get_or_create_entity, cursor), dns) conn.commit() source_1 = create_source_1(source_granularity, entities) def store_modified_at(trendstore, datapackage, modified): def set_modified(state): state["modified"] = modified partition = trendstore.partition(datapackage.timestamp) set_modified_action = UpdateState(set_modified) copy_from = CopyFrom(k(partition), k(datapackage)) return DbTransaction(set_modified_action, copy_from) transaction = store_modified_at(trendstore_1, source_1, modified_a) transaction.run(conn) source_2 = create_source_2(source_granularity, entities) transaction = store_modified_at(trendstore_2, source_2, modified_a) transaction.run(conn) result_partition = result_trendstore.partition(timestamp) result_table = result_partition.table() conn.commit() logging.debug("source_1") logging.debug(unlines(render_datapackage(source_1))) logging.debug("source_2") logging.debug(unlines(render_datapackage(source_2))) dest_timestamp = timestamp transformation = Transformation(function_set_qtr, dest_timestamp) transformation.execute(minerva_context) columns = map(Column, ["entity_id", "identity_a", "identity_b", "add_a_b", "a_times_300"]) query = result_table.select(columns) with closing(conn.cursor()) as cursor: query.execute(cursor) logging.debug(unlines(render_result(cursor))) src_table_1 = trendstore_1.partition(timestamp).table() query = src_table_1.select(Call("max", Column("modified"))) query.execute(cursor) src1_max_modified = first(cursor.fetchone()) src_table_2 = trendstore_2.partition(timestamp).table() query = src_table_2.select(Call("max", Column("modified"))) query.execute(cursor) src2_max_modified = first(cursor.fetchone()) query = modified_table.select(Column("end")).where_(Eq(Column("table_name"), result_table.name)) query.execute(cursor) query = state_table.select(Column("processed_max_modified")).where_(Eq(Column("function_set_id"))) query.execute(cursor, (function_set_qtr.id,)) processed_max_modified = first(cursor.fetchone()) eq_(max(src1_max_modified, src2_max_modified), processed_max_modified)
def test_run(conn): clear_database(conn) plugin = load_plugin() minerva_context = MinervaContext(conn, conn) instance = plugin(minerva_context) job_id = 67 description = { "function_set_id": 42, "dest_timestamp": timestamp.isoformat(), "processed_max_modified": "2012-12-11 14:03:29+01:00"} config = {} job = instance.create_job(job_id, description, config) assert_not_equal(job, None) dest_granularity = 900 function_mapping_table = Table("transform", "function_mapping") with closing(conn.cursor()) as cursor: state_table.truncate().execute(cursor) function_set_table.truncate(cascade=True).execute(cursor) source_datasource_1 = get_dummy_datasource(cursor, "dummy-src-1") source_datasource_2 = get_dummy_datasource(cursor, "dummy-src-2") dest_datasource = get_dummy_datasource(cursor, "dummy-transform-src") dest_entitytype = get_dummy_entitytype(cursor, "dummy_type_standard") function_mapping_table.truncate().execute(cursor) get_function_mapping(cursor, 1, None, ["counter_a"], "identity_a") get_function_mapping(cursor, 2, None, ["counter_b"], "identity_b") get_function_mapping(cursor, 3, None, ["counter_c"], "identity_c") get_function_mapping(cursor, 4, "add", ["counter_a", "counter_b"], "add_a_b") get_function_mapping(cursor, 5, "multiply", ["counter_a", "300"], "a_times_300") get_function_set(cursor, 42, "test_set", [1, 2, 3, 4, 5], [3, 4], 42, 900, 6, dest_entitytype.id, dest_granularity, None, [], True) args = 1, "unittest", "transform", "" add_job_source(cursor, *args) size = 233 job_source_id = 1 args = job_id, "transform", "", size, "2012-12-11 14:34:00", None, None, None, job_source_id, "running" add_job(cursor, *args) args = 42, description["dest_timestamp"], description["processed_max_modified"], "2012-12-11 13:03:00", job_id add_state(cursor, *args) table_name = "dummy-src-1_dummy_type_standard_qtr_20121211" columns = [ Column("entity_id"), Column("timestamp", type_=SqlType("timestamp with time zone")), Column("modified", type_=SqlType("timestamp with time zone")), Column("counter_a"), Column("counter_b")] src_table_1 = Table("trend", table_name, columns=columns) if table_exists(cursor, src_table_1): src_table_1.drop().execute(cursor) src_table_1.create().execute(cursor) entities = map(partial(get_or_create_entity, cursor), dns) source_1 = create_source_1(entities) store(cursor, src_table_1, source_1) table_name = "dummy-src-2_dummy_type_standard_qtr_20121211" columns = [ Column("entity_id"), Column("timestamp", type_=SqlType("timestamp with time zone")), Column("modified", type_=SqlType("timestamp with time zone")), Column("counter_c")] src_table_2 = Table("trend", table_name, columns=columns) if table_exists(cursor, src_table_2): src_table_2.drop().execute(cursor) src_table_2.create().execute(cursor) source_2 = create_source_2(entities) store(cursor, src_table_2, source_2) result_table = Table("trend", "dummy-transform-src_dummy_type_standard_qtr_20121211") if table_exists(cursor, result_table): result_table.truncate().execute(cursor) conn.commit() print("source_1") print(unlines(render_source(source_1))) print("source_2") print(unlines(render_source(source_2))) job.execute() columns = map(Column, ["entity_id", "identity_a", "identity_b", "add_a_b", "a_times_300"]) query = result_table.select(columns) with closing(conn.cursor()) as cursor: query.execute(cursor, args) print(unlines(render_result(cursor))) query = src_table_1.select(Call("max", Column("modified"))) query.execute(cursor) src1_max_modified = first(cursor.fetchone()) query = src_table_2.select(Call("max", Column("modified"))) query.execute(cursor) src2_max_modified = first(cursor.fetchone()) query = modified_table.select(Column("end")).where_(Eq(Column("table_name"), result_table.name)) query.execute(cursor) query = state_table.select(Column("processed_max_modified")).where_(Eq(Column("function_set_id"))) query.execute(cursor, (42,)) processed_max_modified = first(cursor.fetchone()) eq_(max(src1_max_modified, src2_max_modified), processed_max_modified)
def load_plugin(): """ Load and return the plugin. """ return first([entrypoint.load() for entrypoint in pkg_resources.iter_entry_points(group=ENTRYPOINT, name="transform")])
def get_timestamp(cursor): cursor.execute("SELECT NOW()") return first(cursor.fetchone())
def get_entrypoint(): return first(list(iter_entry_points(group=ENTRYPOINT, name="transform")))