def test_run_hr(conn): with closing(conn.cursor()) as cursor: reset_db(cursor) conn.commit() source_granularity = create_granularity("900") dest_granularity = create_granularity("3600") with closing(conn.cursor()) as cursor: source_datasource = get_dummy_datasource(cursor, "dummy-src-5") dest_datasource = get_dummy_datasource(cursor, "dummy-transform-src") dest_entitytype = get_dummy_entitytype(cursor, "dummy_type_aggregate") function_mapping = add_function_mapping(cursor, "sum", ["counter_a"], "sum_a") partition_size = 86400 trendstore_1 = TrendStore3(source_datasource, dest_entitytype, source_granularity, partition_size, "table") trendstore_1.create(cursor) dest_trendstore = TrendStore3(dest_datasource, dest_entitytype, dest_granularity, partition_size, "table") dest_trendstore.create(cursor) function_set_qtr = add_function_set(cursor, "test_set_agg", "", [function_mapping.id], [source_datasource.id], dest_entitytype.id, source_granularity.name, dest_datasource.id, dest_entitytype.id, dest_granularity.name, None, ["entity_id"], None, True) conn.commit() transaction = store_modified_at(trendstore_1, source_1_1, modified_a) transaction.run(conn) transaction = store_modified_at(trendstore_1, source_1_2, modified_b) transaction.run(conn) transaction = store_modified_at(trendstore_1, source_1_3, modified_a) transaction.run(conn) transaction = store_modified_at(trendstore_1, source_1_4, modified_a) transaction.run(conn) logging.debug("source_1") logging.debug(unlines(render_datapackage(source_1_1))) processed_max_modified = None minerva_context = MinervaContext(conn, conn) transformation = Transformation(function_set_qtr, dest_timestamp) transformation.execute(minerva_context) columns = map(Column, ["entity_id", "sum_a"]) dest_partition = dest_trendstore.partition(dest_timestamp) result_table = dest_partition.table() query = result_table.select(columns) with closing(conn.cursor()) as cursor: query.execute(cursor) logging.debug(unlines(render_result(cursor))) query.execute(cursor) row = cursor.fetchone() eq_(row[1], 21)
def test_run(conn): clear_database(conn) plugin = load_plugin() minerva_context = MinervaContext(conn, conn) instance = plugin(minerva_context) job_id = 67 description = { "function_set_id": 42, "dest_timestamp": timestamp.isoformat(), "processed_max_modified": "2012-12-11 14:03:29+01:00"} config = {} job = instance.create_job(job_id, description, config) assert_not_equal(job, None) dest_granularity = 900 function_mapping_table = Table("transform", "function_mapping") with closing(conn.cursor()) as cursor: state_table.truncate().execute(cursor) function_set_table.truncate(cascade=True).execute(cursor) source_datasource_1 = get_dummy_datasource(cursor, "dummy-src-1") source_datasource_2 = get_dummy_datasource(cursor, "dummy-src-2") dest_datasource = get_dummy_datasource(cursor, "dummy-transform-src") dest_entitytype = get_dummy_entitytype(cursor, "dummy_type_standard") function_mapping_table.truncate().execute(cursor) get_function_mapping(cursor, 1, None, ["counter_a"], "identity_a") get_function_mapping(cursor, 2, None, ["counter_b"], "identity_b") get_function_mapping(cursor, 3, None, ["counter_c"], "identity_c") get_function_mapping(cursor, 4, "add", ["counter_a", "counter_b"], "add_a_b") get_function_mapping(cursor, 5, "multiply", ["counter_a", "300"], "a_times_300") get_function_set(cursor, 42, "test_set", [1, 2, 3, 4, 5], [3, 4], 42, 900, 6, dest_entitytype.id, dest_granularity, None, [], True) args = 1, "unittest", "transform", "" add_job_source(cursor, *args) size = 233 job_source_id = 1 args = job_id, "transform", "", size, "2012-12-11 14:34:00", None, None, None, job_source_id, "running" add_job(cursor, *args) args = 42, description["dest_timestamp"], description["processed_max_modified"], "2012-12-11 13:03:00", job_id add_state(cursor, *args) table_name = "dummy-src-1_dummy_type_standard_qtr_20121211" columns = [ Column("entity_id"), Column("timestamp", type_=SqlType("timestamp with time zone")), Column("modified", type_=SqlType("timestamp with time zone")), Column("counter_a"), Column("counter_b")] src_table_1 = Table("trend", table_name, columns=columns) if table_exists(cursor, src_table_1): src_table_1.drop().execute(cursor) src_table_1.create().execute(cursor) entities = map(partial(get_or_create_entity, cursor), dns) source_1 = create_source_1(entities) store(cursor, src_table_1, source_1) table_name = "dummy-src-2_dummy_type_standard_qtr_20121211" columns = [ Column("entity_id"), Column("timestamp", type_=SqlType("timestamp with time zone")), Column("modified", type_=SqlType("timestamp with time zone")), Column("counter_c")] src_table_2 = Table("trend", table_name, columns=columns) if table_exists(cursor, src_table_2): src_table_2.drop().execute(cursor) src_table_2.create().execute(cursor) source_2 = create_source_2(entities) store(cursor, src_table_2, source_2) result_table = Table("trend", "dummy-transform-src_dummy_type_standard_qtr_20121211") if table_exists(cursor, result_table): result_table.truncate().execute(cursor) conn.commit() print("source_1") print(unlines(render_source(source_1))) print("source_2") print(unlines(render_source(source_2))) job.execute() columns = map(Column, ["entity_id", "identity_a", "identity_b", "add_a_b", "a_times_300"]) query = result_table.select(columns) with closing(conn.cursor()) as cursor: query.execute(cursor, args) print(unlines(render_result(cursor))) query = src_table_1.select(Call("max", Column("modified"))) query.execute(cursor) src1_max_modified = first(cursor.fetchone()) query = src_table_2.select(Call("max", Column("modified"))) query.execute(cursor) src2_max_modified = first(cursor.fetchone()) query = modified_table.select(Column("end")).where_(Eq(Column("table_name"), result_table.name)) query.execute(cursor) query = state_table.select(Column("processed_max_modified")).where_(Eq(Column("function_set_id"))) query.execute(cursor, (42,)) processed_max_modified = first(cursor.fetchone()) eq_(max(src1_max_modified, src2_max_modified), processed_max_modified)
def test_run(conn): with closing(conn.cursor()) as cursor: reset_db(cursor) conn.commit() minerva_context = MinervaContext(conn, conn) source_granularity = create_granularity("900") dest_granularity = create_granularity("900") with closing(conn.cursor()) as cursor: source_datasource_1 = get_dummy_datasource(cursor, "dummy-src-1") source_datasource_2 = get_dummy_datasource(cursor, "dummy-src-2") dest_datasource = get_dummy_datasource(cursor, "dummy-transform-src") entitytype = get_dummy_entitytype(cursor, dummy_type_name) partition_size = 86400 trendstore_1 = TrendStore( source_datasource_1, entitytype, source_granularity, partition_size, "table") trendstore_1.create(cursor) trendstore_2 = TrendStore( source_datasource_2, entitytype, source_granularity, partition_size, "table") trendstore_2.create(cursor) result_trendstore = TrendStore( dest_datasource, entitytype, dest_granularity, partition_size, "table") result_trendstore.create(cursor) function_mappings = [ add_function_mapping(cursor, None, ["counter_a"], "identity_a"), add_function_mapping(cursor, None, ["counter_b"], "identity_b"), add_function_mapping(cursor, None, ["counter_c"], "identity_c"), add_function_mapping(cursor, "add", ["counter_a", "counter_b"], "add_a_b"), add_function_mapping(cursor, "multiply", ["counter_a", "300"], "a_times_300")] function_mapping_ids = [fm.id for fm in function_mappings] function_set_qtr = add_function_set(cursor, "test_set", "", function_mapping_ids, [source_datasource_1.id, source_datasource_2.id], entitytype.id, source_granularity.name, dest_datasource.id, entitytype.id, dest_granularity.name, None, [], None, True) entities = map(partial(get_or_create_entity, cursor), dns) conn.commit() source_1 = create_source_1(source_granularity, entities) def store_modified_at(trendstore, datapackage, modified): def set_modified(state): state["modified"] = modified partition = trendstore.partition(datapackage.timestamp) set_modified_action = UpdateState(set_modified) copy_from = CopyFrom(k(partition), k(datapackage)) return DbTransaction(set_modified_action, copy_from) transaction = store_modified_at(trendstore_1, source_1, modified_a) transaction.run(conn) source_2 = create_source_2(source_granularity, entities) transaction = store_modified_at(trendstore_2, source_2, modified_a) transaction.run(conn) result_partition = result_trendstore.partition(timestamp) result_table = result_partition.table() conn.commit() logging.debug("source_1") logging.debug(unlines(render_datapackage(source_1))) logging.debug("source_2") logging.debug(unlines(render_datapackage(source_2))) dest_timestamp = timestamp transformation = Transformation(function_set_qtr, dest_timestamp) transformation.execute(minerva_context) columns = map(Column, ["entity_id", "identity_a", "identity_b", "add_a_b", "a_times_300"]) query = result_table.select(columns) with closing(conn.cursor()) as cursor: query.execute(cursor) logging.debug(unlines(render_result(cursor))) src_table_1 = trendstore_1.partition(timestamp).table() query = src_table_1.select(Call("max", Column("modified"))) query.execute(cursor) src1_max_modified = first(cursor.fetchone()) src_table_2 = trendstore_2.partition(timestamp).table() query = src_table_2.select(Call("max", Column("modified"))) query.execute(cursor) src2_max_modified = first(cursor.fetchone()) query = modified_table.select(Column("end")).where_(Eq(Column("table_name"), result_table.name)) query.execute(cursor) query = state_table.select(Column("processed_max_modified")).where_(Eq(Column("function_set_id"))) query.execute(cursor, (function_set_qtr.id,)) processed_max_modified = first(cursor.fetchone()) eq_(max(src1_max_modified, src2_max_modified), processed_max_modified)
def test_run(conn): clear_database(conn) plugin = load_plugin() minerva_context = MinervaContext(conn, conn) instance = plugin(minerva_context) job_id = 67 description = { "function_set_id": 43, "dest_timestamp": "2012-12-11 13:00:00", "processed_max_modified": "2012-12-11 13:03:29", } config = {} job = instance.create_job(job_id, description, config) assert_not_equal(job, None) dest_granularity = 3600 with closing(conn.cursor()) as cursor: source_datasource_1 = get_dummy_datasource(cursor, 5, "dummy-src-5") dest_datasource = get_dummy_datasource(cursor, 6, "dummy-transform-src") dest_entitytype = get_dummy_entitytype(cursor, 45, "dummy_type_aggregate") get_function_mapping(cursor, 11, "sum", ["counter_a"], "sum_a") get_function_set( cursor, 43, "test_set_agg", [11], [5], 45, 900, 6, dest_entitytype.id, dest_granularity, None, ["entity_id"] ) args = 1, "unittest", "transform", "" add_job_source(cursor, *args) size = 233 job_source_id = 1 args = job_id, "transform", "", size, "2012-12-11 14:34:00", None, None, None, job_source_id, "running" add_job(cursor, *args) args = 43, description["dest_timestamp"], description["processed_max_modified"], "2012-12-11 13:03:00", job_id add_state(cursor, *args) table_name = "dummy-src-5_dummy_type_aggregate_qtr_20121211" columns = [ Column("entity_id"), Column("timestamp", type_=SqlType("timestamp with time zone")), Column("modified", type_=SqlType("timestamp with time zone")), Column("counter_a"), Column("counter_b"), ] table = Table("trend", table_name, columns=columns) if table_exists(cursor, table): table.drop().execute(cursor) table.create().execute(cursor) store(cursor, table, source_1) result_table = Table("trend", "dummy-transform-src_dummy_type_aggregate_hr_20121207") if table_exists(cursor, result_table): result_table.truncate().execute(cursor) conn.commit() print("source_1") print("\n".join(render_source(source_1))) job.execute() columns = map(Column, ["entity_id", "sum_a"]) query = result_table.select(columns) with closing(conn.cursor()) as cursor: query.execute(cursor, args) print("\n".join(render_result(cursor))) query.execute(cursor, args) row = cursor.fetchone() eq_(row[1], 21)