def create_select_statement(source_table): join_parts = [] if subquery_filter: join_part = ( "JOIN ({0}) AS filter " "ON filter.id = {1}.entity_id").format( subquery_filter, source_table.render()) join_parts.append(join_part) if relation_table_name: relation_table = Table("relation", relation_table_name) return_id_field = "r.target_id AS entity_id" join_part = ( "LEFT JOIN {0} r " "ON r.source_id = {1}.entity_id").format( relation_table.render(), source_table.render()) join_parts.append(join_part) else: return_id_field = "entity_id" return ( "SELECT {0}, %(end)s, {1} {2} " "FROM {3} {4} " "WHERE timestamp > %(start)s AND timestamp <= %(end)s").format( return_id_field, select_samples_column, ",".join(map(enquote_column_name, trend_names)), source_table.render(), " ".join(join_parts))
def delete_by_sub_query(conn, table_name, timestamp, entityselection): """ Delete rows from table for a specific timestamp and entity_ids in entityselection. """ table = Table(SCHEMA, table_name) delete_query = ( "DELETE FROM {} d USING entity_filter f " "WHERE d.timestamp = %s AND f.entity_id = d.entity_id" ).format(table.render()) args = (timestamp,) with closing(conn.cursor()) as cursor: entityselection.create_temp_table(cursor, "entity_filter") logging.debug(cursor.mogrify(delete_query, args)) try: cursor.execute(delete_query, args) except psycopg2.DatabaseError as exc: if exc.pgcode == psycopg2.errorcodes.UNDEFINED_TABLE: raise NoSuchTable() else: raise exc
def test_table_create(conn): table = Table("test_table") query = table.create() with_cursor(conn, query.execute) with closing(conn.cursor()) as cursor: eq_(table_exists(cursor, table), True)
def add_function_set(cursor, *args): col_names = ["id", "name", "mapping_signature", "source_datasource_ids", "source_entitytype_id", "source_granularity", "dest_datasource_id", "dest_entitytype_id", "dest_granularity", "filter_sub_query", "group_by", "enabled"] columns = map(Column, col_names) table = Table("transform", "function_set") table.insert(columns).execute(cursor, args)
def test_create_temp_table_from(conn): table = Table(SCHEMA, "storage_tmp_test_table") trend_names = ["CellID", "CCR", "Drops"] data_types = ["float", "smallint", "smallint"] with closing(conn.cursor()) as cursor: table.drop().if_exists().execute(cursor) create_trend_table(conn, SCHEMA, table, trend_names, data_types) create_temp_table_from(conn, SCHEMA, table)
def add_state(cursor, *args): table = Table("transform", "state") col_names = ["function_set_id", "dest_timestamp", "processed_max_modified", "max_modified", "job_id"] columns = map(Column, col_names) select = table.select(1).where_(And(Eq(columns[0]), Eq(columns[1]))) select.execute(cursor, args[0:2]) if cursor.rowcount == 0: table.insert(columns).execute(cursor, args)
def get_relations(cursor, relation_type_name): """ Return dict of related entity ids specified by relation_type_name """ table = Table("relation", relation_type_name) query = table.select([Column("source_id"), Column("target_id")]) query.execute(cursor) relations = {} for (s_id, t_id) in cursor.fetchall(): relations.setdefault(s_id, []).append(t_id) return relations
def add_job_source(cursor, *args): table = Table("system", "job_source") col_names = "id", "name", "job_type", "config" columns = map(Column, col_names) column_id = columns[0] select = Select(1, from_=table, where_=Eq(column_id)) select.execute(cursor, (args[0], )) if cursor.rowcount == 0: table.insert(columns).execute(cursor, args)
def add_job(cursor, *args): table = Table("system", "job") col_names = "id", "type", "description", "size", "created", "started", "finished", "success", "job_source_id", "state" columns = map(Column, col_names) column_id = columns[0] select = Select(1, from_=table, where_=Eq(column_id)) select.execute(cursor, (args[0], )) if cursor.rowcount == 0: table.insert(columns).execute(cursor, args)
def test_table_drop(conn): table = Table("test_table") create_query = table.create() drop_query = table.drop() with closing(conn.cursor()) as cursor: create_query.execute(cursor) eq_(table_exists(cursor, table), True) drop_query.execute(cursor) eq_(table_exists(cursor, table), False)
def create_temp_table_from(cursor, table): """ Create a temporary table that is like `table` and return the temporary table name. """ tmp_table = Table("tmp_{0}".format(table.name)) query = ( "CREATE TEMPORARY TABLE {0} (LIKE {1}) " "ON COMMIT DROP").format(tmp_table.render(), table.render()) cursor.execute(query) return tmp_table
def __init__(self, datasource, attributes): self.id = None self.version = 1 self.datasource = datasource self.attributes = attributes table_name = datasource.name self.table = Table("notification", table_name)
def add_function_set(cursor, *args): col_names = ["name", "description", "mapping_signature", "source_datasource_ids", "source_entitytype_id", "source_granularity", "dest_datasource_id", "dest_entitytype_id", "dest_granularity", "filter_sub_query", "group_by", "relation_type_id", "enabled"] columns = map(Column, col_names) table = Table("transform", "function_set") insert_query = table.insert(columns).returning("id") insert_query.execute(cursor, args) id, = cursor.fetchone() row = (id,) + args return helpers.function_set_from_row(cursor, row)
def test_store_insert_rows(conn): table = Table(SCHEMA, 'storage_tmp_test_table') trend_names = ['CellID', 'CCR', 'Drops'] data_rows = [ (10023, ('10023', '0.9919', '17')), (10047, ('10047', '0.9963', '18')) ] curr_timezone = timezone("Europe/Amsterdam") modified = curr_timezone.localize(datetime.now()) time1 = curr_timezone.localize(datetime.now()) time2 = time1 - timedelta(days=1) data_types = extract_data_types(data_rows) with closing(conn.cursor()) as cursor: table.drop().if_exists().execute(cursor) create_trend_table(conn, SCHEMA, table.name, trend_names, data_types) store_insert_rows(conn, SCHEMA, table.name, trend_names, time1, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 2) store_insert_rows(conn, SCHEMA, table.name, trend_names, time2, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 4) store_insert_rows(conn, SCHEMA, table.name, trend_names, time1, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 4) table.select(Call("max", Column("modified"))).execute(cursor) max_modified = first(cursor.fetchone()) eq_(max_modified, modified)
def create_trend_table(conn, schema, table, column_names, data_types): """ :param conn: psycopg2 database connection :param schema: name of the database schema to create the table in :param table: name of table to be created, or Table instance """ columns_part = "".join( ['"{0}" {1}, '.format(name, data_type) for (name, data_type) in zip(column_names, data_types)] ) if isinstance(table, str): table = Table(schema, table) query = ( "CREATE TABLE {0} ( " "entity_id integer NOT NULL, " '"timestamp" timestamp with time zone NOT NULL, ' '"modified" timestamp with time zone NOT NULL, ' "{1}" 'CONSTRAINT "{2}_pkey" PRIMARY KEY (entity_id, "timestamp"))' ).format(table.render(), columns_part, table.name) alter_query = "ALTER TABLE {0} ALTER COLUMN modified " "SET DEFAULT CURRENT_TIMESTAMP".format(table.render()) index_query_modified = 'CREATE INDEX "idx_{0}_modified" ON {1} ' "USING btree (modified)".format( table.name, table.render() ) index_query_timestamp = 'CREATE INDEX "idx_{0}_timestamp" ON {1} ' "USING btree (timestamp)".format( table.name, table.render() ) owner_query = "ALTER TABLE {} OWNER TO minerva_writer".format(table.render()) with closing(conn.cursor()) as cursor: try: cursor.execute(query) cursor.execute(alter_query) cursor.execute(index_query_modified) cursor.execute(index_query_timestamp) cursor.execute(owner_query) except psycopg2.IntegrityError as exc: # apparently the table has been created already, so ignore pass except psycopg2.ProgrammingError as exc: if exc.pgcode == psycopg2.errorcodes.DUPLICATE_TABLE: # apparently the table has been created already, so ignore pass else: raise NonRecoverableError("ProgrammingError({0}): {1}".format(exc.pgcode, exc.pgerror)) else: grant(conn, "TABLE", "SELECT", table.render(), "minerva") grant(conn, "TABLE", "TRIGGER", table.render(), "minerva_writer") conn.commit()
def test_store_using_tmp(conn): table = Table(SCHEMA, 'storage_tmp_test_table') trend_names = ['CellID', 'CCR', 'RadioFail', 'RFOldHo', 'AbisFailCall'] data_rows = [ (10023, ('10023', '0.9919', '10', '3', '3')), (10047, ('10047', '0.9963', '11', '5', '0')), (10048, ('10048', '0.9935', '12', '3', '0')), (10049, ('10049', '0.9939', '20', '3', '4')), (10050, ('10050', '0.9940', '18', '3', '0')), (10051, ('10051', '0.9944', '17', '2', '2')), (10052, ('10052', '0.9889', '18', '2', '0')), (10053, ('10053', '0.9920', '15', '3', '1')), (10023, ('10023', '0.9931', '9', '0', '1')), (10085, ('10085', '0.9987', '3', '0', '0')), (10086, ('10086', '0.9972', '3', '2', '0')) ] data_types = extract_data_types(data_rows) with closing(conn.cursor()) as cursor: table.drop().if_exists().execute(cursor) create_trend_table(conn, SCHEMA, table.name, trend_names, data_types) curr_timezone = timezone("Europe/Amsterdam") timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0)) modified = curr_timezone.localize(datetime.now()) store_using_tmp(conn, SCHEMA, table.name, trend_names, timestamp, modified, data_rows) conn.commit() eq_(row_count(cursor, table), 10) table.select(Call("max", Column("modified"))).execute(cursor) max_modified = first(cursor.fetchone()) eq_(max_modified, modified)
def test_copy_from(conn): data = ( "1\tfirst\n" "2\tsecond\n" "3\tthird\n") stream = StringIO(data) columns = [ Column("id", type_="integer"), Column("name", type_="character varying")] table = Table("test_table", columns=columns) create_query = table.create() copy_action = Copy(table).columns(columns).from_(stream) with closing(conn.cursor()) as cursor: create_query.execute(cursor) copy_action.execute(cursor) select = table.select([Column("id"), Column("name")]) query_by_id = select.where_(Eq(Column("id"))) query_by_name = select.where_(Eq(Column("name"))) query_by_id.execute(cursor, (2,)) id, name = cursor.fetchone() eq_(name, "second") query_by_name.execute(cursor, ("third",)) id, name = cursor.fetchone() eq_(id, 3)
def create_temp_table_from(conn, schema, table): """ Create a temporary table that inherits from `table` and return the temporary table name. """ if isinstance(table, str): table = Table(schema, table) tmp_table_name = "tmp_{0}".format(table.name) query = ('CREATE TEMPORARY TABLE "{0}" (LIKE {1}) ' "ON COMMIT DROP").format(tmp_table_name, table.render()) with closing(conn.cursor()) as cursor: cursor.execute(query) return tmp_table_name
def test_run(conn): clear_database(conn) plugin = load_plugin() minerva_context = MinervaContext(conn, conn) instance = plugin(minerva_context) job_id = 67 description = { "function_set_id": 42, "dest_timestamp": timestamp.isoformat(), "processed_max_modified": "2012-12-11 14:03:29+01:00"} config = {} job = instance.create_job(job_id, description, config) assert_not_equal(job, None) dest_granularity = 900 function_mapping_table = Table("transform", "function_mapping") with closing(conn.cursor()) as cursor: state_table.truncate().execute(cursor) function_set_table.truncate(cascade=True).execute(cursor) source_datasource_1 = get_dummy_datasource(cursor, "dummy-src-1") source_datasource_2 = get_dummy_datasource(cursor, "dummy-src-2") dest_datasource = get_dummy_datasource(cursor, "dummy-transform-src") dest_entitytype = get_dummy_entitytype(cursor, "dummy_type_standard") function_mapping_table.truncate().execute(cursor) get_function_mapping(cursor, 1, None, ["counter_a"], "identity_a") get_function_mapping(cursor, 2, None, ["counter_b"], "identity_b") get_function_mapping(cursor, 3, None, ["counter_c"], "identity_c") get_function_mapping(cursor, 4, "add", ["counter_a", "counter_b"], "add_a_b") get_function_mapping(cursor, 5, "multiply", ["counter_a", "300"], "a_times_300") get_function_set(cursor, 42, "test_set", [1, 2, 3, 4, 5], [3, 4], 42, 900, 6, dest_entitytype.id, dest_granularity, None, [], True) args = 1, "unittest", "transform", "" add_job_source(cursor, *args) size = 233 job_source_id = 1 args = job_id, "transform", "", size, "2012-12-11 14:34:00", None, None, None, job_source_id, "running" add_job(cursor, *args) args = 42, description["dest_timestamp"], description["processed_max_modified"], "2012-12-11 13:03:00", job_id add_state(cursor, *args) table_name = "dummy-src-1_dummy_type_standard_qtr_20121211" columns = [ Column("entity_id"), Column("timestamp", type_=SqlType("timestamp with time zone")), Column("modified", type_=SqlType("timestamp with time zone")), Column("counter_a"), Column("counter_b")] src_table_1 = Table("trend", table_name, columns=columns) if table_exists(cursor, src_table_1): src_table_1.drop().execute(cursor) src_table_1.create().execute(cursor) entities = map(partial(get_or_create_entity, cursor), dns) source_1 = create_source_1(entities) store(cursor, src_table_1, source_1) table_name = "dummy-src-2_dummy_type_standard_qtr_20121211" columns = [ Column("entity_id"), Column("timestamp", type_=SqlType("timestamp with time zone")), Column("modified", type_=SqlType("timestamp with time zone")), Column("counter_c")] src_table_2 = Table("trend", table_name, columns=columns) if table_exists(cursor, src_table_2): src_table_2.drop().execute(cursor) src_table_2.create().execute(cursor) source_2 = create_source_2(entities) store(cursor, src_table_2, source_2) result_table = Table("trend", "dummy-transform-src_dummy_type_standard_qtr_20121211") if table_exists(cursor, result_table): result_table.truncate().execute(cursor) conn.commit() print("source_1") print(unlines(render_source(source_1))) print("source_2") print(unlines(render_source(source_2))) job.execute() columns = map(Column, ["entity_id", "identity_a", "identity_b", "add_a_b", "a_times_300"]) query = result_table.select(columns) with closing(conn.cursor()) as cursor: query.execute(cursor, args) print(unlines(render_result(cursor))) query = src_table_1.select(Call("max", Column("modified"))) query.execute(cursor) src1_max_modified = first(cursor.fetchone()) query = src_table_2.select(Call("max", Column("modified"))) query.execute(cursor) src2_max_modified = first(cursor.fetchone()) query = modified_table.select(Column("end")).where_(Eq(Column("table_name"), result_table.name)) query.execute(cursor) query = state_table.select(Column("processed_max_modified")).where_(Eq(Column("function_set_id"))) query.execute(cursor, (42,)) processed_max_modified = first(cursor.fetchone()) eq_(max(src1_max_modified, src2_max_modified), processed_max_modified)
def test_run(conn): clear_database(conn) plugin = load_plugin() minerva_context = MinervaContext(conn, conn) instance = plugin(minerva_context) job_id = 67 description = { "function_set_id": 43, "dest_timestamp": "2012-12-11 13:00:00", "processed_max_modified": "2012-12-11 13:03:29", } config = {} job = instance.create_job(job_id, description, config) assert_not_equal(job, None) dest_granularity = 3600 with closing(conn.cursor()) as cursor: source_datasource_1 = get_dummy_datasource(cursor, 5, "dummy-src-5") dest_datasource = get_dummy_datasource(cursor, 6, "dummy-transform-src") dest_entitytype = get_dummy_entitytype(cursor, 45, "dummy_type_aggregate") get_function_mapping(cursor, 11, "sum", ["counter_a"], "sum_a") get_function_set( cursor, 43, "test_set_agg", [11], [5], 45, 900, 6, dest_entitytype.id, dest_granularity, None, ["entity_id"] ) args = 1, "unittest", "transform", "" add_job_source(cursor, *args) size = 233 job_source_id = 1 args = job_id, "transform", "", size, "2012-12-11 14:34:00", None, None, None, job_source_id, "running" add_job(cursor, *args) args = 43, description["dest_timestamp"], description["processed_max_modified"], "2012-12-11 13:03:00", job_id add_state(cursor, *args) table_name = "dummy-src-5_dummy_type_aggregate_qtr_20121211" columns = [ Column("entity_id"), Column("timestamp", type_=SqlType("timestamp with time zone")), Column("modified", type_=SqlType("timestamp with time zone")), Column("counter_a"), Column("counter_b"), ] table = Table("trend", table_name, columns=columns) if table_exists(cursor, table): table.drop().execute(cursor) table.create().execute(cursor) store(cursor, table, source_1) result_table = Table("trend", "dummy-transform-src_dummy_type_aggregate_hr_20121207") if table_exists(cursor, result_table): result_table.truncate().execute(cursor) conn.commit() print("source_1") print("\n".join(render_source(source_1))) job.execute() columns = map(Column, ["entity_id", "sum_a"]) query = result_table.select(columns) with closing(conn.cursor()) as cursor: query.execute(cursor, args) print("\n".join(render_result(cursor))) query.execute(cursor, args) row = cursor.fetchone() eq_(row[1], 21)
class NotificationStore(object): def __init__(self, datasource, attributes): self.id = None self.version = 1 self.datasource = datasource self.attributes = attributes table_name = datasource.name self.table = Table("notification", table_name) @staticmethod def load(cursor, datasource): """Load NotificationStore from database and return it.""" query = ( "SELECT id " "FROM notification.notificationstore " "WHERE datasource_id = %s") args = datasource.id, cursor.execute(query, args) if cursor.rowcount == 1: notificationstore_id, = cursor.fetchone() notificationstore = NotificationStore(datasource, []) notificationstore.id = notificationstore_id query = ( "SELECT id, name, data_type, description " "FROM notification.attribute " "WHERE notificationstore_id = %s" ) args = (notificationstore_id, ) cursor.execute(query, args) for attribute_id, name, data_type, description in cursor.fetchall(): attribute = Attribute(name, data_type, description) attribute.id = attribute_id notificationstore.attributes.append(attribute) return notificationstore def create(self, cursor): """Create notification store in database in return itself.""" if self.id: raise NotImplementedError() else: query = ( "INSERT INTO notification.notificationstore " "(datasource_id, version) " "VALUES (%s, %s) RETURNING id") args = self.datasource.id, self.version cursor.execute(query, args) self.id = first(cursor.fetchone()) for attribute in self.attributes: query = ( "INSERT INTO notification.attribute " "(notificationstore_id, name, data_type, description) " "VALUES (%s, %s, %s, %s) " "RETURNING id") args = (self.id, attribute.name, attribute.data_type, attribute.description) cursor.execute(query, args) return self def store_record(self, record): """Return function that can store the data from a :class:`~minerva.storage.notification.types.Record`.""" @translate_postgresql_exceptions def f(cursor): column_names = ['entity_id', 'timestamp'] + record.attribute_names columns_part = ','.join(map(smart_quote, column_names)) entity_placeholder, entity_value = record.entity_ref.to_argument() placeholders = ( [entity_placeholder, "%s"] + (["%s"] * len(record.attribute_names)) ) query = ( "INSERT INTO {} ({}) " "VALUES ({})" ).format(self.table.render(), columns_part, ",".join(placeholders)) args = ( [entity_value, record.timestamp] + map(prepare_value, record.values) ) cursor.execute(query, args) return f