コード例 #1
0
def store_batch_insert(cursor, table, datapackage, modified):
    column_names = ["entity_id", "timestamp", "modified"]
    column_names.extend(datapackage.trend_names)

    dest_column_names = ",".join('"{0}"'.format(column_name)
            for column_name in column_names)

    parameters = ", ".join(["%s"] * len(column_names))

    query = (
        "INSERT INTO {0} ({1}) "
        "VALUES ({2})").format(table.render(), dest_column_names, parameters)

    rows = [(entity_id, datapackage.timestamp, modified) + tuple(values)
            for entity_id, values in datapackage.rows]

    logging.debug(cursor.mogrify(query, first(rows)))

    try:
        cursor.executemany(query, rows)
    except psycopg2.DatabaseError as exc:
        m = str(exc)
        if m.find("violates check constraint") > -1:
            print(cursor.mogrify(query, first(rows)))
            print(m)

        raise translate_postgresql_exception(exc)
コード例 #2
0
def test_update_modified_column(conn):
    curr_timezone = timezone("Europe/Amsterdam")

    trend_names = ['CellID', 'CCR', 'Drops']
    data_rows = [
        (10023, ('10023', '0.9919', '17')),
        (10047, ('10047', '0.9963', '18'))
    ]
    data_types = extract_data_types(data_rows)

    update_data_rows = [(10023, ('10023', '0.9919', '17'))]
    timestamp = curr_timezone.localize(datetime.now())
    granularity = create_granularity("900")

    with closing(conn.cursor()) as cursor:
        datasource = name_to_datasource(cursor, "test-src009")
        entitytype = name_to_entitytype(cursor, "test-type001")

        trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor)
        partition = trendstore.partition(timestamp)

        table = partition.table()

        partition.create(cursor)

        partition.check_columns_exist(trend_names, data_types)(cursor)

        store(conn, SCHEMA, table.name, trend_names, timestamp, data_rows)
        time.sleep(1)
        store(conn, SCHEMA, table.name, trend_names, timestamp, update_data_rows)
        conn.commit()

        query = table.select([Column("modified")])

        query.execute(cursor)
        modified_list = [modified for modified in cursor.fetchall()]
        assert_not_equal(modified_list[0], modified_list[1])

        table.select(Call("max", Column("modified"))).execute(cursor)

        max_modified = first(cursor.fetchone())

        modified_table.select(Column("end")).where_(
                Eq(Column("table_name"), table.name)).execute(cursor)

        end = first(cursor.fetchone())

        eq_(end, max_modified)
コード例 #3
0
def test_store_copy_from_2(conn):
    trend_names = ['CCR', 'CCRatts', 'Drops']
    data_rows = [
        (10023, ('0.9919', '2105', '17'))
    ]

    data_types = ['integer', 'smallint', 'smallint']

    curr_timezone = timezone("Europe/Amsterdam")
    timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0))
    modified = curr_timezone.localize(datetime.now())
    granularity = create_granularity("900")

    with closing(conn.cursor()) as cursor:
        datasource = name_to_datasource(cursor, "test-src010")
        entitytype = name_to_entitytype(cursor, "test-type002")
        trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor)
        partition = trendstore.partition(timestamp)
        partition.create(cursor)
        partition.check_columns_exist(trend_names, data_types)(cursor)
        table = partition.table()

        store_copy_from(conn, SCHEMA, table.name, trend_names, timestamp,
                modified, data_rows)

        conn.commit()

        eq_(row_count(cursor, table), 1)

        table.select(Call("max", Column("modified"))).execute(cursor)

        max_modified = first(cursor.fetchone())

        eq_(max_modified, modified)
コード例 #4
0
ファイル: types.py プロジェクト: hendrikx-itc/python-minerva
    def create(self, cursor):
        """Create notification store in database in return itself."""
        if self.id:
            raise NotImplementedError()
        else:
            query = (
                "INSERT INTO notification.notificationstore "
                "(datasource_id, version) "
                "VALUES (%s, %s) RETURNING id")

            args = self.datasource.id, self.version

            cursor.execute(query, args)

            self.id = first(cursor.fetchone())

            for attribute in self.attributes:
                query = (
                    "INSERT INTO notification.attribute "
                    "(notificationstore_id, name, data_type, description) "
                    "VALUES (%s, %s, %s, %s) "
                    "RETURNING id")

                args = (self.id, attribute.name, attribute.data_type,
                        attribute.description)
                cursor.execute(query, args)

            return self
コード例 #5
0
 def count_for_table(table):
     query = query_template.format(table.render())
     try:
         cursor.execute(query, args)
     except (psycopg2.ProgrammingError, psycopg2.InternalError):
         return 0
     else:
         return first(cursor.fetchone())
コード例 #6
0
def get_function_mapping(cursor, *args):
    id = first(args)

    try:
        return helpers.get_function_mapping(cursor, id)
    except:
        add_function_mapping(cursor, *args)

        return helpers.get_function_mapping(cursor, id)
コード例 #7
0
def get_function_set(cursor, *args):
    id = first(args)

    try:
        return helpers.get_function_set(cursor, id)
    except helpers.NoSuchFunctionSetError:
        add_function_set(cursor, *args)

        return helpers.get_function_set(cursor, id)
コード例 #8
0
def test_store_copy_from_1(conn):
    trend_names = ['CellID', 'CCR', 'CCRatts', 'Drops']

    data_rows = [
        (10023, ('10023', '0.9919', '2105', '17')),
        (10047, ('10047', '0.9963', '4906', '18')),
        (10048, ('10048', '0.9935', '2448', '16')),
        (10049, ('10049', '0.9939', '5271', '32')),
        (10050, ('10050', '0.9940', '3693', '22')),
        (10051, ('10051', '0.9944', '3753', '21')),
        (10052, ('10052', '0.9889', '2168', '24')),
        (10053, ('10053', '0.9920', '2372', '19')),
        (10085, ('10085', '0.9987', '2282', '3')),
        (10086, ('10086', '0.9972', '1763', '5')),
        (10087, ('10087', '0.9931', '1453', '10'))
    ]

    curr_timezone = timezone("Europe/Amsterdam")
    data_types = extract_data_types(data_rows)
    timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0))
    granularity = create_granularity("900")
    modified = curr_timezone.localize(datetime.now())

    with closing(conn.cursor()) as cursor:
        datasource = name_to_datasource(cursor, "test-src009")
        entitytype = name_to_entitytype(cursor, "test-type001")

        trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor)
        partition = trendstore.partition(timestamp)

        table = partition.table()

        partition.create(cursor)

        partition.check_columns_exist(trend_names, data_types)(cursor)

        store_copy_from(conn, SCHEMA, table.name, trend_names, timestamp,
                modified, data_rows)

        conn.commit()

        eq_(row_count(cursor, table), 11)

        table.select(Call("max", Column("modified"))).execute(cursor)

        max_modified = first(cursor.fetchone())

        eq_(max_modified, modified)
コード例 #9
0
def test_store_insert_rows(conn):
    table = Table(SCHEMA, 'storage_tmp_test_table')
    trend_names = ['CellID', 'CCR', 'Drops']
    data_rows = [
        (10023, ('10023', '0.9919', '17')),
        (10047, ('10047', '0.9963', '18'))
    ]
    curr_timezone = timezone("Europe/Amsterdam")
    modified = curr_timezone.localize(datetime.now())
    time1 = curr_timezone.localize(datetime.now())
    time2 = time1 - timedelta(days=1)

    data_types = extract_data_types(data_rows)

    with closing(conn.cursor()) as cursor:
        table.drop().if_exists().execute(cursor)

        create_trend_table(conn, SCHEMA, table.name, trend_names, data_types)

        store_insert_rows(conn, SCHEMA, table.name, trend_names, time1, modified,
                data_rows)
        conn.commit()

        eq_(row_count(cursor, table), 2)

        store_insert_rows(conn, SCHEMA, table.name, trend_names, time2, modified,
                data_rows)
        conn.commit()

        eq_(row_count(cursor, table), 4)

        store_insert_rows(conn, SCHEMA, table.name, trend_names, time1, modified,
                data_rows)
        conn.commit()

        eq_(row_count(cursor, table), 4)

        table.select(Call("max", Column("modified"))).execute(cursor)

        max_modified = first(cursor.fetchone())

        eq_(max_modified, modified)
コード例 #10
0
def test_store_using_tmp(conn):
    table = Table(SCHEMA, 'storage_tmp_test_table')
    trend_names = ['CellID', 'CCR', 'RadioFail', 'RFOldHo', 'AbisFailCall']

    data_rows = [
        (10023, ('10023', '0.9919', '10', '3', '3')),
        (10047, ('10047', '0.9963', '11', '5', '0')),
        (10048, ('10048', '0.9935', '12', '3', '0')),
        (10049, ('10049', '0.9939', '20', '3', '4')),
        (10050, ('10050', '0.9940', '18', '3', '0')),
        (10051, ('10051', '0.9944', '17', '2', '2')),
        (10052, ('10052', '0.9889', '18', '2', '0')),
        (10053, ('10053', '0.9920', '15', '3', '1')),
        (10023, ('10023', '0.9931', '9', '0', '1')),
        (10085, ('10085', '0.9987', '3', '0', '0')),
        (10086, ('10086', '0.9972', '3', '2', '0'))
    ]

    data_types = extract_data_types(data_rows)

    with closing(conn.cursor()) as cursor:
        table.drop().if_exists().execute(cursor)

        create_trend_table(conn, SCHEMA, table.name, trend_names, data_types)
        curr_timezone = timezone("Europe/Amsterdam")
        timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0))
        modified = curr_timezone.localize(datetime.now())
        store_using_tmp(conn, SCHEMA, table.name, trend_names, timestamp,
                modified, data_rows)

        conn.commit()

        eq_(row_count(cursor, table), 10)

        table.select(Call("max", Column("modified"))).execute(cursor)

        max_modified = first(cursor.fetchone())

        eq_(max_modified, modified)
コード例 #11
0
def retrieve(cursor, tables, columns, entities, start, end,
        subquery_filter=None, relation_table_name=None, limit=None, entitytype=None):
    """
    Retrieve data.

    :param cursor: Minerva database cursor
    :param columns: A list of column identifiers (possibly for different
            datasources)
    :param entities: List of entity Ids
    :param start: The start timestamp of the range of trend values
    :param end: The end timestamp of the range of trend values
    :param subquery_filter: optional subquery for additional filtering
        by JOINing on field 'id' = entity_id
    :param relation_table_name: optional relation table name for converting entity
        ids to related ones
    """
    all_rows = []

    if entities is not None and len(entities) == 0:
        return []

    columns = map(ensure_column, columns)

    # group tables by partition size signature to be able to JOIN them later
    tables_by_partition_signature = {}
    for table in tables:
        signature = table.name.split("_")[-1]

        tables_by_partition_signature.setdefault(signature, []).append(table)

    for tables in tables_by_partition_signature.values():
        params = []

        if start == end and start is not None and len(tables) > 1:
            cols = [As(Argument(), "timestamp"), Column("dn"),
                    As(Column("id"), "entity_id")]

            q = Select(cols,
                    from_=Table("directory", "entity"),
                    where_=Eq(Column("entitytype_id"), Value(entitytype.id)))

            with_query = WithQuery("t", query=q)
            params.append(start)

            base_timestamp_column = Column("t", "timestamp")
            base_entity_id_column = Column("t", "entity_id")

            from_item = FromItem(Table("t"))
            data_tables = tables
        else:
            with_query = None

            base_tbl = first(tables)

            base_timestamp_column = Column(base_tbl, "timestamp")
            base_entity_id_column = Column(base_tbl, "entity_id")

            from_item = FromItem(base_tbl)
            data_tables = tables[1:]

        for tbl in data_tables:
            timestamp_comparison = Eq(Column(tbl, "timestamp"), base_timestamp_column)
            entity_id_comparison = Eq(Column(tbl, "entity_id"), base_entity_id_column)
            join_condition = And(timestamp_comparison, entity_id_comparison)

            from_item = from_item.join(tbl, on=join_condition, join_type="LEFT")

        if subquery_filter:
            filter_tbl = Literal("({0}) AS filter".format(subquery_filter))
            from_item = from_item.join(filter_tbl,
                    on=Eq(Column("filter", "id"), base_entity_id_column))

        if relation_table_name:
            relation_table = Table("relation", relation_table_name)

            join_condition = Eq(Column("r", "source_id"), base_entity_id_column)

            from_item = from_item.left_join(As(relation_table, "r"), on=join_condition)

            entity_id_column = Column("r", "target_id")
        else:
            entity_id_column = base_entity_id_column

        partition_columns = [entity_id_column, base_timestamp_column] + columns

        where_parts = []

        if not with_query:
            if start == end and start is not None:
                condition = Eq(base_timestamp_column, Argument())
                where_parts.append(condition)
                params.append(start)
            else:
                if not start is None:
                    condition = Gt(base_timestamp_column, Argument())
                    where_parts.append(condition)
                    params.append(start)

                if not end is None:
                    condition = LtEq(base_timestamp_column, Argument())
                    where_parts.append(condition)
                    params.append(end)

        if not entities is None:
            condition = Literal("{0} IN ({1:s})".format(base_entity_id_column.render(),
                ",".join(str(entity_id) for entity_id in entities)))
            where_parts.append(condition)

        if where_parts:
            where_clause = ands(where_parts)
        else:
            where_clause = None

        select = Select(partition_columns, with_query=with_query, from_=from_item,
                where_=where_clause, limit=limit)

        query = select.render()

        try:
            cursor.execute(query, params)
        except psycopg2.ProgrammingError as exc:
            msg = "{} in query: {}".format(exc, cursor.mogrify(query, params))
            raise Exception(msg)
        else:
            all_rows.extend(cursor.fetchall())

    return all_rows
コード例 #12
0
def test_run(conn):
    with closing(conn.cursor()) as cursor:
        reset_db(cursor)

    conn.commit()

    minerva_context = MinervaContext(conn, conn)

    source_granularity = create_granularity("900")
    dest_granularity = create_granularity("900")

    with closing(conn.cursor()) as cursor:
        source_datasource_1 = get_dummy_datasource(cursor, "dummy-src-1")
        source_datasource_2 = get_dummy_datasource(cursor, "dummy-src-2")
        dest_datasource = get_dummy_datasource(cursor, "dummy-transform-src")

        entitytype = get_dummy_entitytype(cursor, dummy_type_name)

        partition_size = 86400

        trendstore_1 = TrendStore(
            source_datasource_1, entitytype, source_granularity,
            partition_size, "table")
        trendstore_1.create(cursor)
        trendstore_2 = TrendStore(
            source_datasource_2, entitytype, source_granularity,
            partition_size, "table")
        trendstore_2.create(cursor)
        result_trendstore = TrendStore(
            dest_datasource, entitytype, dest_granularity, partition_size,
            "table")
        result_trendstore.create(cursor)

        function_mappings = [
            add_function_mapping(cursor, None, ["counter_a"], "identity_a"),
            add_function_mapping(cursor, None, ["counter_b"], "identity_b"),
            add_function_mapping(cursor, None, ["counter_c"], "identity_c"),
            add_function_mapping(cursor, "add", ["counter_a", "counter_b"], "add_a_b"),
            add_function_mapping(cursor, "multiply", ["counter_a", "300"], "a_times_300")]

        function_mapping_ids = [fm.id for fm in function_mappings]

        function_set_qtr = add_function_set(cursor, "test_set", "", function_mapping_ids,
                [source_datasource_1.id, source_datasource_2.id], entitytype.id, source_granularity.name, dest_datasource.id, entitytype.id,
                dest_granularity.name, None, [], None, True)

        entities = map(partial(get_or_create_entity, cursor), dns)

        conn.commit()

        source_1 = create_source_1(source_granularity, entities)

        def store_modified_at(trendstore, datapackage, modified):
            def set_modified(state):
                state["modified"] = modified

            partition = trendstore.partition(datapackage.timestamp)
            set_modified_action = UpdateState(set_modified)
            copy_from = CopyFrom(k(partition), k(datapackage))

            return DbTransaction(set_modified_action, copy_from)

        transaction = store_modified_at(trendstore_1, source_1, modified_a)
        transaction.run(conn)

        source_2 = create_source_2(source_granularity, entities)

        transaction = store_modified_at(trendstore_2, source_2, modified_a)
        transaction.run(conn)

        result_partition = result_trendstore.partition(timestamp)

        result_table = result_partition.table()

    conn.commit()

    logging.debug("source_1")
    logging.debug(unlines(render_datapackage(source_1)))

    logging.debug("source_2")
    logging.debug(unlines(render_datapackage(source_2)))

    dest_timestamp = timestamp

    transformation = Transformation(function_set_qtr, dest_timestamp)

    transformation.execute(minerva_context)

    columns = map(Column, ["entity_id", "identity_a", "identity_b", "add_a_b", "a_times_300"])

    query = result_table.select(columns)

    with closing(conn.cursor()) as cursor:
        query.execute(cursor)

        logging.debug(unlines(render_result(cursor)))

        src_table_1 = trendstore_1.partition(timestamp).table()
        query = src_table_1.select(Call("max", Column("modified")))
        query.execute(cursor)
        src1_max_modified = first(cursor.fetchone())

        src_table_2 = trendstore_2.partition(timestamp).table()
        query = src_table_2.select(Call("max", Column("modified")))
        query.execute(cursor)
        src2_max_modified = first(cursor.fetchone())

        query = modified_table.select(Column("end")).where_(Eq(Column("table_name"), result_table.name))
        query.execute(cursor)

        query = state_table.select(Column("processed_max_modified")).where_(Eq(Column("function_set_id")))
        query.execute(cursor, (function_set_qtr.id,))
        processed_max_modified = first(cursor.fetchone())

        eq_(max(src1_max_modified, src2_max_modified), processed_max_modified)
コード例 #13
0
def test_run(conn):
    clear_database(conn)
    plugin = load_plugin()

    minerva_context = MinervaContext(conn, conn)

    instance = plugin(minerva_context)

    job_id = 67
    description = {
        "function_set_id": 42,
        "dest_timestamp": timestamp.isoformat(),
        "processed_max_modified": "2012-12-11 14:03:29+01:00"}
    config = {}

    job = instance.create_job(job_id, description, config)

    assert_not_equal(job, None)

    dest_granularity = 900

    function_mapping_table = Table("transform", "function_mapping")

    with closing(conn.cursor()) as cursor:
        state_table.truncate().execute(cursor)
        function_set_table.truncate(cascade=True).execute(cursor)

        source_datasource_1 = get_dummy_datasource(cursor, "dummy-src-1")
        source_datasource_2 = get_dummy_datasource(cursor, "dummy-src-2")
        dest_datasource = get_dummy_datasource(cursor, "dummy-transform-src")

        dest_entitytype = get_dummy_entitytype(cursor, "dummy_type_standard")

        function_mapping_table.truncate().execute(cursor)

        get_function_mapping(cursor, 1, None, ["counter_a"], "identity_a")
        get_function_mapping(cursor, 2, None, ["counter_b"], "identity_b")
        get_function_mapping(cursor, 3, None, ["counter_c"], "identity_c")
        get_function_mapping(cursor, 4, "add", ["counter_a", "counter_b"], "add_a_b")
        get_function_mapping(cursor, 5, "multiply", ["counter_a", "300"], "a_times_300")

        get_function_set(cursor, 42, "test_set", [1, 2, 3, 4, 5], [3, 4], 42, 900, 6,
            dest_entitytype.id, dest_granularity, None, [], True)

        args = 1, "unittest", "transform", ""
        add_job_source(cursor, *args)

        size = 233
        job_source_id = 1
        args = job_id, "transform", "", size, "2012-12-11 14:34:00", None, None, None, job_source_id, "running"

        add_job(cursor, *args)

        args = 42, description["dest_timestamp"], description["processed_max_modified"], "2012-12-11 13:03:00", job_id
        add_state(cursor, *args)

        table_name = "dummy-src-1_dummy_type_standard_qtr_20121211"
        columns = [
            Column("entity_id"),
            Column("timestamp", type_=SqlType("timestamp with time zone")),
            Column("modified", type_=SqlType("timestamp with time zone")),
            Column("counter_a"),
            Column("counter_b")]
        src_table_1 = Table("trend", table_name, columns=columns)

        if table_exists(cursor, src_table_1):
            src_table_1.drop().execute(cursor)

        src_table_1.create().execute(cursor)

        entities = map(partial(get_or_create_entity, cursor), dns)

        source_1 = create_source_1(entities)

        store(cursor, src_table_1, source_1)

        table_name = "dummy-src-2_dummy_type_standard_qtr_20121211"
        columns = [
            Column("entity_id"),
            Column("timestamp", type_=SqlType("timestamp with time zone")),
            Column("modified", type_=SqlType("timestamp with time zone")),
            Column("counter_c")]
        src_table_2 = Table("trend", table_name, columns=columns)

        if table_exists(cursor, src_table_2):
            src_table_2.drop().execute(cursor)

        src_table_2.create().execute(cursor)

        source_2 = create_source_2(entities)

        store(cursor, src_table_2, source_2)

        result_table = Table("trend", "dummy-transform-src_dummy_type_standard_qtr_20121211")

        if table_exists(cursor, result_table):
            result_table.truncate().execute(cursor)

    conn.commit()

    print("source_1")
    print(unlines(render_source(source_1)))

    print("source_2")
    print(unlines(render_source(source_2)))

    job.execute()

    columns = map(Column, ["entity_id", "identity_a", "identity_b", "add_a_b", "a_times_300"])

    query = result_table.select(columns)

    with closing(conn.cursor()) as cursor:
        query.execute(cursor, args)

        print(unlines(render_result(cursor)))

        query = src_table_1.select(Call("max", Column("modified")))
        query.execute(cursor)
        src1_max_modified = first(cursor.fetchone())

        query = src_table_2.select(Call("max", Column("modified")))
        query.execute(cursor)
        src2_max_modified = first(cursor.fetchone())

        query = modified_table.select(Column("end")).where_(Eq(Column("table_name"), result_table.name))
        query.execute(cursor)

        query = state_table.select(Column("processed_max_modified")).where_(Eq(Column("function_set_id")))
        query.execute(cursor, (42,))
        processed_max_modified = first(cursor.fetchone())

        eq_(max(src1_max_modified, src2_max_modified), processed_max_modified)
コード例 #14
0
def load_plugin():
    """
    Load and return the plugin.
    """
    return first([entrypoint.load() for entrypoint in pkg_resources.iter_entry_points(group=ENTRYPOINT, name="transform")])
コード例 #15
0
def get_timestamp(cursor):
    cursor.execute("SELECT NOW()")

    return first(cursor.fetchone())
コード例 #16
0
def get_entrypoint():
    return first(list(iter_entry_points(group=ENTRYPOINT, name="transform")))