Exemple #1
0
def add_missing_columns(conn, schema, table_name, columns_to_check):
    """
    Add missing columns in `table_name`.

    :param conn: psycopg2 database connection
    :param schema: name of schema where specified table is located
    :param table_name: name of table that will be updated
    :param colums_to_check: list of tuples (column_name, data_type) specifying
    columns that must be checked and added when missing
    """
    with closing(conn.cursor()) as cursor:
        for (column_name, data_type) in columns_to_check:
            if not column_exists(conn, schema, table_name, column_name):
                create_column(cursor, Table(schema, table_name), column_name, data_type)
def retrieve_aggregated(conn, datasource, granularity, entitytype,
    column_identifiers, interval, group_by, subquery_filter=None,
    relation_table_name=None):
    """
    Return aggregated data

    :param conn: psycopg2 database connection
    :param datasource: datasource object
    :param granularity: granularity in seconds
    :param entitytype: entitytype object
    :param column_identifiers: e.g. SUM(trend1), MAX(trend2)
    :param interval: (start, end) tuple with non-naive timestamps
    :param group_by: list of columns to GROUP BY
    :param subquery_filter: optional subquery for additional filtering
        by JOINing on field 'id' = entity_id
    :param relation_table_name: optional relation table name for converting
            entity ids to related ones
    """
    start, end = interval

    with closing(conn.cursor()) as cursor:
        source_table_names = get_table_names_v4(cursor, [datasource], granularity,
                entitytype, start, end)

    def get_trend_names(column_identifier):
        if isinstance(column_identifier, Sql):
            return [a.name for a in column_identifier.args]
        else:
            trend_names_part = re.match(".*\(([\w, ]+)\)", column_identifier).group(1)

            return map(str.strip, trend_names_part.split(","))

    trend_names = set(chain(*map(get_trend_names, column_identifiers)))

    #Deal with 'samples' column
    if column_exists(conn, SCHEMA, source_table_names[-1], "samples"):
        select_samples_part = "SUM(samples)"
        select_samples_column = "samples,"
    else:
        select_samples_part = "COUNT(*)"
        select_samples_column = ""

    args = {"start": start, "end": end}

    select_parts = []

    for source_table_name in source_table_names:

        join_parts = []

        return_id_field = "entity_id"

        if subquery_filter:
            join_parts.append(
                "JOIN ({0}) AS filter ON filter.id = \"{1}\".{2}.entity_id".format(
                subquery_filter, SCHEMA, enquote_column_name(source_table_name)))

        if relation_table_name:
            return_id_field = "r.target_id AS entity_id"

            join_parts.append(
                "JOIN relation.\"{0}\" r ON r.source_id = \"{1}\".entity_id".format(
                relation_table_name, source_table_name))

        select_parts.append(
            "SELECT {0}, %(end)s, {1} {2} FROM \"{3}\".\"{4}\" {5}"
            " WHERE timestamp > %(start)s AND timestamp <= %(end)s".format(
                return_id_field,
                select_samples_column,
                ",".join(map(enquote_column_name, trend_names)),
                SCHEMA,
                source_table_name,
                " ".join(join_parts)))

    query = ("SELECT entity_id, %(end)s, {0}, {1} FROM( {2} ) "
        "AS sources GROUP BY {3}").format(
            select_samples_part,
            ",".join(map(quote_ident, column_identifiers)),
            " UNION ALL ".join(select_parts),
            ",".join(map(enquote_column_name, group_by)))

    all_rows = []

    with closing(conn.cursor()) as cursor:
        try:
            cursor.execute(query, args)
        except psycopg2.ProgrammingError:
            logging.debug(cursor.mogrify(query, args))
            conn.rollback()
            # TODO: Check error code
        else:
            all_rows = cursor.fetchall()

    return all_rows
def aggregate(conn, schema, source, target, trend_names, timestamp):
    """
    Basic aggregation of trend data

    :param conn: psycopg2 database connection
    :param schema: schema where source and target data is located
    :param source: tuple (datasource, gp, entitytype_name) specifying source
    :param target: tuple (datasource, gp, entitytype_name) specifying target
    :param trend_names: trends to aggregate
    :param timestamp: non-naive timestamp specifying end of interval to aggregate
    """
    target_gp = target[1]
    interval = (get_previous_timestamp(timestamp, target_gp), timestamp)

    (ds, gp, et_name) = source
    source_table_names = get_table_names(
        [ds], gp, et_name, interval[0], interval[1])

    target_table_name = make_table_name(*(target + (timestamp,)))

    if column_exists(conn, schema, source_table_names[-1], "samples"):
        select_samples_part = "SUM(samples)"
        select_samples_column = "samples,"
    else:
        select_samples_part = "COUNT(*)"
        select_samples_column = ""

    select_parts = []

    for source_table_name in source_table_names:
        select_parts.append(
            "SELECT "
            "entity_id, '{1}', {2} {3} "
            "FROM \"{0}\".\"{4}\" "
            "WHERE timestamp > %s AND timestamp <= %s ".format(
                schema,
                timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                select_samples_column,
                ",".join(["\"{0}\"".format(tn) for tn in trend_names]),
                source_table_name))

    query = (
        "INSERT INTO \"{0}\".\"{1}\" (entity_id, timestamp, samples, {2}) "
        "SELECT entity_id, '{4}', {5}, {6} FROM "
        "( {3} ) AS sources "
        "GROUP BY entity_id".format(
            schema,
            target_table_name,
            ",".join(["\"{0}\"".format(tn) for tn in trend_names]),
            " UNION ALL ".join(select_parts),
            timestamp.strftime("%Y-%m-%d %H:%M:%S"),
            select_samples_part,
            ",".join(["SUM(\"{0}\")".format(tn) for tn in trend_names])))

    retry = True
    attempt = 0

    #Strategy followed in code below is like trend_storage.store() function
    while retry is True:
        retry = False
        attempt += 1

        if attempt > MAX_RETRIES:
            raise MaxRetriesError("Max retries ({0}) reached".format(MAX_RETRIES))
        try:
            with closing(conn.cursor()) as cursor:
                cursor.execute(query, len(source_table_names) * interval)
        except psycopg2.DatabaseError as exc:
            conn.rollback()
            columns = [("samples", "integer")]
            columns.extend(zip(trend_names,
                get_data_types(conn, schema, source_table_names[-1], trend_names)))

            if exc.pgcode == psycopg2.errorcodes.NUMERIC_VALUE_OUT_OF_RANGE:
                max_values = []
                for source_table_name in source_table_names:
                    query_max_values = (
                        "SELECT {0} FROM "
                        "(SELECT "
                        " {1} "
                        "FROM \"{2}\".\"{3}\" "
                        "WHERE timestamp > %s AND timestamp <= %s "
                        "GROUP BY entity_id) AS sums"
                    ).format(
                            ",".join(["MAX(\"{0}\")".format(tn) for tn in trend_names]),
                            ",".join(["SUM(\"{0}\") AS \"{0}\"".format(tn) for tn in trend_names]),
                            schema,
                            source_table_name)

                    with closing(conn.cursor()) as cursor:
                        cursor.execute(query_max_values, interval)
                        max_values.append(cursor.fetchone())

                data_types = [datatype.extract_from_value(v)
                        for v in map(max, zip(*max_values))]
                check_column_types(conn, schema, target_table_name, trend_names,
                        data_types)

                retry = True
            elif exc.pgcode == psycopg2.errorcodes.UNIQUE_VIOLATION:
                raise NonRecoverableError("{0}, {1!s} in query '{2}'".format(
                    exc.pgcode, exc, query))
                # TODO: remove unique violating record from target
                # retry = True
            elif exc.pgcode == psycopg2.errorcodes.UNDEFINED_COLUMN:
                column_names, data_types = zip(*columns)
                add_missing_columns(conn, schema, target_table_name,
                        zip(column_names, data_types))
                retry = True
            elif exc.pgcode == psycopg2.errorcodes.UNDEFINED_TABLE:
                column_names, data_types = zip(*columns)
                create_trend_table(conn, schema, target_table_name, column_names,
                        data_types)
                retry = True
            else:
                raise NonRecoverableError("{0}, {1!s} in query '{2}'".format(
                    exc.pgcode, exc, query))
        else:
            conn.commit()