def check_column_types(conn, schema, table, column_names, data_types): """ Check if database column types match trend datatype and correct it if necessary. """ current_data_types = get_data_types(conn, schema, table, column_names) with closing(conn.cursor()) as cursor: for column_name, current_data_type, data_type in zip(column_names, current_data_types, data_types): required_data_type = datatype.max_datatype(current_data_type, data_type) if required_data_type != current_data_type: logging.debug("{} != {}".format(required_data_type, current_data_type)) args = table, column_name, required_data_type cursor.callproc("trend.modify_partition_column", args) logging.info( "Column {0:s} modified from type {1} to {2}".format( column_name, current_data_type, required_data_type ) ) conn.commit()
def aggregate(conn, schema, source, target, trend_names, timestamp): """ Basic aggregation of trend data :param conn: psycopg2 database connection :param schema: schema where source and target data is located :param source: tuple (datasource, gp, entitytype_name) specifying source :param target: tuple (datasource, gp, entitytype_name) specifying target :param trend_names: trends to aggregate :param timestamp: non-naive timestamp specifying end of interval to aggregate """ target_gp = target[1] interval = (get_previous_timestamp(timestamp, target_gp), timestamp) (ds, gp, et_name) = source source_table_names = get_table_names( [ds], gp, et_name, interval[0], interval[1]) target_table_name = make_table_name(*(target + (timestamp,))) if column_exists(conn, schema, source_table_names[-1], "samples"): select_samples_part = "SUM(samples)" select_samples_column = "samples," else: select_samples_part = "COUNT(*)" select_samples_column = "" select_parts = [] for source_table_name in source_table_names: select_parts.append( "SELECT " "entity_id, '{1}', {2} {3} " "FROM \"{0}\".\"{4}\" " "WHERE timestamp > %s AND timestamp <= %s ".format( schema, timestamp.strftime("%Y-%m-%d %H:%M:%S"), select_samples_column, ",".join(["\"{0}\"".format(tn) for tn in trend_names]), source_table_name)) query = ( "INSERT INTO \"{0}\".\"{1}\" (entity_id, timestamp, samples, {2}) " "SELECT entity_id, '{4}', {5}, {6} FROM " "( {3} ) AS sources " "GROUP BY entity_id".format( schema, target_table_name, ",".join(["\"{0}\"".format(tn) for tn in trend_names]), " UNION ALL ".join(select_parts), timestamp.strftime("%Y-%m-%d %H:%M:%S"), select_samples_part, ",".join(["SUM(\"{0}\")".format(tn) for tn in trend_names]))) retry = True attempt = 0 #Strategy followed in code below is like trend_storage.store() function while retry is True: retry = False attempt += 1 if attempt > MAX_RETRIES: raise MaxRetriesError("Max retries ({0}) reached".format(MAX_RETRIES)) try: with closing(conn.cursor()) as cursor: cursor.execute(query, len(source_table_names) * interval) except psycopg2.DatabaseError as exc: conn.rollback() columns = [("samples", "integer")] columns.extend(zip(trend_names, get_data_types(conn, schema, source_table_names[-1], trend_names))) if exc.pgcode == psycopg2.errorcodes.NUMERIC_VALUE_OUT_OF_RANGE: max_values = [] for source_table_name in source_table_names: query_max_values = ( "SELECT {0} FROM " "(SELECT " " {1} " "FROM \"{2}\".\"{3}\" " "WHERE timestamp > %s AND timestamp <= %s " "GROUP BY entity_id) AS sums" ).format( ",".join(["MAX(\"{0}\")".format(tn) for tn in trend_names]), ",".join(["SUM(\"{0}\") AS \"{0}\"".format(tn) for tn in trend_names]), schema, source_table_name) with closing(conn.cursor()) as cursor: cursor.execute(query_max_values, interval) max_values.append(cursor.fetchone()) data_types = [datatype.extract_from_value(v) for v in map(max, zip(*max_values))] check_column_types(conn, schema, target_table_name, trend_names, data_types) retry = True elif exc.pgcode == psycopg2.errorcodes.UNIQUE_VIOLATION: raise NonRecoverableError("{0}, {1!s} in query '{2}'".format( exc.pgcode, exc, query)) # TODO: remove unique violating record from target # retry = True elif exc.pgcode == psycopg2.errorcodes.UNDEFINED_COLUMN: column_names, data_types = zip(*columns) add_missing_columns(conn, schema, target_table_name, zip(column_names, data_types)) retry = True elif exc.pgcode == psycopg2.errorcodes.UNDEFINED_TABLE: column_names, data_types = zip(*columns) create_trend_table(conn, schema, target_table_name, column_names, data_types) retry = True else: raise NonRecoverableError("{0}, {1!s} in query '{2}'".format( exc.pgcode, exc, query)) else: conn.commit()