예제 #1
0
    def _create_cache_table(self):

        if not self.is_postgre_sql:

            sql_statement = "CREATE TABLE %s.%s ( " \
                            "ENTITY_TYPE_ID BIGINT NOT NULL, " \
                            "PARQUET_NAME VARCHAR(2048) NOT NULL, " \
                            "PARQUET_FILE BLOB(2G), " \
                            "UPDATED_TS TIMESTAMP  NOT NULL DEFAULT CURRENT TIMESTAMP, " \
                            "CONSTRAINT %s UNIQUE(ENTITY_TYPE_ID, PARQUET_NAME) ENFORCED ) " \
                            "ORGANIZE BY ROW" % (self.quoted_schema, self.quoted_cache_tablename,
                                                 dbhelper.quotingTableName('uc_%s' % self.cache_tablename,
                                                                           self.is_postgre_sql))
            try:
                stmt = ibm_db.exec_immediate(self.db_connection, sql_statement)
                ibm_db.free_result(stmt)
            except Exception as ex:
                raise Exception('Execution of sql statement "%s" failed.' % sql_statement) from ex
        else:
            sql_statement = "CREATE TABLE %s.%s ( " \
                            "entity_type_id BIGINT NOT NULL, " \
                            "parquet_name VARCHAR(2048) NOT NULL, " \
                            "parquet_file BYTEA, " \
                            "updated_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, " \
                            "CONSTRAINT %s UNIQUE(entity_type_id, parquet_name))" % (
                                self.quoted_schema, self.quoted_cache_tablename,
                                dbhelper.quotingTableName('uc_%s' % self.cache_tablename, self.is_postgre_sql))
            try:
                dbhelper.execute_postgre_sql_query(self.db_connection, sql_statement)
            except Exception as ex:
                raise Exception('Execution of sql statement "%s" failed.' % sql_statement) from ex

        logger.info('Table %s.%s has been created.' % (self.quoted_schema, self.quoted_cache_tablename))
예제 #2
0
    def __init__(self, tenant_id, entity_type_id, schema, db_connection,
                 db_type):

        self.tenant_id = tenant_id
        self.entity_type_id = entity_type_id
        self.schema = schema
        self.db_connection = db_connection
        self.db_type = db_type

        if self.db_type == 'db2':
            self.is_postgre_sql = False
            self.schema = schema.upper()
            self.store_tablename = DBModelStore.STORE_TABLENAME.upper()
        elif self.db_type == 'postgresql':
            self.is_postgre_sql = True
            self.schema = schema.lower()
            self.store_tablename = DBModelStore.STORE_TABLENAME.lower()
        else:
            raise Exception(
                'Initialization of %s failed because the database type %s is unknown.'
                % (self.__class__.__name__, self.db_type))

        self.quoted_schema = dbhelper.quotingSchemaName(
            self.schema, self.is_postgre_sql)
        self.quoted_store_tablename = dbhelper.quotingTableName(
            self.store_tablename, self.is_postgre_sql)

        self._handle_store_table()
예제 #3
0
    def _push_cache(self, cache_filename, cache_pathname):

        if not self.is_postgre_sql:

            sql_statement = "MERGE INTO %s.%s AS TARGET " \
                            "USING (VALUES (?, ?, ?, CURRENT_TIMESTAMP)) " \
                            "AS SOURCE (ENTITY_TYPE_ID, PARQUET_NAME, PARQUET_FILE, UPDATED_TS) " \
                            "ON TARGET.ENTITY_TYPE_ID = SOURCE.ENTITY_TYPE_ID " \
                            "AND TARGET.PARQUET_NAME = SOURCE.PARQUET_NAME " \
                            "WHEN MATCHED THEN " \
                            "UPDATE SET TARGET.PARQUET_FILE = SOURCE.PARQUET_FILE, " \
                            "TARGET.UPDATED_TS = SOURCE.UPDATED_TS " \
                            "WHEN NOT MATCHED THEN " \
                            "INSERT (ENTITY_TYPE_ID, PARQUET_NAME, PARQUET_FILE, UPDATED_TS) " \
                            "VALUES (SOURCE.ENTITY_TYPE_ID, SOURCE.PARQUET_NAME, SOURCE.PARQUET_FILE, " \
                            "SOURCE.UPDATED_TS)" % (self.quoted_schema, self.quoted_cache_tablename)
            try:
                stmt = ibm_db.prepare(self.db_connection, sql_statement)

                try:
                    ibm_db.bind_param(stmt, 1, self.entity_type_id)
                    ibm_db.bind_param(stmt, 2, cache_filename)
                    ibm_db.bind_param(stmt, 3, cache_pathname, ibm_db.PARAM_FILE, ibm_db.SQL_BLOB)
                    ibm_db.execute(stmt)
                finally:
                    ibm_db.free_result(stmt)
            except Exception as ex:
                raise Exception('Storing cache file %s under name %s failed with sql statement "%s"' % (
                    cache_pathname, cache_filename, sql_statement)) from ex

        else:
            try:
                f = open(cache_pathname, 'rb')
                try:
                    blob = f.read()
                finally:
                    f.close()
            except Exception as ex:
                raise Exception('The cache file %s could not be read from disc.' % cache_pathname) from ex
            else:
                statement1 = "INSERT INTO %s.%s (entity_type_id, parquet_name, parquet_file, updated_ts) " % (
                    self.quoted_schema, self.quoted_cache_tablename)

                statement3 = "ON CONFLICT ON CONSTRAINT %s DO update set entity_type_id = EXCLUDED.entity_type_id, " \
                             "parquet_name = EXCLUDED.parquet_name, parquet_file = EXCLUDED.parquet_file, " \
                             "updated_ts = EXCLUDED.updated_ts" % dbhelper.quotingTableName(
                    ('uc_%s' % self.cache_tablename), self.is_postgre_sql)

                sql_statement = statement1 + " values (%s, %s, %s, current_timestamp) " + statement3

                try:
                    dbhelper.execute_postgre_sql_query(self.db_connection, sql_statement,
                                                       (self.entity_type_id, cache_filename, psycopg2.Binary(blob)))
                except Exception as ex:
                    raise Exception('Storing cache under name %s failed with sql statement "%s"' % (
                        cache_filename, sql_statement)) from ex

        logger.info('Cache has been stored under name %s in table %s.%s' % (
            cache_filename, self.quoted_schema, self.quoted_cache_tablename))
예제 #4
0
    def execute(self, df, start_ts, end_ts, entities=None):
        key_id = 'key_id_'
        key_timestamp = 'key_timestamp_'

        sql = 'SELECT %s, %s AS "%s", %s AS "%s" FROM %s' % (
            ', '.join([dbhelper.quotingColumnName(col, self.dms.is_postgre_sql) for col in self.columns]),
            dbhelper.quotingColumnName(self.id_col, self.dms.is_postgre_sql), key_id,
            dbhelper.quotingColumnName(self.timestamp_col, self.dms.is_postgre_sql), key_timestamp,
            dbhelper.quotingTableName(self.table, self.dms.is_postgre_sql))
        condition_applied = False
        if self.where_clause is not None:
            sql += ' WHERE %s' % self.where_clause
            condition_applied = True
        if start_ts is not None and end_ts is not None:  # TODO start_ts and end_ts are expected to be not None
            if not condition_applied:
                sql += ' WHERE '
            else:
                sql += ' AND '
            sql += "%s < %s AND %s <= %s" % (dbhelper.quotingSqlString(str(start_ts)),
                                             dbhelper.quotingColumnName(self.timestamp_col, self.dms.is_postgre_sql),
                                             dbhelper.quotingColumnName(self.timestamp_col, self.dms.is_postgre_sql),
                                             dbhelper.quotingSqlString(str(end_ts)))
            condition_applied = True
        if entities is not None:
            if not condition_applied:
                sql += ' WHERE '
            else:
                sql += ' AND '
            sql += "%s IN (%s)" % (dbhelper.quotingColumnName(self.id_col, self.dms.is_postgre_sql),
                                   ', '.join([dbhelper.quotingSqlString(ent) for ent in entities]))

        self.parse_dates.add(key_timestamp)
        requested_col_names = self.names + [key_id, key_timestamp]
        df_sql = self._get_dms().db.read_sql_query(sql, parse_dates=self.parse_dates,
                                                   requested_col_names=requested_col_names)
        df_sql = df_sql.astype(dtype={key_id: str}, errors='ignore')

        self.logger.debug('loaded_df=\n%s' % df_sql.head())

        # reset and rename event df index to the same special column names
        original_event_index_names = df.index.names
        df = df.rename_axis([key_id, key_timestamp])
        df = df.reset_index()

        # concat ignoring index (simple concatenation) then set index back renamed to the original one
        df = pd.concat([df, df_sql], ignore_index=True, sort=False)
        df = df.set_index(keys=[key_id, key_timestamp])
        df = df.rename_axis(original_event_index_names)

        self.logger.debug('concatenated_df=\n%s' % df.head())

        return df
예제 #5
0
    def store_model(self, model_name, model, user_name=None, serialize=True):

        if serialize:
            try:
                model = pickle.dumps(model)
            except Exception as ex:
                raise Exception(
                    'Serialization of model %s that is supposed to be stored in ModelStore failed.' % model_name) from ex

        if not self.is_postgre_sql:
            sql_statement = "MERGE INTO %s.%s AS TARGET " \
                            "USING (VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?)) " \
                            "AS SOURCE (ENTITY_TYPE_ID, MODEL_NAME, MODEL, UPDATED_TS, LAST_UPDATED_BY) " \
                            "ON TARGET.ENTITY_TYPE_ID = SOURCE.ENTITY_TYPE_ID " \
                            "AND TARGET.MODEL_NAME = SOURCE.MODEL_NAME " \
                            "WHEN MATCHED THEN " \
                            "UPDATE SET TARGET.MODEL = SOURCE.MODEL, " \
                            "TARGET.UPDATED_TS = SOURCE.UPDATED_TS " \
                            "WHEN NOT MATCHED THEN " \
                            "INSERT (ENTITY_TYPE_ID, MODEL_NAME, MODEL, UPDATED_TS, LAST_UPDATED_BY) " \
                            "VALUES (SOURCE.ENTITY_TYPE_ID, SOURCE.MODEL_NAME, SOURCE.MODEL, " \
                            "SOURCE.UPDATED_TS, SOURCE.LAST_UPDATED_BY)" % (
                                self.quoted_schema, self.quoted_store_tablename)
            try:
                stmt = ibm_db.prepare(self.db_connection, sql_statement)

                try:
                    ibm_db.bind_param(stmt, 1, self.entity_type_id)
                    ibm_db.bind_param(stmt, 2, model_name)
                    ibm_db.bind_param(stmt, 3, model)
                    ibm_db.bind_param(stmt, 4, user_name)
                    ibm_db.execute(stmt)
                finally:
                    ibm_db.free_result(stmt)
            except Exception as ex:
                raise Exception('Storing model %s failed with sql statement "%s"' % (model_name, sql_statement)) from ex
        else:
            statement1 = "INSERT INTO %s.%s (entity_type_id, model_name, model, updated_ts, last_updated_by) " % (
                self.quoted_schema, self.quoted_store_tablename)

            statement3 = "ON CONFLICT ON CONSTRAINT %s DO update set entity_type_id = EXCLUDED.entity_type_id, " \
                         "model_name = EXCLUDED.model_name, model = EXCLUDED.model, " \
                         "updated_ts = EXCLUDED.updated_ts, last_updated_by = EXCLUDED.last_updated_by" % dbhelper.quotingTableName(
                ('uc_%s' % self.store_tablename), self.is_postgre_sql)

            sql_statement = statement1 + " values (%s, %s, %s, current_timestamp, %s) " + statement3

            try:
                dbhelper.execute_postgre_sql_query(self.db_connection, sql_statement,
                                                   (self.entity_type_id, model_name, psycopg2.Binary(model), user_name))
            except Exception as ex:
                raise Exception('Storing model %s failed with sql statement "%s"' % (model_name, sql_statement)) from ex

        logger.info('Model %s of size %d bytes has been stored in table %s.%s.' % (
            model_name, len(model) if model is not None else 0, self.quoted_schema, self.quoted_store_tablename))