def __init__(self, sql, actioner, log_detail=True, batch_size=100, meta={}, db_type=None, table_schema=None, logger=None): self.meta = meta self.actioner = actioner self.log_detail = log_detail self.batch_size = batch_size self.logger = logger if logger else Logger(log_level="info", vendor_key=-1, retailer_key=-1, module_name="scd") self.sql = sql self.raw_sql_splitted = self.sql.split(' ') self.raw_sql_splitted_trimed = [ str for str in self.raw_sql_splitted if str != '' ] self.db_type = db_type self.table_schema = table_schema self.logger.info('[SQL] %s' % self.sql) self.logger.info('[SQL splitted] %s' % self.raw_sql_splitted) self.mssql = MSOperation(meta=self.meta) self.vtc = None self.saved_records_count = 0
def __init__(self, meta, params, logger=None): self.meta = meta self.params = params self.file_ext = self.params.get("fileExt") if self.params.get( "fileExt") else "txt" self.zip_flag = True if str(self.params.get( "zipFlag", "N")).upper() in ["Y", "YES", "T", "TRUE"] else False self._debug = "Y" if str(self.params.get( 'debug', 'N')).upper() in ["Y", "YES", "T", "TRUE"] else 'N' __log_level = 'DEBUG' if self._debug == "Y" else 'INFO' self.logger = logger if logger else Logger(log_level="info", target="console", vendor_key=-1, retailer_key=-1, sql_conn=None) self.logger.set_level(log_level=__log_level) self.logger.set_keys(log_id="{}".format(self.params["jobId"])) self.provision_url = self.meta[ "api_provision_str"] + "/filesDelivery/layout/attributes?cycle_key=" self.capacity = Capacity(meta=self.meta) self.rsi_username = self.meta.get("fileshare_username") self.rsi_folder = self.meta.get("fileshare_folder") self.cmn_schema = self.meta.get("db_conn_vertica_common_schema", "common") self.app_conn = MSOperation(meta=self.meta) self.vertica_conn = DWOperation(meta=self.meta) self.dumper_context = self.get_context() self._layout_payload = {}
def __init__(self, context): self.context = context self.meta = self.context["meta"] self._logger = self.context["logger"] self._db = MSOperation(meta=self.meta) self._dw = DWOperation(meta=self.meta) self.bundle_base_url = self.meta["api_osa_bundle_str"] self.alert_base_url = self.meta["api_alerts_str"] self._common_schema = self.meta["db_conn_vertica_common_schema"] self._dim_calendar = "DIM_CALENDAR" self._dim_product = "DIM_PRODUCT" self._dim_store = "DIM_STORE" self._meta_intvn = "ALT_META_INTERVENTION" self._dim_base_dir = self.meta["azure_storage_dim_root"] self._azure_storage_account_name = self.meta[ "azure_storage_account_name"] self._azure_storage_blob_container = self.meta[ "azure_storage_blob_container"] self._prefix_name = "IRIS_" self.azure_uploader = UploadToBlobStorage(meta=self.meta, logger=self._logger) self.body = { "job_id": self.context["jobId"], "step_id": self.context["stepId"], "status": StepStatus.RUNNING.name } self.capacity = Capacity(meta=self.meta) self.cycle_key = self._get_cycle_key()
def __init__(self, context): self.context = {**context, **context["meta"]}.copy() self._hub_ids = context["hub_ids"] self._logger = context["logger"] self._db = MSOperation(meta=context["meta"]) self._dw = DWOperation(meta=context["meta"]) self._common_schema = context["meta"]["db_conn_vertica_common_schema"] # self._dw_conn_vertica = self.context["dw_conn_vertica"] self._all_dim = self.context["all_dim"] self._dim_calendar = "DIM_CALENDAR" self._dim_product = "DIM_PRODUCT" self._dim_store = "DIM_STORE" self._product_cis_src_tbl = "PS_ITEM_DIM_EXT" self._store_cis_src_tbl = "PS_STORE_DIM_EXT" self._dct_vendor_retailer_hub = {} self._dct_vendor_hub = {} self._dct_retailer_hub = {} self._all_vendor_retailer_hub = self._get_avaliable_vr()
def __init__(self, meta, params, logger=None, filename="afm_config.json"): """ :param meta: common config :param params: used by calling REST API :param filename: only used for local testing """ self.meta = meta self.params = params self.filename = filename self._cap = Capacity(meta=self.meta) self.app_conn = MSOperation(meta=self.meta) self._logger = logger if logger else Logger(log_level="debug", target="console", vendor_key=-1, retailer_key=-1, sql_conn=None) self.scd_url = "{0}/scd/process".format( self.meta["api_osa_bundle_str"])
def test_gen_vtc_test_data(): mssql = MSOperation(meta=meta) vtc = DWOperation(meta=meta) try: vtc.execute('drop table common.test_columns') except: print('table doesn\'t exists') vtc.execute( 'create table common.test_columns as select * from columns') rs = mssql.query( 'select id from META_SCD_TABLE_DEF where table_name=\'test_columns\'' ) if not rs: sql = ''' INSERT INTO META_SCD_TABLE_DEF(TABLE_NAME, PK, DB_TYPE, TABLE_SCHEMA, CREATE_TIME) SELECT 'test_columns', '["table_schema", "table_name", "column_name"]', 'VERTICA', '(COMMON)', GETDATE() ''' mssql.execute(sql) '''
def insert_default_params(): sql_handler = MSOperation(meta=meta) sql = "UPDATE META_DIM_DTL SET ATTR_TEXT = '%s' WHERE DEF_ID=(SELECT ID FROM META_DIM_DEF WHERE NAME='defaultParamCategory')" % json.dumps( param_category) try: sql_handler.execute(sql) finally: sql_handler.close_connection()
def test_gen_mssql_test_data(): mssql = MSOperation(meta=meta) try: mssql.execute('drop table test_backupfille') except: print('table doesn\'t exists') mssql.execute('select * into test_backupfille from msdb.dbo.backupfile' ) #--backup_set_id, file_number rs = mssql.query( 'select id from META_SCD_TABLE_DEF where table_name=\'test_backupfille\'' ) if not rs: sql = ''' INSERT INTO META_SCD_TABLE_DEF(TABLE_NAME, PK, DB_TYPE, TABLE_SCHEMA, CREATE_TIME) SELECT 'test_backupfille', '["backup_set_id", "file_number"]', 'MSSQL', '(COMMON)', GETDATE() ''' mssql.execute(sql) '''
def __init__(self, context): self.context = context self.meta = self.context["meta"] self._logger = self.context["logger"] self.body = { "job_id": self.context["jobId"], "step_id": self.context["stepId"], "status": StepStatus.RUNNING.name } self._db = MSOperation(meta=self.meta) self._dw = DWOperation(meta=self.meta) self._common_schema = self.meta["db_conn_vertica_common_schema"] self._check_spd_url = "{}/availablecycle/rc/action".format( self.meta["api_alerts_str"]) self.pacific_job_def_name = "OSAPacificNotification"
class DumpMain(object): def __init__(self, meta, params, logger=None): self.meta = meta self.params = params self.file_ext = self.params.get("fileExt") if self.params.get( "fileExt") else "txt" self.zip_flag = True if str(self.params.get( "zipFlag", "N")).upper() in ["Y", "YES", "T", "TRUE"] else False self._debug = "Y" if str(self.params.get( 'debug', 'N')).upper() in ["Y", "YES", "T", "TRUE"] else 'N' __log_level = 'DEBUG' if self._debug == "Y" else 'INFO' self.logger = logger if logger else Logger(log_level="info", target="console", vendor_key=-1, retailer_key=-1, sql_conn=None) self.logger.set_level(log_level=__log_level) self.logger.set_keys(log_id="{}".format(self.params["jobId"])) self.provision_url = self.meta[ "api_provision_str"] + "/filesDelivery/layout/attributes?cycle_key=" self.capacity = Capacity(meta=self.meta) self.rsi_username = self.meta.get("fileshare_username") self.rsi_folder = self.meta.get("fileshare_folder") self.cmn_schema = self.meta.get("db_conn_vertica_common_schema", "common") self.app_conn = MSOperation(meta=self.meta) self.vertica_conn = DWOperation(meta=self.meta) self.dumper_context = self.get_context() self._layout_payload = {} def main_process(self): """ Getting AFM related parameters. 2 cases here. 1, called by AFM directly: Alerts Delivery is coupled with AFM. So it is better to call by AFM. Then we can get parameters directly from AFM via self.params 2, called via REST API. in case failure when called by AFM, we also support the REST API while we can manually trigger this. And getting params from REST API Body. :return: """ try: # Checking the cancel status from previous step. _cancel_flag = self.params.get("cancel_flag", False) if _cancel_flag: return self.params.get("message") self.logger.debug("The context is: %s" % str(self.dumper_context)) init = Initialize(self.dumper_context) afm_params = init.init_process() _cycle_key = afm_params.get("cycleKey") _retailer_name = self.capacity.get_retailer_schema_name( retailer_key=afm_params["retailerKey"]) self.dumper_context["retailer_name"] = _retailer_name self.dumper_context["schema"] = _retailer_name self.dumper_context["cycle_key"] = _cycle_key self._layout_payload = self._get_layout() self._process(afm_params) except Warning as e: return str(e) except Exception as e: self.logger.warning(str(e)) raise def get_context(self): context = { "meta": self.meta, "params": self.params, "dw_conn": self.vertica_conn, "app_conn": self.app_conn, "logger": self.logger } return context def _get_layout(self): # Getting layout fields for alert delivery. _cycle_key = self.dumper_context["cycle_key"] _delivery_layout_api = "{0}{1}".format(self.provision_url, _cycle_key) self.logger.info( "Alert delivery related columns(payload param) is not given, " "So just calling provision API: %s to retrieve them." % _delivery_layout_api) # _header = {"tokenid": "eyJhbGciOiJIUzI1NiJ9.eyJjb29raWVOYW1lIjoicnNpU3NvTmV4dEdlbiIsImNvb2tpZVZhbHVlIjoiQVFJQzV3TTJMWTRTZmN6ZmRuLVVQazA2b2NnRzVWaTlZRFc1cHZZQzF6b3djbXMuKkFBSlRTUUFDTURFQUFsTkxBQkkyTVRBNU1EUTBPRFUwTXpZeU1UWTBORFlBQWxNeEFBQS4qIiwic3RhdHVzIjoic3VjY2VzcyIsInVzZXJJZCI6ImJlbi53dUByc2ljb3JwLmxvY2FsIiwiaWF0IjoxNTQ0Njg5OTY3fQ.AzCgFFHXHo3J1M4fk-17T8fBLwReDQDb4p-DXUcBm_M"} _header = {} resp = requests.get(_delivery_layout_api, _header, verify=False) if resp.text == "invalid token": self.logger.error( "ERROR: invalid tokenid, Please update the tokenid manually. " "Then rerun this script again!!!") if resp.status_code != requests.codes.ok or str( resp.json().get("status")).lower() != "success": self.logger.warning("The response result is: %s" % resp.json()) self.logger.error("Calling API failed. Refer to API: %s" % _delivery_layout_api) # _payload = resp.json()["data"]["payload"] # e.g. list({"key1":"value1"}, {}, {}) _payload = resp.json()["data"] self.logger.debug("The layout payload is: %s" % _payload) return _payload def _gen_query(self, delivery_key): # getting the payload for given delivery key. _payload = self._layout_payload.get(str(delivery_key))["payload"] self.logger.debug("The payload is: %s" % str(_payload)) # only getting data where field preselect is True. _payload = list(filter(lambda x: x["preselect"] is True, _payload)) self.logger.debug("The filtered payload is: %s" % str(_payload)) if not _payload: self.logger.warning( "There is no layout configed for this delivery.") return None, None # It is required to order by seq, so need to convert list(dict) to list(tuple) to sort the data. # Otherwise, the following code will not be working correctly. # _payload_list should be like: e.g. [(1, 'Alert', 'ALERT_ID', 'ALERT ID', None), (), ...] # Noted: Please don't change below fields order for generating _payload_list. _payload_list = [(ele["sequence"], ele["dimension_type"], ele["Field"], ele["display_name"], ele["sort_by"]) for ele in _payload] self.logger.debug("Converted payload is: %s" % str(_payload_list)) # Fields should be listed based on sequence. In case seq is None, then putting those columns to the last. # if many elements with None value, then they will be sorted randomly. _payload_list.sort( key=lambda x: x[0] if x[0] is not None else 999) # Assuming less than 999 columns. self.logger.debug("Sorted payload is: %s" % str(_payload_list)) display_fields_lst = [ ] # getting display name like: ["alert type", ...] for displaying on delivery file. columns_with_alias = [ ] # getting fields with format like: [alert."alert_type" as "alert type", ...] table_list = set( ) # getting required table list. ("Alert", "Store", ...) for columns in _payload_list: self.logger.debug( "Current display name is: \"%s\" for column: %s" % (columns[3], columns[2])) table_list.add(str(columns[1]).lower()) if columns[3] is None: # display name could be None display_fields_lst.append(columns[2]) columns_with_alias.append(columns[1] + '."' + columns[2] + '" AS "' + columns[2] + '"') else: display_fields_lst.append(columns[3]) columns_with_alias.append(columns[1] + '."' + columns[2] + '" AS "' + columns[3] + '"') # if there is no "Alert Date" enabled, then manually added this field to the beginning. if "Alert Date".lower() not in list( map(lambda x: str(x).lower(), display_fields_lst)): display_fields_lst.insert(0, "Alert Date") columns_with_alias.insert(0, 'ALERT.period_key AS "Alert Date"') _display_fields_str = ",".join([ '"' + column + '"' for column in display_fields_lst ]) # combine them with comma(,) _required_columns_prefix_tmp = ",".join( [column for column in columns_with_alias]) # combine them # replace one mandatory fields alert_type to intervention_name. Since it is not the column name in fact table. _required_columns_prefix = re.sub( '(?i)' + re.escape('ALERT."ALERT_TYPE"'), 'type.intervention_name', _required_columns_prefix_tmp) # Getting columns which sort_by is True(which is enabled sorting). And sort it. sort_by_list = sorted(list( filter(lambda x: x[4] is True, _payload_list)), key=lambda x: x[4]) if sort_by_list: _sort_columns = ",".join([ ele[1] + '."' + ele[2] + '" DESC NULLS LAST' for ele in sort_by_list ]) else: _sort_columns = '1' self.logger.debug(_sort_columns) _sort_columns = re.sub('(?i)' + re.escape('ALERT."ALERT_TYPE"'), 'type.intervention_name', _sort_columns) _fdbk_required = True if "feedback" in table_list else False _item_required = True if "product" in table_list else False _store_required = True # mandatory # Generating the initial query. _init_sql = """ SELECT {columns}, ROW_NUMBER() OVER(ORDER BY store.STORE_ID, {sortedFields} ) AS rn FROM {schema}.FACT_PROCESSED_ALERT alert INNER JOIN {cmnSchema}.alt_meta_intervention type ON alert.InterventionKey = type.Intervention_Key """\ .format(columns=_required_columns_prefix, sortedFields=_sort_columns, schema=self.dumper_context["schema"], cmnSchema=self.cmn_schema) if _fdbk_required: _init_sql += """ LEFT JOIN {schema}.FACT_FEEDBACK Feedback ON alert.alert_id = Feedback.alert_id"""\ .format(schema=self.dumper_context["schema"]) if _item_required: _init_sql += """ INNER JOIN {cmnSchema}.dim_product Product ON alert.item_key = Product.item_key AND alert.vendor_key = Product.vendor_key AND alert.retailer_key = Product.retailer_key"""\ .format(cmnSchema=self.cmn_schema) if _store_required: _init_sql += """ INNER JOIN {cmnSchema}.dim_store Store ON alert.store_key = store.store_key AND alert.vendor_key = store.vendor_key AND alert.retailer_key = store.retailer_key"""\ .format(cmnSchema=self.cmn_schema) # always apply the period_key filter for given vendor/retailer _init_sql += """ WHERE 1=1 AND alert.IssuanceId = 0 AND (alert.vendor_key, alert.retailer_key, alert.period_key) IN ( SELECT vendor_key, retailer_key, alert_day FROM TMP_RAW_ALERTS_INFO ) """ return _display_fields_str, _init_sql def _process(self, afm_params): try: _cycle_key = afm_params.get("cycleKey") # Reading configuration from meta table under IRIS MSSQL. # Getting all owners(includes both SVR & RETAILER rule) according to given cycle_key. # the delivery file will be dumped by owner. sql = """ SELECT d.ID AS DELIVERY_KEY, d.CYCLE_KEY, d.RETAILER_KEY, d.DELIVERY_NAME, d.FILTERS, d.DELIMITER, d.OWNER, ep.SERVER, ep.EXTRACTION_FOLDER, ep.USERNAME, ep.PASSWORD, ep.MAIL_SUBJECT, ep.MAIL_BODY, ep.MAIL_RECPSCC, ep.MAIL_RECPSTO, ep.DELIVERY_TYPE FROM AP_META_DELIVERIES d INNER JOIN AP_META_ENDPOINTS ep ON d.ENDPOINT_ID = ep.ID WHERE d.cycle_key = {0} AND d.ENABLED = 'T' AND ep.ENABLED = 'T' """.format(_cycle_key) self.logger.info(sql) meta_rows = self.app_conn.query(sql) self.logger.debug("The meta data is: %s" % str(meta_rows)) if not meta_rows: raise Warning( "There is no endpoint or delivery configed. Please check meta table!" ) # There could be multi owners for the given cycle but with different filters. # This is required by PM. And we need to generate separate files for every single row. for meta_data in meta_rows: # 1, Getting the initial source query _delivery_key = meta_data.DELIVERY_KEY required_columns, _init_src_query = self._gen_query( delivery_key=_delivery_key) if required_columns is None and _init_src_query is None: self.logger.warning( "Seems no layout configed for delivery key: %s" % _delivery_key) continue self.logger.info("The initial source query is: %s" % _init_src_query) delivery_type = meta_data.DELIVERY_TYPE if str.lower(delivery_type) == 'customer': meta_data = meta_data._replace( EXTRACTION_FOLDER=self.rsi_folder, USERNAME=self.rsi_username) if meta_data.USERNAME is None: self.logger.warning( "There is no username configed for delivery key: %s" % _delivery_key) continue _pmp_pwd = get_password(username=meta_data.USERNAME, meta=self.meta) if _pmp_pwd: meta_data = meta_data._replace(PASSWORD=_pmp_pwd) self.logger.info("Start to dump & delivery for meta: %s" % str(meta_data)) _src_query = _init_src_query # 2, checking if any filters applied. (e.g alert_type, category etc.) # User might wants to dump only given alert types of data. This should be configurable. # So far, we support 2 types of filters: alert_type & category # TODO: confirm the filter format with UI team. Currently filters are configed with json format. # e.g. {"alert_type": "d-void,phantom", "category":"cat1,cat2"} _filters_raw = meta_data.FILTERS if not _filters_raw or _filters_raw == "": self.logger.info("No filters applied.") else: self.logger.info("The filters are: %s" % _filters_raw) _filters = json.loads(str(_filters_raw).lower().strip()) alert_type_str = _filters.get( "alert_type", None) # e.g. phantom,d-void,shelf oos if alert_type_str is not None and str( alert_type_str).strip() != '': alert_type = ','.join( "'" + str(ele).strip() + "'" for ele in str(alert_type_str).split(',')) _src_query += " AND type.intervention_name IN ({type})".format( type=alert_type) category_str = _filters.get("category", None) if category_str is not None and str( category_str).strip() != '': category_type = ','.join( "'" + str(ele).strip() + "'" for ele in str(category_str).split(',')) _src_query += " AND Product.OSM_CATEGORY IN ({cat_type})".format( cat_type=category_type) # The owner format should be like: owner1 or owner1,owner2,... _owners = str(meta_data.OWNER) if not _owners: # owner is the mandatory filter for every delivery. raise ValueError( "There is no owner configed in delivery meta table") _owner_in_str = ",".join("'" + ele.strip() + "'" for ele in _owners.split(",")) _src_query += " AND alert.owner IN ({owner}) ".format( owner=_owner_in_str) _final_src_query = """ SELECT {columns} FROM ({query}) x ORDER BY rn """.format(columns=required_columns, query=_src_query) self.logger.info("The final source sql is: %s" % _final_src_query) # delivery file name should be: <delivery_name>_<YYYYMMDD>.<fileExt>. e.g. <delivery_name>_20180101.txt curr_folder = os.path.dirname(os.path.realpath(__file__)) target_filename = meta_data.DELIVERY_NAME + "_" + datetime.datetime.now( ).strftime('%Y%m%d') # delivery file will be dumped to "<curr_dir>/data" folder temporarily. abs_target_filename = curr_folder + os.sep + "data" + os.sep + target_filename + '.' + self.file_ext zip_filename = curr_folder + os.sep + "data" + os.sep + target_filename + '.zip' # Getting data delimiter. e.g. ',' delimiter = str(meta_data.DELIMITER).strip() if len(delimiter) != 1: raise ValueError("delimiter should be 1 char") # start to dump data self.dumper = dd.DumpData(context=self.dumper_context) # dump data from source db self.logger.info("Dumping data into file: %s" % abs_target_filename) _dump_flag = self.dumper.dump_data( src_sql=_final_src_query, output_file=abs_target_filename, delimiter=delimiter) self.logger.debug("The dump flag is: %s" % _dump_flag) # dump alerts succeeded. if _dump_flag is True: self.logger.info("Dumping data is done!") # check the zip flag if self.zip_flag: _flat_file_size = round( os.path.getsize(abs_target_filename) / 1024 / 1024) self.logger.debug("The flat file size is: %s" % _flat_file_size) self.logger.info("zipping file: %s" % abs_target_filename) with zipfile.ZipFile(zip_filename, 'w') as z: z.write(abs_target_filename, os.path.basename(abs_target_filename)) abs_target_filename = zip_filename self.logger.info("The zip file name is: %s" % abs_target_filename) # start to send data file self.logger.info( "Starting uploading delivery file to dest folder!") self.sender = sd.SendData(context=self.dumper_context) self.sender.delivery_file(meta_data=meta_data, src_file=abs_target_filename) else: self.logger.warning( "There is no data returned or dump data failed. " "Please refer to previous log to get the related source query." ) self.logger.info("Alert delivery process is done") except Warning as e: raise except Exception: raise finally: if self.vertica_conn: self.vertica_conn.close_connection() if self.app_conn: self.app_conn.close_connection()
class SyncDimData: """ Get product and store dimension data from hub. DIM_PRODUCT and DIM_STORE save pivoted data, tall table PRODUCT and STORE save attribute that config in AP_CONFIG_META_ATTRIBUTE """ def __init__(self, context): self.context = {**context, **context["meta"]}.copy() self._hub_ids = context["hub_ids"] self._logger = context["logger"] self._db = MSOperation(meta=context["meta"]) self._dw = DWOperation(meta=context["meta"]) self._common_schema = context["meta"]["db_conn_vertica_common_schema"] # self._dw_conn_vertica = self.context["dw_conn_vertica"] self._all_dim = self.context["all_dim"] self._dim_calendar = "DIM_CALENDAR" self._dim_product = "DIM_PRODUCT" self._dim_store = "DIM_STORE" self._product_cis_src_tbl = "PS_ITEM_DIM_EXT" self._store_cis_src_tbl = "PS_STORE_DIM_EXT" self._dct_vendor_retailer_hub = {} self._dct_vendor_hub = {} self._dct_retailer_hub = {} self._all_vendor_retailer_hub = self._get_avaliable_vr() def _get_column(self): """Get attribute from config table :return: """ self._dct_table = {"DIM_PRODUCT": "", "DIM_STORE": ""} self._dct_key = {"DIM_PRODUCT": "ITEM_KEY", "DIM_STORE": "STORE_KEY"} self._dct_table_column = {"PRODUCT": [], "STORE": []} sql = "SELECT DISTINCT KEY_COLUMN FROM AP_CONFIG_META_ATTRIBUTE WHERE DIMENSION_TYPE = 1" self._logger.debug(sql) for row in self._db.query(sql): self._dct_table_column["PRODUCT"].append(row[0]) sql = "SELECT DISTINCT KEY_COLUMN FROM AP_CONFIG_META_ATTRIBUTE WHERE DIMENSION_TYPE = 2" self._logger.debug(sql) for row in self._db.query(sql): self._dct_table_column["STORE"].append(row[0]) for k, v in self._dct_table_column.items(): self._dct_table_column[k] = """'{}'""".format("', '".join(v)) def _get_source_config(self, hub_id): """Get source config :return: """ self._source_dw = SiloOperation(hub_id=hub_id, meta=self.context["meta"]) if self.context["meta"]["db_conn_vertica_silo_username"]: self._source_dw.config.json_data["configs"][ "dw.etluser.id"] = self._source_dw.username self._source_dw.config.json_data["configs"][ "dw.etluser.password"] = Crypto().encrypt( self._source_dw.password) silo_type = self._source_dw.config.get_property( "etl.silo.type", "None").upper() # TODO For hub, silo_type is MD, not find para for this, default host if silo_type in ("WMSSC", "SASSC", "WMCAT", "WMINTL"): self._active_status = "" else: self._active_status = " AND D.ACTIVE = 'T'" def _get_avaliable_vr(self): """ getting all avaliable vendor and retailer from cycle mapping table. and get related hub_id via config service. :return: dict like {'vendor,retailer': 'hub_id', ...} """ sql = "SELECT DISTINCT VENDOR_KEY, RETAILER_KEY FROM AP_ALERT_CYCLE_MAPPING " \ "UNION " \ "SELECT DISTINCT VENDOR_KEY, RETAILER_KEY FROM AP_ALERT_CYCLE_RC_MAPPING" self._logger.debug(sql) dct_vendor_retailer_hub = {} # dct_vendor_retailer_hub = dict(self._db.query(sql)) for v_r in self._db.query(sql): try: config = Config(meta=self.context["meta"], vendor_key=v_r.VENDOR_KEY, retailer_key=v_r.RETAILER_KEY) hub_id = config.get_hub_id() _key = str(v_r.VENDOR_KEY) + ',' + str(v_r.RETAILER_KEY) dct_vendor_retailer_hub[_key] = hub_id # in case there is no config returned for given vendor & retailer, then skip this vendor & retailer. except Exception as e: # self._logger.warning(str(e)) self._logger.warning( "Seems there is no silo configed for vendor: %s and retailer: %s" % (str(v_r.VENDOR_KEY), str(v_r.RETAILER_KEY))) continue return dct_vendor_retailer_hub def _get_vendor_retailer_from_hub(self, hub_id): """Get vendor retailer from table AP_ALERT_CYCLE_MAPPING, only keep the first one that appear in multiple hubs :return: """ self._str_vendor_retailer_filter = "" self._str_vendor_filter = "" self._str_retailer_filter = "" lst_vendor_retailer_filter = [] lst_vendor_filter = [] lst_retailer_filter = [] dct_vendor_retailer_hub = self._all_vendor_retailer_hub sql = ( "SELECT /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ " " DISTINCT VENDOR_KEY, RETAILER_KEY FROM {self._meta_schema}.CONFIG_CUSTOMERS" " WHERE VENDOR_NAME != 'Alert' AND HUB_ID = '{hub_id}'" " ORDER BY VENDOR_KEY, RETAILER_KEY".format(hub_id=hub_id, self=self)) self._logger.debug(sql) vendor_retailers = self._source_dw.query(sql) for row in vendor_retailers: _vr = "{row.VENDOR_KEY},{row.RETAILER_KEY}".format(row=row) # same vendor & retailer could have multi hubs. # So only getting the one which matches with the hub from Config service. if dct_vendor_retailer_hub.get(_vr) == hub_id or self._all_dim: if _vr not in self._dct_vendor_retailer_hub: self._dct_vendor_retailer_hub[_vr] = hub_id lst_vendor_retailer_filter.append( "({row.VENDOR_KEY},{row.RETAILER_KEY})".format( row=row)) if str(row.VENDOR_KEY) not in self._dct_vendor_hub: self._dct_vendor_hub["{}".format(row.VENDOR_KEY)] = hub_id lst_vendor_filter.append("{}".format(row.VENDOR_KEY)) if str(row.RETAILER_KEY) not in self._dct_retailer_hub: self._dct_retailer_hub["{}".format( row.RETAILER_KEY)] = hub_id lst_retailer_filter.append("{}".format(row.RETAILER_KEY)) if lst_vendor_retailer_filter: self._str_vendor_retailer_filter = ( " AND (VENDOR_KEY, RETAILER_KEY) IN ({})" "".format(",".join(lst_vendor_retailer_filter))) self._logger.debug(self._str_vendor_retailer_filter) if lst_vendor_filter: self._str_vendor_filter = (" AND VENDOR_KEY IN ({})".format( ",".join(list(set(lst_vendor_filter))))) self._logger.debug(self._str_vendor_filter) if lst_retailer_filter: self._str_retailer_filter = (" AND RETAILER_KEY IN ({})".format( ",".join(list(set(lst_retailer_filter))))) self._logger.debug(self._str_retailer_filter) def _recreate_stage_table(self): """ Creating related stage tables on IRIS DW. :return: """ lst_sql = [] for table in (list(self._dct_table.keys()) + ["PRODUCT", "STORE"]): lst_sql.append(""" DROP TABLE IF EXISTS {self._common_schema}.STAGE_{table}; CREATE TABLE {self._common_schema}.STAGE_{table} LIKE {self._common_schema}.{table} INCLUDING PROJECTIONS; """.format(self=self, table=table)) sql = ''.join(lst_sql) self._logger.info(sql) self._dw.execute(sql) # creating staging table for loading CIS source data. _sql = """ DROP TABLE IF EXISTS {cmnSchema}.STAGE_{itemTable}_CIS; CREATE TABLE {cmnSchema}.STAGE_{itemTable}_CIS LIKE {cmnSchema}.{itemTable} INCLUDING PROJECTIONS; DROP TABLE IF EXISTS {cmnSchema}.STAGE_{storeTable}_CIS; CREATE TABLE {cmnSchema}.STAGE_{storeTable}_CIS LIKE {cmnSchema}.{storeTable} INCLUDING PROJECTIONS; """.format(cmnSchema=self._common_schema, itemTable=self._dim_product, storeTable=self._dim_store) self._logger.info(_sql) self._dw.execute(_sql) def _truncate_stage_table(self): """Truncate stage table :return: """ lst_sql = [] for k in (list(self._dct_table.keys()) + ["PRODUCT", "STORE"]): lst_sql.append( "TRUNCATE TABLE {self._common_schema}.STAGE_{k}".format( self=self, k=k)) sql = ';\n'.join(lst_sql) self._logger.debug(sql) self._dw.execute(sql) def _analyze_table(self): """Analyze table :return: """ lst_sql = [] for k in self._dct_table.keys(): lst_sql.append( "SELECT /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ ANALYZE_STATISTICS('{self._common_schema}.{k}')" "".format(self=self, k=k)) sql = ';\n'.join(lst_sql) self._logger.debug(sql) self._dw.execute(sql) def _export_calendar_data(self, hub_id): """ Export calendar data to common schema from source schema if no data in DIM_CALENDAR table :param hub_id: :return: """ cnt = self._dw.query_scalar( "SELECT /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ COUNT(*) " "FROM {self._common_schema}.{self._dim_calendar}".format( self=self)) if cnt != 0: self._logger.info( "Calendar data exists, skip hub_id {}.".format(hub_id)) else: col = ( 'PERIOD_KEY, CALENDAR_KEY, CALENDARNAME, YEAR, YEARNAME, QUARTER, QUARTERNAME, MONTH, MONTHNAME, ' 'PERIOD, PERIODNAME, WEEKENDED, WEEKENDEDNAME, WEEKBEGIN, WEEKBEGINNAME, YEARWEEKNAME, YEARWEEK, ' 'YEARMONTHWEEKNAME, LY_PERIOD_KEY, NY_PERIOD_KEY, DATE_NAME, DATE_VALUE, CAL_PERIOD_KEY, ' '"2YA_PERIOD_KEY", "3YA_PERIOD_KEY", "4YA_PERIOD_KEY"') sql = """ SELECT /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ CAL.PERIOD_KEY AS PERIOD_KEY, META.CALENDAR_KEY AS CALENDAR_KEY, MAX(META.CALENDAR_NAME) AS CALENDARNAME, MAX(CAL."YEAR") AS "YEAR", MAX(CAL.YEARNAME) AS YEARNAME, MAX(CAL.YEARQUARTER) AS QUARTER, MAX(CAL.YEARQUARTERNAME) AS QUARTERNAME, MAX(CAL.YEARPERIOD) AS "MONTH", MAX(CAL.YEARPERIODNAME) AS MONTHNAME, MAX(CAL.YEARMONTH) AS PERIOD, MAX(CAL.YEARMONTHNAME) AS PERIODNAME, MAX(CAL.WEEKENDED) AS WEEKENDED, MAX(CAL.WEEKENDEDNAME) AS WEEKENDEDNAME, MAX(CAL.WEEKBEGIN) AS WEEKBEGIN, MAX(CAL.WEEKBEGINNAME) AS WEEKBEGINNAME, (((MAX(CAL.YEAR))::VARCHAR|| ' - Wk '::VARCHAR(6)) || SUBSTR(('00'::VARCHAR(2) ||(MAX(CAL.WEEK))::VARCHAR),((LENGTH(('00'::VARCHAR(2) ||(MAX(CAL.WEEK))::VARCHAR)) - 2) + 1),2) ) AS YEARWEEKNAME, MAX(CAL.YEARWEEK) AS YEARWEEK, (((((MAX(CAL.YEAR))::VARCHAR|| ' - '::VARCHAR(3)) || SUBSTR (('00'::VARCHAR(2) ||(MAX(CAL.MONTH))::VARCHAR),((LENGTH(('00'::VARCHAR(2) ||(MAX(CAL.MONTH))::VARCHAR)) - 2) + 1),2)) || ' Wk '::VARCHAR(4)) || SUBSTR(('00'::VARCHAR(2) ||(CEILING(((RANK() OVER (PARTITION BY META.CALENDAR_KEY,MAX(CAL.YEAR),MAX(META.MONTH) ORDER BY CAL.PERIOD_KEY)*1.0) / 7::NUMERIC(18,0))))::VARCHAR),((LENGTH(('00'::VARCHAR(2) ||(CEILING(((RANK() OVER (PARTITION BY META.CALENDAR_KEY,MAX(CAL.YEAR),MAX(CAL.MONTH) ORDER BY CAL.PERIOD_KEY)*1.0) / 7::NUMERIC(18,0))))::VARCHAR)) - 2) + 1),2)) AS YEARMONTHWEEKNAME, MAX(CAL.LY_PERIOD_KEY) LY_PERIOD_KEY, MAX(CAL.NY_PERIOD_KEY) NY_PERIOD_KEY, TO_CHAR(TO_DATE(CAL.PERIOD_KEY::VARCHAR, 'YYYYMMDD'), 'YYYY-MM-DD') DATE_NAME, TO_DATE(CAL.PERIOD_KEY::VARCHAR, 'YYYYMMDD') DATE_VALUE, CAL.PERIOD_KEY CAL_PERIOD_KEY, MAX("2YA_PERIOD_KEY") "2YA_PERIOD_KEY", MAX("3YA_PERIOD_KEY") "3YA_PERIOD_KEY", MAX("4YA_PERIOD_KEY") "4YA_PERIOD_KEY" FROM ( (SELECT 2 AS CALENDAR_KEY, 'STD' AS PREFIX, 1 AS "MONTH", '445 (Standard)' AS CALENDAR_NAME) META JOIN (SELECT C.PERIOD_KEY AS PERIOD_KEY, C.CALENDAR_KEY AS CALENDAR_KEY, C.YEAR, C.WEEK, C.MONTH, (C.YEAR)::VARCHAR(4) AS YEARNAME, QE.YEARQUARTER, QE.YEARQUARTERNAME, ME.YEARPERIOD AS YEARMONTH, ME.YEARPERIODNAME AS YEARMONTHNAME, ME.YEARPERIOD, ME.YEARPERIODNAME, WE.YEARWEEK AS YEARWEEK, WE.WEEKBEGIN, TO_CHAR(TO_DATE ((WE.WEEKBEGIN)::VARCHAR,'YYYYMMDD'::VARCHAR(8)),'MM/DD/YYYY'::VARCHAR(10) ) AS WEEKBEGINNAME, WE.WEEKENDED, TO_CHAR(TO_DATE ((WE.WEEKENDED)::VARCHAR,'YYYYMMDD'::VARCHAR(8)),'MM/DD/YYYY'::VARCHAR(10) ) AS WEEKENDEDNAME, C.LY_PERIOD_KEY, C.NY_PERIOD_KEY, YA2.LY_PERIOD_KEY "2YA_PERIOD_KEY", YA3.LY_PERIOD_KEY "3YA_PERIOD_KEY", YA4.LY_PERIOD_KEY "4YA_PERIOD_KEY" FROM ((({self._dim_schema}.CALENDAR_PERIOD C JOIN (SELECT CALENDAR_PERIOD.CALENDAR_KEY, MIN(CALENDAR_PERIOD.PERIOD_KEY) AS WEEKBEGIN, MAX(CALENDAR_PERIOD.PERIOD_KEY) AS WEEKENDED, CALENDAR_PERIOD.YEAR, CALENDAR_PERIOD.WEEK AS WEEK, CASE WHEN (CALENDAR_PERIOD.WEEK > 2000) THEN CALENDAR_PERIOD.WEEK ELSE ((CALENDAR_PERIOD.YEAR*100) + CALENDAR_PERIOD.WEEK) END AS YEARWEEK FROM {self._dim_schema}.CALENDAR_PERIOD WHERE ((CALENDAR_PERIOD.WEEK_CODE = 'B'::CHAR(1)) OR (CALENDAR_PERIOD.WEEK_CODE = 'E'::CHAR(1))) GROUP BY CALENDAR_PERIOD.CALENDAR_KEY, CALENDAR_PERIOD.YEAR, CALENDAR_PERIOD.WEEK) WE ON ( ( (C.CALENDAR_KEY = WE.CALENDAR_KEY) AND (C.YEAR = WE.YEAR) AND (C.WEEK = WE.WEEK)))) JOIN (SELECT CALENDAR_PERIOD.CALENDAR_KEY, CALENDAR_PERIOD.YEAR, CALENDAR_PERIOD.MONTH AS PERIOD, CASE WHEN (CALENDAR_PERIOD.MONTH > 2000) THEN (CALENDAR_PERIOD.MONTH)::VARCHAR(6) ELSE CONCAT (CONCAT (CONCAT ('Period '::VARCHAR(7),(CALENDAR_PERIOD.MONTH)::VARCHAR(2)), ', '::VARCHAR(2)),(CALENDAR_PERIOD.YEAR)::VARCHAR(4)) END AS YEARPERIODNAME, CASE WHEN (CALENDAR_PERIOD.MONTH > 2000) THEN CALENDAR_PERIOD.MONTH ELSE ((CALENDAR_PERIOD.YEAR*100) + CALENDAR_PERIOD.MONTH) END AS YEARPERIOD FROM {self._dim_schema}.CALENDAR_PERIOD WHERE ((CALENDAR_PERIOD.MONTH_CODE = 'B'::CHAR (1)) OR (CALENDAR_PERIOD.MONTH_CODE = 'E'::CHAR (1))) GROUP BY CALENDAR_PERIOD.CALENDAR_KEY, CALENDAR_PERIOD.YEAR, CALENDAR_PERIOD.MONTH) ME ON ( ( (C.CALENDAR_KEY = ME.CALENDAR_KEY) AND (C.YEAR = ME.YEAR) AND (C.MONTH = ME.PERIOD)))) JOIN (SELECT CALENDAR_PERIOD.CALENDAR_KEY, CALENDAR_PERIOD.YEAR, CALENDAR_PERIOD.QUARTER AS QUARTER, CASE WHEN CALENDAR_PERIOD.QUARTER > 2000 THEN CONCAT(CONCAT (CONCAT ('Q'::VARCHAR(1), (SUBSTR((CALENDAR_PERIOD.QUARTER)::VARCHAR(6), 6))::VARCHAR(2)), ', '::VARCHAR(2)),(CALENDAR_PERIOD.QUARTER)::VARCHAR(4)) ELSE CONCAT(CONCAT (CONCAT ('Q'::VARCHAR(1),(CALENDAR_PERIOD.QUARTER)::VARCHAR(2)), ', '::VARCHAR(2)),(CALENDAR_PERIOD.YEAR)::VARCHAR(4)) END AS YEARQUARTERNAME, CASE WHEN (CALENDAR_PERIOD.QUARTER > 2000) THEN CALENDAR_PERIOD.QUARTER ELSE ((CALENDAR_PERIOD.YEAR*100) + CALENDAR_PERIOD.QUARTER) END AS YEARQUARTER FROM {self._dim_schema}.CALENDAR_PERIOD WHERE ((CALENDAR_PERIOD.QUARTER_CODE = 'B'::CHAR (1)) OR (CALENDAR_PERIOD.QUARTER_CODE = 'E'::CHAR (1))) GROUP BY CALENDAR_PERIOD.CALENDAR_KEY, CALENDAR_PERIOD.YEAR, CALENDAR_PERIOD.QUARTER) QE ON (((C.CALENDAR_KEY = QE.CALENDAR_KEY) AND (C.YEAR = QE.YEAR) AND (C.QUARTER = QE.QUARTER)))) LEFT JOIN {self._dim_schema}.CALENDAR_PERIOD YA2 ON C.CALENDAR_KEY = YA2.CALENDAR_KEY AND C.LY_PERIOD_KEY = YA2.PERIOD_KEY LEFT JOIN {self._dim_schema}.CALENDAR_PERIOD YA3 ON C.CALENDAR_KEY = YA3.CALENDAR_KEY AND YA2.LY_PERIOD_KEY = YA3.PERIOD_KEY LEFT JOIN {self._dim_schema}.CALENDAR_PERIOD YA4 ON C.CALENDAR_KEY = YA4.CALENDAR_KEY AND YA3.LY_PERIOD_KEY = YA4.PERIOD_KEY WHERE (C.YEAR >= ((DATE_PART ('year'::VARCHAR (4),(NOW ())::TIMESTAMP))::INT- 5))) CAL ON ( (CAL.CALENDAR_KEY = META.CALENDAR_KEY)) ) GROUP BY CAL.PERIOD_KEY,META.CALENDAR_KEY; """.format(self=self) dct_sync_data = self.context.copy() dct_sync_data["source_dw"] = self._source_dw dct_sync_data["source_config"] = self._source_dw.config.json_data[ "configs"] dct_sync_data["target_dw_schema"] = self._common_schema dct_sync_data["target_dw_table"] = self._dim_calendar dct_sync_data["target_column"] = col dct_sync_data["source_sql"] = sql self._logger.debug(sql) sync_data(dct_sync_data) self._logger.info( "Sync hub_id {} dim calendar done".format(hub_id)) def _export_product_data(self, hub_id, cis_source=False): """ Export product data to common schema from source schema :param hub_id: :return: """ _target_stage_table = "STAGE_{}".format(self._dim_product) _src_item_table = "PRODUCT" if cis_source: _target_stage_table = "STAGE_{}_CIS".format(self._dim_product) _src_item_table = "STAGE_{}".format(self._product_cis_src_tbl) # Creating related staging table for CIS source in HUB dw. # Since CIS source tables are external tables, can NOT be used to join other tables directly. sql = """ DROP TABLE IF EXISTS {dimSchema}.{stgItemTable}; CREATE TABLE {dimSchema}.{stgItemTable} AS SELECT RETAILER_KEY, VENDOR_KEY, ITEM_KEY, UPC_ID AS UPC, ATTRIBUTE_NAME as ATTRIBNAME, ATTRIBUTE_VALUE as ATTRIBVALUE, 'T' AS ACTIVE FROM {dimSchema}.{srcItemTable} WHERE 0=0 {filters}; """.format(dimSchema=self._dim_schema, stgItemTable=_src_item_table, srcItemTable=self._product_cis_src_tbl, filters=self._str_vendor_retailer_filter) self._logger.info(sql) self._source_dw.execute(sql) col = ( " RETAILER_KEY, VENDOR_KEY, ITEM_KEY, UPC, ITEM_DESCRIPTION, ITEM_GROUP, OSM_ITEM_NBR, OSM_ITEM_STATUS," "OSM_ITEM_TYPE, OSM_BRAND, OSM_UNIT_PRICE, OSM_CATEGORY, OSM_MAJOR_CATEGORY_NO, OSM_MAJOR_CATEGORY, " "OSM_SUB_CATEGORY_NO, OSM_SUB_CATEGORY, OSM_VENDOR_STK_NBR, OSM_VENDOR_PACK_COST, VENDOR_PACK_QTY, " "OSM_WHSE_PACK_COST, OSM_WHSE_PACK_QTY, DSD_IND, VENDOR_NAME ") sql = """ SELECT /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ CC.RETAILER_KEY, CC.VENDOR_KEY, PV.ITEM_KEY, PV.UPC, AGPG.ITEM_DESCRIPTION, CASE WHEN AGPG.ITEM_GROUP IS NULL OR AGPG.ITEM_GROUP = '000000' THEN PV.UPC ELSE AGPG.ITEM_GROUP END AS ITEM_GROUP, AGPG.OSM_ITEM_NBR, AGPG.OSM_ITEM_STATUS, AGPG.OSM_ITEM_TYPE, AGPG.OSM_BRAND,AGPG.OSM_UNIT_PRICE, AGPG.OSM_CATEGORY, AGPG.OSM_MAJOR_CATEGORY_NO, AGPG.OSM_MAJOR_CATEGORY, AGPG.OSM_SUB_CATEGORY_NO, AGPG.OSM_SUB_CATEGORY, AGPG.OSM_VENDOR_STK_NBR, AGPG.OSM_VENDOR_PACK_COST, CASE WHEN length(AGPG.VENDOR_PACK_QTY) < 15 THEN CASE WHEN REGEXP_LIKE(SUBSTR(REPLACE(AGPG.VENDOR_PACK_QTY, ',', ''),0,15), '^[0-9]*(\.[0-9]+)?$') THEN CASE WHEN SUBSTR(REPLACE(AGPG.VENDOR_PACK_QTY, ',', ''),0,15)::NUMERIC <= 1 THEN 1 ELSE SUBSTR(REPLACE(AGPG.VENDOR_PACK_QTY, ',', ''),0,15)::NUMERIC END ELSE 1 END ELSE 1 END VENDOR_PACK_QTY, AGPG.OSM_WHSE_PACK_COST, AGPG.OSM_WHSE_PACK_QTY, CASE WHEN AGPG.OSM_ITEM_TYPE IN ('07','37') THEN '1' ELSE '0' END AS DSD_IND, CC.VENDOR_NAME FROM (SELECT DISTINCT RETAILER_KEY, VENDOR_KEY, VENDOR_NAME FROM {self._meta_schema}.CONFIG_CUSTOMERS WHERE VENDOR_NAME != 'Alert' {self._str_vendor_retailer_filter}) CC JOIN ( SELECT D.VENDOR_KEY, D.ITEM_KEY, MAX(D.UPC) UPC FROM {self._dim_schema}.{itemTable} D JOIN {self._dim_schema}.VENDOR V on V.VENDOR_KEY = D.VENDOR_KEY WHERE D.ITEM_KEY NOT IN (SELECT ITEM_KEY FROM {self._dim_schema}.PRODUCT_GLOBAL_FILTER) {self._active_status} GROUP BY D.VENDOR_KEY, D.ITEM_KEY ) PV ON PV.VENDOR_KEY = CC.VENDOR_KEY LEFT JOIN ( SELECT AGP.RETAILER_KEY, AGP.VENDOR_KEY, AGP.ITEM_KEY , MAX(CASE WHEN AGP.ATTRIBNAME = 'ITEM_GROUP_X' THEN ATTRIBVALUE END) ITEM_GROUP , MAX(CASE WHEN AGP.ATTRIBNAME = 'VENDOR_PACK_QTY_X' THEN ATTRIBVALUE END) VENDOR_PACK_QTY , MAX(CASE WHEN AGP.ATTRIBNAME = 'ITEM_DESCRIPTION_X' THEN ATTRIBVALUE END) ITEM_DESCRIPTION , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_ITEM_NBR_X' THEN ATTRIBVALUE END) OSM_ITEM_NBR , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_ITEM_STATUS_X' THEN ATTRIBVALUE END) OSM_ITEM_STATUS , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_ITEM_TYPE_X' THEN ATTRIBVALUE END) OSM_ITEM_TYPE , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_WHSE_PACK_QTY_X' THEN ATTRIBVALUE END) OSM_WHSE_PACK_QTY , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_MAJOR_CATEGORY_NO_X' THEN ATTRIBVALUE END) OSM_MAJOR_CATEGORY_NO , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_MAJOR_CATEGORY_X' THEN ATTRIBVALUE END) OSM_MAJOR_CATEGORY , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_CATEGORY_X' THEN ATTRIBVALUE END) OSM_CATEGORY , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_SUB_CATEGORY_NO_X' THEN ATTRIBVALUE END) OSM_SUB_CATEGORY_NO , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_SUB_CATEGORY_X' THEN ATTRIBVALUE END) OSM_SUB_CATEGORY , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_BRAND_X' THEN ATTRIBVALUE END) OSM_BRAND , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_UNIT_PRICE_X' THEN ATTRIBVALUE END) OSM_UNIT_PRICE , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_VENDOR_STK_NBR_X' THEN ATTRIBVALUE END) OSM_VENDOR_STK_NBR , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_VENDOR_PACK_COST_X' THEN ATTRIBVALUE END) OSM_VENDOR_PACK_COST , MAX(CASE WHEN AGP.ATTRIBNAME = 'OSM_WHSE_PACK_COST_X' THEN ATTRIBVALUE END) OSM_WHSE_PACK_COST FROM ( SELECT AG.RETAILER_KEY, AG.VENDOR_KEY, CONCAT(ATTR_GROUP, '_X') ATTRIBNAME, D.ITEM_KEY, D.ATTRIBVALUE FROM {self._meta_schema}.ATTRIBUTEGROUP AG JOIN {self._dim_schema}.{itemTable} D ON D.VENDOR_KEY = AG.VENDOR_KEY AND D.ATTRIBNAME = AG.ATTR_NAME {self._active_status} WHERE AG.ATTR_GROUP IN ('ITEM_GROUP','VENDOR_PACK_QTY','ITEM_DESCRIPTION','OSM_ITEM_NBR' ,'OSM_ITEM_STATUS','OSM_ITEM_TYPE','OSM_WHSE_PACK_QTY','OSM_MAJOR_CATEGORY_NO' ,'OSM_MAJOR_CATEGORY','OSM_CATEGORY','OSM_SUB_CATEGORY_NO','OSM_SUB_CATEGORY','OSM_BRAND', 'OSM_UNIT_PRICE','OSM_VENDOR_STK_NBR','OSM_VENDOR_PACK_COST','OSM_WHSE_PACK_COST') ) AGP GROUP BY AGP.RETAILER_KEY, AGP.VENDOR_KEY, AGP.ITEM_KEY ) AGPG ON AGPG.RETAILER_KEY = CC.RETAILER_KEY AND AGPG.VENDOR_KEY = PV.VENDOR_KEY AND AGPG.ITEM_KEY =PV.ITEM_KEY WHERE (CC.RETAILER_KEY, CC.VENDOR_KEY, PV.ITEM_KEY) NOT IN (SELECT RETAILER_KEY, VENDOR_KEY, ITEM_KEY FROM {self._dim_schema}.PRODUCT_DEFAULT_FILTER) """.format(self=self, itemTable=_src_item_table) dct_sync_data = self.context.copy() dct_sync_data["source_dw"] = self._source_dw dct_sync_data["source_config"] = self._source_dw.config.json_data[ "configs"] dct_sync_data["target_dw_schema"] = self._common_schema dct_sync_data["target_dw_table"] = _target_stage_table dct_sync_data["target_column"] = col dct_sync_data["source_sql"] = sql self._logger.debug(sql) sync_data(dct_sync_data) # sync tall table(not for CIS). if self._str_vendor_filter and not cis_source: self._product_columns = self._dct_table_column["PRODUCT"] col = " VENDOR_KEY, ITEM_KEY, UPC, ATTRIBNAME, ATTRIBVALUE, ACTIVE " sql = ( "SELECT /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ {col} FROM {self._dim_schema}.PRODUCT WHERE ATTRIBNAME " "IN ({self._product_columns}) {self._str_vendor_filter}". format(self=self, col=col)) dct_sync_data["target_dw_table"] = "STAGE_PRODUCT" dct_sync_data["target_column"] = col dct_sync_data["source_sql"] = sql self._logger.debug(sql) sync_data(dct_sync_data) def _export_store_data(self, hub_id, cis_source=False): """ Export store data to common schema table stage_<table_name> from source schema <table_name> :param hub_id: :return: """ _target_store_table = "STAGE_{}".format(self._dim_store) _src_store_table = "STORE" if cis_source: _target_store_table = "STAGE_{}_CIS".format(self._dim_store) _src_store_table = "STAGE_{}".format(self._store_cis_src_tbl) # Creating related staging table for CIS source in HUB dw. # Since CIS source tables are external tables, can NOT be used to join other tables directly. sql = """ DROP TABLE IF EXISTS {dimSchema}.{stgStoreTable}; CREATE TABLE {dimSchema}.{stgStoreTable} AS SELECT RETAILER_KEY, VENDOR_KEY, STORE_KEY, STORE_ID AS STOREID, ATTRIBUTE_NAME as ATTRIBNAME, ATTRIBUTE_VALUE as ATTRIBVALUE, 'T' as ACTIVE FROM {dimSchema}.{srcStoreTable} WHERE 0=0 {filters}; """.format(dimSchema=self._dim_schema, stgStoreTable=_src_store_table, srcStoreTable=self._store_cis_src_tbl, filters=self._str_vendor_retailer_filter) self._logger.info(sql) self._source_dw.execute(sql) col = ( " RETAILER_KEY, VENDOR_KEY, STORE_KEY, STORE_ID, MARKET_CLUSTER, STATE, CITY, ZIP, OSM_REGION, " "OSM_DISTRICT, RSI_ALERT_TYPE, RETAILER_NAME, RSI_BANNER, PRIME_DC, STORE_NAME" ) sql = """ SELECT /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ CC.RETAILER_KEY, CC.VENDOR_KEY, SR.STORE_KEY, SR.STOREID, AGSG.MARKET_CLUSTER, SR.STATE, CASE WHEN SR.CITY IS NULL AND STATE IS NULL THEN 'N/A' WHEN SR.CITY IS NOT NULL AND STATE IS NULL THEN CONCAT(SR.CITY, ', N/A') WHEN SR.CITY IS NULL AND STATE IS NOT NULL THEN CONCAT('N/A, ' , SR.STATE) WHEN SR.CITY IS NOT NULL AND STATE IS NOT NULL THEN CONCAT(CONCAT(SR.CITY, ', '), SR.STATE) END CITY, SR.ZIP, AGSG.OSM_REGION, AGSG.OSM_DISTRICT, COALESCE(SR.RSI_ALERT_TYPE, '0') RSI_ALERT_TYPE, CC.RETAILER_NAME, coalesce(AGSG.RSI_BANNER, 'unmapped') AS RSI_BANNER, AGSG.PRIME_DC, AGSG.STORE_NAME FROM (SELECT DISTINCT RETAILER_KEY, VENDOR_KEY, RETAILER_NAME FROM {self._meta_schema}.CONFIG_CUSTOMERS WHERE VENDOR_NAME != 'Alert' {self._str_vendor_retailer_filter}) CC JOIN ( SELECT D.RETAILER_KEY, D.STORE_KEY, D.STOREID, MAX(CASE WHEN D.ATTRIBNAME = 'STATE' THEN D.ATTRIBVALUE END) STATE, MAX(CASE WHEN D.ATTRIBNAME = 'CITY' THEN D.ATTRIBVALUE END) CITY, MAX(CASE WHEN D.ATTRIBNAME = 'ZIP' THEN D.ATTRIBVALUE END) ZIP, MAX(CASE WHEN D.ATTRIBNAME = 'RSI_ALERT_TYPE' THEN D.ATTRIBVALUE END) RSI_ALERT_TYPE FROM {self._dim_schema}.{storeTable} D JOIN {self._dim_schema}.RETAILER R ON R.RETAILER_KEY = D.RETAILER_KEY WHERE STORE_KEY NOT IN (SELECT STORE_KEY FROM {self._dim_schema}.STORES_GLOBAL_FILTER) {self._active_status} GROUP BY D.RETAILER_KEY, D.STORE_KEY, D.STOREID ) SR ON SR.RETAILER_KEY = CC.RETAILER_KEY LEFT JOIN ( SELECT AGS.RETAILER_KEY, AGS.VENDOR_KEY, AGS.STORE_KEY, MAX(CASE WHEN AGS.ATTRIBNAME = 'MARKET_CLUSTER_X' THEN ATTRIBVALUE END) MARKET_CLUSTER, MAX(CASE WHEN AGS.ATTRIBNAME = 'OSM_REGION_X' THEN ATTRIBVALUE END) OSM_REGION, MAX(CASE WHEN AGS.ATTRIBNAME = 'OSM_DISTRICT_X' THEN ATTRIBVALUE END) OSM_DISTRICT, MAX(CASE WHEN AGS.ATTRIBNAME = 'RSI_BANNER_X' THEN ATTRIBVALUE END) RSI_BANNER, MAX(CASE WHEN AGS.ATTRIBNAME = 'PRIME_DC_X' THEN ATTRIBVALUE END) PRIME_DC, MAX(CASE WHEN AGS.ATTRIBNAME = 'SAR_STORE_NAME_X' THEN ATTRIBVALUE END) STORE_NAME FROM ( SELECT AG.RETAILER_KEY, AG.VENDOR_KEY, CONCAT(AG.ATTR_GROUP, '_X') ATTRIBNAME, D.STORE_KEY, D.ATTRIBVALUE FROM {self._meta_schema}.ATTRIBUTEGROUP AG JOIN {self._dim_schema}.{storeTable} D ON D.RETAILER_KEY = AG.RETAILER_KEY AND D.ATTRIBNAME = AG.ATTR_NAME {self._active_status} WHERE AG.ATTR_GROUP IN ('RSI_BANNER','PRIME_DC', 'MARKET_CLUSTER', 'OSM_REGION', 'OSM_DISTRICT', 'SAR_STORE_NAME') ) AGS GROUP BY AGS.RETAILER_KEY, AGS.VENDOR_KEY, AGS.STORE_KEY ) AGSG ON AGSG.RETAILER_KEY = SR.RETAILER_KEY AND AGSG.VENDOR_KEY = CC.VENDOR_KEY AND AGSG.STORE_KEY = SR.STORE_KEY WHERE (CC.RETAILER_KEY, CC.VENDOR_KEY, SR.STORE_KEY) NOT IN (SELECT RETAILER_KEY, VENDOR_KEY, STORE_KEY FROM {self._dim_schema}.STORES_DEFAULT_FILTER); """.format(self=self, storeTable=_src_store_table) dct_sync_data = self.context.copy() dct_sync_data["source_dw"] = self._source_dw dct_sync_data["source_config"] = self._source_dw.config.json_data[ "configs"] dct_sync_data["target_dw_schema"] = self._common_schema dct_sync_data["target_dw_table"] = _target_store_table dct_sync_data["target_column"] = col dct_sync_data["source_sql"] = sql self._logger.debug(sql) sync_data(dct_sync_data) # sync tall table(not for CIS) if self._str_retailer_filter and not cis_source: self._store_columns = self._dct_table_column["STORE"] col = " RETAILER_KEY, STORE_KEY, STOREID, ATTRIBNAME, ATTRIBVALUE, ACTIVE " sql = ( "SELECT /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ {col} FROM {self._dim_schema}.STORE WHERE ATTRIBNAME " "IN ({self._store_columns}) {self._str_retailer_filter}". format(self=self, col=col)) dct_sync_data["target_dw_table"] = "STAGE_STORE" dct_sync_data["target_column"] = col dct_sync_data["source_sql"] = sql self._logger.debug(sql) sync_data(dct_sync_data) def _load_product_data_cis(self, hub_id): """ Sync data from CIS source. if the same item_key exists in both CIS and NXG HUBs, the CIS one should override the one in NXG side. :param hub_id: :return: """ # dump CIS data into cis staging table: stage_dim_product_cis self._export_product_data(hub_id=hub_id, cis_source=True) # Below part is moved from method self._export_product_data. Put it here when all product data are ready. # Override nxg items with CIS if there are duplicated items on both NXG and CIS sql = """ DELETE FROM {cmnSchema}.STAGE_{itemTable} WHERE (vendor_key, retailer_key, item_key) IN (SELECT vendor_key, retailer_key, item_key FROM {cmnSchema}.STAGE_{itemTable}_CIS); INSERT INTO {cmnSchema}.STAGE_{itemTable} SELECT * FROM {cmnSchema}.STAGE_{itemTable}_CIS; """.format(itemTable=self._dim_product, cmnSchema=self._common_schema) self._logger.info(sql) self._dw.execute(sql) # updating column OSM_MAJOR_CATEGORY when NXG and CIS sources are all done. sql = """ DROP TABLE IF EXISTS TEMP_OSM_SUB_CATEGORY_CONSISTENCY; CREATE LOCAL TEMP TABLE IF NOT EXISTS TEMP_OSM_SUB_CATEGORY_CONSISTENCY ON COMMIT PRESERVE ROWS AS /*+ DIRECT, LABEL(GX_IRIS_SYNC_DIM_DATA)*/ SELECT distinct OSM_SUB_CATEGORY_NO,MAX(OSM_SUB_CATEGORY) as OSM_SUB_CATEGORY FROM {self._common_schema}.{self._dim_product} GROUP BY OSM_SUB_CATEGORY_NO; DROP TABLE IF EXISTS TEMP_OSM_MAJOR_CATEGORY_CONSISTENCY; CREATE LOCAL TEMP TABLE IF NOT EXISTS TEMP_OSM_MAJOR_CATEGORY_CONSISTENCY ON COMMIT PRESERVE ROWS AS /*+ DIRECT, LABEL(GX_IRIS_SYNC_DIM_DATA)*/ SELECT distinct OSM_MAJOR_CATEGORY_NO,MAX(OSM_MAJOR_CATEGORY) as OSM_MAJOR_CATEGORY FROM {self._common_schema}.{self._dim_product} GROUP BY OSM_MAJOR_CATEGORY_NO; """.format(self=self) self._logger.info(sql) self._dw.execute(sql) sql = (""" SELECT /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ ANALYZE_STATISTICS('{self._common_schema}.STAGE_{self._dim_product}'); INSERT /*+ DIRECT, LABEL(GX_IRIS_SYNC_DIM_DATA)*/ INTO TEMP_OSM_SUB_CATEGORY_CONSISTENCY SELECT a.OSM_SUB_CATEGORY_NO, MAX(a.OSM_SUB_CATEGORY) AS OSM_SUB_CATEGORY FROM {self._common_schema}.STAGE_{self._dim_product} a LEFT JOIN TEMP_OSM_SUB_CATEGORY_CONSISTENCY b on a.OSM_SUB_CATEGORY_NO = b.OSM_SUB_CATEGORY_NO WHERE a.OSM_SUB_CATEGORY_NO IS NOT NULL AND a.OSM_SUB_CATEGORY_NO <> '' AND b.OSM_SUB_CATEGORY_NO IS NULL GROUP BY a.OSM_SUB_CATEGORY_NO; UPDATE /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ {self._common_schema}.STAGE_{self._dim_product} SET OSM_SUB_CATEGORY = b.OSM_SUB_CATEGORY FROM (SELECT * FROM TEMP_OSM_SUB_CATEGORY_CONSISTENCY) b WHERE {self._common_schema}.STAGE_{self._dim_product}.OSM_SUB_CATEGORY_NO = b.OSM_SUB_CATEGORY_NO; /*UPDATE {self._common_schema}.STAGE_{self._dim_product} SET ITEM_GROUP = B.ITEM_GROUP FROM $schemaName.OLAP_ITEM B WHERE {self._common_schema}.STAGE_{self._dim_product}.ITEM_KEY = B.ITEM_KEY;*/ INSERT /*+ DIRECT, LABEL(GX_IRIS_SYNC_DIM_DATA)*/ INTO TEMP_OSM_MAJOR_CATEGORY_CONSISTENCY SELECT a.OSM_MAJOR_CATEGORY_NO, MAX(a.OSM_MAJOR_CATEGORY) AS OSM_MAJOR_CATEGORY FROM {self._common_schema}.STAGE_{self._dim_product} a LEFT JOIN TEMP_OSM_MAJOR_CATEGORY_CONSISTENCY b on a.OSM_MAJOR_CATEGORY_NO = b.OSM_MAJOR_CATEGORY_NO WHERE a.OSM_MAJOR_CATEGORY_NO IS NOT NULL AND a.OSM_MAJOR_CATEGORY_NO <> '' AND b.OSM_MAJOR_CATEGORY_NO IS NULL GROUP BY a.OSM_MAJOR_CATEGORY_NO; UPDATE /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ {self._common_schema}.STAGE_{self._dim_product} SET OSM_MAJOR_CATEGORY = b.OSM_MAJOR_CATEGORY FROM (SELECT * FROM TEMP_OSM_MAJOR_CATEGORY_CONSISTENCY) b WHERE {self._common_schema}.STAGE_{self._dim_product}.OSM_MAJOR_CATEGORY_NO = b.OSM_MAJOR_CATEGORY_NO; """.format(self=self)) self._logger.info(sql) self._dw.execute(sql) def _load_store_data_cis(self, hub_id): """ Sync data from CIS source. # if the same store_key exists in CIS and NXG HUBs, the CIS one should override the one in NXG side. :param hub_id: :return: """ # dump CIS data into cis staging table: stage_dim_store_cis self._export_store_data(hub_id=hub_id, cis_source=True) # # Override nxg stores with CIS if there are duplicated items on both NXG and CIS sql = """ DELETE FROM {cmnSchema}.STAGE_{storeTable} WHERE (vendor_key, retailer_key, store_key) IN (SELECT vendor_key, retailer_key, store_key FROM {cmnSchema}.STAGE_{storeTable}_CIS ); INSERT INTO {cmnSchema}.STAGE_{storeTable} SELECT * FROM {cmnSchema}.STAGE_{storeTable}_CIS; """.format(storeTable=self._dim_store, cmnSchema=self._common_schema) self._logger.info(sql) self._dw.execute(sql) def _swap_data(self): """ Switch partition from stage table to final table. :return: """ sql = ("SELECT /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ DISTINCT VENDOR_KEY " "FROM {self._common_schema}.STAGE_{self._dim_product}".format( self=self)) self._logger.debug(sql) for row in self._dw.query(sql): self._dw.switch_partition( schema_name=self._common_schema, table_name=self._dim_product, partition_name=row[0], stage_schema_name=self._common_schema, stage_table_name='STAGE_{self._dim_product}'.format(self=self)) sql = ("SELECT /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ DISTINCT VENDOR_KEY " "FROM {self._common_schema}.STAGE_PRODUCT".format(self=self)) self._logger.debug(sql) for row in self._dw.query(sql): self._dw.switch_partition(schema_name=self._common_schema, table_name='PRODUCT', partition_name=row[0], stage_schema_name=self._common_schema, stage_table_name='STAGE_PRODUCT') sql = ( "SELECT /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ DISTINCT RETAILER_KEY " "FROM {self._common_schema}.STAGE_{self._dim_store}".format( self=self)) self._logger.debug(sql) for row in self._dw.query(sql): self._dw.switch_partition( schema_name=self._common_schema, table_name=self._dim_store, partition_name=row[0], stage_schema_name=self._common_schema, stage_table_name='STAGE_{self._dim_store}'.format(self=self)) sql = ( "SELECT /*+ LABEL(GX_IRIS_SYNC_DIM_DATA)*/ DISTINCT RETAILER_KEY " "FROM {self._common_schema}.STAGE_STORE".format(self=self)) self._logger.debug(sql) for row in self._dw.query(sql): self._dw.switch_partition(schema_name=self._common_schema, table_name='STORE', partition_name=row[0], stage_schema_name=self._common_schema, stage_table_name='STAGE_STORE') def _process_sync_dim_data(self): """ Process to sync dim data :return: """ try: self._get_column() self._recreate_stage_table() for hub_id in self._hub_ids: try: self._dim_schema = "DIM_{hub_id}".format(hub_id=hub_id) self._meta_schema = "METADATA_{hub_id}".format( hub_id=hub_id) self._logger.info( "Sync dim data start hub_id {hub_id}".format( hub_id=hub_id)) self._get_source_config(hub_id) self._export_calendar_data(hub_id) self._get_vendor_retailer_from_hub(hub_id) if not self._str_vendor_retailer_filter: self._logger.warning( "hub_id {} does not have any matched product and store data" .format(hub_id)) continue self._export_product_data(hub_id) self._export_store_data(hub_id) # handle CIS source. self._load_product_data_cis(hub_id) self._load_store_data_cis(hub_id) self._logger.info( "Sync hub_id {hub_id} dim data to stage table.".format( hub_id=hub_id)) except Exception as msg: self._logger.warning(msg) continue self._swap_data() # self._truncate_stage_table() self._logger.info("Sync dim data done from hubs {hub_ids}".format( hub_ids=self._hub_ids)) self._analyze_table() except Exception as msg: self._logger.error(msg) finally: pass def process(self): try: self._process_sync_dim_data() except Exception as e: self._logger.error(e) finally: if hasattr(self, "_dw"): self._dw.close_connection() if hasattr(self, "_source_dw"): self._source_dw.close_connection() if hasattr(self, "_db"): self._db.close_connection()
class AFMConfigToTable(object): """ :input: afm_config.json under same folder. Note: this config file name is hardcoded. :output: below 4 tables. AFM_RULE_SET AFM_RULES AFM_RETAILER_RULE AFM_SVR_RULE :usage: python AFMConfigToTable.py :description: This is a temp solution for loading AFM configuration data from json format into AFM rule tables. Since the frontend UI is not yet working. Refer to above json string for the format template. When the frontend finished, this module will be also retired. There are only 4 tables involved here. See output tables list. """ def __init__(self, meta, params, logger=None, filename="afm_config.json"): """ :param meta: common config :param params: used by calling REST API :param filename: only used for local testing """ self.meta = meta self.params = params self.filename = filename self._cap = Capacity(meta=self.meta) self.app_conn = MSOperation(meta=self.meta) self._logger = logger if logger else Logger(log_level="debug", target="console", vendor_key=-1, retailer_key=-1, sql_conn=None) self.scd_url = "{0}/scd/process".format( self.meta["api_osa_bundle_str"]) def get_data(self): # json_data = json.loads(afm_config_data) with open(self.filename, 'r') as fp: json_data = json.load(fp=fp) return json_data def insert_data(self): # _json_config_data = self.get_data() # for local testing _json_config_data = self.params # getting data from params directly instead of reading from config file. _rule_type = _json_config_data["ruleType"].upper() _cycle_key = _json_config_data["cycleKey"] _vendor_key = _json_config_data["vendorKey"] _retailer_key = _json_config_data["retailerKey"] _owner = _json_config_data["owner"] _rule_set_name = None try: if _rule_type not in ("RETAILER", "SVR"): self._logger.warning( "ruleType should be only either RETAILER or SVR! " "Please check config file: afm_config.json.") exit(1) if _rule_type == "RETAILER": _rule_set_name = _owner + '_' + self._cap.get_retailer_name_by_key( retailer_key=_retailer_key) elif _rule_type == "SVR": _rule_set_name = _owner + '_' + self._cap.get_retailer_name_by_key( retailer_key=_retailer_key ) + '_' + self._cap.get_vendor_name_by_key( vendor_key=_vendor_key) rule_set_data = { "RULE_SET_NAME": _rule_set_name, "ENGINE_PROVIDER_NAME": "AFM", "DATA_PROVIDER_NAME": "AHOLD", "OWNER": _owner, "ITEM_SCOPE": _json_config_data["itemScope"], "STORE_SCOPE": _json_config_data["storeScope"], "TYPES_LIST": _json_config_data["typeList"], "ENABLED": "T", "CREATED_BY": _json_config_data["createdBy"], "UPDATED_BY": _json_config_data["createdBy"] } self._logger.info(rule_set_data) _rule_set_sql = """ INSERT INTO AFM_RULE_SET ( [RULE_SET_NAME], [ENGINE_PROVIDER_NAME], [DATA_PROVIDER_NAME], [OWNER], [ITEM_SCOPE], [STORE_SCOPE], [TYPES_LIST], [ENABLED], [CREATED_BY], [CREATED_DATE], [UPDATED_BY], [UPDATED_DATE] ) OUTPUT inserted.RULE_SET_ID VALUES( '{RULE_SET_NAME}', '{ENGINE_PROVIDER_NAME}', '{DATA_PROVIDER_NAME}', '{OWNER}', '{ITEM_SCOPE}', '{STORE_SCOPE}', '{TYPES_LIST}', '{ENABLED}', '{CREATED_BY}', GETDATE(), '{UPDATED_BY}', GETDATE() ) """.format(**rule_set_data) self._logger.info("SQL for AFM_RULE_SET table is: %s" % _rule_set_sql) _rule_set_id = self.app_conn.query_scalar(_rule_set_sql) self._logger.info("Generated rule set id is: %s" % _rule_set_id) rules_data = _json_config_data["rules"] for rule in rules_data: rule_data = { "RULE_ID": rule["ruleId"], "RULE_SET_ID": _rule_set_id, "SUB_LEVEL_METRICS": rule["subLevelMetrics"], "PARAMETER1": rule["parameter1"], "PARAMETER2": rule["parameter2"], "PARAMETER3": rule["parameter3"], "ENABLED": "T", "CREATED_BY": _json_config_data["createdBy"], "CREATED_DATE": "", "UPDATED_BY": _json_config_data["createdBy"], "UPDATED_DATE": "" } self._logger.debug(rule_data) _rules_sql = """ INSERT INTO AFM_RULES ( [RULE_ID], [RULE_SET_ID], [SUB_LEVEL_METRICS], [PARAMETER1], [PARAMETER2], [PARAMETER3], [ENABLED], [CREATED_BY], [CREATED_DATE], [UPDATED_BY], [UPDATED_DATE] ) VALUES ( {RULE_ID}, {RULE_SET_ID}, '{SUB_LEVEL_METRICS}', '{PARAMETER1}', '{PARAMETER2}', '{PARAMETER3}', '{ENABLED}', '{CREATED_BY}', getdate(), '{UPDATED_BY}', getdate() )""".format(**rule_data) self._logger.info("SQL for AFM_RULES table is: %s" % _rules_sql) self.app_conn.execute(_rules_sql) if _rule_type == "RETAILER": _sql = """SELECT COUNT(*) FROM AFM_RETAILER_RULE WHERE cycle_key = {cycleKey}""".format(cycleKey=_cycle_key) _exist = self.app_conn.query_scalar(_sql) if _exist != 0: _update_sql = "UPDATE AFM_RETAILER_RULE SET owner='{owner}', rule_set_id = {rule_set_id} " \ "WHERE cycle_key = {cycleKey}; "\ .format(cycleKey=_cycle_key, owner=_owner, rule_set_id=_rule_set_id) self._logger.info( "Sql for updating table AFM_RETAILER_RULE is: %s" % _update_sql) # calling scd api to execute update/delete statements _body = { "sql": _update_sql, "actioner": "ben.wu", "log_detail": True, "batch_size": 100, "db_type": "MSSQL", "table_schema": "(COMMON)" } # resp = requests.post(self.scd_url, data=json.dumps(_body)) resp = requests.post(self.scd_url, json=_body) if resp.status_code != requests.codes.ok: self._logger.warning("The response result is: %s" % resp.json()) self._logger.error( "Calling API failed with body: %s. Refer to API: %s" % (_body, self.scd_url)) else: _insert_sql = """ INSERT INTO AFM_RETAILER_RULE(cycle_key, owner, rule_set_id) VALUES({0}, '{1}', {2});""".format(_cycle_key, _owner, _rule_set_id) self._logger.info( "SQL for inserting table AFM_RETAILER_RULE is: %s" % _insert_sql) self.app_conn.execute(_insert_sql) if _rule_type == "SVR": _sql = """ SELECT COUNT(*) FROM AFM_SVR_RULE WHERE vendor_key = {vendorKey} AND retailer_key = {retailerKey} """.format(vendorKey=_vendor_key, retailerKey=_retailer_key) _exist = self.app_conn.query_scalar(_sql) if _exist != 0: _update_sql = """ UPDATE AFM_SVR_RULE SET owner = '{owner}', rule_set_id = {rule_set_id} WHERE vendor_key = {vendorKey} AND retailer_key = {retailerKey} AND cycle_key = {cycleKey} """.format(owner=_owner, rule_set_id=_rule_set_id, vendorKey=_vendor_key, retailerKey=_retailer_key, cycleKey=_cycle_key) self._logger.info( "SQL for updating table AFM_SVR_RULE is: %s" % _update_sql) # calling scd api to execute update/delete statements _body = { "sql": _update_sql, "actioner": "ben.wu", "log_detail": True, "batch_size": 100, "db_type": "MSSQL", "table_schema": "(COMMON)" } # resp = requests.post(self.scd_url, data=json.dumps(_body)) resp = requests.post(self.scd_url, json=_body) if resp.status_code != requests.codes.ok: self._logger.warning("The response result is: %s" % resp.text) self._logger.error( "Calling API failed with body: %s. Refer to API: %s" % (_body, self.scd_url)) else: _insert_sql = """ INSERT INTO AFM_SVR_RULE(vendor_key, retailer_key, owner, rule_set_id, cycle_key) VALUES({0}, {1}, '{2}', {3}, {4}); """.format(_vendor_key, _retailer_key, _owner, _rule_set_id, _cycle_key) self._logger.info( "SQL for inserting table AFM_SVR_RULE is: %s" % _insert_sql) self.app_conn.execute(_insert_sql) # insert related schedule. self.gen_schedule() self._logger.info( "Insert AFM config data completed. Please check rule_set_id: %s in related tables." % _rule_set_id) except Exception as e: self._logger.warning( "WARNING: Error found. Please fix it and re-run this script.") self._logger.warning(e) raise finally: if self.app_conn: self.app_conn.close_connection() def get_job_def_id(self, job_name): _job_name = job_name _job_url = self.meta[ "api_schedule_str"] # http://engv3dstr2.eng.rsicorp.local/common _headers = { #"tokenid": "eyJhbGciOiJIUzI1NiJ9.eyJjb29raWVWYWx1ZSI6IkFRSUM1d00yTFk0U2Zjd1ZidkJILXZOWFhEYS1HQm1ETlVpd240dWtMSzBsNEJjLipBQUpUU1FBQ01ERUFBbE5MQUJNek16azRPVEkyTXpFNU5EZzJNemMwTnpZeUFBSlRNUUFBKiIsInVzZXJJZCI6ImJlbi53dUByc2ljb3JwLmxvY2FsIiwiY29va2llTmFtZSI6InJzaVNzb05leHRHZW4iLCJzdGF0dXMiOiJzdWNjZXNzIiwiaWF0IjoxNTM0ODE1NTAxfQ.Hbv_wcsEqmUBFTy64BTf15nWC94fsFTfmt3LZMq24Ag", "content-type": "application/json" } _job_def_url = _job_url + '/schedule/jobdefinitions' self._logger.info("URL to find the job definition is: %s " % _job_def_url) res = requests.get(url=_job_def_url, headers=_headers, verify=False) if res.text == "invalid token": self._logger.warning( "WARNING: Please update the tokenid manually. " "Then rerun this script again!!!") exit(1) x = [dct["id"] for dct in res.json() if dct["jobDefName"] == _job_name] if not x: self._logger.info( "There is no job id found for job: %s. You can refer to API: %s" % (_job_name, _job_def_url)) exit(1) # returning job definition id return x[0] def gen_schedule(self): self._logger.info( "Inserting related schedule schedule for OSARetailerAFM.") _rule_type = self.params["ruleType"].upper() # only creating schedule for RETAILER rule. if _rule_type == "RETAILER": _job_name = "OSARetailerAFM" _job_schedule_id = self.get_job_def_id(_job_name) _cycle_key = self.params["cycleKey"] _group_name = "{0}:{1}".format(_job_schedule_id, _cycle_key) _schedule_name = "{0}:{1}".format(_group_name, _job_name) sch_params = dict( creater="*****@*****.**", groupName=_group_name, jobDefinitionId=_job_schedule_id, # parametersContext="", # priority=1, # scheduleExpression="", scheduleName=_schedule_name, scheduleType="EVENT") loader = scheduler.ScheduleParamsToTable(self.meta, sch_params) loader.load_data() # OSMAlerting schedule will take care of SVR rule. if _rule_type == "SVR": pass
def __init__(self, meta, params=None, init_flag=False, logger=None): """ # sync feedback data from RDP side to IRIS(OSA) side by incremental via event_key. # 1, only sync those vendor&retailer which applied OSA Service. # 2, for Those new vendor&retailer, copy all historical data when initialization. :param meta: [mandatory] config data from config.properties file :param params: 2 cases here. depends on whether sync rdp feedback for whole RDP or new customer. see below. 1, rdp_id: if rdp_id was given, then sync all data for this given RDP. otherwise, sync data from all related RDPs. Noted: rdp_id will be passed when calling this service via REST API. 2, vendor_key: mandatory only when init_flag is True. retailer_key: mandatory only when init_flag is True Noted: These 2 parameters will not be passed from REST API but called directly by deploy scripts. :param init_flag: if init_flag is True: then only sync feedback data for given vendor & retailer. This is used when introducing new customer. if init_flat is False: sync all customers' data from RDP periodically(e.g. sync daily). :param logger: """ self.meta = meta self._params = {} if params is None else params self._rdp_id = self._params.get("rdpId", None) self._fact_type = 'fdbk' self._init_flag = init_flag self._vendor_key = self._params.get("vendor_key", None) self._retailer_key = self._params.get("retailer_key", None) self._debug = self._params.get('debug', 'N') self._default_rdp = "RDP_AUX" self._log_file = './log/sync_fdbk_%s_%s.log' % ( self._rdp_id, datetime.datetime.now().strftime('%Y%m%d')) self.logger = logger if logger else Logger(log_level="debug", target="console|file", vendor_key=-1, retailer_key=-1, log_file=self._log_file, sql_conn=None) self.osa_app_conn = MSOperation(meta=self.meta, logger=self.logger) self.osa_dw_conn = DWOperation(meta=self.meta, logger=self.logger) self.max_event_key = None # we already know feedback table name of RDP self.source_table_rdp = "DS_FACT_FEEDBACK" # source table in RDP side. self.staging_import_table_osa = "STAGE_FACT_FEEDBACK_RDP" # used to store sync data from RDP table (same structure as table DS_FACT_FEEDBACK) self.target_table_osa = "FACT_FEEDBACK" # final table in OSA side self.capacity = Capacity(meta=meta) self.dct_sync_data = copy.deepcopy( self.meta) # required for calling sync_data module self.dct_sync_data[ "meta"] = self.meta # required for calling sync_data module self.dct_sync_data["target_osa_conn"] = self.osa_dw_conn self.dct_sync_data["target_dw_schema"] = self.meta[ 'db_conn_vertica_common_schema'] self.dct_sync_data["target_dw_table"] = self.staging_import_table_osa self.dct_sync_data["logger"] = self.logger # [True|False(default)] True: direct connection between Vertica clusters. False: using vsql. self.dct_sync_data["dw_conn_vertica"] = False # self.dct_sync_data["dw_conn_vertica"] = True self.transfer = TransferData(dct_sync_data=self.dct_sync_data)
class Feedback(object): def __init__(self, meta, params=None, init_flag=False, logger=None): """ # sync feedback data from RDP side to IRIS(OSA) side by incremental via event_key. # 1, only sync those vendor&retailer which applied OSA Service. # 2, for Those new vendor&retailer, copy all historical data when initialization. :param meta: [mandatory] config data from config.properties file :param params: 2 cases here. depends on whether sync rdp feedback for whole RDP or new customer. see below. 1, rdp_id: if rdp_id was given, then sync all data for this given RDP. otherwise, sync data from all related RDPs. Noted: rdp_id will be passed when calling this service via REST API. 2, vendor_key: mandatory only when init_flag is True. retailer_key: mandatory only when init_flag is True Noted: These 2 parameters will not be passed from REST API but called directly by deploy scripts. :param init_flag: if init_flag is True: then only sync feedback data for given vendor & retailer. This is used when introducing new customer. if init_flat is False: sync all customers' data from RDP periodically(e.g. sync daily). :param logger: """ self.meta = meta self._params = {} if params is None else params self._rdp_id = self._params.get("rdpId", None) self._fact_type = 'fdbk' self._init_flag = init_flag self._vendor_key = self._params.get("vendor_key", None) self._retailer_key = self._params.get("retailer_key", None) self._debug = self._params.get('debug', 'N') self._default_rdp = "RDP_AUX" self._log_file = './log/sync_fdbk_%s_%s.log' % ( self._rdp_id, datetime.datetime.now().strftime('%Y%m%d')) self.logger = logger if logger else Logger(log_level="debug", target="console|file", vendor_key=-1, retailer_key=-1, log_file=self._log_file, sql_conn=None) self.osa_app_conn = MSOperation(meta=self.meta, logger=self.logger) self.osa_dw_conn = DWOperation(meta=self.meta, logger=self.logger) self.max_event_key = None # we already know feedback table name of RDP self.source_table_rdp = "DS_FACT_FEEDBACK" # source table in RDP side. self.staging_import_table_osa = "STAGE_FACT_FEEDBACK_RDP" # used to store sync data from RDP table (same structure as table DS_FACT_FEEDBACK) self.target_table_osa = "FACT_FEEDBACK" # final table in OSA side self.capacity = Capacity(meta=meta) self.dct_sync_data = copy.deepcopy( self.meta) # required for calling sync_data module self.dct_sync_data[ "meta"] = self.meta # required for calling sync_data module self.dct_sync_data["target_osa_conn"] = self.osa_dw_conn self.dct_sync_data["target_dw_schema"] = self.meta[ 'db_conn_vertica_common_schema'] self.dct_sync_data["target_dw_table"] = self.staging_import_table_osa self.dct_sync_data["logger"] = self.logger # [True|False(default)] True: direct connection between Vertica clusters. False: using vsql. self.dct_sync_data["dw_conn_vertica"] = False # self.dct_sync_data["dw_conn_vertica"] = True self.transfer = TransferData(dct_sync_data=self.dct_sync_data) def _populate_source_config(self, source_config): self.logger.debug("The source config is: %s" % source_config) _src_config = {} if os.name == 'nt': _src_config["temp_file_path"] = "d:" elif os.name == 'posix': _src_config["temp_file_path"] = "/tmp" # Getting user account from config.properties file first. if self.meta.get("db_conn_vertica_rdp_username"): _src_config["dw.etluser.id"] = self.meta.get( "db_conn_vertica_rdp_username") if self.meta.get("db_conn_vertica_rdp_password"): _src_config["dw.etluser.password"] = self.meta.get( "db_conn_vertica_rdp_password") else: _pmp_pwd = get_password( username=self.meta.get("db_conn_vertica_rdp_username"), meta=self.meta) # The pwd should be encrypted in order to: 1, align with else part, 2, pass it to db.sync_data module _src_config["dw.etluser.password"] = Crypto().encrypt(_pmp_pwd) # if not configed then get them directly from RDP config. else: _src_config["dw.etluser.id"] = source_config.get("dw.etluser.id") # the pwd is encrypted _src_config["dw.etluser.password"] = source_config.get( "dw.etluser.password") # required info for calling sync_data module. _src_config["dw.server.name"] = source_config.get("dw.server.name") _src_config["dw.db.name"] = source_config.get("dw.db.name") _src_config["dw.db.portno"] = source_config.get("dw.db.portno", 5433) _src_config["dw.schema.name"] = source_config.get("dw.schema.name") self.logger.debug("srouce config is: %s" % _src_config) self.dct_sync_data["source_config"] = _src_config # Create the connection to RDP Vertica Cluster. which is the source Vertica cluster rdp_meta = copy.deepcopy(self.meta) tmp_rdp_meta = { 'db_conn_vertica_servername': _src_config["dw.server.name"], 'db_conn_vertica_port': _src_config["dw.db.portno"], 'db_conn_vertica_dbname': _src_config["dw.db.name"], 'db_conn_vertica_username': _src_config["dw.etluser.id"], 'db_conn_vertica_password': _src_config["dw.etluser.password"], 'db_conn_vertica_password_encrypted': "true" } rdp_meta.update(tmp_rdp_meta) self.logger.debug("rdp config is: %s" % rdp_meta) rdp_connection = DWOperation(meta=rdp_meta) self.dct_sync_data["source_dw"] = rdp_connection def main_process(self): try: # if not introducing new customer and _rdp_id was given, # then we will sync all feedback data from given RDP for registered users. if self._init_flag is False and self._rdp_id: try: rdp_config = Config(meta=self.meta, hub_id=self._rdp_id).json_data if not rdp_config['configs']: raise Warning( "There is no configs returned for RDP: %s." "Please check if this RDP registered in CP with below URL." "%s/properties/rdps?factType=fdbk" % (self._rdp_id, self.meta["api_config_str"])) # exit(StepStatus.SUCCESS.value) _rdp_schema = rdp_config['configs'].get('dw.schema.name') self.logger.info("Started to sync data from rdp: %s" % _rdp_schema) # self.dct_sync_data["source_config"] = rdp_config['configs'] self._populate_source_config(rdp_config['configs']) self.initialize() _flag = self.load_data() if _flag: # if no data, then no need to process & update variables table. self.process_data() sql = """ IF NOT EXISTS(SELECT * FROM VARIABLES WHERE VARIABLE_NAME = '{eventType}') INSERT INTO VARIABLES (VARIABLE_NAME, VARIABLE_VALUE, PREVIOUS_VALUE, INSERT_TIME, UPDATE_TIME) VALUES ('{eventType}', '{value}', '', getdate(), getdate()) ELSE UPDATE VARIABLES SET PREVIOUS_VALUE = VARIABLE_VALUE, VARIABLE_VALUE = '{value}',UPDATE_TIME = getdate() WHERE VARIABLE_NAME = '{eventType}' """.format(eventType=_rdp_schema, value=self.max_event_key) self.logger.info(sql) self.osa_app_conn.execute(sql) self.logger.info("Data sync done for RDP: %s" % _rdp_schema) except Exception as e: self.logger.warning(e) raise # exit(StepStatus.SUCCESS.value) # exit(0) otherwise Docker container will fail. # Else we will get all RDPs from REST API: http://10.172.36.75/config/properties/rdps?factType=fdbk # There could be multi RDPs(e.g. for SVR & WM). if so, loop all RDPs elif self._init_flag is False and self._rdp_id is None: try: rdp_configs = Config( meta=self.meta, rdp_info=True, rdp_fact_type=self._fact_type).json_data if not rdp_configs: raise Warning( "No feedback related RDP found." "Please check if any data returned from below URL." "%s/properties/rdps?factType=fdbk" % (self.meta["api_config_str"])) # exit(StepStatus.SUCCESS.value) for rdp_config in rdp_configs: _rdp_schema = rdp_config['configs'].get( 'dw.schema.name') self.logger.info("Started to sync data from rdp: %s" % _rdp_schema) # self.dct_sync_data["source_config"] = rdp_config['configs'] self._populate_source_config(rdp_config['configs']) self.initialize() _flag = self.load_data() if _flag: # if no data, then no need to process & update variables table. self.process_data() sql = """ IF NOT EXISTS(SELECT * FROM VARIABLES WHERE VARIABLE_NAME = '{eventType}') INSERT INTO VARIABLES (VARIABLE_NAME, VARIABLE_VALUE, PREVIOUS_VALUE, INSERT_TIME, UPDATE_TIME) VALUES ('{eventType}', '{value}', '', getdate(), getdate()) ELSE UPDATE VARIABLES SET PREVIOUS_VALUE = VARIABLE_VALUE, VARIABLE_VALUE = '{value}',UPDATE_TIME = getdate() WHERE VARIABLE_NAME = '{eventType}' """.format(eventType=_rdp_schema, value=self.max_event_key) self.logger.info(sql) self.osa_app_conn.execute(sql) self.logger.info("Data sync done for RDP: %s" % _rdp_schema) except Exception as e: self.logger.warning(e) raise elif self._init_flag is True: if self._vendor_key is None or self._retailer_key is None: self.logger.warning( "vendor_key and retailer_key are required when initilize feedback for new customer" ) raise ValueError # getting fdbk related rdps. try: rdp_configs = Config( meta=self.meta, rdp_info=True, rdp_fact_type=self._fact_type).json_data if not rdp_configs: self.logger.warning( "No feedback related RDP found." "Please check if any data returned from below URL." "%s/properties/rdps?factType=fdbk" % (self.meta["api_config_str"])) exit(StepStatus.SUCCESS.value) fdbk_rdps = [ str(rdp_config["rdpId"]).upper() for rdp_config in rdp_configs ] # change table name in case conflict with normal sync process self.dct_sync_data[ "target_dw_table"] = "{0}_{1}_{2}".format( self.staging_import_table_osa, self._vendor_key, self._retailer_key) _silo_config = Config( meta=self.meta, vendor_key=self._vendor_key, retailer_key=self._retailer_key).json_data _silo_type = _silo_config['configs'].get( 'etl.silo.type', 'SVR') _rdp_id = _silo_config['configs'].get('rdp.db.name') # RDP_AUX is default rdp id for feedback etl on PRODUCTION. # 1, if there is no RDP for given silo. then exit. if not _rdp_id or str(_rdp_id).strip() == '': self.logger.warning( "There is no RDP silo configed for the given vendor:%s " "and retailer:%s. So no need to sync feedback." % (self._vendor_key, self._retailer_key)) exit(StepStatus.SUCCESS.value) # 2, Getting configed RDP list, and check if there are feedback related RDPs. _tmp_rdp_lst = str(_rdp_id).upper().split(sep=",") _rdp_lst = [_tmp.strip() for _tmp in _tmp_rdp_lst] # common_rdps is RDP silo configed for syncing feedback data for given silo(vendor&retailer) common_rdps = list(set(_rdp_lst).intersection(fdbk_rdps)) if common_rdps is None: self.logger.warning( "There is no RDP silo configed for the given vendor:%s " "and retailer:%s. So no need to sync feedback." % (self._vendor_key, self._retailer_key)) exit(StepStatus.SUCCESS.value) # If there is 1 or more than 1 feedback related rdps configed, then loop them to sync feedback data, # Normally, there should be only 1. or no feedback rdp configed. for common_rdp in common_rdps: _rdp_id = common_rdp self.logger.info( "Started to sync data from rdp: %s for given vendor:%s and retailer:%s. " % (_rdp_id, self._vendor_key, self._retailer_key)) # if RDP is not RDP_AUX, Won't exit but log a warning. if _rdp_id != self._default_rdp: self.logger.warning( "Please be noted: The RDP is:%s. It is not RDP_AUX." % _rdp_id) # WM silos are also following above logic. # all hosted silo are ultilizing RDP_AUX to transfer feedback data. not sure about Walmart. # if str(_silo_type).upper() in ["WMSSC", "WMCAT", "SASSC", "WMINTL"]: # _rdp_id = self._default_rdp # WM rdp is RDP_AUX as well? rdp_config = Config(meta=self.meta, hub_id=_rdp_id).json_data if not rdp_config['configs']: self.logger.warning( "There is no configs for RDP: %s. Please check following URL:" "%s/properties/%s/%s" % (_rdp_id, self.meta["api_config_str"], _rdp_id, _rdp_id)) exit(StepStatus.SUCCESS.value) _rdp_schema = rdp_config['configs'].get( 'dw.schema.name') self.logger.info( "Started to init feedback data from rdp: %s for " "given vendor:%s and retailer:%s " % (_rdp_schema, self._vendor_key, self._retailer_key)) # self.dct_sync_data["source_config"] = rdp_config['configs'] self._populate_source_config(rdp_config['configs']) self.initialize() _flag = self.load_data() if _flag: # if no data, then no need to process. self.process_data() self.logger.info("Data sync done for RDP: %s" % _rdp_id) except Exception as e: self.logger.warning(e) self.logger.warning( "Please check if any warning or error messages when doing the initialization!" ) finally: if self.osa_app_conn: self.osa_app_conn.close_connection() if self.osa_dw_conn: self.osa_dw_conn.close_connection() def initialize(self): """ Create local temp tables , and DDLs required to process this fact type :return: """ self.logger.info("Initialize...") # recreate this table for every RDP. no need to truncate any longer. # sql = "TRUNCATE TABLE {cmnSchema}.{targetTable}"\ # .format(cmnSchema=self.dct_sync_data["target_dw_schema"], targetTable=self.staging_import_table_osa) # self.logger.info(sql) # self.osa_dw.execute(sql) sql = """ --Store data from RDP table. DROP TABLE IF EXISTS {cmnSchema}.{importTable}; CREATE TABLE {cmnSchema}.{importTable} ( EVENT_KEY int NOT NULL, RETAILER_KEY int, VENDOR_KEY int, STORE_VISIT_DATE date, PERIOD_KEY int NOT NULL, TYPE varchar(1), TYPE_DATE varchar(10), ALERT_ID int, ALERT_TYPE varchar(64), MERCHANDISER_STORE_NUMBER varchar(512), STORE_ID varchar(512), MERCHANDISER_UPC varchar(512), INNER_UPC varchar(512), MERCHANDISER varchar(100), STORE_REP varchar(1000), SOURCE varchar(1000), BEGIN_STATUS varchar(255), ACTION varchar(255), FEEDBACK_DESCRIPTION varchar(255), FEEDBACK_HOTLINEREPORTDATE date, FEEDBACK_ISININVENTORY varchar(5), ZIP_CODE varchar(64), ARTS_CHAIN_NAME varchar(255), UPC_STATUS varchar(255), MSI varchar(255) ) UNSEGMENTED ALL NODES; """.format(cmnSchema=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"]) self.logger.info(sql) self.osa_dw_conn.execute(sql) def load_data(self): """ # Load data from RDP table ds_fact_feedback to local temp tables. There is an column event_key which is incremental for all customers in ds_fact_feedback table. we can save the snapshot of this column to variable table, and do the incremental every time based on this column. There are few cases here: 1, Routinely, There will be a scheduled job to sync the whole feedback data for valid customers from related RDP silo. And save the snapshot of the event_key from previous loading for next incremental loading. 2, if on-boarding a new vendor & retailer customer. Getting rdp_event_key from variable for related RDP silo. (rdp_event_key is from previous loading) and then sync feedback data from related RDP silo only for this given customer when event_key < rdp_event_key. Then case1 will take care the rest of feedback data. :return: """ rdp_schema = self.dct_sync_data["source_config"].get('dw.schema.name') # rdp_aux.ds_fact_feedback source_table = "{rdpSchema}.{rdptableName}"\ .format(rdpSchema=rdp_schema, rdptableName=self.source_table_rdp) # common.stage_fact_feedback_rdp target_table = "{dwSchema}.{importTable}"\ .format(dwSchema=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"]) self.logger.info( "Ready to load Data from {srouceTable} to {targetTable}".format( targetTable=target_table, srouceTable=source_table)) insert_columns = " EVENT_KEY, RETAILER_KEY, VENDOR_KEY, STORE_VISIT_DATE, PERIOD_KEY, TYPE, TYPE_DATE," \ " ALERT_ID, ALERT_TYPE, MERCHANDISER_STORE_NUMBER, STORE_ID, MERCHANDISER_UPC, INNER_UPC," \ " MERCHANDISER, STORE_REP, SOURCE, BEGIN_STATUS, ACTION, FEEDBACK_DESCRIPTION," \ " FEEDBACK_HOTLINEREPORTDATE, FEEDBACK_ISININVENTORY, ZIP_CODE, ARTS_CHAIN_NAME, UPC_STATUS, MSI " try: self.logger.info( "Getting the previous Event_key from last run for incremental load." ) _event_sql = "SELECT VARIABLE_VALUE FROM variables " \ "WHERE VARIABLE_NAME = '{rdpName}'".format(rdpName=rdp_schema) self.logger.info(_event_sql) event_key = self.osa_app_conn.query_scalar(_event_sql) self.logger.info( "Getting customer info which only applied OSA services as filter" ) sql = "SELECT DISTINCT retailer_key, vendor_key FROM AP_ALERT_CYCLE_MAPPING " \ "UNION " \ "SELECT DISTINCT retailer_key, vendor_key FROM AP_ALERT_CYCLE_RC_MAPPING" self.logger.info(sql) results = self.osa_app_conn.query(sql) if not results: raise Warning( "There is no data in table AP_ALERT_CYCLE_MAPPING. Please check sql: %s" % sql) # exit(StepStatus.SUCCESS.value) user_filters = [ 'SELECT ' + str(result.retailer_key) + ',' + str(result.vendor_key) for result in results ] user_filter_str = ' UNION ALL '.join(user_filters) self.logger.info("Customer filters are: %s" % user_filter_str) # incremental filter from RDP table where_sql = "EVENT_KEY > {eventKey} AND SOURCE != 'ARIA' " \ "AND (RETAILER_KEY, VENDOR_KEY) in ({userFilter})"\ .format(eventKey=event_key, userFilter=user_filter_str) # TODO2DONE: how to set default value? use -1 # copy all if there is no value in variables table. if not event_key: self.logger.warning( "There is no value set in variables table for RDP:{name}, " "So copy the whole table".format(name=rdp_schema)) where_sql = " SOURCE != 'ARIA' AND (RETAILER_KEY, VENDOR_KEY) in ({userFilter})"\ .format(eventKey=event_key, userFilter=user_filter_str) event_key = -1 # check if this is the first run. if self._init_flag is True: if event_key == -1: # event_key is None self.logger.warning( "There is no event_key logged in variables table for the given RDP: %s." "So Let's wait for the routine job to sync the whole rdp feedback data" % rdp_schema) return False self.logger.info("Generating init feedback filters") where_sql = "EVENT_KEY <= {eventKey} AND SOURCE != 'ARIA' " \ "AND (RETAILER_KEY, VENDOR_KEY) in ({userFilter}) " \ "AND RETAILER_KEY={retailerKey} AND VENDOR_KEY={vendorKey} "\ .format(eventKey=event_key, userFilter=user_filter_str, retailerKey=self._retailer_key, vendorKey=self._vendor_key) self.logger.debug("The filters are: %s" % where_sql) # form the fetch query from RDP and then Insert into the target table fetch_query = """ SELECT /*+ label(GX_IRIS_SYNCFEEDBACK)*/ {insertQuery} FROM {sourceTable} WHERE {whereSql} """.format(insertQuery=insert_columns, sourceTable=source_table, whereSql=where_sql) self.logger.info("fetch_query is : %s" % fetch_query) self.logger.info( ">>Loading {factType} Data from event_key:{eventKey} start at: {timestamp}<<" .format(factType=self._fact_type, eventKey=event_key, timestamp=datetime.datetime.now())) self.dct_sync_data["target_column"] = insert_columns self.dct_sync_data["source_sql"] = fetch_query row_count = self.transfer.transfer_data( dct_sync_data=self.dct_sync_data) self.logger.info( ">>Done loaded {cnt} rows from event_key:{eventKey} completed at: {timestamp}<<" .format(cnt=row_count, factType=self._fact_type, eventKey=event_key, timestamp=datetime.datetime.now())) # if no data transfered, then update variables with previous value. sql = "SELECT /*+ label(GX_IRIS_SYNCFEEDBACK)*/ nvl(max(event_key), {oldEventKey}) " \ "FROM {schemaName}.{importTable} "\ .format(schemaName=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"], oldEventKey=event_key) self.logger.info(sql) self.max_event_key = self.osa_dw_conn.query_scalar(sql) # max_event_key = -1 # testing purpose if self.max_event_key == -1: self.logger.warning( "There is no feedback data in RDP table: {0}".format( source_table)) return False return True except Exception as e: self.logger.warning(e) raise finally: pass def process_data(self): """ after load_data part completes. sync data from temp table to related schemas. :return: """ try: self.logger.info("Processing feedback start...") # loop retailer to insert feedback data sql = "SELECT DISTINCT retailer_key " \ "FROM {cmnSchema}.{importTable}"\ .format(cmnSchema=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"]) self.logger.info(sql) retailers = self.osa_dw_conn.query(sql) if retailers.rowcount == 0: self.logger.warning( "There is no data in table {cmnSchema}.{importTable}." "It could be no incremental data. Please check fetch_query against RDP database" .format(cmnSchema=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"])) for retailer in retailers: retailer_key = retailer.retailer_key osa_schema = self.capacity.get_retailer_schema_name( retailer_key) # Finally, run the sql msg = "Processing fdbk data within retailer {retailerKey}:{retailerName}"\ .format(retailerKey=retailer_key, retailerName=osa_schema) self.logger.info(msg) # Normally, There should NOT be duplicated alert_id transfered by incremental. # But should consider this case here. Delete existing alertid from target table # TODO: delete could have performance issue. consider using switch partition delete_sql = "DELETE FROM {osaSchema}.{targetTable} " \ "WHERE alert_id IN (SELECT alert_id FROM {cmnSchema}.{importTable} )"\ .format(targetTable=self.target_table_osa, osaSchema=osa_schema, cmnSchema=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"]) self.logger.info(delete_sql) self.osa_dw_conn.execute(delete_sql) # inserting feedback data into final table fact_feedback from processed table. sql = """ INSERT INTO {osaSchema}.{targetTable} (EVENT_KEY, RETAILER_KEY, VENDOR_KEY, STORE_VISITED_PERIOD_KEY, PERIOD_KEY, ALERT_ID, STORE_KEY, MERCHANDISER_STORE_NUMBER, STORE_ID, ITEM_KEY, MERCHANDISER_UPC, UPC, MERCHANDISER, STORE_REP, SOURCE, BEGIN_STATUS, ACTION, FEEDBACK_DESCRIPTION, ON_HAND_PHYSICAL_COUNT, ON_HAND_CAO_COUNT ) SELECT stage.EVENT_KEY, stage.RETAILER_KEY, stage.VENDOR_KEY, TO_CHAR(stage.STORE_VISIT_DATE, 'YYYYMMDD')::int AS STORE_VISITED_PERIOD_KEY, stage.PERIOD_KEY, stage.ALERT_ID, store.STORE_KEY AS STORE_KEY, stage.MERCHANDISER_STORE_NUMBER, COALESCE(store.STORE_ID , alert.STOREID, stage.STORE_ID) AS STORE_ID, item.ITEM_KEY AS ITEM_KEY, stage.MERCHANDISER_UPC, COALESCE(item.UPC, alert.UPC, stage.INNER_UPC, stage.MERCHANDISER_UPC) AS UPC, stage.MERCHANDISER, stage.STORE_REP, stage.SOURCE, stage.BEGIN_STATUS, stage.ACTION, stage.FEEDBACK_DESCRIPTION, 0 AS ON_HAND_PHYSICAL_COUNT, 0 AS ON_HAND_CAO_COUNT FROM {cmnSchema}.{importTable} stage LEFT JOIN {osaSchema}.FACT_PROCESSED_ALERT alert ON stage.alert_id = alert.alert_id AND alert.issuanceid = 0 AND alert.retailer_key = {retailerKey} AND stage.vendor_key = alert.vendor_key INNER JOIN {cmnSchema}.DIM_PRODUCT item ON item.retailer_key = {retailerKey} AND alert.vendor_key = item.vendor_key AND item.item_key = alert.item_key INNER JOIN {cmnSchema}.DIM_STORE store ON store.retailer_key = {retailerKey} AND alert.vendor_key = store.vendor_key AND store.store_key = alert.store_key WHERE stage.retailer_key = {retailerKey} """.format(osaSchema=osa_schema, targetTable=self.target_table_osa, cmnSchema=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"], retailerKey=retailer_key) self.logger.info( "SQL used to load data to related schema. %s" % sql) self.osa_dw_conn.execute(sql) self.logger.info("Processing feedback ended...") except Exception as e: self.logger.warning("Process data for RDP {0} failed: {1}".format( self._rdp_id, e)) raise finally: if self._debug.upper() == 'N': _drop_sql = "DROP TABLE IF EXISTS {schemaName}.{importTable};" \ .format(schemaName=self.dct_sync_data["target_dw_schema"], importTable=self.dct_sync_data["target_dw_table"]) self.logger.info(_drop_sql) self.osa_dw_conn.execute(_drop_sql)
class Scd(object): def __init__(self, sql, actioner, log_detail=True, batch_size=100, meta={}, db_type=None, table_schema=None, logger=None): self.meta = meta self.actioner = actioner self.log_detail = log_detail self.batch_size = batch_size self.logger = logger if logger else Logger(log_level="info", vendor_key=-1, retailer_key=-1, module_name="scd") self.sql = sql self.raw_sql_splitted = self.sql.split(' ') self.raw_sql_splitted_trimed = [ str for str in self.raw_sql_splitted if str != '' ] self.db_type = db_type self.table_schema = table_schema self.logger.info('[SQL] %s' % self.sql) self.logger.info('[SQL splitted] %s' % self.raw_sql_splitted) self.mssql = MSOperation(meta=self.meta) self.vtc = None self.saved_records_count = 0 def set_insert_delete_update(self): cmd = self.raw_sql_splitted_trimed[0].upper() if cmd in ('DELETE', 'UPDATE', 'INSERT'): self.cmd = cmd else: raise RuntimeError('%s is not supported.' % cmd) def locate_keyword(self, keyword, sql_splitted=[]): # find the location of the keyword firstly appears in the sql # e.g. locate WHERE # UPDATE TEST SET COL = ' WHERE ' WHERE ID=1, the first ' WHERE ' is not a keyword, so return the index of the 2nd WHERE keyword = keyword.upper() base_seq_splitted = sql_splitted if sql_splitted else self.raw_sql_splitted # if sql_splitted is specified, locate keyword in sql_splitted, otherwise locate keyword in raw_sql_splitted index_keyword = [ k for k, v in enumerate(base_seq_splitted) if keyword in v.upper() ] for ind in index_keyword: count_of_single_quote_ahead_of_where = sum([ base_seq_splitted[i].count("'") for i in range(0, ind) if "'" in base_seq_splitted[i] ]) i = base_seq_splitted[ind].upper().index(keyword) count_of_single_quote_ahead_of_where += base_seq_splitted[ind][ 0:i].count("'") if count_of_single_quote_ahead_of_where % 2 == 0: # the KEYWORD we want to capture is the one has n*2 "'" ahead of it return ind return -1 # KEYWORD not found def locate_keyword_combination( self, keywords): #--mainly for parsing merge, may need to be updated # find the location of the first keyword, and to check if 2nd, 3rd keywords are just following the 1st keyword in the SQL # if match all the keywords, return the index of the first keyword # else return -1 keywords_list = [ w.upper().strip() for w in keywords.split(' ') if w.strip() != '' ] sql_splitted = [*self.raw_sql_splitted] base_ind = 0 self.logger.info('Locating %s' % keywords_list) while True: self.logger.info('Locating 1st keyword "%s"' % keywords_list[0]) ind = self.locate_keyword(keywords_list[0], sql_splitted) if ind == -1: self.logger.warning('Keyword not found.') return -1 following = [ v.upper().strip() for v in sql_splitted[ind + 1:] if v.strip() != '' ] self.logger.info('Getting following %s', following) if following[0:len(keywords_list) - 1] == keywords_list[1:]: self.logger.info('Keywords matched!') return base_ind + ind else: self.logger.warning('Keywords not matched.') sql_splitted = sql_splitted[ind + 1:] base_ind += ind def get_merge_where( self ): #--TODO, not done yet, the sql gramma "update from" for vertica and sqlserver are different, need more time to work on this index_using = self.locate_keyword('USING') index_on = self.locate_keyword('ON') index_when_match = self.locate_keyword_combination('WHEN MATCHED THEN') index_when_not_match = self.locate_keyword_combination( 'WHEN NOT MATCHED THEN') def get_where_condition(self): index_where = self.locate_keyword('WHERE') return '' if index_where == -1 else ' '.join( self.raw_sql_splitted[index_where:]) def get_table_name(self): self.set_insert_delete_update() return self.raw_sql_splitted_trimed[1].upper( ) if self.cmd == 'UPDATE' else self.raw_sql_splitted_trimed[2].upper() def get_raw_table_name(self, table_name): return table_name.split('.')[-1] def get_table_def(self, raw_table_name): sql = "SELECT ID, TABLE_NAME, PK, DB_TYPE, TABLE_SCHEMA FROM META_SCD_TABLE_DEF WHERE TABLE_NAME='%s'" % raw_table_name if self.db_type: sql += " AND DB_TYPE='%s'" % self.db_type if self.table_schema: sql += " AND TABLE_SCHEMA='%s'" % self.table_schema rs = self.mssql.query(sql) if len(rs) == 0: raise ValueError( '[TABLE_NAME:%s, DB_TYPE:%s, TABLE_SCHEMA:%s] The table is not found.' % (raw_table_name, db_type, table_schema)) elif len(rs) > 1: self.logger.info('|ID|TABLE_NAME|DB_TYPE|TABLE_SCHEMA|') for r in rs: self.logger.info( '|%s|%s|%s|%s|' % (r.ID, r.TABLE_NAME, r.DB_TYPE, r.TABLE_SCHEMA)) raise ValueError( '[TABLE_NAME:%s, DB_TYPE:%s, TABLE_SCHEMA:%s] More than one records returned for the table.' % (raw_table_name, db_type, table_schema)) else: if rs[0].DB_TYPE == 'MSSQL': self.target_conn = self.mssql else: self.vtc = DWOperation(meta=self.meta) self.target_conn = self.vtc return rs[0] def get_mssql_table_ddl_def(self, db_name, table_name, table_schema): ddl_sql = '''SELECT COLUMN_NAME, DATA_TYPE FROM %s.INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = N'%s' AND TABLE_SCHEMA = N'%s' ORDER BY ORDINAL_POSITION ''' % (db_name, table_name, table_schema) rs = self.mssql.query(ddl_sql) #return dict([('"%s"'%r.COLUMN_NAME if ' ' in r.COLUMN_NAME else r.COLUMN_NAME, r.DATA_TYPE) for r in rs]) return dict([(r.COLUMN_NAME, r.DATA_TYPE) for r in rs]) def get_vertica_table_ddl_def(self, table_name, table_schema): ddl_sql = '''SELECT COLUMN_NAME, DATA_TYPE FROM COLUMNS WHERE TABLE_NAME = '%s' AND TABLE_SCHEMA = '%s' ORDER BY ORDINAL_POSITION ''' % (table_name, table_schema) rs = self.vtc.query(ddl_sql) #return dict([('"%s"'%r.COLUMN_NAME if ' ' in r.COLUMN_NAME else r.COLUMN_NAME, r.DATA_TYPE) for r in rs]) return dict([(r.COLUMN_NAME, r.DATA_TYPE) for r in rs]) def save_to_db(self, table_def, recs): pk = json.loads(table_def.PK) batch_sqls = [] columns = 'TABLE_DEF_ID, PK_VALUE, ACTION, RECORD, UPDATE_TIME, ACTIONER' if self.log_detail else 'TABLE_DEF_ID, PK_VALUE, ACTION, UPDATE_TIME, ACTIONER' counter = 0 now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") for rec in recs: pk_dict = collections.OrderedDict() for k in pk: pk_dict[k] = rec[k] pk_value = json.dumps(pk_dict).replace("'", "''") if self.log_detail: batch_sqls.append( '''SELECT %s, '%s', '%s', '%s', '%s', '%s' ''' % (table_def.ID, pk_value, self.cmd, json.dumps(rec).replace( "'", "''"), now, self.actioner)) else: batch_sqls.append( '''SELECT %s, '%s', '%s', '%s', '%s' ''' % (table_def.ID, pk_value, self.cmd, now, self.actioner)) counter += 1 if len(batch_sqls) % self.batch_size == 0: self.mssql.execute( '''INSERT INTO META_SCD_CHANGE_LOG(%s) %s''' % (columns, ' UNION ALL '.join(batch_sqls))) batch_sqls = [] self.logger.info('%s has been logged.' % counter) if len(batch_sqls) > 0: self.mssql.execute('''INSERT INTO META_SCD_CHANGE_LOG(%s) %s''' % (columns, ' UNION ALL '.join(batch_sqls))) self.logger.info('%s has been logged.' % counter) @Timer() def save_data_to_be_changed(self, table_def, where_condition): if table_def.DB_TYPE == 'MSSQL': table_schema = 'dbo' if table_def.TABLE_SCHEMA == '(COMMON)' else table_def.TABLE_SCHEMA table_ddl_def = self.get_mssql_table_ddl_def( self.meta['db_conn_mssql_dbname'], table_def.TABLE_NAME, table_schema) else: # vertica table_schema = self.meta[ 'db_conn_vertica_common_schema'] if table_def.TABLE_SCHEMA == '(COMMON)' else table_def.TABLE_SCHEMA table_ddl_def = self.get_vertica_table_ddl_def( table_def.TABLE_NAME, table_schema) if not self.log_detail: # only dump primary key columns pk = json.loads(table_def.PK) table_ddl_def = dict([(c, table_ddl_def[c]) for c in table_ddl_def if c in pk]) self.logger.info(table_ddl_def) query_columns = ','.join([ '"%s"' % c if ' ' in c else c for c in list(table_ddl_def.keys()) ]) pull_data_sql = 'SELECT %s FROM %s.%s %s' % ( query_columns, table_schema, table_def.TABLE_NAME, where_condition) self.logger.info(pull_data_sql) rs = self.target_conn.query(pull_data_sql) recs = [] for r in rs: rec = {} for c in list(table_ddl_def.keys()): #rec[c] = str(eval("r."+c)) if 'TIME' not in table_ddl_def[c].upper() and 'DATE' not in table_ddl_def[c].upper() else eval("r."+c).strftime("%Y-%m-%d %H:%M:%S") rec[c] = str(eval('r.__getattribute__("%s")' % c)) recs.append(rec.copy()) self.saved_records_count = len(recs) self.logger.info('%s records will be saved.' % self.saved_records_count) self.save_to_db(table_def, recs) @Timer() def execute_sql(self): self.target_conn.execute(self.sql) def main(self): try: try: table_name = self.get_table_name() raw_table_name = self.get_raw_table_name(table_name) table_def = self.get_table_def(raw_table_name) saved = False if self.cmd != 'INSERT': where_condition = self.get_where_condition() self.save_data_to_be_changed(table_def, where_condition) saved = True except Exception as e: self.logger.warning(traceback.format_exc()) self.logger.warning( 'Failed to save the to-be-changed data, start to executing sql.' ) self.execute_sql() return '(%(db_type)s)%(table_schema)s.%(table_name)s - impacted %(row_count)s rows, saved %(status)s' % { 'db_type': table_def.DB_TYPE, 'table_schema': table_def.TABLE_SCHEMA, 'table_name': table_def.TABLE_NAME, 'row_count': self.saved_records_count, 'status': 'N/A' if self.cmd == 'INSERT' else 'successfully' if saved else 'unsuccessfully' } finally: if self.mssql: self.mssql.close_connection() if self.vtc: self.vtc.close_connection()