def __init__(self, meta, logger=None, chuck_size=5000000, time_out=3600): """ :param meta: Parameters from config file :param logger: logger handler :param chuck_size: The chuck size when reading data for Pandas :param time_out: time out when uploading data to Azure Storage """ self.meta = meta self.account_name = self.meta.get("azure_storage_account_name") self.container_name = self.meta.get("azure_storage_blob_container") self.logger = logger if logger else Logger(log_level="info", target="console", vendor_key=-1, retailer_key=-1, sql_conn=None) self.account_key = get_password(username=self.account_name, meta=self.meta) self.blob_service = BlockBlobService(self.account_name, self.account_key) self.dw_conn = DWOperation(meta=self.meta) self.sql = "" self.parq_filename = "" self.local_path = "" self.chuck_size = chuck_size # 5,000,000 rows as a chuck self.time_out = time_out # time secs
def __init__(self, logger=None, meta={}): self.meta = meta self.server_name = meta['db_conn_redis_servername'] self.port = meta['db_conn_redis_port'] self.db = meta['db_conn_redis_dbname'] self.password = get_password(username=meta["db_conn_redis_pmpname"], meta=meta) self.__connection = None self._logger = logger if logger else Logger( log_level="info", vendor_key=-1, retailer_key=-1)
def _populate_source_config(self, source_config): self.logger.debug("The source config is: %s" % source_config) _src_config = {} if os.name == 'nt': _src_config["temp_file_path"] = "d:" elif os.name == 'posix': _src_config["temp_file_path"] = "/tmp" # Getting user account from config.properties file first. if self.meta.get("db_conn_vertica_rdp_username"): _src_config["dw.etluser.id"] = self.meta.get( "db_conn_vertica_rdp_username") if self.meta.get("db_conn_vertica_rdp_password"): _src_config["dw.etluser.password"] = self.meta.get( "db_conn_vertica_rdp_password") else: _pmp_pwd = get_password( username=self.meta.get("db_conn_vertica_rdp_username"), meta=self.meta) # The pwd should be encrypted in order to: 1, align with else part, 2, pass it to db.sync_data module _src_config["dw.etluser.password"] = Crypto().encrypt(_pmp_pwd) # if not configed then get them directly from RDP config. else: _src_config["dw.etluser.id"] = source_config.get("dw.etluser.id") # the pwd is encrypted _src_config["dw.etluser.password"] = source_config.get( "dw.etluser.password") # required info for calling sync_data module. _src_config["dw.server.name"] = source_config.get("dw.server.name") _src_config["dw.db.name"] = source_config.get("dw.db.name") _src_config["dw.db.portno"] = source_config.get("dw.db.portno", 5433) _src_config["dw.schema.name"] = source_config.get("dw.schema.name") self.logger.debug("srouce config is: %s" % _src_config) self.dct_sync_data["source_config"] = _src_config # Create the connection to RDP Vertica Cluster. which is the source Vertica cluster rdp_meta = copy.deepcopy(self.meta) tmp_rdp_meta = { 'db_conn_vertica_servername': _src_config["dw.server.name"], 'db_conn_vertica_port': _src_config["dw.db.portno"], 'db_conn_vertica_dbname': _src_config["dw.db.name"], 'db_conn_vertica_username': _src_config["dw.etluser.id"], 'db_conn_vertica_password': _src_config["dw.etluser.password"], 'db_conn_vertica_password_encrypted': "true" } rdp_meta.update(tmp_rdp_meta) self.logger.debug("rdp config is: %s" % rdp_meta) rdp_connection = DWOperation(meta=rdp_meta) self.dct_sync_data["source_dw"] = rdp_connection
def __init__(self, meta={}, logger=None): self._connection = None self._channel = None self._password = get_password(username=meta["mq_pmpname"], meta=meta) self.meta = meta self._exchange = meta['mq_exchange_name'] self._exchange_type = 'fanout' if not os.path.exists(meta['mq_ca_certs']): raise RuntimeError("%s doesn't exist."%meta['mq_ca_certs']) if not os.path.exists(meta['mq_key_file']): raise RuntimeError("%s doesn't exist."%meta['mq_key_file']) if not os.path.exists(meta['mq_cert_file']): raise RuntimeError("%s doesn't exist."%meta['mq_cert_file']) self._app_id = None self._body = None self._logger = logger if logger else Logger(log_level="info", vendor_key=-1, retailer_key=-1)
def __init__(self, db_type=None, logger=None, meta={}): if not db_type or db_type.upper() not in ('VERTICA', 'MSSQL'): raise ValueError("DB type is not specified or is not supported.") self.meta = meta self.db_type = db_type.lower() self.server_name = eval("meta['db_conn_%s_servername']" % self.db_type) self.port = eval("meta['db_conn_%s_port']" % self.db_type) self.db_name = eval("meta['db_conn_%s_dbname']" % self.db_type) self.username = eval("meta['db_conn_%s_username']" % self.db_type) self.is_pmp_password = False if 'db_conn_%s_password' % self.db_type in meta and eval( "meta['db_conn_%s_password']" % self.db_type): self.password = eval("meta['db_conn_%s_password']" % self.db_type) if 'db_conn_%s_password_encrypted' % self.db_type in meta: self.password_encrypted = eval( "meta['db_conn_%s_password_encrypted']" % self.db_type) else: raise ValueError( 'db_conn_%(db_type)s_password_encrypted is missing in meta, it must be specified if db_conn_%(db_type)s_password is specified.' % {'db_type': self.db_type}) else: self.password = get_password(username=self.username, meta=meta) self.is_pmp_password = True self.password_encrypted = 'false' self.odbc_driver = eval("meta['db_driver_%s_odbc']" % self.db_type) self.sqlalchemy_driver = eval("meta['db_driver_%s_sqlachemy']" % self.db_type) if self.password_encrypted.lower() == "true": self.password = decrypt_code(self.password) self._connection = None self.__engine = None self._logger = logger if logger else Logger( log_level="info", vendor_key=-1, retailer_key=-1)
def _process(self, afm_params): try: _cycle_key = afm_params.get("cycleKey") # Reading configuration from meta table under IRIS MSSQL. # Getting all owners(includes both SVR & RETAILER rule) according to given cycle_key. # the delivery file will be dumped by owner. sql = """ SELECT d.ID AS DELIVERY_KEY, d.CYCLE_KEY, d.RETAILER_KEY, d.DELIVERY_NAME, d.FILTERS, d.DELIMITER, d.OWNER, ep.SERVER, ep.EXTRACTION_FOLDER, ep.USERNAME, ep.PASSWORD, ep.MAIL_SUBJECT, ep.MAIL_BODY, ep.MAIL_RECPSCC, ep.MAIL_RECPSTO, ep.DELIVERY_TYPE FROM AP_META_DELIVERIES d INNER JOIN AP_META_ENDPOINTS ep ON d.ENDPOINT_ID = ep.ID WHERE d.cycle_key = {0} AND d.ENABLED = 'T' AND ep.ENABLED = 'T' """.format(_cycle_key) self.logger.info(sql) meta_rows = self.app_conn.query(sql) self.logger.debug("The meta data is: %s" % str(meta_rows)) if not meta_rows: raise Warning( "There is no endpoint or delivery configed. Please check meta table!" ) # There could be multi owners for the given cycle but with different filters. # This is required by PM. And we need to generate separate files for every single row. for meta_data in meta_rows: # 1, Getting the initial source query _delivery_key = meta_data.DELIVERY_KEY required_columns, _init_src_query = self._gen_query( delivery_key=_delivery_key) if required_columns is None and _init_src_query is None: self.logger.warning( "Seems no layout configed for delivery key: %s" % _delivery_key) continue self.logger.info("The initial source query is: %s" % _init_src_query) delivery_type = meta_data.DELIVERY_TYPE if str.lower(delivery_type) == 'customer': meta_data = meta_data._replace( EXTRACTION_FOLDER=self.rsi_folder, USERNAME=self.rsi_username) if meta_data.USERNAME is None: self.logger.warning( "There is no username configed for delivery key: %s" % _delivery_key) continue _pmp_pwd = get_password(username=meta_data.USERNAME, meta=self.meta) if _pmp_pwd: meta_data = meta_data._replace(PASSWORD=_pmp_pwd) self.logger.info("Start to dump & delivery for meta: %s" % str(meta_data)) _src_query = _init_src_query # 2, checking if any filters applied. (e.g alert_type, category etc.) # User might wants to dump only given alert types of data. This should be configurable. # So far, we support 2 types of filters: alert_type & category # TODO: confirm the filter format with UI team. Currently filters are configed with json format. # e.g. {"alert_type": "d-void,phantom", "category":"cat1,cat2"} _filters_raw = meta_data.FILTERS if not _filters_raw or _filters_raw == "": self.logger.info("No filters applied.") else: self.logger.info("The filters are: %s" % _filters_raw) _filters = json.loads(str(_filters_raw).lower().strip()) alert_type_str = _filters.get( "alert_type", None) # e.g. phantom,d-void,shelf oos if alert_type_str is not None and str( alert_type_str).strip() != '': alert_type = ','.join( "'" + str(ele).strip() + "'" for ele in str(alert_type_str).split(',')) _src_query += " AND type.intervention_name IN ({type})".format( type=alert_type) category_str = _filters.get("category", None) if category_str is not None and str( category_str).strip() != '': category_type = ','.join( "'" + str(ele).strip() + "'" for ele in str(category_str).split(',')) _src_query += " AND Product.OSM_CATEGORY IN ({cat_type})".format( cat_type=category_type) # The owner format should be like: owner1 or owner1,owner2,... _owners = str(meta_data.OWNER) if not _owners: # owner is the mandatory filter for every delivery. raise ValueError( "There is no owner configed in delivery meta table") _owner_in_str = ",".join("'" + ele.strip() + "'" for ele in _owners.split(",")) _src_query += " AND alert.owner IN ({owner}) ".format( owner=_owner_in_str) _final_src_query = """ SELECT {columns} FROM ({query}) x ORDER BY rn """.format(columns=required_columns, query=_src_query) self.logger.info("The final source sql is: %s" % _final_src_query) # delivery file name should be: <delivery_name>_<YYYYMMDD>.<fileExt>. e.g. <delivery_name>_20180101.txt curr_folder = os.path.dirname(os.path.realpath(__file__)) target_filename = meta_data.DELIVERY_NAME + "_" + datetime.datetime.now( ).strftime('%Y%m%d') # delivery file will be dumped to "<curr_dir>/data" folder temporarily. abs_target_filename = curr_folder + os.sep + "data" + os.sep + target_filename + '.' + self.file_ext zip_filename = curr_folder + os.sep + "data" + os.sep + target_filename + '.zip' # Getting data delimiter. e.g. ',' delimiter = str(meta_data.DELIMITER).strip() if len(delimiter) != 1: raise ValueError("delimiter should be 1 char") # start to dump data self.dumper = dd.DumpData(context=self.dumper_context) # dump data from source db self.logger.info("Dumping data into file: %s" % abs_target_filename) _dump_flag = self.dumper.dump_data( src_sql=_final_src_query, output_file=abs_target_filename, delimiter=delimiter) self.logger.debug("The dump flag is: %s" % _dump_flag) # dump alerts succeeded. if _dump_flag is True: self.logger.info("Dumping data is done!") # check the zip flag if self.zip_flag: _flat_file_size = round( os.path.getsize(abs_target_filename) / 1024 / 1024) self.logger.debug("The flat file size is: %s" % _flat_file_size) self.logger.info("zipping file: %s" % abs_target_filename) with zipfile.ZipFile(zip_filename, 'w') as z: z.write(abs_target_filename, os.path.basename(abs_target_filename)) abs_target_filename = zip_filename self.logger.info("The zip file name is: %s" % abs_target_filename) # start to send data file self.logger.info( "Starting uploading delivery file to dest folder!") self.sender = sd.SendData(context=self.dumper_context) self.sender.delivery_file(meta_data=meta_data, src_file=abs_target_filename) else: self.logger.warning( "There is no data returned or dump data failed. " "Please refer to previous log to get the related source query." ) self.logger.info("Alert delivery process is done") except Warning as e: raise except Exception: raise finally: if self.vertica_conn: self.vertica_conn.close_connection() if self.app_conn: self.app_conn.close_connection()
def sync_data(dct_sync_data): """Get data from source to target. If dw_conn_vertica is Ture, it will create connect to vertica to export data :param dct_sync_data: The dict need source_config, source_dw, target meta, arget_dw_schema, target_dw_table, target_column, source_sql :return: """ dct_sync_data["sep"] = os.path.sep dct_sync_data["temp_file_path"] = dct_sync_data["source_config"].get( 'temp_file_path', '/tmp') dct_sync_data["source_dw_server"] = dct_sync_data["source_config"].get( 'dw.server.name') dct_sync_data["source_dw_name"] = dct_sync_data["source_config"].get( 'dw.db.name') dct_sync_data["source_dw_port"] = dct_sync_data["source_config"].get( 'dw.db.portno', '5433') dct_sync_data["source_dw_user"] = dct_sync_data["source_config"].get( 'dw.etluser.id') dct_sync_data["source_dw_password"] = dct_sync_data["source_config"].get( 'dw.etluser.password') dct_sync_data["source_dw_password_decrypt"] = Crypto().decrypt( dct_sync_data["source_config"].get('dw.etluser.password')) dct_sync_data["source_dw_schema"] = dct_sync_data["source_config"].get( "dw.schema.name") dct_sync_data["target_dw_server"] = dct_sync_data[ "db_conn_vertica_servername"] dct_sync_data["target_dw_name"] = dct_sync_data["db_conn_vertica_dbname"] dct_sync_data["target_dw_port"] = dct_sync_data["db_conn_vertica_port"] dct_sync_data["target_dw_user"] = dct_sync_data["db_conn_vertica_username"] dct_sync_data["target_dw_password_decrypt"] = get_password( dct_sync_data['db_conn_vertica_username'], dct_sync_data["meta"]) if dct_sync_data["dw_conn_vertica"]: script = ( "CONNECT TO VERTICA {target_dw_name} USER {target_dw_user} PASSWORD '{target_dw_password_decrypt}' " "ON '{target_dw_server}', {target_dw_port}; " "EXPORT TO VERTICA {target_dw_name}.{target_dw_schema}.{target_dw_table}({target_column}) " "AS {source_sql};" "DISCONNECT {target_dw_name}".format(**dct_sync_data)) dct_sync_data["logger"].debug(script) dct_sync_data["source_dw"].execute(script) else: with open( '{temp_file_path}{sep}{source_dw_schema}_{target_dw_table}.sql' .format(**dct_sync_data), 'w', encoding='UTF-8') as file: file.write(dct_sync_data["source_sql"]) dct_sync_data["target_column"] = dct_sync_data[ "target_column"].replace('"', '\\"') script = ( "vsql -h {source_dw_server} -d {source_dw_name} -p {source_dw_port} " "-U {source_dw_user} -w {source_dw_password_decrypt} -At -F \"|\" " "-f {temp_file_path}{sep}{source_dw_schema}_{target_dw_table}.sql " "| gzip > {temp_file_path}{sep}{source_dw_schema}_{target_dw_table}.gz; " "vsql -h {target_dw_server} -d {target_dw_name} -p {target_dw_port} " "-U {target_dw_user} -w {target_dw_password_decrypt} -c \"COPY " "{target_dw_schema}.{target_dw_table}({target_column}) FROM LOCAL " "'{temp_file_path}{sep}{source_dw_schema}_{target_dw_table}.gz' GZIP " "DELIMITER E'|' NO ESCAPE REJECTMAX 1000 " "REJECTED DATA '{temp_file_path}{sep}{source_dw_schema}_{target_dw_table}.reject' " "EXCEPTIONS '{temp_file_path}{sep}{source_dw_schema}_{target_dw_table}.exception' " "DIRECT STREAM NAME 'GX_OSA_SYNC_DATA_COPYCMD' --enable-connection-load-balance\"; " "".format(**dct_sync_data)) dct_sync_data["logger"].debug(script) result = subprocess.run(script, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if result.returncode == 0: dct_sync_data["logger"].info( "{target_dw_schema}.{target_dw_table} data load done".format( **dct_sync_data)) else: err = "Sync data error. {}".format(result.stdout) dct_sync_data["logger"].error(err) raise Exception(err)