Ejemplo n.º 1
0
    def __init__(self, meta, logger=None, chuck_size=5000000, time_out=3600):
        """
        :param meta:  Parameters from config file
        :param logger:  logger handler
        :param chuck_size: The chuck size when reading data for Pandas
        :param time_out: time out when uploading data to Azure Storage
        """

        self.meta = meta
        self.account_name = self.meta.get("azure_storage_account_name")
        self.container_name = self.meta.get("azure_storage_blob_container")
        self.logger = logger if logger else Logger(log_level="info",
                                                   target="console",
                                                   vendor_key=-1,
                                                   retailer_key=-1,
                                                   sql_conn=None)

        self.account_key = get_password(username=self.account_name,
                                        meta=self.meta)
        self.blob_service = BlockBlobService(self.account_name,
                                             self.account_key)
        self.dw_conn = DWOperation(meta=self.meta)

        self.sql = ""
        self.parq_filename = ""
        self.local_path = ""
        self.chuck_size = chuck_size  # 5,000,000 rows as a chuck
        self.time_out = time_out  # time secs
Ejemplo n.º 2
0
 def __init__(self, logger=None, meta={}):
     self.meta = meta
     self.server_name = meta['db_conn_redis_servername']
     self.port = meta['db_conn_redis_port']
     self.db = meta['db_conn_redis_dbname']
     self.password = get_password(username=meta["db_conn_redis_pmpname"],
                                  meta=meta)
     self.__connection = None
     self._logger = logger if logger else Logger(
         log_level="info", vendor_key=-1, retailer_key=-1)
Ejemplo n.º 3
0
    def _populate_source_config(self, source_config):
        self.logger.debug("The source config is: %s" % source_config)

        _src_config = {}
        if os.name == 'nt':
            _src_config["temp_file_path"] = "d:"
        elif os.name == 'posix':
            _src_config["temp_file_path"] = "/tmp"

        # Getting user account from config.properties file first.
        if self.meta.get("db_conn_vertica_rdp_username"):
            _src_config["dw.etluser.id"] = self.meta.get(
                "db_conn_vertica_rdp_username")
            if self.meta.get("db_conn_vertica_rdp_password"):
                _src_config["dw.etluser.password"] = self.meta.get(
                    "db_conn_vertica_rdp_password")
            else:
                _pmp_pwd = get_password(
                    username=self.meta.get("db_conn_vertica_rdp_username"),
                    meta=self.meta)
                # The pwd should be encrypted in order to: 1, align with else part, 2, pass it to db.sync_data module
                _src_config["dw.etluser.password"] = Crypto().encrypt(_pmp_pwd)
        # if not configed then get them directly from RDP config.
        else:
            _src_config["dw.etluser.id"] = source_config.get("dw.etluser.id")
            # the pwd is encrypted
            _src_config["dw.etluser.password"] = source_config.get(
                "dw.etluser.password")

        # required info for calling sync_data module.
        _src_config["dw.server.name"] = source_config.get("dw.server.name")
        _src_config["dw.db.name"] = source_config.get("dw.db.name")
        _src_config["dw.db.portno"] = source_config.get("dw.db.portno", 5433)
        _src_config["dw.schema.name"] = source_config.get("dw.schema.name")

        self.logger.debug("srouce config is: %s" % _src_config)
        self.dct_sync_data["source_config"] = _src_config

        # Create the connection to RDP Vertica Cluster. which is the source Vertica cluster
        rdp_meta = copy.deepcopy(self.meta)
        tmp_rdp_meta = {
            'db_conn_vertica_servername': _src_config["dw.server.name"],
            'db_conn_vertica_port': _src_config["dw.db.portno"],
            'db_conn_vertica_dbname': _src_config["dw.db.name"],
            'db_conn_vertica_username': _src_config["dw.etluser.id"],
            'db_conn_vertica_password': _src_config["dw.etluser.password"],
            'db_conn_vertica_password_encrypted': "true"
        }
        rdp_meta.update(tmp_rdp_meta)
        self.logger.debug("rdp config is: %s" % rdp_meta)
        rdp_connection = DWOperation(meta=rdp_meta)
        self.dct_sync_data["source_dw"] = rdp_connection
Ejemplo n.º 4
0
    def __init__(self, meta={}, logger=None):

        self._connection = None
        self._channel = None
        self._password = get_password(username=meta["mq_pmpname"], meta=meta)
        self.meta = meta
        self._exchange = meta['mq_exchange_name']
        self._exchange_type = 'fanout'

        if not os.path.exists(meta['mq_ca_certs']): 
            raise RuntimeError("%s doesn't exist."%meta['mq_ca_certs'])
        if not os.path.exists(meta['mq_key_file']): 
            raise RuntimeError("%s doesn't exist."%meta['mq_key_file'])
        if not os.path.exists(meta['mq_cert_file']): 
            raise RuntimeError("%s doesn't exist."%meta['mq_cert_file'])
        
        self._app_id = None
        self._body = None
        self._logger = logger if logger else Logger(log_level="info", vendor_key=-1, retailer_key=-1)
Ejemplo n.º 5
0
    def __init__(self, db_type=None, logger=None, meta={}):

        if not db_type or db_type.upper() not in ('VERTICA', 'MSSQL'):
            raise ValueError("DB type is not specified or is not supported.")

        self.meta = meta
        self.db_type = db_type.lower()
        self.server_name = eval("meta['db_conn_%s_servername']" % self.db_type)
        self.port = eval("meta['db_conn_%s_port']" % self.db_type)
        self.db_name = eval("meta['db_conn_%s_dbname']" % self.db_type)
        self.username = eval("meta['db_conn_%s_username']" % self.db_type)
        self.is_pmp_password = False
        if 'db_conn_%s_password' % self.db_type in meta and eval(
                "meta['db_conn_%s_password']" % self.db_type):
            self.password = eval("meta['db_conn_%s_password']" % self.db_type)
            if 'db_conn_%s_password_encrypted' % self.db_type in meta:
                self.password_encrypted = eval(
                    "meta['db_conn_%s_password_encrypted']" % self.db_type)
            else:
                raise ValueError(
                    'db_conn_%(db_type)s_password_encrypted is missing in meta, it must be specified if db_conn_%(db_type)s_password is specified.'
                    % {'db_type': self.db_type})
        else:
            self.password = get_password(username=self.username, meta=meta)
            self.is_pmp_password = True
            self.password_encrypted = 'false'
        self.odbc_driver = eval("meta['db_driver_%s_odbc']" % self.db_type)
        self.sqlalchemy_driver = eval("meta['db_driver_%s_sqlachemy']" %
                                      self.db_type)

        if self.password_encrypted.lower() == "true":
            self.password = decrypt_code(self.password)

        self._connection = None
        self.__engine = None
        self._logger = logger if logger else Logger(
            log_level="info", vendor_key=-1, retailer_key=-1)
Ejemplo n.º 6
0
    def _process(self, afm_params):
        try:
            _cycle_key = afm_params.get("cycleKey")

            # Reading configuration from meta table under IRIS MSSQL.
            # Getting all owners(includes both SVR & RETAILER rule) according to given cycle_key.
            # the delivery file will be dumped by owner.
            sql = """
            SELECT d.ID AS DELIVERY_KEY, d.CYCLE_KEY, d.RETAILER_KEY, d.DELIVERY_NAME, d.FILTERS, 
                   d.DELIMITER, d.OWNER, ep.SERVER, ep.EXTRACTION_FOLDER, ep.USERNAME, ep.PASSWORD,
                   ep.MAIL_SUBJECT, ep.MAIL_BODY, ep.MAIL_RECPSCC, ep.MAIL_RECPSTO, ep.DELIVERY_TYPE 
            FROM AP_META_DELIVERIES d 
            INNER JOIN AP_META_ENDPOINTS ep
            ON d.ENDPOINT_ID = ep.ID 
            WHERE d.cycle_key = {0}
            AND d.ENABLED = 'T' AND ep.ENABLED = 'T' 
            """.format(_cycle_key)
            self.logger.info(sql)
            meta_rows = self.app_conn.query(sql)
            self.logger.debug("The meta data is: %s" % str(meta_rows))
            if not meta_rows:
                raise Warning(
                    "There is no endpoint or delivery configed. Please check meta table!"
                )

            # There could be multi owners for the given cycle but with different filters.
            # This is required by PM. And we need to generate separate files for every single row.
            for meta_data in meta_rows:
                # 1, Getting the initial source query
                _delivery_key = meta_data.DELIVERY_KEY
                required_columns, _init_src_query = self._gen_query(
                    delivery_key=_delivery_key)
                if required_columns is None and _init_src_query is None:
                    self.logger.warning(
                        "Seems no layout configed for delivery key: %s" %
                        _delivery_key)
                    continue
                self.logger.info("The initial source query is: %s" %
                                 _init_src_query)

                delivery_type = meta_data.DELIVERY_TYPE
                if str.lower(delivery_type) == 'customer':
                    meta_data = meta_data._replace(
                        EXTRACTION_FOLDER=self.rsi_folder,
                        USERNAME=self.rsi_username)

                if meta_data.USERNAME is None:
                    self.logger.warning(
                        "There is no username configed for delivery key: %s" %
                        _delivery_key)
                    continue

                _pmp_pwd = get_password(username=meta_data.USERNAME,
                                        meta=self.meta)
                if _pmp_pwd:
                    meta_data = meta_data._replace(PASSWORD=_pmp_pwd)

                self.logger.info("Start to dump & delivery for meta: %s" %
                                 str(meta_data))
                _src_query = _init_src_query

                # 2, checking if any filters applied. (e.g alert_type, category etc.)
                # User might wants to dump only given alert types of data. This should be configurable.
                # So far, we support 2 types of filters: alert_type & category
                # TODO: confirm the filter format with UI team. Currently filters are configed with json format.
                # e.g. {"alert_type": "d-void,phantom", "category":"cat1,cat2"}
                _filters_raw = meta_data.FILTERS
                if not _filters_raw or _filters_raw == "":
                    self.logger.info("No filters applied.")
                else:
                    self.logger.info("The filters are: %s" % _filters_raw)
                    _filters = json.loads(str(_filters_raw).lower().strip())
                    alert_type_str = _filters.get(
                        "alert_type", None)  # e.g. phantom,d-void,shelf oos
                    if alert_type_str is not None and str(
                            alert_type_str).strip() != '':
                        alert_type = ','.join(
                            "'" + str(ele).strip() + "'"
                            for ele in str(alert_type_str).split(','))
                        _src_query += " AND type.intervention_name IN ({type})".format(
                            type=alert_type)

                    category_str = _filters.get("category", None)
                    if category_str is not None and str(
                            category_str).strip() != '':
                        category_type = ','.join(
                            "'" + str(ele).strip() + "'"
                            for ele in str(category_str).split(','))
                        _src_query += " AND Product.OSM_CATEGORY IN ({cat_type})".format(
                            cat_type=category_type)

                # The owner format should be like: owner1 or owner1,owner2,...
                _owners = str(meta_data.OWNER)
                if not _owners:
                    # owner is the mandatory filter for every delivery.
                    raise ValueError(
                        "There is no owner configed in delivery meta table")

                _owner_in_str = ",".join("'" + ele.strip() + "'"
                                         for ele in _owners.split(","))
                _src_query += " AND alert.owner IN ({owner}) ".format(
                    owner=_owner_in_str)

                _final_src_query = """
                SELECT {columns} FROM ({query}) x ORDER BY rn
                """.format(columns=required_columns, query=_src_query)

                self.logger.info("The final source sql is: %s" %
                                 _final_src_query)

                # delivery file name should be: <delivery_name>_<YYYYMMDD>.<fileExt>. e.g. <delivery_name>_20180101.txt
                curr_folder = os.path.dirname(os.path.realpath(__file__))
                target_filename = meta_data.DELIVERY_NAME + "_" + datetime.datetime.now(
                ).strftime('%Y%m%d')

                # delivery file will be dumped to "<curr_dir>/data" folder temporarily.
                abs_target_filename = curr_folder + os.sep + "data" + os.sep + target_filename + '.' + self.file_ext
                zip_filename = curr_folder + os.sep + "data" + os.sep + target_filename + '.zip'

                # Getting data delimiter. e.g. ','
                delimiter = str(meta_data.DELIMITER).strip()
                if len(delimiter) != 1:
                    raise ValueError("delimiter should be 1 char")

                # start to dump data
                self.dumper = dd.DumpData(context=self.dumper_context)

                # dump data from source db
                self.logger.info("Dumping data into file: %s" %
                                 abs_target_filename)
                _dump_flag = self.dumper.dump_data(
                    src_sql=_final_src_query,
                    output_file=abs_target_filename,
                    delimiter=delimiter)
                self.logger.debug("The dump flag is: %s" % _dump_flag)

                # dump alerts succeeded.
                if _dump_flag is True:
                    self.logger.info("Dumping data is done!")

                    # check the zip flag
                    if self.zip_flag:
                        _flat_file_size = round(
                            os.path.getsize(abs_target_filename) / 1024 / 1024)
                        self.logger.debug("The flat file size is: %s" %
                                          _flat_file_size)
                        self.logger.info("zipping file: %s" %
                                         abs_target_filename)
                        with zipfile.ZipFile(zip_filename, 'w') as z:
                            z.write(abs_target_filename,
                                    os.path.basename(abs_target_filename))

                        abs_target_filename = zip_filename
                        self.logger.info("The zip file name is: %s" %
                                         abs_target_filename)

                    # start to send data file
                    self.logger.info(
                        "Starting uploading delivery file to dest folder!")
                    self.sender = sd.SendData(context=self.dumper_context)
                    self.sender.delivery_file(meta_data=meta_data,
                                              src_file=abs_target_filename)

                else:
                    self.logger.warning(
                        "There is no data returned or dump data failed. "
                        "Please refer to previous log to get the related source query."
                    )

            self.logger.info("Alert delivery process is done")

        except Warning as e:
            raise

        except Exception:
            raise

        finally:
            if self.vertica_conn:
                self.vertica_conn.close_connection()
            if self.app_conn:
                self.app_conn.close_connection()
Ejemplo n.º 7
0
def sync_data(dct_sync_data):
    """Get data from source to target. If dw_conn_vertica is Ture, it will create connect to vertica to export data
    :param dct_sync_data: The dict need source_config, source_dw, target meta, arget_dw_schema, target_dw_table,
                          target_column, source_sql
    :return:
    """
    dct_sync_data["sep"] = os.path.sep
    dct_sync_data["temp_file_path"] = dct_sync_data["source_config"].get(
        'temp_file_path', '/tmp')
    dct_sync_data["source_dw_server"] = dct_sync_data["source_config"].get(
        'dw.server.name')
    dct_sync_data["source_dw_name"] = dct_sync_data["source_config"].get(
        'dw.db.name')
    dct_sync_data["source_dw_port"] = dct_sync_data["source_config"].get(
        'dw.db.portno', '5433')
    dct_sync_data["source_dw_user"] = dct_sync_data["source_config"].get(
        'dw.etluser.id')
    dct_sync_data["source_dw_password"] = dct_sync_data["source_config"].get(
        'dw.etluser.password')
    dct_sync_data["source_dw_password_decrypt"] = Crypto().decrypt(
        dct_sync_data["source_config"].get('dw.etluser.password'))
    dct_sync_data["source_dw_schema"] = dct_sync_data["source_config"].get(
        "dw.schema.name")
    dct_sync_data["target_dw_server"] = dct_sync_data[
        "db_conn_vertica_servername"]
    dct_sync_data["target_dw_name"] = dct_sync_data["db_conn_vertica_dbname"]
    dct_sync_data["target_dw_port"] = dct_sync_data["db_conn_vertica_port"]
    dct_sync_data["target_dw_user"] = dct_sync_data["db_conn_vertica_username"]
    dct_sync_data["target_dw_password_decrypt"] = get_password(
        dct_sync_data['db_conn_vertica_username'], dct_sync_data["meta"])
    if dct_sync_data["dw_conn_vertica"]:
        script = (
            "CONNECT TO VERTICA {target_dw_name} USER {target_dw_user} PASSWORD '{target_dw_password_decrypt}' "
            "ON '{target_dw_server}', {target_dw_port}; "
            "EXPORT TO VERTICA {target_dw_name}.{target_dw_schema}.{target_dw_table}({target_column}) "
            "AS {source_sql};"
            "DISCONNECT {target_dw_name}".format(**dct_sync_data))
        dct_sync_data["logger"].debug(script)
        dct_sync_data["source_dw"].execute(script)
    else:
        with open(
                '{temp_file_path}{sep}{source_dw_schema}_{target_dw_table}.sql'
                .format(**dct_sync_data),
                'w',
                encoding='UTF-8') as file:
            file.write(dct_sync_data["source_sql"])
        dct_sync_data["target_column"] = dct_sync_data[
            "target_column"].replace('"', '\\"')
        script = (
            "vsql -h {source_dw_server} -d {source_dw_name} -p {source_dw_port} "
            "-U {source_dw_user} -w {source_dw_password_decrypt} -At -F \"|\" "
            "-f {temp_file_path}{sep}{source_dw_schema}_{target_dw_table}.sql "
            "| gzip > {temp_file_path}{sep}{source_dw_schema}_{target_dw_table}.gz; "
            "vsql -h {target_dw_server} -d {target_dw_name} -p {target_dw_port} "
            "-U {target_dw_user} -w {target_dw_password_decrypt} -c \"COPY "
            "{target_dw_schema}.{target_dw_table}({target_column}) FROM LOCAL "
            "'{temp_file_path}{sep}{source_dw_schema}_{target_dw_table}.gz' GZIP "
            "DELIMITER E'|' NO ESCAPE REJECTMAX 1000 "
            "REJECTED DATA '{temp_file_path}{sep}{source_dw_schema}_{target_dw_table}.reject' "
            "EXCEPTIONS '{temp_file_path}{sep}{source_dw_schema}_{target_dw_table}.exception' "
            "DIRECT STREAM NAME 'GX_OSA_SYNC_DATA_COPYCMD' --enable-connection-load-balance\"; "
            "".format(**dct_sync_data))
        dct_sync_data["logger"].debug(script)
        result = subprocess.run(script,
                                shell=True,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        if result.returncode == 0:
            dct_sync_data["logger"].info(
                "{target_dw_schema}.{target_dw_table} data load done".format(
                    **dct_sync_data))
        else:
            err = "Sync data error. {}".format(result.stdout)
            dct_sync_data["logger"].error(err)
            raise Exception(err)