Esempio n. 1
0
 def _consumer_func(self, payload, *args, **kwargs):
     mc_fi_db = FiMarketingCloud(self.writer)
     inserted = mc_fi_db.insert_many(values=payload['product_data'],
                                     fi_id=f"fi_{payload['fi_id']}",
                                     on_duplicate_key_update=True)
     logger.info(f"{inserted} records inserted in FI {payload['fi_id']}")
     return payload['fi_id'], inserted
Esempio n. 2
0
    def run(self):
        """
        `run` spawns a thread per producer that is registered and threads matching the max_consumers config value.

        The producer threads are spawned and passed the configuring args/kwargs during thread registration.

        The consumer threads are spawned and passed no args/kwargs. These are picked up from the self.consumers
        dictionary by the self._consumer method as it can't be predicted what work will need to be done.

        `run` will wait until all threads have completed their work before finally closing out by logging the number
        of unhandled errors that were raised.

        :return:
        """
        for name in self._producers.keys():
            self.start_producer(name)

        for num in range(0, self.max_consumers):
            thread = threading.Thread(name=f"consumer_{num}",
                                      target=self._consumer)
            self.threads.append(thread)
            thread.start()

        for t in self.threads:
            t.join()

        if not self._work_queue.empty:
            logger.error(
                f"Work Queue was not empty when it should be for producer list {self._producers.keys}"
            )
        logger.info(
            f"completed work, {len(self.thread_exception_list)} exceptions were encountered."
        )
Esempio n. 3
0
    def set_access_token(self, context: MarketingCloudClientContext):
        """
        set_access_token will reach out to the auth url and fetch the access token for the session, then set it to be
        available to this instance of the class.

        :param context: MarketingCloudClientContext object
        :return:
        """
        with self.access_token_lock:
            logger.info("Getting access token.")

            body = {
                "grant_type": "client_credentials",
                "client_id": context.AUTH_ID,
                "client_secret": context.SECRETS.get(context.AUTH_SECRET_KEY)
            }

            r = requests.post(context.AUTH_URL, data=body)

            if r.status_code != 200:
                logger.info("Failed getting access token.")
                return

            data = r.json()

            self.access_token = data["access_token"]
Esempio n. 4
0
    def fetch_product_data(self, context):
        """
        fetch_product_data fetches from Marketing Cloud, the previous days updates. In the event that more records exist
        that were not returned in the first call, calling this method again will return the next results until there
        are no more results to fetch.

        :param context: MarketingCloudClientContext object
        :return (list, bool): results from the call and the value of HasMoreRows if it exists
        """
        retries = 0
        success = False
        content = dict()
        while retries <= RETRY_LIMIT:
            response = self._fetch_product_data(context)

            if response.status_code == 200:
                if hasattr(response, 'content'):
                    try:
                        content = response.json()
                    except json.decoder.JSONDecodeError:
                        # The main reason this will happen is an invalid access token and the endpoint
                        # returning nothing parsable in the content.

                        # This could also be because an error was returned as the content. But we cannot log the content
                        # because of potentially sensitive information being included.

                        # wait if another thread is setting the access token already
                        if self.access_token_lock.locked():
                            while self.access_token_lock.locked():
                                pass
                        else:
                            self.set_access_token(context)
                        retries += 1
                        continue
                    else:
                        success = True
                        break
            else:
                logger.error(response.text)
                logger.info("Failed to send payload to {url}".format(url=context.GET_PRODUCT_DATA_URL))
                break

        if not success:
            logger.error("Error fetching product data from marketing cloud")

        self.request_id = content.get("RequestID", None)
        has_more_rows = content.get('HasMoreRows', False)
        if not has_more_rows:
            self.request_id = None
        return content.get('Results', []), has_more_rows
Esempio n. 5
0
    def insert_many(self, values=[], fi_id=None, on_duplicate_key_update=False):
        """
        Inserts given list of dict()'s containing records to be inserted.
        :param values: list List of dict()
        """
        affected_rows = 0
        sql, insert_values = self._build_sql(values=values,
                                             fi_id=fi_id,
                                             on_duplicate_key_update=on_duplicate_key_update)

        if insert_values:
            affected_rows = self._connection.insert_with_sql(sql, insert_values)

        logger.info(f"{len(values)} records received to be processed, {affected_rows} records inserted into the DB.")

        return affected_rows
Esempio n. 6
0
    def run(self):
        """
        run is responsible for the flow WorkManager. It iterates as long as there is work being processed
        and will shutdown when either the work is complete or no work has been picked up.

        If new work is found while the ProducerConsumer is still running, run will add it to the work being
        done. Otherwise, if ProducerConsumer is finishing up, run will return the work to the queue.

        If no work is found, the loop should break.

        :return:
        """
        logger.info('WorkManager has started...')
        while True:
            work = self.work_queue.get()
            if work is not None:
                if not self.producer_consumer:
                    self.build_producer_consumer()
                self.register_producer_for_work(work)
                if self.producer_consumer_thread and self.producer_consumer_thread.is_alive(
                ):
                    if not self.producer_consumer.event_manager.is_set():
                        event = self.producer_consumer.event_manager.get(
                            work, None)
                        if event is None or event.is_set():
                            self.producer_consumer.start_producer(work)
                        else:
                            logger.warn(
                                f"Somehow received {work} work while already processing that work"
                            )
                    else:
                        logger.warn(
                            "ProducerConsumer thread is already shutting down, replacing work in queue"
                        )
                        self.work_queue.reset_work(work)
                        break
                else:
                    self.start_producer_consumer()
            if self.producer_consumer_thread is None:
                break
            elif not self.producer_consumer_thread.is_alive():
                logger.info(
                    "ProducerConsumer is dead, shutting down WorkManager")
                break
            logger.debug(
                "Waiting for 10 seconds before checking for more work")
            sleep(self.time_to_wait)
Esempio n. 7
0
def _get_mysql_connection(dict_cursor=False, unbuffered=False, **kwargs):
    """
    Obtain `pymysql.Connection <pymysql.connections.Connection>` instance
    for given DB url.

    :param bool dict_cursor: use `~pymysql.cursors.DictCursor` for cursors,
        so that cursor will return records as dicts rather than tuples.
        Shorthand for `cursorclass = pymysql.cursors.DictCursor`.

    :param bool unbuffered:
        use `~pymysql.cursors.SSCursor`
        which does not buffer all response data in memory.
        The downside is that it cannot report number of records
        and navigate them only forwards.
        This option can be combined with ``dict_cursor``
        which will result in `~pymysql.cursors.SSDictCursor`.

    :param kwargs: additional attributes to pass to `pymysql`.
        Common examples are ``password`` or ``database``.
    """
    import pymysql
    logger.info('Obtaining database connection...')

    params = dict()
    params['cursorclass'] = getattr(
        pymysql.cursors, '{}{}Cursor'.format(
            'SS' if unbuffered else '',
            'Dict' if dict_cursor else '',
        ))
    params.update(kwargs)

    if not all(params.get(k) for k in ('host', 'user', 'password')):
        raise ValueError(
            'Host, user and password are mandatory and should be provided in kwargs'
        )

    print_params = params.copy()
    print_params['password'] = '******'

    logger.debug(
        'Connecting to DB using these credentials (password masked): %s',
        print_params)
    conn = pymysql.connect(**params)
    logger.debug('Obtained DB connection: %s', conn)

    return conn
Esempio n. 8
0
    def _consumer(self):
        """
        _consumer reads the work queue and spawns work based on the job_type argument received from the queue. The
        self._consumers keys are matched against job type to determine which consumer function should be used
        for the consumption of the attached payload.

        If the consumer function raises an unhandled exception, _consumer will log it and move on to the next item
        in the queue. This happens until the queue is drained and the event_manager has been notified by all producers
        that there is no more work to be processed.

        :return:
        """
        while not self._work_queue.empty or not self.event_manager.is_set():
            try:
                job = self._work_queue.get()
                func = self._consumers.get(job["job_type"])

                func["func"](job["payload"], *func['args'], **func['kwargs'])

                self._work_queue.task_done()
            except queue.Empty:
                # This handles a race condition between threads where the queue became empty after entering
                # the current loop iteration.
                if self.event_manager.is_set():
                    # if nothing in queue and producer signalled we are done,
                    # then exit out of loop to stop thread
                    break
                sleep(1)
            except Exception as e:
                name = threading.current_thread().getName()
                logger.error(
                    f"Thread {name} encountered an unhandled exception on consumer function: {e}",
                    exc_info=True)
                with self._exception_list_lock:
                    self.thread_exception_list.append(dict(name=name, err=e))
                self._work_queue.task_done()
                continue  # move on with processing the queue

        logger.info(f"Finished {threading.current_thread().getName()}")
Esempio n. 9
0
    def _producer(self, producer_func, producer_args, producer_kwargs):
        """
        _producer takes a function that receives a queue object as the first argument and any number of args or kwargs.
        The provided function populates the queue with work that will be consumed by the consumer_func asynchronously.
        _producer will wait until the producer_func returns and them close out the thread and notify the event manager
        of completion of the work. If the producer function fails to handle an exception, the thread will be killed and
        no further work will be processed by the producer.
        """
        try:
            # completes when the source the function is pulling from is empty
            producer_func(self._work_queue, *producer_args, **producer_kwargs)
        except Exception as e:
            name = threading.current_thread().getName()
            logger.error(
                f"Thread {name} encountered an unhandled exception in producer function: {e}",
                exc_info=True)
            with self._exception_list_lock:
                self.thread_exception_list.append(dict(name=name, err=e))

        # notify consuming threads we are done
        logger.info(f"Finished {threading.current_thread().getName()}")
        self.event_manager.set(threading.current_thread().getName())
Esempio n. 10
0
 def _put_item(self, key: str) -> (bool, str):
     if not Path(self.context.PATH).exists():
         logger.debug("Creating queue path")
         Path(self.context.PATH).mkdir(parents=True)
     if Path(self.context.PATH, f'{key}.lock').exists():
         logger.info("key exists in queue and is locked")
         return False, ErrorMessages.LOCKED_EXISTS
     path = Path(self.context.PATH, key)
     if path.exists():
         logger.info("key exists in queue and is unlocked")
         return False, ErrorMessages.UNLOCKED_EXISTS
     try:
         logger.info(f"attempting to create item for {key} in queue")
         path.touch()
     except Exception as e:
         logger.exception(
             "Something unexpected happened when writing the file")
         return False, e
     return True, None
Esempio n. 11
0
 def _producer_func(self, work_queue: ThreadSafeQueueInterface, *args,
                    **kwargs):
     start_time = datetime.now()
     has_more_rows = True
     self.client.set_access_token(self.context)
     while has_more_rows:
         result, has_more_rows = self.client.fetch_product_data(
             self.context)
         payload_map = dict()
         logger.info(f'Received {len(result)} records from Marketing Cloud')
         for item in result:
             self._update_map(item, payload_map)
         logger.info(f'Records grouped into {len(payload_map.keys())} FIs')
         for key in payload_map.keys():
             payload = dict(fi_id=key,
                            product_data=payload_map[key]['product_data'])
             self.add_to_queue(work_queue, payload)
     end_time = datetime.now()
     logger.info(
         f'ProductDataProducer completed in {(end_time - start_time).seconds} seconds'
     )