def run(self): timeout = self._get_timeout() last_run_time = self._get_utc_now() - timedelta(minutes=timeout) while(self._should_run): msgs = [] try: msgs = self.scanner_queue.get_messages_from_queue() except Exception: # if sqs queue fails, throttle retry log_exception( "Exception in fetching messages from queue: " + str(self.scanner_queue.get_queue_name()) ) time.sleep(self.scanner_queue.get_wait_time()) try: if (len(msgs) > 0 or self._get_utc_now() - last_run_time >= timedelta(minutes=timeout)): if len(msgs) > 0: self.scanner_queue.delete_message_batch_from_queue(msgs) self.run_maint() self.run_scanner() self.scanner_queue.clear() last_run_time = self._get_utc_now() except Exception: log_exception("Exception in running scanner") if self._run_once is True: break
def _get_max_complete_date(self, job): """ Return the max_complete_date from aws :param job: instance of ScheduledJob :type job: ScheduledJob """ job_dict = job.get(s3_path=None) # s3_path sample: s3://bucket_name/logs/log_name/ s3_path = job_dict.get('s3_path') if s3_path is None: return None bucket_name, prefix = parse_s3_path(s3_path) prefix_list = prefix.split("/") if prefix_list[-1] is not '': log_name = prefix_list[-1] else: log_name = prefix_list[-2] try: log_data = get_log_meta_data(bucket_name, log_name) return get_deep(log_data, ['log', 'max_complete_date'], None) except Exception: log_exception( "Exception in running scanner when getting max_complete_date in s3 path: " + s3_path ) return None
def get_messages_from_queue(self): """ Fetches messages from the sqs queue of name passed in during this object's construction. Does not handle exceptions from Boto. :rtype: a list of SQS message or None """ try: msgs = self._queue.get_messages( num_messages=self._num_messages_to_fetch, wait_time_seconds=self._wait_time_secs) return msgs except (BotoClientError, BotoServerError): log_exception( "Boto exception in fetching messages from SQS, for queue name:" + self._queue.id) raise
def run_scanner(self): """ Fetch relevant jobs from DB table and create ET or L work. Does not throw any exceptions out. """ jobs = self._fetch_jobs_for_work() for job in jobs: try: if self._action_pending(job) or self._should_process_job(job): self._create_work_for_job(job) else: log("Skipping job since there was no processing needed: {0}".format( job2log(job) )) except Exception: log_exception( "Caught an exception in processing a job. Ignoring" " entry: {0}".format(job2log(job)))
def get_connection(cls, table_object_name): if table_object_name not in cls._connection_dict: if cls._region_conn is None: cls._region_conn = get_dynamodb_connection() table_properties = cls._TABLE_NAME_TO_PROPERTIES[table_object_name] avro_schema = get_avro_schema(table_properties['avro_schema']) table_name = read_string(table_properties['physical_id_key']) table = Table(table_name, connection=cls._region_conn) try: results = table.describe() raw_indexes = results['Table'].get('GlobalSecondaryIndexes', []) table.global_indexes = introspect_global_indexes(raw_indexes) except Exception: log_exception("Table Connection Failed") cls._connection_dict[table_object_name] = table_properties[ 'class'](table, avro_schema) return cls._connection_dict[table_object_name]
def _maint_paused_jobs(self, now): lsd_field = 'et_last_successful_date' kwargs = dict((action, None) for action in JOBS_ETL_ACTIONS) kwargs[lsd_field] = None get_jobs = getattr(self.db, 'get_jobs_with_et_status') jobs = get_jobs(JOBS_ETL_STATUS_PAUSED) for job in jobs: # if key is set to None or does not exist, assume it's set to 1 # to avoid resuming non-conforming jobs. result_dict = job.get(**kwargs) pause_requested = result_dict.get('pause_requested', 1) cancel_requested = result_dict.get('cancel_requested', 1) lsd = result_dict.get(lsd_field, None) if cancel_requested != 0: log('cancel requested for paused job {0}, resetting...'.format(job2log(job))) self._update_job_status(job, JOBS_ETL_STATUS_EMPTY) elif pause_requested == 0: # job is paused but pause_requested is 0 => resume to IDLE, # or SUCCESS if can't resume right away ready_to_process = self._should_process_job(job) new_job_status = JOBS_ETL_STATUS_EMPTY \ if ready_to_process or lsd is None else JOBS_ETL_STATUS_SUCCESS log('resetting paused job {0} to {1}'.format(job2log(job), new_job_status)) self._update_job_status(job, new_job_status) if ready_to_process: additional_info = "Job will start immediately." else: additional_info = "Job will start when new data is available." job_dict = job.get(**self.DEFAULT_KEYS_TO_FETCH_FROM_JOB) if job_dict['contact_emails'] is not None: job_dict['contact_emails'] = list(job_dict['contact_emails']) try: self.emailer.mail_result( "resumed", job_dict, additional_info ) log("Sent emails to: {0}".format(job_dict['contact_emails'])) except Exception: log_exception("Exception in sending emails of job:" + str(job_dict))
def get_connection(cls, table_object_name): if table_object_name not in cls._connection_dict: if cls._region_conn is None: cls._region_conn = get_dynamodb_connection() table_properties = cls._TABLE_NAME_TO_PROPERTIES[table_object_name] avro_schema = get_avro_schema(table_properties['avro_schema']) table_name = read_string(table_properties['physical_id_key']) table = Table( table_name, connection=cls._region_conn ) try: results = table.describe() raw_indexes = results['Table'].get('GlobalSecondaryIndexes', []) table.global_indexes = introspect_global_indexes(raw_indexes) except Exception: log_exception("Table Connection Failed") cls._connection_dict[table_object_name] = table_properties['class']( table, avro_schema ) return cls._connection_dict[table_object_name]
def _run_complete_callback(self, job, run_id, step, results): """ Callback invoked when a single run is done :param job: current job :type: WorkerJob :param run_id: an unique run identifier :type: string :param step: step associated with the run execution :type: string :param results: run results :type: list """ self._cond.acquire() try: if len(results) != 1: raise ValueError("len(results) != 1, {0}".format(results)) log("done: {0}, {1}, {2}".format(run_id, step, results[0]['status'])) job.all_results[run_id].extend(results) job.runs_done += 1 job.runs_in_flight -= 1 if job.runs_in_flight < 0: raise ValueError("runs_in_flight < 0 \ ({0} < 0)".format(job.runs_in_flight)) if job.is_waiting is True: self._cond.notify(n=1) self.etl_helper.etl_step_complete(job.msg_dict, run_id, step, results[0]) job.run_complete(run_id, step, results) except: # if callback dies, et_pool stops working log_exception('_run_complete_callback') finally: self._cond.release()
def worker_extended(inqueue, outqueue, initializer=None, initargs=(), maxtasks=None): """ This worker is mostly copy-and-paste from from multiprocessing.pool :param inqueue: input queue to fetch command to execute :type inqueue: SimpleQueue :param outqueue: output queue to place results after command is executed :type outqueue: SimpleQueue :param initializer: a function to perform custom initialization :type initializer: function pointer :param initargs: initialization arguments to pass to initializer :type initargs: list :param maxtasks: Not used, solely for 2.6, 2.7 compatablitiy :type maxtasks: int """ put = outqueue.put get = inqueue.get if hasattr(inqueue, '_writer'): inqueue._writer.close() outqueue._reader.close() if initializer is not None: initializer(*initargs) while 1: try: task = get() except (EOFError, IOError): log_exception('worker got EOFError or IOError -- exiting') break except: log_exception('unknown exception') break if task is None: break job, i, func, args, kwds = task try: result = (True, func(*args, **kwds)) except Exception, e: log_exception('exception in {0}({1}, {2})'.format( func, args, kwds)) result = (False, e) put((job, i, result))
def worker_extended(inqueue, outqueue, initializer=None, initargs=(), maxtasks=None): """ This worker is mostly copy-and-paste from from multiprocessing.pool :param inqueue: input queue to fetch command to execute :type inqueue: SimpleQueue :param outqueue: output queue to place results after command is executed :type outqueue: SimpleQueue :param initializer: a function to perform custom initialization :type initializer: function pointer :param initargs: initialization arguments to pass to initializer :type initargs: list :param maxtasks: Not used, solely for 2.6, 2.7 compatablitiy :type maxtasks: int """ put = outqueue.put get = inqueue.get if hasattr(inqueue, '_writer'): inqueue._writer.close() outqueue._reader.close() if initializer is not None: initializer(*initargs) while 1: try: task = get() except (EOFError, IOError): log_exception('worker got EOFError or IOError -- exiting') break except: log_exception('unknown exception') break if task is None: break job, i, func, args, kwds = task try: result = (True, func(*args, **kwds)) except Exception, e: log_exception('exception in {0}({1}, {2})'.format(func, args, kwds)) result = (False, e) put((job, i, result))
def run(self): """Main entry point for the worker. Queries an SQS queue for messages and performs the appropriate action on each message received. Swallows all exceptions and logs them. """ queue_name = str(self._get_queue_name()) sqs = self._get_sqs_wrapper(queue_name, JSONMessage) scanner_queue_name = str(self._get_scanner_queue_name()) scanner_sqs = self._get_sqs_wrapper(scanner_queue_name, JSONMessage) dummy_message = {"message": "dummy"} # TODO: make this message meaningful while(not self._stop_requested): # Loop forever while this variable is set. try: # Main try-except for msg in sqs.get_messages_from_queue(): msg_body = msg.get_body() log({ "status": "new message", "queue": queue_name, "msg": msg_body, }) results = None final_status = JOBS_ETL_STATUS_ERROR lsd = None try: self._update_scheduled_jobs_on_etl_start(msg_body) # safe to delete message. if worker dies, scanner will resubmit sqs.delete_message_from_queue(msg) try: # Execute etl results, action_dict = self._process_msg(msg) # Parse results final_status, lsd, extra_info = \ parse_results(results, msg_body['end_date']) if final_status != JOBS_ETL_STATUS_COMPLETE: if action_dict['delete_requested']: final_status = JOBS_ETL_STATUS_DELETED elif action_dict['cancel_requested']: final_status = JOBS_ETL_STATUS_CANCELLED elif action_dict['pause_requested']: final_status = JOBS_ETL_STATUS_PAUSED log({ "status": "processed message OK", "queue": queue_name, "msg": msg_body, "results": results, "job status": final_status, "last OK date": lsd, }) except Exception: final_status = JOBS_ETL_STATUS_ERROR log_exception( "Exception in processing msg from queue: " + queue_name + " msg body:" + str(msg_body) ) if final_status != JOBS_ETL_STATUS_DELETED: self._update_scheduled_jobs_on_etl_complete( msg_body, final_status, lsd ) scanner_sqs.write_message_to_queue(dummy_message) try: self.emailer.mail_result( final_status, msg_body, additional_info=extra_info ) log( "Sent emails to:" + str(msg_body['contact_emails']) ) except Exception: log_exception( "Exception in sending emails of job:" + str(msg_body) ) except Exception: log_exception( "Failed to update scheduled jobs on etl" " start/complete, msg body: " + str(msg_body) ) except Exception: # end of main try-except log_exception( "Exception in fetching messages from queue:" + queue_name ) # if sqs queue fails, throttle retry time.sleep(sqs.get_wait_time()) if self._run_once: break self._stop_requested = False
def run(self): """Main entry point for the worker. Queries an SQS queue for messages and performs the appropriate action on each message received. Swallows all exceptions and logs them. """ queue_name = str(self._get_queue_name()) sqs = self._get_sqs_wrapper(queue_name, JSONMessage) scanner_queue_name = str(self._get_scanner_queue_name()) scanner_sqs = self._get_sqs_wrapper(scanner_queue_name, JSONMessage) dummy_message = { "message": "dummy" } # TODO: make this message meaningful while (not self._stop_requested ): # Loop forever while this variable is set. try: # Main try-except for msg in sqs.get_messages_from_queue(): msg_body = msg.get_body() log({ "status": "new message", "queue": queue_name, "msg": msg_body, }) results = None final_status = JOBS_ETL_STATUS_ERROR lsd = None try: self._update_scheduled_jobs_on_etl_start(msg_body) # safe to delete message. if worker dies, scanner will resubmit sqs.delete_message_from_queue(msg) try: # Execute etl results, action_dict = self._process_msg(msg) # Parse results final_status, lsd, extra_info = \ parse_results(results, msg_body['end_date']) if final_status != JOBS_ETL_STATUS_COMPLETE: if action_dict['delete_requested']: final_status = JOBS_ETL_STATUS_DELETED elif action_dict['cancel_requested']: final_status = JOBS_ETL_STATUS_CANCELLED elif action_dict['pause_requested']: final_status = JOBS_ETL_STATUS_PAUSED log({ "status": "processed message OK", "queue": queue_name, "msg": msg_body, "results": results, "job status": final_status, "last OK date": lsd, }) except Exception: final_status = JOBS_ETL_STATUS_ERROR log_exception( "Exception in processing msg from queue: " + queue_name + " msg body:" + str(msg_body)) if final_status != JOBS_ETL_STATUS_DELETED: self._update_scheduled_jobs_on_etl_complete( msg_body, final_status, lsd) scanner_sqs.write_message_to_queue(dummy_message) try: self.emailer.mail_result( final_status, msg_body, additional_info=extra_info) log("Sent emails to:" + str(msg_body['contact_emails'])) except Exception: log_exception( "Exception in sending emails of job:" + str(msg_body)) except Exception: log_exception("Failed to update scheduled jobs on etl" " start/complete, msg body: " + str(msg_body)) except Exception: # end of main try-except log_exception("Exception in fetching messages from queue:" + queue_name) # if sqs queue fails, throttle retry time.sleep(sqs.get_wait_time()) if self._run_once: break self._stop_requested = False