def _start_process(self, start_timeperiod, end_timeperiod, arguments): try: start_dt = time_helper.synergy_to_datetime(QUALIFIER_HOURLY, start_timeperiod) sqoop_slice_starttime = start_dt.strftime(SqoopDriver.SQOOP_DATE_FORMAT) end_dt = time_helper.synergy_to_datetime(QUALIFIER_HOURLY, end_timeperiod) sqoop_slice_endtime = end_dt.strftime(SqoopDriver.SQOOP_DATE_FORMAT) sink_path = ProcessContext.get_sink(self.process_name) self.logger.info('start: %s {' % self.process_name) p = psutil.Popen([settings['bash_shell'], settings['sqoop_command'], str(sqoop_slice_starttime), str(sqoop_slice_endtime), sink_path + '/' + start_timeperiod], close_fds=True, cwd=settings['process_cwd'], stdin=PIPE, stdout=PIPE, stderr=PIPE) self.cli_process = p self.logger.info('Started %s with pid = %r' % (self.process_name, p.pid)) except Exception: self.logger.error('Exception on starting: %s' % self.process_name, exc_info=True) finally: self.logger.info('}')
def create_unit_of_work(process_name, start_id, end_id, timeperiod='INVALID_TIMEPERIOD', state=unit_of_work.STATE_REQUESTED, creation_at=datetime.utcnow(), uow_id=None): """ method creates and returns unit_of_work """ try: source_collection = ProcessContext.get_source(process_name) target_collection = ProcessContext.get_sink(process_name) except KeyError: source_collection = None target_collection = None uow = UnitOfWork() uow.timeperiod = timeperiod uow.start_timeperiod = timeperiod uow.end_timeperiod = timeperiod uow.start_id = start_id uow.end_id = end_id uow.source = source_collection uow.sink = target_collection uow.state = state uow.created_at = creation_at uow.process_name = process_name uow.number_of_retries = 0 if uow_id is not None: uow.document['_id'] = uow_id return uow
def compute_scope_of_processing(self, process_name, start_timeperiod, end_timeperiod, job_record): """method reads collection and identify slice for processing""" source_collection_name = ProcessContext.get_source(process_name) target_collection_name = ProcessContext.get_sink(process_name) start_id = self.ds.highest_primary_key(source_collection_name, start_timeperiod, end_timeperiod) end_id = self.ds.lowest_primary_key(source_collection_name, start_timeperiod, end_timeperiod) uow = UnitOfWork() uow.timeperiod = start_timeperiod uow.start_id = str(start_id) uow.end_id = str(end_id) uow.start_timeperiod = start_timeperiod uow.end_timeperiod = end_timeperiod uow.created_at = datetime.utcnow() uow.source = source_collection_name uow.sink = target_collection_name uow.state = unit_of_work.STATE_REQUESTED uow.process_name = process_name uow.number_of_retries = 0 uow_id = self.uow_dao.insert(uow) mq_request = WorkerMqRequest() mq_request.process_name = process_name mq_request.unit_of_work_id = uow_id publisher = self.publishers.get(process_name) publisher.publish(mq_request.document) publisher.release() msg = 'Published: UOW %r for %r in timeperiod %r.' % (uow_id, process_name, start_timeperiod) self._log_message(INFO, process_name, job_record, msg) return uow
def __init__(self, process_name): """@param process_name: id of the process, the worker will be performing """ super(AbstractMqWorker, self).__init__(process_name) self.queue_source = ProcessContext.get_source(self.process_name) self.queue_sink = ProcessContext.get_sink(self.process_name) self.consumer = None self._init_mq_consumer() self.main_thread = None self.performance_ticker = None self._init_performance_ticker(self.logger) msg_suffix = 'in Production Mode' if settings['under_test']: msg_suffix = 'in Testing Mode' self.logger.info('Started %s %s' % (self.process_name, msg_suffix))