コード例 #1
0
ファイル: sqoop_driver.py プロジェクト: NMerch/scheduler
    def _start_process(self, start_timeperiod, end_timeperiod, arguments):
        try:
            start_dt = time_helper.synergy_to_datetime(QUALIFIER_HOURLY, start_timeperiod)
            sqoop_slice_starttime = start_dt.strftime(SqoopDriver.SQOOP_DATE_FORMAT)

            end_dt = time_helper.synergy_to_datetime(QUALIFIER_HOURLY, end_timeperiod)
            sqoop_slice_endtime = end_dt.strftime(SqoopDriver.SQOOP_DATE_FORMAT)

            sink_path = ProcessContext.get_sink(self.process_name)

            self.logger.info('start: %s {' % self.process_name)
            p = psutil.Popen([settings['bash_shell'],
                              settings['sqoop_command'],
                              str(sqoop_slice_starttime),
                              str(sqoop_slice_endtime),
                              sink_path + '/' + start_timeperiod],
                             close_fds=True,
                             cwd=settings['process_cwd'],
                             stdin=PIPE,
                             stdout=PIPE,
                             stderr=PIPE)
            self.cli_process = p
            self.logger.info('Started %s with pid = %r' % (self.process_name, p.pid))
        except Exception:
            self.logger.error('Exception on starting: %s' % self.process_name, exc_info=True)
        finally:
            self.logger.info('}')
コード例 #2
0
ファイル: base_fixtures.py プロジェクト: NMerch/scheduler
def create_unit_of_work(process_name,
                        start_id,
                        end_id,
                        timeperiod='INVALID_TIMEPERIOD',
                        state=unit_of_work.STATE_REQUESTED,
                        creation_at=datetime.utcnow(),
                        uow_id=None):
    """ method creates and returns unit_of_work """
    try:
        source_collection = ProcessContext.get_source(process_name)
        target_collection = ProcessContext.get_sink(process_name)
    except KeyError:
        source_collection = None
        target_collection = None

    uow = UnitOfWork()
    uow.timeperiod = timeperiod
    uow.start_timeperiod = timeperiod
    uow.end_timeperiod = timeperiod
    uow.start_id = start_id
    uow.end_id = end_id
    uow.source = source_collection
    uow.sink = target_collection
    uow.state = state
    uow.created_at = creation_at
    uow.process_name = process_name
    uow.number_of_retries = 0

    if uow_id is not None:
        uow.document['_id'] = uow_id

    return uow
コード例 #3
0
    def compute_scope_of_processing(self, process_name, start_timeperiod, end_timeperiod, job_record):
        """method reads collection and identify slice for processing"""
        source_collection_name = ProcessContext.get_source(process_name)
        target_collection_name = ProcessContext.get_sink(process_name)

        start_id = self.ds.highest_primary_key(source_collection_name, start_timeperiod, end_timeperiod)
        end_id = self.ds.lowest_primary_key(source_collection_name, start_timeperiod, end_timeperiod)

        uow = UnitOfWork()
        uow.timeperiod = start_timeperiod
        uow.start_id = str(start_id)
        uow.end_id = str(end_id)
        uow.start_timeperiod = start_timeperiod
        uow.end_timeperiod = end_timeperiod
        uow.created_at = datetime.utcnow()
        uow.source = source_collection_name
        uow.sink = target_collection_name
        uow.state = unit_of_work.STATE_REQUESTED
        uow.process_name = process_name
        uow.number_of_retries = 0
        uow_id = self.uow_dao.insert(uow)

        mq_request = WorkerMqRequest()
        mq_request.process_name = process_name
        mq_request.unit_of_work_id = uow_id

        publisher = self.publishers.get(process_name)
        publisher.publish(mq_request.document)
        publisher.release()

        msg = 'Published: UOW %r for %r in timeperiod %r.' % (uow_id, process_name, start_timeperiod)
        self._log_message(INFO, process_name, job_record, msg)
        return uow
コード例 #4
0
    def __init__(self, process_name):
        """@param process_name: id of the process, the worker will be performing """
        super(AbstractMqWorker, self).__init__(process_name)
        self.queue_source = ProcessContext.get_source(self.process_name)
        self.queue_sink = ProcessContext.get_sink(self.process_name)
        self.consumer = None
        self._init_mq_consumer()

        self.main_thread = None
        self.performance_ticker = None
        self._init_performance_ticker(self.logger)

        msg_suffix = 'in Production Mode'
        if settings['under_test']:
            msg_suffix = 'in Testing Mode'
        self.logger.info('Started %s %s' % (self.process_name, msg_suffix))