Beispiel #1
0
def reset_incomplete_runs():
    """
    Cleanup incomplete runs.

    A run is left incomplete when a worker dies while the run hasn't been
    finished (or was marked as enqueued). These runs needs to be re-started
    and therefore reset to scheduled state.

    """
    logger.info('Cleaning up incomplete runs')
    incomplete_runs = []

    for state in ['in_queue', 'started']:
        incomplete_runs.extend(Run.get_list(
            config.get('job_runner_worker', 'run_resource_uri'),
            params={
                'state': state,
                'worker__api_key': config.get('job_runner_worker', 'api_key'),
            }
        ))

    for run in incomplete_runs:
        logger.warning('Run {0} was left incomplete'.format(run.resource_uri))
        run.patch({
            'enqueue_dts': None,
            'start_dts': None,
        })
Beispiel #2
0
    def _get_json_data(self):
        """
        Return JSON data.

        :raises:
            :exc:`!RequestException` on ``requests`` error.

        :raises:
            :exc:`.RequestServerError` on 5xx response.

        :raises:
            :exc:`.RequestClientError` on errors caused client-side.

        """
        response = requests.get(
            urlparse.urljoin(config.get('job_runner_worker', 'api_base_url'),
                             self._resource_path),
            auth=HmacAuth(config.get('job_runner_worker', 'api_key'),
                          config.get('job_runner_worker', 'secret')),
            headers={'content-type': 'application/json'},
            verify=False,
        )

        if response.status_code != 200:
            if response.status_code >= 500 and response.status_code <= 599:
                raise RequestServerError('Server returned {0} - {1}'.format(
                    response.status_code, response.content))
            else:
                raise RequestClientError('Server returned {0} - {1}'.format(
                    response.status_code, response.content))

        return response.json
Beispiel #3
0
    def post(self, attributes={}):
        """
        PATCH resource with given keyword arguments.

        :raises:
            :exc:`!RequestException` on ``requests`` error.

        :raises:
            :exc:`.RequestServerError` on 5xx response.

        :raises:
            :exc:`.RequestClientError` on errors caused client-side.

        """
        response = requests.post(
            urlparse.urljoin(config.get('job_runner_worker', 'api_base_url'),
                             self._resource_path),
            auth=HmacAuth(config.get('job_runner_worker', 'api_key'),
                          config.get('job_runner_worker', 'secret')),
            headers={'content-type': 'application/json'},
            data=json.dumps(attributes),
            verify=False,
        )

        if response.status_code != 201:
            if response.status_code >= 500 and response.status_code <= 599:
                raise RequestServerError('Server returned {0} - {1}'.format(
                    response.status_code, response.content))
            else:
                raise RequestClientError('Server returned {0} - {1}'.format(
                    response.status_code, response.content))
Beispiel #4
0
def _handle_enqueue_action(message, run_queue, event_queue):
    """
    Handle the ``'enqueue'`` action.
    """
    run = Run('{0}{1}/'.format(
        config.get('job_runner_worker', 'run_resource_uri'),
        message['run_id']))

    worker_list = Worker.get_list(
        config.get('job_runner_worker', 'worker_resource_uri'))

    if run.enqueue_dts:
        logger.warning(
            'Was expecting that run: {0} was not in queue yet'.format(run.id))
    elif len(worker_list) != 1:
        logger.warning('API returned multiple workers, expected one')
    else:
        run.patch({
            'enqueue_dts': datetime.now(utc).isoformat(' '),
            # set the worker so we know which worker of the pool claimed the
            # run
            'worker': worker_list[0].resource_uri,
        })
        run_queue.put(run)
        event_queue.put(
            json.dumps({
                'event': 'enqueued',
                'run_id': run.id,
                'kind': 'run'
            }))
Beispiel #5
0
    def post(self, attributes={}):
        """
        PATCH resource with given keyword arguments.

        :raises:
            :exc:`!RequestException` on ``requests`` error.

        :raises:
            :exc:`.RequestServerError` on 5xx response.

        :raises:
            :exc:`.RequestClientError` on errors caused client-side.

        """
        response = requests.post(
            urlparse.urljoin(
                config.get('job_runner_worker', 'api_base_url'),
                self._resource_path
            ),
            auth=HmacAuth(
                config.get('job_runner_worker', 'api_key'),
                config.get('job_runner_worker', 'secret')
            ),
            headers={'content-type': 'application/json'},
            data=json.dumps(attributes),
            verify=False,
        )

        if response.status_code != 201:
            if response.status_code >= 500 and response.status_code <= 599:
                raise RequestServerError('Server returned {0} - {1}'.format(
                    response.status_code, response.content))
            else:
                raise RequestClientError('Server returned {0} - {1}'.format(
                    response.status_code, response.content))
def _get_subscriber(zmq_context):
    """
    Return a new subscriber connection for the given ``zmq_context``.
    """
    subscriber = zmq_context.socket(zmq.SUB)
    subscriber.connect('tcp://{0}:{1}'.format(
        config.get('job_runner_worker', 'broadcaster_server_hostname'),
        config.get('job_runner_worker', 'broadcaster_server_port'),
    ))
    subscriber.setsockopt(zmq.SUBSCRIBE, 'master.broadcast.{0}'.format(
        config.get('job_runner_worker', 'api_key')))
    return subscriber
Beispiel #7
0
def _get_subscriber(zmq_context):
    """
    Return a new subscriber connection for the given ``zmq_context``.
    """
    subscriber = zmq_context.socket(zmq.SUB)
    subscriber.connect('tcp://{0}:{1}'.format(
        config.get('job_runner_worker', 'broadcaster_server_hostname'),
        config.get('job_runner_worker', 'broadcaster_server_port'),
    ))
    subscriber.setsockopt(
        zmq.SUBSCRIBE, 'master.broadcast.{0}'.format(
            config.get('job_runner_worker', 'api_key')))
    return subscriber
def _get_subscriber(zmq_context):
    """
    Return a new subscriber connection for the given ``zmq_context``.
    """
    subscriber = zmq_context.socket(zmq.SUB)
    subscriber.connect(
        "tcp://{0}:{1}".format(
            config.get("job_runner_worker", "broadcaster_server_hostname"),
            config.get("job_runner_worker", "broadcaster_server_port"),
        )
    )
    subscriber.setsockopt(zmq.SUBSCRIBE, "master.broadcast.{0}".format(config.get("job_runner_worker", "api_key")))
    return subscriber
Beispiel #9
0
    def get_list(cls, resource_path, params={}):
        """
        Return a list of models for ``resource_path``.

        :param resource_path:
            The path of the resource.

        :param params:
            A ``dict`` containing optional request params. Optional.

        :return:
            A ``list`` of class instances.

        :raises:
            :exc:`.RestError` when response code is not 200.

        """
        response = requests.get(
            urlparse.urljoin(
                config.get('job_runner_worker', 'api_base_url'),
                resource_path
            ),
            auth=HmacAuth(
                config.get('job_runner_worker', 'api_key'),
                config.get('job_runner_worker', 'secret')
            ),
            params=params,
            headers={'content-type': 'application/json'},
            verify=False,
        )

        if response.status_code != 200:
            if response.status_code >= 500 and response.status_code <= 599:
                raise RequestServerError('Server returned {0} - {1}'.format(
                    response.status_code, response.content))
            else:
                raise RequestClientError('Server returned {0} - {1}'.format(
                    response.status_code, response.content))

        output = []

        for obj_dict in response.json['objects']:
            output.append(cls(obj_dict['resource_uri'], obj_dict))

        if 'next' in response.json['meta'] and response.json['meta']['next']:
            output.extend(cls.get_list(response.json['meta']['next']))

        return output
def _handle_ping_action(message):
    """
    Handle the ``'ping'`` action.
    """
    worker_list = Worker.get_list(
        config.get('job_runner_worker', 'worker_resource_uri'),
        params={
            'api_key': config.get('job_runner_worker', 'api_key')
        }
    )

    if len(worker_list) == 1:
        worker_list[0].patch({
            'ping_response_dts': datetime.now(utc).isoformat(' '),
        })
    else:
        logger.warning('Workers by api_key query resulted in multiple results')
Beispiel #11
0
    def get_list(cls, resource_path, params={}):
        """
        Return a list of models for ``resource_path``.

        :param resource_path:
            The path of the resource.

        :param params:
            A ``dict`` containing optional request params. Optional.

        :return:
            A ``list`` of class instances.

        :raises:
            :exc:`.RestError` when response code is not 200.

        """
        response = requests.get(
            urlparse.urljoin(config.get('job_runner_worker', 'api_base_url'),
                             resource_path),
            auth=HmacAuth(config.get('job_runner_worker', 'api_key'),
                          config.get('job_runner_worker', 'secret')),
            params=params,
            headers={'content-type': 'application/json'},
            verify=False,
        )

        if response.status_code != 200:
            if response.status_code >= 500 and response.status_code <= 599:
                raise RequestServerError('Server returned {0} - {1}'.format(
                    response.status_code, response.content))
            else:
                raise RequestClientError('Server returned {0} - {1}'.format(
                    response.status_code, response.content))

        output = []

        for obj_dict in response.json['objects']:
            output.append(cls(obj_dict['resource_uri'], obj_dict))

        if 'next' in response.json['meta'] and response.json['meta']['next']:
            output.extend(cls.get_list(response.json['meta']['next']))

        return output
Beispiel #12
0
def publish(zmq_context, event_queue, exit_queue):
    """
    Publish enqueued events to the WebSocket server.

    :param zmq_context:
        An instance of ``zmq.Context``.

    :param event_queue:
        A ``Queue`` instance for events to broadcast.

    :param exit_queue:
        An instance of ``Queue`` to consume from. If this queue is not empty,
        the function needs to terminate.

    """
    logger.info('Starting event publisher')

    publisher = zmq_context.socket(zmq.PUB)
    publisher.connect('tcp://{0}:{1}'.format(
        config.get('job_runner_worker', 'ws_server_hostname'),
        config.get('job_runner_worker', 'ws_server_port'),
    ))

    while True:
        try:
            event = event_queue.get(block=False)
            logger.debug('Sending event: {0}'.format(event))
            publisher.send_multipart(['worker.event', event])
            continue
        except Empty:
            pass

        try:
            exit_queue.get(block=False)
            logger.info('Terminating event publisher')
            return
        except Empty:
            pass

        time.sleep(0.5)

    publisher.close()
Beispiel #13
0
def publish(zmq_context, event_queue, exit_queue):
    """
    Publish enqueued events to the WebSocket server.

    :param zmq_context:
        An instance of ``zmq.Context``.

    :param event_queue:
        A ``Queue`` instance for events to broadcast.

    :param exit_queue:
        An instance of ``Queue`` to consume from. If this queue is not empty,
        the function needs to terminate.

    """
    logger.info('Starting event publisher')

    publisher = zmq_context.socket(zmq.PUB)
    publisher.connect('tcp://{0}:{1}'.format(
        config.get('job_runner_worker', 'ws_server_hostname'),
        config.get('job_runner_worker', 'ws_server_port'),
    ))

    while True:
        try:
            event = event_queue.get(block=False)
            logger.debug('Sending event: {0}'.format(event))
            publisher.send_multipart(['worker.event', event])
            continue
        except Empty:
            pass

        try:
            exit_queue.get(block=False)
            logger.info('Terminating event publisher')
            return
        except Empty:
            pass

        time.sleep(0.5)

    publisher.close()
Beispiel #14
0
def _handle_kill_action(message, kill_queue, event_queue):
    """
    Handle the ``'kill'`` action.
    """
    kill_request = KillRequest(
        "{0}{1}/".format(config.get("job_runner_worker", "kill_request_resource_uri"), message["kill_request_id"])
    )

    if kill_request.enqueue_dts:
        logger.warning("Was expecting that kill: {0} was not in queue yet".format(message["kill_request_id"]))
    else:
        kill_request.patch({"enqueue_dts": datetime.now(utc).isoformat(" ")})
        kill_queue.put(kill_request)
        event_queue.put(json.dumps({"event": "enqueued", "kill_request_id": kill_request.id, "kind": "kill_request"}))
Beispiel #15
0
def _handle_enqueue_action(message, run_queue, event_queue):
    """
    Handle the ``'enqueue'`` action.
    """
    run = Run("{0}{1}/".format(config.get("job_runner_worker", "run_resource_uri"), message["run_id"]))

    worker_list = Worker.get_list(config.get("job_runner_worker", "worker_resource_uri"))

    if run.enqueue_dts:
        logger.warning("Was expecting that run: {0} was not in queue yet".format(run.id))
    elif len(worker_list) != 1:
        logger.warning("API returned multiple workers, expected one")
    else:
        run.patch(
            {
                "enqueue_dts": datetime.now(utc).isoformat(" "),
                # set the worker so we know which worker of the pool claimed the
                # run
                "worker": worker_list[0].resource_uri,
            }
        )
        run_queue.put(run)
        event_queue.put(json.dumps({"event": "enqueued", "run_id": run.id, "kind": "run"}))
Beispiel #16
0
    def _get_json_data(self):
        """
        Return JSON data.

        :raises:
            :exc:`!RequestException` on ``requests`` error.

        :raises:
            :exc:`.RequestServerError` on 5xx response.

        :raises:
            :exc:`.RequestClientError` on errors caused client-side.

        """
        response = requests.get(
            urlparse.urljoin(
                config.get('job_runner_worker', 'api_base_url'),
                self._resource_path
            ),
            auth=HmacAuth(
                config.get('job_runner_worker', 'api_key'),
                config.get('job_runner_worker', 'secret')
            ),
            headers={'content-type': 'application/json'},
            verify=False,
        )

        if response.status_code != 200:
            if response.status_code >= 500 and response.status_code <= 599:
                raise RequestServerError('Server returned {0} - {1}'.format(
                    response.status_code, response.content))
            else:
                raise RequestClientError('Server returned {0} - {1}'.format(
                    response.status_code, response.content))

        return response.json
Beispiel #17
0
def _handle_ping_action(message):
    """
    Handle the ``'ping'`` action.
    """
    worker_list = Worker.get_list(config.get("job_runner_worker", "worker_resource_uri"))

    if len(worker_list) == 1:
        worker_list[0].patch(
            {
                "ping_response_dts": datetime.now(utc).isoformat(" "),
                "worker_version": job_runner_worker.__version__,
                "concurrent_jobs": config.getint("job_runner_worker", "concurrent_jobs"),
            }
        )
    else:
        logger.warning("API returned multiple workers, expected one")
Beispiel #18
0
def _handle_ping_action(message):
    """
    Handle the ``'ping'`` action.
    """
    worker_list = Worker.get_list(
        config.get('job_runner_worker', 'worker_resource_uri'))

    if len(worker_list) == 1:
        worker_list[0].patch({
            'ping_response_dts':
            datetime.now(utc).isoformat(' '),
            'worker_version':
            job_runner_worker.__version__,
            'concurrent_jobs':
            config.getint('job_runner_worker', 'concurrent_jobs')
        })
    else:
        logger.warning('API returned multiple workers, expected one')
def _handle_enqueue_action(message, run_queue, event_queue):
    """
    Handle the ``'enqueue'`` action.
    """
    run = Run('{0}{1}/'.format(
        config.get('job_runner_worker', 'run_resource_uri'),
        message['run_id']
    ))

    if run.enqueue_dts:
        logger.warning(
            'Was expecting that run: {0} was not in queue yet'.format(
                run.id))
    else:
        run.patch({
            'enqueue_dts': datetime.now(utc).isoformat(' ')
        })
        run_queue.put(run)
        event_queue.put(json.dumps(
            {'event': 'enqueued', 'run_id': run.id, 'kind': 'run'}))
Beispiel #20
0
def _handle_kill_action(message, kill_queue, event_queue):
    """
    Handle the ``'kill'`` action.
    """
    kill_request = KillRequest('{0}{1}/'.format(
        config.get('job_runner_worker', 'kill_request_resource_uri'),
        message['kill_request_id']))

    if kill_request.enqueue_dts:
        logger.warning(
            'Was expecting that kill: {0} was not in queue yet'.format(
                message['kill_request_id']))
    else:
        kill_request.patch({'enqueue_dts': datetime.now(utc).isoformat(' ')})
        kill_queue.put(kill_request)
        event_queue.put(
            json.dumps({
                'event': 'enqueued',
                'kill_request_id': kill_request.id,
                'kind': 'kill_request'
            }))
def _handle_kill_action(message, kill_queue, event_queue):
    """
    Handle the ``'kill'`` action.
    """
    kill_request = KillRequest('{0}{1}/'.format(
        config.get('job_runner_worker', 'kill_request_resource_uri'),
        message['kill_request_id']
    ))

    if kill_request.enqueue_dts:
        logger.warning(
            'Was expecting that kill: {0} was not in queue yet'.format(
                message['kill_request_id']))
    else:
        kill_request.patch({
            'enqueue_dts': datetime.now(utc).isoformat(' ')
        })
        kill_queue.put(kill_request)
        event_queue.put(json.dumps({
            'event': 'enqueued',
            'kill_request_id': kill_request.id,
            'kind': 'kill_request'
        }))
Beispiel #22
0
def enqueue_actions(zmq_context, run_queue, kill_queue, event_queue,
                    exit_queue):
    """
    Handle incoming actions sent by the broadcaster.

    :param zmq_context:
        An instance of ``zmq.Context``.

    :param run_queue:
        An instance of ``Queue`` for pushing the runs to.

    :param kill_queue:
        An instance of ``Queue`` for pushing the kill-requests to.

    :param event_queue:
        An instance of ``Queue`` for pushing events to.

    :param exit_queue:
        An instance of ``Queue`` to consume from. If this queue is not empty,
        the function needs to terminate.

    """
    logger.info('Starting enqueue loop')
    subscriber = _get_subscriber(zmq_context)

    expected_address = 'master.broadcast.{0}'.format(
        config.get('job_runner_worker', 'api_key'))

    last_activity_dts = datetime.utcnow()
    reconnect_after_inactivity = config.getint('job_runner_worker',
                                               'reconnect_after_inactivity')

    while True:
        try:
            exit_queue.get(block=False)
            logger.info('Termintating enqueue loop')
            return
        except Empty:
            pass

        try:
            address, content = subscriber.recv_multipart(zmq.NOBLOCK)
            last_activity_dts = datetime.utcnow()
        except zmq.ZMQError:
            # this is needed in case the ZMQ publisher is load-balanced and the
            # loadbalancer dropped the connection to the backend, but not the
            # connection to our side. without this work-around, zmq will think
            # that all is well, and we won't receive anything anymore
            delta = datetime.utcnow() - last_activity_dts
            if delta > timedelta(seconds=reconnect_after_inactivity):
                logger.warning('There was not activity for {0}, reconnecting'
                               ' to publisher'.format(delta))
                subscriber.close()
                time.sleep(random.randint(1, 10))
                subscriber = _get_subscriber(zmq_context)
                last_activity_dts = datetime.utcnow()
                continue
            else:
                time.sleep(0.5)
                continue

        # since zmq is subscribed to everything that starts with the given
        # prefix, we have to do a double check to make sure this is an exact
        # match.
        if not address == expected_address:
            continue

        logger.debug('Received [{0}]: {1}'.format(address, content))
        message = json.loads(content)

        if message['action'] == 'enqueue':
            _handle_enqueue_action(message, run_queue, event_queue)

        elif message['action'] == 'kill':
            _handle_kill_action(message, kill_queue, event_queue)

        elif message['action'] == 'ping':
            _handle_ping_action(message)

    subscriber.close()
Beispiel #23
0
def execute_run(run_queue, event_queue, exit_queue):
    """
    Execute runs from the ``run_queue``.

    :param run_queue:
        An instance of ``Queue`` to consume run instances from.

    :param event_queue:
        An instance of ``Queue`` to push events to.

    :param exit_queue:
        An instance of ``Queue`` to consume from. If this queue is not empty,
        the function needs to terminate.

    """
    logger.info('Starting run executer')

    while True:
        try:
            exit_queue.get(block=False)
            logger.info('Termintating run executer')
            return
        except Empty:
            pass

        try:
            run = run_queue.get(block=False)
        except Empty:
            time.sleep(0.5)
            continue

        # If *anything goes wrong* we want to have feedback bubling up to
        # the master server, including email sent and dashboard updated.
        # From a user POV, a job not run is a failure.
        # Hence the catchall try.
        did_run = False
        file_path = None

        logger.info('Starting run {0}'.format(run.resource_uri))
        run.patch({'start_dts': datetime.now(utc).isoformat(' ')})
        event_queue.put(json.dumps(
            {'event': 'started', 'run_id': run.id, 'kind': 'run'}))

        try:
            file_desc, file_path = tempfile.mkstemp(
                dir=config.get('job_runner_worker', 'script_temp_path')
            )
            # seems there isn't support to open file descriptors directly in
            # utf-8 encoding
            os.fdopen(file_desc).close()

            file_obj = codecs.open(file_path, 'w', 'utf-8')
            file_obj.write(run.job.script_content.replace('\r', ''))
            file_obj.close()

            # get shebang from content of the script
            shebang = run.job.script_content.split('\n', 1)[0]
            if not shebang.startswith('#!'):
                raise Exception(
                    'The first line of the job to run needs to '
                    'start with a shebang (#!). The current first line is: "'
                    '{0}"'.format(shebang))
            executable = "{0} {1}".format(shebang.replace('#!', ''), file_path)

            sub_proc = subprocess.Popen(
                shlex.split(executable),
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT
            )

            run.patch({'pid': sub_proc.pid})
            did_run = True
            out, err = sub_proc.communicate()
        except Exception as e:
            logger.exception('The run failed to complete because of an error')
            out = ('[job runner worker] Could not execute job: ' +
                   traceback.format_exc(e))

        log_output = _truncate_log(out)

        logger.info('Run {0} ended'.format(run.resource_uri))
        run.reload()
        run_log = run.run_log

        if run_log:
            # handles the rare case when a job alread has a log, but was
            # restarted (because the return_dts was never set)
            run_log.patch({
                'content': log_output,
            })
        else:
            run_log = RunLog(
                config.get('job_runner_worker', 'run_log_resource_uri'))
            run_log.post({
                'run': '{0}{1}/'.format(
                    config.get('job_runner_worker', 'run_resource_uri'),
                    run.id
                ),
                'content': log_output
            })
        run.patch({
            'return_dts': datetime.now(utc).isoformat(' '),
            'return_success':
            False if did_run is False or sub_proc.returncode else True,
        })
        event_queue.put(json.dumps(
            {'event': 'returned', 'run_id': run.id, 'kind': 'run'}))

        if file_path:
            os.remove(file_path)
def enqueue_actions(
        zmq_context, run_queue, kill_queue, event_queue, exit_queue):
    """
    Handle incoming actions sent by the broadcaster.

    :param zmq_context:
        An instance of ``zmq.Context``.

    :param run_queue:
        An instance of ``Queue`` for pushing the runs to.

    :param kill_queue:
        An instance of ``Queue`` for pushing the kill-requests to.

    :param event_queue:
        An instance of ``Queue`` for pushing events to.

    :param exit_queue:
        An instance of ``Queue`` to consume from. If this queue is not empty,
        the function needs to terminate.

    """
    logger.info('Starting enqueue loop')
    subscriber = _get_subscriber(zmq_context)

    expected_address = 'master.broadcast.{0}'.format(
        config.get('job_runner_worker', 'api_key'))

    last_activity_dts = datetime.utcnow()
    reconnect_after_inactivity = config.getint(
        'job_runner_worker', 'reconnect_after_inactivity')

    while True:
        try:
            exit_queue.get(block=False)
            logger.info('Termintating enqueue loop')
            return
        except Empty:
            pass

        try:
            address, content = subscriber.recv_multipart(zmq.NOBLOCK)
            last_activity_dts = datetime.utcnow()
        except zmq.ZMQError:
            # this is needed in case the ZMQ publisher is load-balanced and the
            # loadbalancer dropped the connection to the backend, but not the
            # connection to our side. without this work-around, zmq will think
            # that all is well, and we won't receive anything anymore
            delta = datetime.utcnow() - last_activity_dts
            if delta > timedelta(seconds=reconnect_after_inactivity):
                logger.warning(
                    'There was not activity for {0}, reconnecting'
                    ' to publisher'.format(delta)
                )
                subscriber.close()
                time.sleep(random.randint(1, 10))
                subscriber = _get_subscriber(zmq_context)
                last_activity_dts = datetime.utcnow()
                continue
            else:
                time.sleep(0.5)
                continue

        # since zmq is subscribed to everything that starts with the given
        # prefix, we have to do a double check to make sure this is an exact
        # match.
        if not address == expected_address:
            continue

        logger.debug('Received [{0}]: {1}'.format(address, content))
        message = json.loads(content)

        if message['action'] == 'enqueue':
            _handle_enqueue_action(message, run_queue, event_queue)

        elif message['action'] == 'kill':
            _handle_kill_action(message, kill_queue, event_queue)

        elif message['action'] == 'ping':
            _handle_ping_action(message)

    subscriber.close()
Beispiel #25
0
def execute_run(run_queue, event_queue, exit_queue):
    """
    Execute runs from the ``run_queue``.

    :param run_queue:
        An instance of ``Queue`` to consume run instances from.

    :param event_queue:
        An instance of ``Queue`` to push events to.

    :param exit_queue:
        An instance of ``Queue`` to consume from. If this queue is not empty,
        the function needs to terminate.

    """
    logger.info('Starting run executer')

    while True:
        try:
            exit_queue.get(block=False)
            logger.info('Termintating run executer')
            return
        except Empty:
            pass

        try:
            run = run_queue.get(block=False)
        except Empty:
            time.sleep(0.5)
            continue

        file_desc, file_path = tempfile.mkstemp(
            dir=config.get('job_runner_worker', 'script_temp_path')
        )
        # seems there isn't support to open file descriptors directly in
        # utf-8 encoding
        os.fdopen(file_desc).close()

        file_obj = codecs.open(file_path, 'w', 'utf-8')
        file_obj.write(run.job.script_content.replace('\r', ''))
        file_obj.close()

        # get shebang from content of the script
        shebang = run.job.script_content.split('\n', 1)[0]
        executable = shebang.replace('#!', '').split()
        executable.append(file_path)

        logger.info('Starting run {0}'.format(run.resource_uri))
        did_run = False
        try:
            run.patch({'start_dts': datetime.now(utc).isoformat(' ')})

            sub_proc = subprocess.Popen(
                executable, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

            event_queue.put(json.dumps(
                {'event': 'started', 'run_id': run.id, 'kind': 'run'}))

            run.patch({'pid': sub_proc.pid})
            did_run = True
            out, err = sub_proc.communicate()
        except OSError as e:
            out = 'Could not execute job: ' + str(e)
            event_queue.put(json.dumps(
                {'event': 'started', 'run_id': run.id, 'kind': 'run'}))

        log_output = _truncate_log(out)

        logger.info('Run {0} ended'.format(run.resource_uri))
        run_log = RunLog(
            config.get('job_runner_worker', 'run_log_resource_uri'))
        run_log.post({
            'run': '{0}{1}/'.format(
                config.get('job_runner_worker', 'run_resource_uri'),
                run.id
            ),
            'content': log_output
        })
        run.patch({
            'return_dts': datetime.now(utc).isoformat(' '),
            'return_success':
            False if did_run is False or sub_proc.returncode else True,
        })
        event_queue.put(json.dumps(
            {'event': 'returned', 'run_id': run.id, 'kind': 'run'}))
        os.remove(file_path)