Example #1
0
    def connect_to_master(self, master_url=None):
        """
        Notify the master that this slave exists.

        :param master_url: The URL of the master service. If none specified, defaults to localhost:43000.
        :type master_url: str | None
        """
        self.is_alive = True
        self._master_url = master_url or 'localhost:43000'
        self._master_api = UrlBuilder(self._master_url)
        connect_url = self._master_api.url('slave')
        data = {
            'slave': '{}:{}'.format(self.host, self.port),
            'num_executors': self._num_executors,
        }
        response = self._network.post(connect_url, data=data)
        self._slave_id = int(response.json().get('slave_id'))
        self._logger.info('Slave {}:{} connected to master on {}.', self.host,
                          self.port, self._master_url)

        # We disconnect from the master before build_teardown so that the master stops sending subjobs. (Teardown
        # callbacks are executed in the reverse order that they're added, so we add the build_teardown callback first.)
        UnhandledExceptionHandler.singleton().add_teardown_callback(
            self._do_build_teardown_and_reset, timeout=30)
        UnhandledExceptionHandler.singleton().add_teardown_callback(
            self._disconnect_from_master)
    def _validate_successful_deployment(self, master_service_url, slaves_to_validate):
        """
        Poll the master's /slaves endpoint until either timeout or until all of the slaves have registered with
        the master.

        Throws exception upon timeout or API response error.

        :param master_service_url: the hostname:port for the running master service
        :type master_service_url: str
        :param slaves_to_validate: the list of slave hostnames (no ports) to deploy to
        :type slaves_to_validate: list[str]
        """
        master_api = UrlBuilder(master_service_url, BuildRunner.API_VERSION)
        slave_api_url = master_api.url('slave')
        network = Network()

        def all_slaves_registered():
            return len(self._registered_slave_hostnames(slave_api_url, network)) == len(slaves_to_validate)

        if not wait_for(
                boolean_predicate=all_slaves_registered,
                timeout_seconds=self._SLAVE_REGISTRY_TIMEOUT_SEC,
                poll_period=1,
                exceptions_to_swallow=(requests.RequestException, requests.ConnectionError)
        ):
            try:
                registered_slaves = self._registered_slave_hostnames(slave_api_url, network)
                non_registered_slaves = self._non_registered_slaves(registered_slaves, slaves_to_validate)
            except ConnectionError:
                self._logger.error('Error contacting {} on the master.'.format(slave_api_url))
                raise SystemExit(1)

            self._logger.error('Slave registration timed out after {} sec, with slaves {} missing.'.format(
                self._SLAVE_REGISTRY_TIMEOUT_SEC, ','.join(non_registered_slaves)))
            raise SystemExit(1)
 def __init__(self, base_api_url):
     """
     :param base_api_url: The base API url of the service (e.g., 'http://localhost:43000')
     :type base_api_url: str
     """
     self._api = UrlBuilder(base_api_url)
     self._network = Network()
     self._logger = log.get_logger(__name__)
 def test_url_should_generate_correct_url(self):
     host = 'master:9000'
     first, second, third = 'first', 'second', 'third'
     builder = UrlBuilder(host)
     url = builder.url(first, second, third)
     self.assertEqual(
         'http://{}/v1/{}/{}/{}'.format(host, first, second, third), url,
         'Url generated did not match expectation')
Example #5
0
 def __init__(self, slave_url, num_executors):
     """
     :type slave_url: str
     :type num_executors: int
     """
     self.url = slave_url
     self.num_executors = num_executors
     self.id = self._slave_id_counter.increment()
     self._num_executors_in_use = Counter()
     self._network = Network(min_connection_poolsize=num_executors)
     self.current_build_id = None
     self._is_alive = True
     self._slave_api = UrlBuilder(slave_url, self.API_VERSION)
     self._logger = log.get_logger(__name__)
Example #6
0
 def __init__(self, master_url, request_params, secret):
     """
     :param master_url: The url of the master which the build will be executed on
     :type master_url: str
     :param request_params: A dict of request params that will be json-encoded and sent in the build request
     :type request_params: dict
     :type secret: str
     """
     self._master_url = self._ensure_url_has_scheme(master_url)
     self._request_params = request_params
     self._secret = secret
     self._build_id = None
     self._network = Network()
     self._logger = get_logger(__name__)
     self._last_build_status_details = None
     self._master_api = UrlBuilder(master_url, self.API_VERSION)
     self._cluster_master_api_client = ClusterMasterAPIClient(master_url)
Example #7
0
 def __init__(self, slave_url, num_executors, slave_session_id=None):
     """
     :type slave_url: str
     :type num_executors: int
     :type slave_session_id: str
     """
     self.url = slave_url
     self.num_executors = num_executors
     self.id = self._slave_id_counter.increment()
     self._num_executors_in_use = Counter()
     self._network = Network(min_connection_poolsize=num_executors)
     self.current_build_id = None
     self._last_heartbeat_time = datetime.now()
     self._is_alive = True
     self._is_in_shutdown_mode = False
     self._slave_api = UrlBuilder(slave_url, self.API_VERSION)
     self._session_id = slave_session_id
     self._logger = log.get_logger(__name__)
    def get(self, build_id, subjob_id, atom_id):
        """
        :type build_id: int
        :type subjob_id: int
        :type atom_id: int
        """
        max_lines = int(self.get_query_argument('max_lines', 50))
        offset_line = self.get_query_argument('offset_line', None)

        if offset_line is not None:
            offset_line = int(offset_line)

        try:
            response = self._cluster_master.get_console_output(
                build_id,
                subjob_id,
                atom_id,
                Configuration['results_directory'],
                max_lines,
                offset_line
            )
            self.write(response)
            return
        except ItemNotFoundError as e:
            # If the master doesn't have the atom's console output, it's possible it's currently being worked on,
            # in which case the slave that is working on it may be able to provide the in-progress console output.
            build = self._cluster_master.get_build(int(build_id))
            subjob = build.subjob(int(subjob_id))
            slave = subjob.slave

            if slave is None:
                raise e

            api_url_builder = UrlBuilder(slave.url)
            slave_console_url = api_url_builder.url('build', build_id, 'subjob', subjob_id, 'atom', atom_id, 'console')
            query = {'max_lines': max_lines}

            if offset_line is not None:
                query['offset_line'] = offset_line

            query_string = urllib.parse.urlencode(query)
            self.redirect('{}?{}'.format(slave_console_url, query_string))
    def block_until_build_queue_empty(self, timeout=60):
        """
        This blocks until the master's build queue is empty. This data is exposed via the /queue endpoint and contains
        any jobs that are currently building or not yet started. If the queue is not empty before the timeout, this
        method raises an exception.

        :param timeout: The maximum number of seconds to block before raising an exception.
        :type timeout: int
        """
        master_api = UrlBuilder(self._master_url)
        queue_url = master_api.url('queue')

        def is_queue_empty():
            queue_resp = requests.get(queue_url)
            if queue_resp and queue_resp.ok:
                queue_data = queue_resp.json()
                if 'queue' in queue_data and len(queue_data['queue']) == 0:
                    return True
            return False

        if not poll.wait_for(is_queue_empty, timeout, 0.5):
            raise Exception('Master service did not become idle before timeout.')