Ejemplo n.º 1
0
 def function_with_retries(*args, **kwargs):
     for i in range(num_attempts):
         try:
             return_value = function(*args, **kwargs)
         except exceptions as ex:
             if i == num_attempts - 1:
                 raise  # final attempt failed
             log.get_logger(__name__).warning('Call to {} raised {}("{}"). Retrying in {} seconds.',
                                              function.__qualname__, type(ex).__name__, ex, retry_delay)
             time.sleep(retry_delay)
         else:
             return return_value
Ejemplo n.º 2
0
    def __init__(self, build_request):
        """
        :type build_request: BuildRequest
        """
        self._logger = get_logger(__name__)
        self._build_id = self._build_id_counter.increment()
        self.build_request = build_request
        self._artifacts_archive_file = None
        self._build_artifact = None
        """ :type : BuildArtifact"""

        self._error_message = None
        self.is_prepared = False
        self._setup_is_started = False
        self._preparation_coin = SingleUseCoin()  # protects against separate threads calling prepare() more than once
        self._is_canceled = False

        self._project_type = None
        self._build_completion_lock = Lock()  # protects against more than one thread detecting the build's finish

        self._all_subjobs_by_id = {}
        self._unstarted_subjobs = None  # WIP: Move subjob queues to BuildScheduler class.
        self._finished_subjobs = None
        self._failed_atoms = None
        self._postbuild_tasks_are_finished = False
        self._timing_file_path = None

        self._state_timestamps = {status: None
                                  for status in BuildStatus}   # initialize all timestamps to None
        self._record_state_timestamp(BuildStatus.QUEUED)
Ejemplo n.º 3
0
    def __init__(self, build_request):
        """
        :type build_request: BuildRequest
        """
        self._logger = get_logger(__name__)
        self._build_id = self._build_id_counter.increment()
        self.build_request = build_request
        self._artifacts_archive_file = None
        self._build_artifact = None
        """ :type : BuildArtifact"""

        self._error_message = None
        self.is_prepared = False
        self._setup_is_started = False
        self._preparation_coin = SingleUseCoin(
        )  # protects against separate threads calling prepare() more than once
        self._is_canceled = False

        self._project_type = None
        self._build_completion_lock = Lock(
        )  # protects against more than one thread detecting the build's finish

        self._all_subjobs_by_id = {}
        self._unstarted_subjobs = None  # WIP: Move subjob queues to BuildScheduler class.
        self._finished_subjobs = None
        self._failed_atoms = None
        self._postbuild_tasks_are_finished = False
        self._timing_file_path = None

        self._state_timestamps = {status: None
                                  for status in BuildStatus
                                  }  # initialize all timestamps to None
        self._record_state_timestamp(BuildStatus.QUEUED)
Ejemplo n.º 4
0
    def __init__(self):
        self._logger = get_logger(__name__)

        self._all_slaves_by_url = {}
        self._all_builds_by_id = OrderedDict()  # This is an OrderedDict so we can more easily implement get_queue()
        self._builds_waiting_for_slaves = Queue()

        self._request_queue = Queue()
        self._request_handler = SerialRequestHandler()

        self._request_queue_worker_thread = SafeThread(
            target=self._build_preparation_loop, name='RequestHandlerLoop', daemon=True)
        self._request_queue_worker_thread.start()

        self._slave_allocation_worker_thread = SafeThread(
            target=self._slave_allocation_loop, name='SlaveAllocationLoop', daemon=True)
        self._slave_allocation_worker_thread.start()

        self._master_results_path = Configuration['results_directory']

        # It's important that idle slaves are only in the queue once so we use OrderedSet
        self._idle_slaves = OrderedSetQueue()

        # Delete all old builds when master starts.  Remove this if/when build numbers are unique across master
        # starts/stops
        if os.path.exists(self._master_results_path):
            shutil.rmtree(self._master_results_path)

        fs.create_dir(self._master_results_path)
Ejemplo n.º 5
0
 def log_app_debug_info_and_force_kill_after_delay():
     time.sleep(seconds)
     logger = log.get_logger(__name__)
     logger.error('ClusterRunner did not exit within {} seconds. App debug info:\n\n{}.',
                  seconds, app_info.get_app_info_string())
     logger.critical('ClusterRunner seems to be hanging unexpectedly. Hard killing the process. Farewell!')
     os._exit(1)
Ejemplo n.º 6
0
 def log_app_debug_info_and_force_kill_after_delay():
     time.sleep(seconds)
     logger = log.get_logger(__name__)
     logger.error('ClusterRunner did not exit within {} seconds. App debug info:\n\n{}.',
                  seconds, app_info.get_app_info_string())
     logger.critical('ClusterRunner seems to be hanging unexpectedly. Sending SIGKILL to self. Farewell!')
     os.kill(os.getpid(), signal.SIGKILL)
Ejemplo n.º 7
0
    def __init__(self, port, host, num_executors=10):
        """
        :param port: The port number the slave service is running on
        :type port: int
        :param host: The hostname at which the slave is reachable
        :type host: str
        :param num_executors: The number of executors this slave should operate with -- this determines how many
            concurrent subjobs the slave can execute.
        :type num_executors: int
        """
        self.port = port
        self.host = host
        self.is_alive = True
        self._slave_id = None
        self._num_executors = num_executors
        self._logger = log.get_logger(__name__)

        self._idle_executors = Queue(maxsize=num_executors)
        self.executors_by_id = {}
        for executor_id in range(num_executors):
            executor = SubjobExecutor(executor_id)
            self._idle_executors.put(executor)
            self.executors_by_id[executor_id] = executor

        self._master_url = None
        self._network = Network(min_connection_poolsize=num_executors)
        self._master_api = None  # wait until we connect to a master first

        self._project_type = None  # this will be instantiated during build setup
        self._current_build_id = None
        self._build_teardown_coin = None
Ejemplo n.º 8
0
    def __init__(self, port, host, num_executors=10):
        """
        :param port: The port number the slave service is running on
        :type port: int
        :param host: The hostname at which the slave is reachable
        :type host: str
        :param num_executors: The number of executors this slave should operate with -- this determines how many
            concurrent subjobs the slave can execute.
        :type num_executors: int
        """
        self.port = port
        self.host = host
        self.is_alive = True
        self._slave_id = None
        self._num_executors = num_executors
        self._logger = log.get_logger(__name__)

        self._idle_executors = Queue(maxsize=num_executors)
        self.executors_by_id = {}
        for executor_id in range(num_executors):
            executor = SubjobExecutor(executor_id)
            self._idle_executors.put(executor)
            self.executors_by_id[executor_id] = executor

        self._master_url = None
        self._network = Network(min_connection_poolsize=num_executors)
        self._master_api = None  # wait until we connect to a master first

        self._project_type = None  # this will be instantiated during build setup
        self._current_build_id = None
        self._build_teardown_coin = None
        self._base_executor_index = None
Ejemplo n.º 9
0
 def __init__(self, atomizer_dicts):
     """
     :param atomizer_dicts: A list of dicts mapping atomizer env var names to atomizer commands
     :type atomizer_dicts: list[dict[str, str]]
     """
     self._logger = log.get_logger(__name__)
     self._atomizer_dicts = atomizer_dicts
 def __init__(self):
     self._logger = get_logger(__name__)
     self._builds_waiting_for_slaves = Queue()
     self._request_queue = Queue()
     self._request_queue_worker_thread = SafeThread(
         target=self._build_preparation_loop, name='RequestHandlerLoop', daemon=True)
     self._project_preparation_locks = {}
Ejemplo n.º 11
0
    def __init__(self):
        self._logger = get_logger(__name__)
        self._master_results_path = Configuration['results_directory']
        self._slave_registry = SlaveRegistry.singleton()
        self._scheduler_pool = BuildSchedulerPool()
        self._build_request_handler = BuildRequestHandler(self._scheduler_pool)
        self._build_request_handler.start()
        self._slave_allocator = SlaveAllocator(self._scheduler_pool)
        self._slave_allocator.start()

        # The best practice for determining the number of threads to use is
        # the number of threads per core multiplied by the number of physical
        # cores. So for example, with 10 cores, 2 sockets and 2 per core, the
        # max would be 40.
        #
        # Currently we use threads for incrementing/decrementing slave executor
        # counts (lock acquisition) and tearing down the slave (network IO). 32 threads should be
        # plenty for these tasks. In the case of heavy load, the bottle neck will be the number
        # of executors, not the time it takes to lock/unlock the executor counts or the number of
        # teardown requests. Tweak the number to find the sweet spot if you feel this is the case.
        self._thread_pool_executor = ThreadPoolExecutor(max_workers=32)

        # Asynchronously delete (but immediately rename) all old builds when master starts.
        # Remove this if/when build numbers are unique across master starts/stops
        if os.path.exists(self._master_results_path):
            fs.async_delete(self._master_results_path)
        fs.create_dir(self._master_results_path)

        # Configure heartbeat tracking
        self._unresponsive_slaves_cleanup_interval = Configuration['unresponsive_slaves_cleanup_interval']
        self._hb_scheduler = sched.scheduler()

        SlavesCollector.register_slaves_metrics_collector(lambda: self._slave_registry.get_all_slaves_by_id().values())
Ejemplo n.º 12
0
    def __init__(self, port, host, num_executors=10):
        """
        :param port: The port number the slave service is running on
        :type port: int
        :param host: The hostname at which the slave is reachable
        :type host: str
        :param num_executors: The number of executors this slave should operate with -- this determines how many
            concurrent subjobs the slave can execute.
        :type num_executors: int
        """
        self.port = port
        self.host = host
        self._slave_id = None
        self._num_executors = num_executors
        self._logger = log.get_logger(__name__)

        self._idle_executors = Queue(maxsize=num_executors)
        self.executors = {}
        for executor_id in range(num_executors):
            executor = SubjobExecutor(executor_id)
            self._idle_executors.put(executor)
            self.executors[executor_id] = executor

        self._setup_complete_event = Event()
        self._master_url = None
        self._network = Network(min_connection_poolsize=num_executors)
        self._master_api = None  # wait until we connect to a master first

        self._project_type = None  # this will be instantiated during build setup
        self._current_build_id = None

        UnhandledExceptionHandler.singleton().add_teardown_callback(self._async_teardown_build,
                                                                    should_disconnect_from_master=True)
Ejemplo n.º 13
0
    def __init__(self, build_request):
        """
        :type build_request: BuildRequest
        """
        self._logger = get_logger(__name__)
        self._build_id = self._build_id_counter.increment()
        self._build_request = build_request
        self._artifacts_archive_file = None
        self._build_artifact = None

        self._error_message = None
        self._preparation_coin = SingleUseCoin()  # protects against separate threads calling prepare() more than once

        self._project_type = None
        self._build_completion_lock = Lock()  # protects against more than one thread detecting the build's finish

        self._all_subjobs_by_id = {}
        self._unstarted_subjobs = None  # WIP(joey): Move subjob queues to BuildScheduler class.
        self._finished_subjobs = None
        self._failed_atoms = None
        self._postbuild_tasks_are_finished = False  # WIP(joey): Remove and use build state.
        self._timing_file_path = None

        self._state_machine = BuildFsm(
            build_id=self._build_id,
            enter_state_callbacks={
                BuildState.ERROR: self._on_enter_error_state,
                BuildState.CANCELED: self._on_enter_canceled_state,
            }
        )
Ejemplo n.º 14
0
    def __init__(self, build_request):
        """
        :type build_request: BuildRequest
        """
        self._logger = get_logger(__name__)
        self._build_id = self._build_id_counter.increment()
        self.build_request = build_request
        self._artifacts_archive_file = None
        self._build_artifact = None
        """ :type : BuildArtifact"""

        self._error_message = None
        self.is_prepared = False
        self._preparation_coin = SingleUseCoin()  # protects against separate threads calling prepare() more than once

        self._project_type = None
        self._num_slaves_in_use = 0
        self._build_completion_lock = Lock()  # protects against more than one thread detecting the build's finish
        self._num_allocated_executors = 0
        self._max_executors = float('inf')
        self._build_completion_lock = Lock()

        self._all_subjobs_by_id = {}
        self._unstarted_subjobs = None
        self._finished_subjobs = None
        self._postbuild_tasks_are_finished = False
        self._teardowns_finished = False
Ejemplo n.º 15
0
 def __init__(self, build_id, subjob_id, project_type, job_config, atoms):
     """
     :param build_id:
     :type build_id: int
     :param subjob_id:
     :type subjob_id: int
     :param project_type:
     :type project_type: ProjectType
     :param job_config: the job's configuration from clusterrunner.yaml
     :type job_config: JobConfig
     :param atoms: the atom project_type strings
     :type atoms: list[app.master.atom.Atom]
     :return:
     """
     self._logger = get_logger(__name__)
     self._build_id = build_id
     self._subjob_id = subjob_id
     self._project_type = project_type  # todo: Unused; remove.
     self.job_config = job_config
     self._atoms = atoms
     self._set_atoms_subjob_id(atoms, subjob_id)
     self._set_atom_state(AtomState.NOT_STARTED)
     self.timings = {
     }  # a dict, atom_ids are the keys and seconds are the values
     self.slave = None  # The slave that had been assigned this subjob. Is None if not started.
Ejemplo n.º 16
0
def _grouped_atoms(atoms, max_executors, timing_file_path, project_directory):
    """
    Return atoms that are grouped for optimal CI performance.

    If a timing file exists, then use the TimeBasedAtomGrouper.
    If not, use the default AtomGrouper (groups each atom into its own subjob).

    :param atoms: all of the atoms to be run this time
    :type atoms: list[app.master.atom.Atom]
    :param max_executors: the maximum number of executors for this build
    :type max_executors: int
    :param timing_file_path: path to where the timing data file would be stored (if it exists) for this job
    :type timing_file_path: str
    :type project_directory: str
    :return: the grouped atoms (in the form of list of lists of strings)
    :rtype: list[list[app.master.atom.Atom]]
    """
    atom_time_map = None

    if os.path.isfile(timing_file_path):
        with open(timing_file_path, 'r') as json_file:
            try:
                atom_time_map = json.load(json_file)
            except ValueError:
                logger = log.get_logger(__name__)
                logger.warning('Failed to load timing data from file that exists {}', timing_file_path)

    if atom_time_map is not None and len(atom_time_map) > 0:
        atom_grouper = TimeBasedAtomGrouper(atoms, max_executors, atom_time_map, project_directory)
    else:
        atom_grouper = AtomGrouper(atoms, max_executors)

    return atom_grouper.groupings()
Ejemplo n.º 17
0
    def __init__(self,
                 url,
                 build_project_directory='',
                 project_directory='',
                 remote='origin',
                 branch='master',
                 config=None,
                 job_name=None,
                 remote_files=None,
                 atoms_override=None):
        """
        Note: the first line of each parameter docstring will be exposed as command line argument documentation for the
        clusterrunner build client.

        :param url: url to the git repo (ie: https, ssh)
        :type url: str
        :param build_project_directory: the symlinked directory of where PROJECT_DIR should end up being set to
        :type build_project_directory: str
        :param project_directory: path within the repo that contains clusterrunner.yaml
        :type project_directory: str
        :param remote: The git remote name to fetch from
        :type remote: str
        :param branch: The git branch name on the remote to fetch
        :type branch: str
        :param config: a yaml string representing the project_type's config
        :type config: str|None
        :param job_name: a list of job names we intend to run
        :type job_name: list [str] | None
        :param remote_files: dictionary mapping of output file to URL
        :type remote_files: dict[str, str] | None
        :param atoms_override: The list of overridden atoms (if specified, will not run atomizer).
        :type atoms_override: list[str] | None
        """
        super().__init__(config, job_name, remote_files, atoms_override)
        self._url = url
        self._remote = remote
        self._branch = branch
        self._repo_directory = self.get_full_repo_directory(self._url)
        self._timing_file_directory = self.get_timing_file_directory(self._url)
        self._local_ref = None
        self._logger = log.get_logger(__name__)

        # We explicitly set the repo directory to 700 so we don't inadvertently expose the repo to access by other users
        fs.create_dir(self._repo_directory, self.DIRECTORY_PERMISSIONS)
        fs.create_dir(self._timing_file_directory, self.DIRECTORY_PERMISSIONS)
        fs.create_dir(os.path.dirname(build_project_directory))

        # Create a symlink from the generated build project directory to the actual project directory.
        # This is done in order to switch between the master's and the slave's copies of the repo while not
        # having to do something hacky in order to user the master's generated atoms on the slaves.
        actual_project_directory = os.path.join(self._repo_directory,
                                                project_directory)
        try:
            os.unlink(build_project_directory)
        except FileNotFoundError:
            pass

        os.symlink(actual_project_directory, build_project_directory)
        self.project_directory = build_project_directory
Ejemplo n.º 18
0
 def __init__(self, base_api_url):
     """
     :param base_api_url: The base API url of the service (e.g., 'http://localhost:43000')
     :type base_api_url: str
     """
     self._api = UrlBuilder(base_api_url)
     self._network = Network()
     self._logger = log.get_logger(__name__)
 def initialize(self, route_node=None, cluster_master=None):
     """
     :type route_node: RouteNode
     :type cluster_master: app.master.cluster_master.ClusterMaster
     """
     self._logger = log.get_logger(__name__)
     self._cluster_master = cluster_master
     super().initialize(route_node)
Ejemplo n.º 20
0
 def __init__(self, raw_yaml_contents):
     """
     :param raw_yaml_contents: Raw string contents of project clusterrunner.yaml file
     :type raw_yaml_contents: string
     """
     self._job_configs = None
     self._logger = log.get_logger(__name__)
     self._raw_yaml_contents = raw_yaml_contents
Ejemplo n.º 21
0
 def __init__(self, raw_yaml_contents):
     """
     :param raw_yaml_contents: Raw string contents of project clusterrunner.yaml file
     :type raw_yaml_contents: string
     """
     self._job_configs = None
     self._logger = log.get_logger(__name__)
     self._raw_yaml_contents = raw_yaml_contents
Ejemplo n.º 22
0
 def __init__(self, build_artifact_dir):
     """
     :param build_artifact_dir: absolute path to the build artifact (IE: '/var/clusterrunner/artifacts/20')
     :type build_artifact_dir: str
     """
     self._logger = get_logger(__name__)
     self.build_artifact_dir = build_artifact_dir
     self._failed_commands = None
Ejemplo n.º 23
0
    def rsa_key(host):
        """
        :param host: The RSA key for host that we want to retrieve
        :type host: str
        :return: the rsa key string, without the 'ssh-rsa' prefix. Returns None if failed ssh-keyscan fails.
        :rtype: str|None
        """
        proc = subprocess.Popen('ssh-keyscan -t rsa {}'.format(host), shell=True, stdout=PIPE, stderr=PIPE)
        output, error = proc.communicate()

        if proc.returncode != 0:
            log.get_logger(__name__).error('Failed to get rsa string with output: {}, error: {}'.format(output, error))
            return None

        line = output.decode("utf-8")
        # We want the string to the right of, and not including, the 'ssh-rsa' string.
        return line.split('ssh-rsa', 1)[-1].strip()
Ejemplo n.º 24
0
 def __init__(self, base_api_url):
     """
     :param base_api_url: The base API url of the service (e.g., 'http://localhost:43000')
     :type base_api_url: str
     """
     self._api = UrlBuilder(self._ensure_url_has_scheme(base_api_url))
     self._network = Network()
     self._logger = log.get_logger(__name__)
Ejemplo n.º 25
0
 def __init__(self, executor_id):
     """
     :type executor_id: int
     """
     self.id = executor_id
     self._project_type = None
     self._logger = log.get_logger(__name__)
     self._current_build_id = None
     self._current_subjob_id = None
Ejemplo n.º 26
0
 def __init__(self, session_dir, docker_process):
     """
     :type session_dir: TemporaryDirectory
     :type docker_process: Popen
     """
     self._session_dir = session_dir
     self._docker_process = docker_process
     self._logger = log.get_logger(__name__)
     self._logger.debug('Started docker session, pid: {}', self._docker_process.pid)
Ejemplo n.º 27
0
 def __init__(self, executor_id):
     """
     :type executor_id: int
     """
     self.id = executor_id
     self._project_type = None
     self._logger = log.get_logger(__name__)
     self._current_build_id = None
     self._current_subjob_id = None
Ejemplo n.º 28
0
 def __init__(self):
     self._logger = get_logger(__name__)
     self._builds_waiting_for_slaves = Queue()
     self._request_queue = Queue()
     self._request_queue_worker_thread = SafeThread(
         target=self._build_preparation_loop,
         name='RequestHandlerLoop',
         daemon=True)
     self._project_preparation_locks = {}
Ejemplo n.º 29
0
 def __init__(self, build_request_handler):
     """
     :type build_request_handler: BuildRequestHandler
     """
     self._logger = get_logger(__name__)
     self._build_request_handler = build_request_handler
     self._idle_slaves = OrderedSetQueue()
     self._allocation_thread = SafeThread(
         target=self._slave_allocation_loop, name='SlaveAllocationLoop', daemon=True)
Ejemplo n.º 30
0
 def __init__(self, session_dir, docker_process):
     """
     :type session_dir: TemporaryDirectory
     :type docker_process: Popen
     """
     self._session_dir = session_dir
     self._docker_process = docker_process
     self._logger = log.get_logger(__name__)
     self._logger.debug('Started docker session, pid: {}',
                        self._docker_process.pid)
Ejemplo n.º 31
0
    def __init__(self, min_connection_poolsize=DEFAULT_POOLSIZE):
        """
        :param min_connection_poolsize: The minimum connection pool size for this instance
        :type min_connection_poolsize: int
        """
        self._session = requests.Session()
        self._logger = get_logger(__name__)

        poolsize = max(min_connection_poolsize, DEFAULT_POOLSIZE)
        self._session.mount('http://', HTTPAdapter(pool_connections=poolsize, pool_maxsize=poolsize))
Ejemplo n.º 32
0
    def __init__(self, min_connection_poolsize=DEFAULT_POOLSIZE):
        """
        :param min_connection_poolsize: The minimum connection pool size for this instance
        :type min_connection_poolsize: int
        """
        self._logger = get_logger(__name__)
        self._session = None

        self._poolsize = max(min_connection_poolsize, DEFAULT_POOLSIZE)
        self.reset_session()
Ejemplo n.º 33
0
 def log_app_debug_info_and_force_kill_after_delay():
     time.sleep(seconds)
     logger = log.get_logger(__name__)
     logger.error(
         'ClusterRunner did not exit within {} seconds. App debug info:\n\n{}.',
         seconds, app_info.get_app_info_string())
     logger.critical(
         'ClusterRunner seems to be hanging unexpectedly. Sending SIGKILL to self. Farewell!'
     )
     os.kill(os.getpid(), signal.SIGKILL)
Ejemplo n.º 34
0
    def __init__(self, min_connection_poolsize=DEFAULT_POOLSIZE):
        """
        :param min_connection_poolsize: The minimum connection pool size for this instance
        :type min_connection_poolsize: int
        """
        self._logger = get_logger(__name__)
        self._session = None

        self._poolsize = max(min_connection_poolsize, DEFAULT_POOLSIZE)
        self.reset_session()
Ejemplo n.º 35
0
 def log_app_debug_info_and_force_kill_after_delay():
     time.sleep(seconds)
     logger = log.get_logger(__name__)
     logger.error(
         'ClusterRunner did not exit within {} seconds. App debug info:\n\n{}.',
         seconds, app_info.get_app_info_string())
     logger.critical(
         'ClusterRunner seems to be hanging unexpectedly. Hard killing the process. Farewell!'
     )
     os._exit(1)
Ejemplo n.º 36
0
 def __init__(self, scheduler_pool):
     """
     :type scheduler_pool: app.master.build_scheduler_pool.BuildSchedulerPool
     """
     self._logger = get_logger(__name__)
     self._scheduler_pool = scheduler_pool
     self._request_queue = Queue()
     self._request_queue_worker_thread = SafeThread(
         target=self._build_preparation_loop, name='RequestHandlerLoop', daemon=True)
     self._project_preparation_locks = {}
Ejemplo n.º 37
0
 def __init__(self, filename=None):
     """
     :param filename: The name of the logfile
     :type filename: str | None
     """
     self.filename = filename
     self.logging_disabled = filename is None
     self._analytics_logger = None
     self._event_id_generator = Counter()
     self._log_cache = collections.deque()
     self._logger = log.get_logger(__name__)
Ejemplo n.º 38
0
 def __init__(self, filename=None):
     """
     :param filename: The name of the logfile
     :type filename: str | None
     """
     self.filename = filename
     self.logging_disabled = filename is None
     self._analytics_logger = None
     self._event_id_generator = Counter()
     self._log_cache = collections.deque()
     self._logger = log.get_logger(__name__)
Ejemplo n.º 39
0
 def __init__(self, build_request_handler):
     """
     :type build_request_handler: BuildRequestHandler
     """
     self._logger = get_logger(__name__)
     self._build_request_handler = build_request_handler
     self._idle_slaves = OrderedSetQueue()
     self._allocation_thread = SafeThread(
         target=self._slave_allocation_loop,
         name='SlaveAllocationLoop',
         daemon=True)
 def __init__(self, scheduler_pool):
     """
     :type scheduler_pool: app.master.build_scheduler_pool.BuildSchedulerPool
     """
     self._logger = get_logger(__name__)
     self._scheduler_pool = scheduler_pool
     self._idle_slaves = OrderedSetQueue()
     self._allocation_thread = SafeThread(
         target=self._slave_allocation_loop,
         name='SlaveAllocationLoop',
         daemon=True)
 def __init__(self, scheduler_pool):
     """
     :type scheduler_pool: BuildSchedulerPool
     """
     self._logger = get_logger(__name__)
     self._scheduler_pool = scheduler_pool
     self._builds_waiting_for_slaves = Queue()
     self._request_queue = Queue()
     self._request_queue_worker_thread = SafeThread(
         target=self._build_preparation_loop, name='RequestHandlerLoop', daemon=True)
     self._project_preparation_locks = {}
     self._subjob_calculator = SubjobCalculator()
Ejemplo n.º 42
0
    def __init__(self, min_connection_poolsize=DEFAULT_POOLSIZE):
        """
        :param min_connection_poolsize: The minimum connection pool size for this instance
        :type min_connection_poolsize: int
        """
        self._session = requests.Session()
        self._logger = get_logger(__name__)

        poolsize = max(min_connection_poolsize, DEFAULT_POOLSIZE)
        self._session.mount(
            'http://',
            HTTPAdapter(pool_connections=poolsize, pool_maxsize=poolsize))
Ejemplo n.º 43
0
 def __init__(self, scheduler_pool):
     """
     :type scheduler_pool: app.master.build_scheduler_pool.BuildSchedulerPool
     """
     self._logger = get_logger(__name__)
     self._scheduler_pool = scheduler_pool
     self._request_queue = Queue()
     self._request_queue_worker_thread = SafeThread(
         target=self._build_preparation_loop,
         name='RequestHandlerLoop',
         daemon=True)
     self._project_preparation_locks = {}
Ejemplo n.º 44
0
    def __init__(self, url, build_project_directory='', project_directory='', remote='origin', branch='master',
                 hash='FETCH_HEAD', config=None, job_name=None, remote_files=None, atoms_override=None):
        """
        Note: the first line of each parameter docstring will be exposed as command line argument documentation for the
        clusterrunner build client.

        :param url: url to the git repo (ie: https, ssh)
        :type url: str
        :param build_project_directory: the symlinked directory of where PROJECT_DIR should end up being set to
        :type build_project_directory: str
        :param project_directory: path within the repo that contains clusterrunner.yaml
        :type project_directory: str
        :param remote: The git remote name to fetch from
        :type remote: str
        :param branch: The git branch name on the remote to fetch
        :type branch: str
        :param hash: The hash to reset hard on. If hash is not set, we use the FETCH_HEAD of <branch>.
        :type hash: str
        :param config: a yaml string representing the project_type's config
        :type config: str|None
        :param job_name: a list of job names we intend to run
        :type job_name: list [str] | None
        :param remote_files: dictionary mapping of output file to URL
        :type remote_files: dict[str, str] | None
        :param atoms_override: The list of overridden atoms (if specified, will not run atomizer).
        :type atoms_override: list[str] | None
        """
        super().__init__(config, job_name, remote_files, atoms_override)
        self._url = url
        self._remote = remote
        self._branch = branch
        self._hash = hash
        self._repo_directory = self.get_full_repo_directory(self._url)
        self._timing_file_directory = self.get_timing_file_directory(self._url)
        self._local_ref = None
        self._logger = log.get_logger(__name__)

        # We explicitly set the repo directory to 700 so we don't inadvertently expose the repo to access by other users
        fs.create_dir(self._repo_directory, self.DIRECTORY_PERMISSIONS)
        fs.create_dir(self._timing_file_directory, self.DIRECTORY_PERMISSIONS)
        fs.create_dir(os.path.dirname(build_project_directory))

        # Create a symlink from the generated build project directory to the actual project directory.
        # This is done in order to switch between the master's and the slave's copies of the repo while not
        # having to do something hacky in order to user the master's generated atoms on the slaves.
        actual_project_directory = os.path.join(self._repo_directory, project_directory)
        try:
            os.unlink(build_project_directory)
        except FileNotFoundError:
            pass

        os.symlink(actual_project_directory, build_project_directory)
        self.project_directory = build_project_directory
Ejemplo n.º 45
0
    def __init__(self, build_id, enter_state_callbacks):
        """
        :type build_id: int
        :type enter_state_callbacks: dict[BuildState, callable]
        """
        self._logger = log.get_logger(__name__)
        self._build_id = build_id
        self._transition_timestamps = {state: None for state in BuildState}   # initialize all timestamps to None
        self._fsm = self._create_state_machine()

        for build_state, callback in enter_state_callbacks.items():
            self._register_enter_state_callback(build_state, callback)
    def __init__(self):
        super().__init__()
        self._handling_lock = Lock()
        self._teardown_callback_stack = LifoQueue()  # we execute callbacks in the reverse order that they were added
        self._logger = log.get_logger(__name__)
        self._handled_exceptions = Queue()

        # Set up a handler to be called when process receives SIGTERM.
        # Note: this will raise if called on a non-main thread, but we should NOT work around that here. (That could
        # prevent the teardown handler from ever being registered!) Calling code should be organized so that this
        # singleton is only ever initialized on the main thread.
        signal.signal(signal.SIGTERM, self._application_teardown_signal_handler)
        signal.signal(signal.SIGINT, self._application_teardown_signal_handler)
Ejemplo n.º 47
0
 def __init__(self, config=None, job_name=None, remote_files=None):
     """
     :param config: A yaml string representing a cluster_runner.yaml file
     :type config: str | None
     :type job_name: str | None
     :param remote_files: key-value pairs of where the key is the output_file and the value is the url
     :type remote_files: dict[str, str] | None
     """
     self._logger = get_logger(__name__)
     self.project_directory = ''
     self._config = config
     self._job_name = job_name
     self._remote_files = remote_files if remote_files else {}
Ejemplo n.º 48
0
    def rsa_key(host):
        """
        :param host: The RSA key for host that we want to retrieve
        :type host: str
        :return: the rsa key string, without the 'ssh-rsa' prefix. Returns None if failed ssh-keyscan fails.
        :rtype: str|None
        """
        proc = subprocess.Popen('ssh-keyscan -t rsa {}'.format(host),
                                shell=True,
                                stdout=PIPE,
                                stderr=PIPE)
        output, error = proc.communicate()

        if proc.returncode != 0:
            log.get_logger(__name__).error(
                'Failed to get rsa string with output: {}, error: {}'.format(
                    output, error))
            return None

        line = output.decode("utf-8")
        # We want the string to the right of, and not including, the 'ssh-rsa' string.
        return line.split('ssh-rsa', 1)[-1].strip()
Ejemplo n.º 49
0
 def __init__(self, host, username, executable_path):
     """
     :param host: the fully qualified hostname of the host to deploy to
     :type host: str
     :param username: the user who is executing this process and whose ssh credentials will be used
     :type username: str
     :param executable_path: the path to the clusterrunner executable on the remote host
     :type executable_path: str
     """
     self._logger = get_logger(__name__)
     self.host = host
     self._username = username
     self._executable_path = executable_path
     self._shell_client = ShellClientFactory.create(host, username)
Ejemplo n.º 50
0
 def __init__(self, slave_url, num_executors):
     """
     :type slave_url: str
     :type num_executors: int
     """
     self.url = slave_url
     self.num_executors = num_executors
     self.id = self._slave_id_counter.increment()
     self._num_executors_in_use = Counter()
     self._network = Network(min_connection_poolsize=num_executors)
     self.current_build_id = None
     self._is_alive = True
     self._slave_api = UrlBuilder(slave_url, self.API_VERSION)
     self._logger = log.get_logger(__name__)
Ejemplo n.º 51
0
 def __init__(self, slave_url, num_executors):
     """
     :type slave_url: str
     :type num_executors: int
     """
     self.url = slave_url
     self.num_executors = num_executors
     self.id = self._slave_id_counter.increment()
     self._num_executors_in_use = Counter()
     self._network = Network(min_connection_poolsize=num_executors)
     self.current_build_id = None
     self.is_alive = True
     self._slave_api = UrlBuilder(slave_url, self.API_VERSION)
     self._logger = log.get_logger(__name__)
Ejemplo n.º 52
0
    def __init__(self, build_id, enter_state_callbacks):
        """
        :type build_id: int
        :type enter_state_callbacks: dict[BuildState, callable]
        """
        self._logger = log.get_logger(__name__)
        self._build_id = build_id
        self._transition_timestamps = {state: None
                                       for state in BuildState
                                       }  # initialize all timestamps to None
        self._fsm = self._create_state_machine()

        for build_state, callback in enter_state_callbacks.items():
            self._register_enter_state_callback(build_state, callback)
 def __init__(self, scheduler_pool):
     """
     :type scheduler_pool: BuildSchedulerPool
     """
     self._logger = get_logger(__name__)
     self._scheduler_pool = scheduler_pool
     self._builds_waiting_for_slaves = Queue()
     self._request_queue = Queue()
     self._request_queue_worker_thread = SafeThread(
         target=self._build_preparation_loop,
         name='RequestHandlerLoop',
         daemon=True)
     self._project_preparation_locks = {}
     self._subjob_calculator = SubjobCalculator()
Ejemplo n.º 54
0
    def __init__(self, config=None, job_name=None, remote_files=None):
        """
        :param config: A yaml string representing a clusterrunner.yaml file
        :type config: str | None
        :type job_name: str | None
        :param remote_files: key-value pairs of where the key is the output_file and the value is the url
        :type remote_files: dict[str, str] | None
        """
        self.project_directory = ''
        self._config = config
        self._job_name = job_name
        self._remote_files = remote_files if remote_files else {}

        self._logger = log.get_logger(__name__)
        self._kill_event = Event()
Ejemplo n.º 55
0
    def __init__(self, build):
        """
        :type build: Build
        """
        self._logger = get_logger(__name__)
        self._build = build

        job_config = build.project_type.job_config()
        self._max_executors = job_config.max_executors
        self._max_executors_per_slave = job_config.max_executors_per_slave

        self._slaves_allocated = []
        self._num_executors_allocated = 0
        self._num_executors_in_use = 0
        self._subjob_assignment_lock = Lock(
        )  # prevents subjobs from being skipped
Ejemplo n.º 56
0
    def __init__(self):
        self._logger = get_logger(__name__)
        self._master_results_path = Configuration['results_directory']
        self._all_slaves_by_url = {}
        self._all_builds_by_id = OrderedDict()
        self._build_request_handler = BuildRequestHandler()
        self._build_request_handler.start()
        self._slave_allocator = SlaveAllocator(self._build_request_handler)
        self._slave_allocator.start()

        # Asynchronously delete (but immediately rename) all old builds when master starts.
        # Remove this if/when build numbers are unique across master starts/stops
        if os.path.exists(self._master_results_path):
            fs.async_delete(self._master_results_path)

        fs.create_dir(self._master_results_path)
Ejemplo n.º 57
0
    def __init__(self):
        super().__init__()
        self._handling_lock = Lock()
        self._teardown_callback_stack = LifoQueue(
        )  # we execute callbacks in the reverse order that they were added
        self._logger = log.get_logger(__name__)
        self._handled_exceptions = Queue()
        self._teardown_callback_raised_exception = False

        # Set up handlers to be called when the application process receives certain signals.
        # Note: this will raise if called on a non-main thread, but we should NOT work around that here. (That could
        # prevent the teardown handler from ever being registered!) Calling code should be organized so that this
        # singleton is only ever initialized on the main thread.
        signal.signal(signal.SIGTERM,
                      self._application_teardown_signal_handler)
        signal.signal(signal.SIGINT, self._application_teardown_signal_handler)
        signal.signal(self.SIGINFO, self._application_info_dump_signal_handler)
Ejemplo n.º 58
0
 def __init__(self, master_url, request_params, secret):
     """
     :param master_url: The url of the master which the build will be executed on
     :type master_url: str
     :param request_params: A dict of request params that will be json-encoded and sent in the build request
     :type request_params: dict
     :type secret: str
     """
     self._master_url = self._ensure_url_has_scheme(master_url)
     self._request_params = request_params
     self._secret = secret
     self._build_id = None
     self._network = Network()
     self._logger = get_logger(__name__)
     self._last_build_status_details = None
     self._master_api = UrlBuilder(master_url, self.API_VERSION)
     self._cluster_master_api_client = ClusterMasterAPIClient(master_url)