Esempio n. 1
0
 def test_shutdown(self):
     clusters = [
         MesosClusterRepository.get_cluster(address)
         for address in ['a', 'b', 'c']
     ]
     assert_equal(self.cluster_cls.call_count, 3)
     MesosClusterRepository.shutdown()
     for cluster in clusters:
         assert_equal(cluster.stop.call_count, 1)
Esempio n. 2
0
 def test_shutdown(self):
     clusters = [
         MesosClusterRepository.get_cluster(address)
         for address in ['a', 'b', 'c']
     ]
     assert_equal(self.cluster_cls.call_count, 3)
     MesosClusterRepository.shutdown()
     for cluster in clusters:
         assert_equal(cluster.stop.call_count, 1)
Esempio n. 3
0
    def restore_state(self, action_runner):
        """Use the state manager to retrieve to persisted state and apply it
        to the configured Jobs.
        """
        log.info('restoring')
        states = self.state_watcher.restore(self.jobs.get_names())
        MesosClusterRepository.restore_state(states.get('mesos_state', {}))

        self.jobs.restore_state(states.get('job_state', {}), action_runner)
        self.state_watcher.save_metadata()
Esempio n. 4
0
File: mcp.py Progetto: Yelp/Tron
    def restore_state(self, action_runner):
        """Use the state manager to retrieve to persisted state and apply it
        to the configured Jobs.
        """
        log.info('restoring')
        states = self.state_watcher.restore(self.jobs.get_names())
        MesosClusterRepository.restore_state(states.get('mesos_state', {}))

        self.jobs.restore_state(states.get('job_state', {}), action_runner)
        self.state_watcher.save_metadata()
Esempio n. 5
0
 def _handle_shutdown(self, sig_num, stack_frame):
     log.info(f"Shutdown requested via {str(sig_num)}")
     reactor.callLater(0, reactor.stop)
     waited = 0
     while reactor.running:
         if waited > 5:
             log.error("timed out waiting for reactor shutdown")
             break
         time.sleep(0.1)
         waited += 0.1
     if self.mcp:
         self.mcp.shutdown()
     MesosClusterRepository.shutdown()
     raise SystemExit(f"Terminating on signal {str(sig_num)}")
Esempio n. 6
0
 def _handle_shutdown(self, sig_num, stack_frame):
     log.info("Shutdown requested via %s" % sig_num)
     reactor.callLater(0, reactor.stop)
     waited = 0
     while reactor.running:
         if waited > 5:
             log.error("timed out waiting for reactor shutdown")
             break
         time.sleep(0.1)
         waited += 0.1
     if self.mcp:
         self.mcp.shutdown()
     MesosClusterRepository.shutdown()
     self.context.terminate(sig_num, stack_frame)
Esempio n. 7
0
 def _handle_shutdown(self, sig_num, stack_frame):
     log.info(f"Shutdown requested via {str(sig_num)}")
     reactor.callLater(0, reactor.stop)
     waited = 0
     while reactor.running:
         if waited > 5:
             log.error("timed out waiting for reactor shutdown")
             break
         time.sleep(0.1)
         waited += 0.1
     if self.mcp:
         self.mcp.shutdown()
     MesosClusterRepository.shutdown()
     raise SystemExit(f"Terminating on signal {str(sig_num)}")
Esempio n. 8
0
    def submit_command(self):
        serializer = filehandler.OutputStreamSerializer(self.output_path)
        mesos_cluster = MesosClusterRepository.get_cluster()
        task = mesos_cluster.create_task(
            action_run_id=self.id,
            command=self.command,
            cpus=self.cpus,
            mem=self.mem,
            constraints=self.constraints,
            docker_image=self.docker_image,
            docker_parameters=self.docker_parameters,
            env=self.env,
            extra_volumes=self.extra_volumes,
            serializer=serializer,
        )
        if not task:  # Mesos is disabled
            self.fail(None)
            return

        self.mesos_task_id = task.get_mesos_id()

        # Watch before submitting, in case submit causes a transition
        self.watch(task)
        mesos_cluster.submit(task)
        return task
Esempio n. 9
0
    def recover(self):
        if not self.machine.check('running'):
            log.error(f'{self} unable to transition from {self.machine.state}'
                      'to running for recovery')
            return

        if self.mesos_task_id is None:
            log.error(f'{self} no task ID, cannot recover')
            self.fail_unknown()
            return

        log.info(f'{self} recovering Mesos run')

        serializer = filehandler.OutputStreamSerializer(self.output_path)
        mesos_cluster = MesosClusterRepository.get_cluster()
        task = self._create_mesos_task(
            mesos_cluster,
            serializer,
            self.mesos_task_id,
        )
        if not task:
            log.warning(f'{self} cannot recover, Mesos is disabled or '
                        f'invalid task ID {self.mesos_task_id!r}')
            self.fail_unknown()
            return

        self.watch(task)
        mesos_cluster.recover(task)

        # Reset status
        self.exit_status = None
        self.end_time = None
        self.transition_and_notify('running')

        return task
Esempio n. 10
0
    def test_configure(self):
        clusters = [
            MesosClusterRepository.get_cluster(address)
            for address in ['d', 'e']
        ]
        mock_volume = mock.Mock()
        options = mock.Mock(
            master_port=5000,
            secret='/dev/null',
            principal="fake-principal",
            role='tron',
            enabled=False,
            default_volumes=[mock_volume],
            dockercfg_location='auth',
            offer_timeout=1000,
        )
        with mock.patch(
            'tron.mesos.get_secret_from_file',
            autospec=True,
            return_value='test-secret'
        ):
            MesosClusterRepository.configure(options)

        expected_volume = mock_volume._asdict.return_value
        for cluster in clusters:
            cluster.set_enabled.assert_called_once_with(False)
            cluster.configure_tasks.assert_called_once_with(
                default_volumes=[expected_volume],
                dockercfg_location='auth',
                offer_timeout=1000,
            )

        # Next cluster we get should be initialized with the same settings
        MesosClusterRepository.get_cluster('f')
        self.cluster_cls.assert_called_with(
            mesos_address='f',
            mesos_master_port=5000,
            secret='test-secret',
            principal="fake-principal",
            mesos_role='tron',
            framework_id=None,
            enabled=False,
            default_volumes=[expected_volume],
            dockercfg_location='auth',
            offer_timeout=1000,
        )
Esempio n. 11
0
    def test_configure(self):
        clusters = [
            MesosClusterRepository.get_cluster(address)
            for address in ['d', 'e']
        ]
        mock_volume = mock.Mock()
        options = mock.Mock(
            master_port=5000,
            secret='/dev/null',
            principal="fake-principal",
            role='tron',
            enabled=False,
            default_volumes=[mock_volume],
            dockercfg_location='auth',
            offer_timeout=1000,
        )
        with mock.patch(
            'tron.mesos.get_secret_from_file',
            autospec=True,
            return_value='test-secret'
        ):
            MesosClusterRepository.configure(options)

        expected_volume = mock_volume._asdict.return_value
        for cluster in clusters:
            cluster.set_enabled.assert_called_once_with(False)
            cluster.configure_tasks.assert_called_once_with(
                default_volumes=[expected_volume],
                dockercfg_location='auth',
                offer_timeout=1000,
            )

        # Next cluster we get should be initialized with the same settings
        MesosClusterRepository.get_cluster('f')
        self.cluster_cls.assert_called_with(
            mesos_address='f',
            mesos_master_port=5000,
            secret='test-secret',
            principal="fake-principal",
            mesos_role='tron',
            framework_id=None,
            enabled=False,
            default_volumes=[expected_volume],
            dockercfg_location='auth',
            offer_timeout=1000,
        )
Esempio n. 12
0
    def submit_command(self):
        serializer = filehandler.OutputStreamSerializer(self.output_path)
        mesos_cluster = MesosClusterRepository.get_cluster()
        task = self._create_mesos_task(mesos_cluster, serializer)
        if not task:  # Mesos is disabled
            self.fail(self.EXIT_MESOS_DISABLED)
            return

        self.mesos_task_id = task.get_mesos_id()

        # Watch before submitting, in case submit causes a transition
        self.watch(task)
        mesos_cluster.submit(task)
        return task
Esempio n. 13
0
    def submit_command(self):
        serializer = filehandler.OutputStreamSerializer(self.output_path)
        mesos_cluster = MesosClusterRepository.get_cluster()
        task = self._create_mesos_task(mesos_cluster, serializer)
        if not task:  # Mesos is disabled
            self.fail(self.EXIT_MESOS_DISABLED)
            return

        self.mesos_task_id = task.get_mesos_id()

        # Watch before submitting, in case submit causes a transition
        self.watch(task)
        mesos_cluster.submit(task)
        return task
Esempio n. 14
0
    def recover(self):
        if self.mesos_task_id is None:
            log.error(f'{self} no task ID, cannot recover')
            return

        if not self.machine.check('running'):
            log.error(
                f'{self} unable to transition from {self.machine.state}'
                'to running for recovery'
            )
            return

        log.info(f'{self} recovering Mesos run')

        serializer = filehandler.OutputStreamSerializer(self.output_path)
        mesos_cluster = MesosClusterRepository.get_cluster()
        task = mesos_cluster.create_task(
            action_run_id=self.id,
            command=self.command,
            cpus=self.cpus,
            mem=self.mem,
            constraints=self.constraints,
            docker_image=self.docker_image,
            docker_parameters=self.docker_parameters,
            env=self.env,
            extra_volumes=self.extra_volumes,
            serializer=serializer,
            task_id=self.mesos_task_id,
        )
        if not task:
            log.warning(
                f'{self} cannot recover, Mesos is disabled or '
                f'invalid task ID {self.mesos_task_id!r}'
            )
            self.fail_unknown()
            return

        self.watch(task)
        mesos_cluster.recover(task)

        # Reset status
        self.exit_status = None
        self.end_time = None
        self.transition_and_notify('running')

        return task
Esempio n. 15
0
    def _kill_mesos_task(self):
        msgs = []
        if not self.is_active:
            msgs.append(f'Action is {self.state}, not running. Continuing anyway.')

        mesos_cluster = MesosClusterRepository.get_cluster()
        if self.mesos_task_id is None:
            msgs.append("Error: Can't find task id for the action.")
        else:
            msgs.append(f"Sending kill for {self.mesos_task_id}...")
            succeeded = mesos_cluster.kill(self.mesos_task_id)
            if succeeded:
                msgs.append("Sent! It can take up to docker_stop_timeout (current setting is 2 mins) to stop.")
            else:
                msgs.append("Error while sending kill request. Please try again.")

        return '\n'.join(msgs)
Esempio n. 16
0
    def recover(self):
        if not self.machine.check('running'):
            log.error(
                f'{self} unable to transition from {self.machine.state}'
                'to running for recovery'
            )
            return

        if self.mesos_task_id is None:
            log.error(f'{self} no task ID, cannot recover')
            self.fail_unknown()
            return

        log.info(f'{self} recovering Mesos run')

        serializer = filehandler.OutputStreamSerializer(self.output_path)
        mesos_cluster = MesosClusterRepository.get_cluster()
        task = self._create_mesos_task(
            mesos_cluster,
            serializer,
            self.mesos_task_id,
        )
        if not task:
            log.warning(
                f'{self} cannot recover, Mesos is disabled or '
                f'invalid task ID {self.mesos_task_id!r}'
            )
            self.fail_unknown()
            return

        self.watch(task)
        mesos_cluster.recover(task)

        # Reset status
        self.exit_status = None
        self.end_time = None
        self.transition_and_notify('running')

        return task
Esempio n. 17
0
    def _kill_mesos_task(self):
        msgs = []
        if not self.is_active:
            msgs.append(
                f'Action is {self.state}, not running. Continuing anyway.'
            )

        mesos_cluster = MesosClusterRepository.get_cluster()
        if self.mesos_task_id is None:
            msgs.append("Error: Can't find task id for the action.")
        else:
            msgs.append(f"Sending kill for {self.mesos_task_id}...")
            succeeded = mesos_cluster.kill(self.mesos_task_id)
            if succeeded:
                msgs.append(
                    "Sent! It can take up to docker_stop_timeout (current setting is 2 mins) to stop."
                )
            else:
                msgs.append(
                    "Error while sending kill request. Please try again."
                )

        return '\n'.join(msgs)
Esempio n. 18
0
 def test_get_cluster_repeated_mesos_address(self):
     first = MesosClusterRepository.get_cluster('master-a.com')
     second = MesosClusterRepository.get_cluster('master-a.com')
     assert_equal(first, second)
     assert_equal(self.cluster_cls.call_count, 1)
Esempio n. 19
0
 def test_get_cluster_repeated_mesos_address(self):
     first = MesosClusterRepository.get_cluster('master-a.com')
     second = MesosClusterRepository.get_cluster('master-a.com')
     assert_equal(first, second)
     assert_equal(self.cluster_cls.call_count, 1)