Example #1
0
    def _execute(self):
        """Executes an actor and yields the results when its finished.

        raises: gen.Return(True)
        """

        elb = yield self._find_elb(name=self.option('name'))

        repeating_log = utils.create_repeating_log(
            self.log.info,
            'Still waiting for %s to become healthy' % self.option('name'),
            seconds=30)
        while True:
            healthy = yield self._is_healthy(elb, count=self.option('count'))

            if healthy is True:
                self.log.info('ELB is healthy.')
                break

            # In dry mode, fake it
            if self._dry:
                self.log.info('Pretending that ELB is healthy.')
                break

            # Not healthy :( continue looping
            self.log.debug('Retrying in 3 seconds.')
            yield utils.tornado_sleep(3)

        utils.clear_repeating_log(repeating_log)

        raise gen.Return()
Example #2
0
    def _run_task(self, task_definition_name):
        """Runs a task on ECS given a task definition's family and revision.

        A task can result in multiple running tasks,
        depending on count and sidekick tasks.

        Args:
            task_definition_name: Task Definition string

        Returns:
            list: task ARNs.
        """
        repeating_log = utils.create_repeating_log(
            self.log.info,
            'Waiting for task to be found...',
            seconds=30)

        while True:
            response = yield self.thread(
                self.ecs_conn.run_task,
                cluster=self.option('cluster'),
                taskDefinition=task_definition_name,
                count=self.option('count'))

            if not response['failures']:
                break
            # Error on non-missing failures.
            self._handle_failures(response['failures'], self.FAILURE_MISSING)
            yield gen.sleep(2)

        utils.clear_repeating_log(repeating_log)

        self.log.info('Scheduled task {}.'.format(task_definition_name))
        tasks = [t['taskArn'] for t in response['tasks']]
        raise gen.Return(tasks)
Example #3
0
    def _wait_for_deployment_update(self, service_name, task_definition_name):
        """Wait's for a service's primary deployment to be updated.

        Args:
            service_name: Service name to wait for.
            task_definition_name: Expected Task Definition string.
        """
        repeating_log = utils.create_repeating_log(
            self.log.info,
            'Waiting for primary deployment to be updated to %s '
            'for service with name %s...' % (task_definition_name,
                                             service_name),
            seconds=30)

        while True:
            try:
                service = yield self._describe_service(service_name)
            except ServiceNotFound as e:
                self.log.info('Service Not Found: %s' % e.message)
                yield gen.sleep(2)
                continue

            primary_deployment = self._get_primary_deployment(service)
            if primary_deployment:
                self.log.info('Primary deployment is %s.' %
                              self._arn_to_name(
                                  primary_deployment['taskDefinition']))
                if self._is_task_in_deployment(
                        primary_deployment, task_definition_name):
                    self.log.info('Primary deployment updated.')
                    break
            yield gen.sleep(2)

        utils.clear_repeating_log(repeating_log)
Example #4
0
    def _wait_for_deployment_update(self, service_name, task_definition_name):
        """Wait's for a service's primary deployment to be updated.

        Args:
            service_name: Service name to wait for.
            task_definition_name: Expected Task Definition string.
        """
        repeating_log = utils.create_repeating_log(
            self.log.info,
            'Waiting for primary deployment to be updated to %s '
            'for service with name %s...' % (task_definition_name,
                                             service_name),
            seconds=30)

        while True:
            try:
                service = yield self._describe_service(service_name)
            except ServiceNotFound as e:
                self.log.info('Service Not Found: %s' % e.message)
                yield gen.sleep(2)
                continue

            primary_deployment = self._get_primary_deployment(service)
            if primary_deployment:
                self.log.info('Primary deployment is %s.' %
                              self._arn_to_name(
                                  primary_deployment['taskDefinition']))
                if self._is_task_in_deployment(
                        primary_deployment, task_definition_name):
                    self.log.info('Primary deployment updated.')
                    break
            yield gen.sleep(2)

        utils.clear_repeating_log(repeating_log)
Example #5
0
    def _run_task(self, task_definition_name):
        """Runs a task on ECS given a task definition's family and revision.

        A task can result in multiple running tasks,
        depending on count and sidekick tasks.

        Args:
            task_definition_name: Task Definition string

        Returns:
            list: task ARNs.
        """
        repeating_log = utils.create_repeating_log(
            self.log.info,
            'Waiting for task to be found...',
            seconds=30)

        while True:
            response = yield self.api_call(
                self.ecs_conn.run_task,
                cluster=self.option('cluster'),
                taskDefinition=task_definition_name,
                count=self.option('count'))

            if not response['failures']:
                break
            # Error on non-missing failures.
            self._handle_failures(response['failures'], self.FAILURE_MISSING)
            yield gen.sleep(2)

        utils.clear_repeating_log(repeating_log)

        self.log.info('Scheduled task {}.'.format(task_definition_name))
        tasks = [t['taskArn'] for t in response['tasks']]
        raise gen.Return(tasks)
Example #6
0
    def test_repeating_log(self):
        logger = mock.Mock()  # used for tracking

        # Repeat this message 10 times per second
        logid = utils.create_repeating_log(logger.info, 'test', seconds=0.1)
        yield utils.tornado_sleep(0.45)  # Some process takes .4 <> .5 seconds
        utils.clear_repeating_log(logid)
        self.assertEquals(logger.info.call_count, 4)

        # Let's make sure that we don't keep looping our log message.
        yield utils.tornado_sleep(0.2)
        self.assertEquals(logger.info.call_count, 4)
Example #7
0
    def test_repeating_log(self):
        logger = mock.Mock()  # used for tracking

        # Repeat this message 10 times per second
        # seconds=0 instructs Tornado to invoke this log on every IO loop
        # Below we yield gen.moment to allow IO loop iterations.
        # We do N+1 loops and check N count.
        logid = utils.create_repeating_log(logger.info, 'test', seconds=0)
        yield gen.moment
        yield gen.moment
        yield gen.moment
        yield gen.moment
        yield gen.moment

        utils.clear_repeating_log(logid)
        self.assertEqual(logger.info.call_count, 4)

        # Let's make sure that we don't keep looping our log message.
        yield gen.moment
        yield gen.moment
        self.assertEqual(logger.info.call_count, 4)
Example #8
0
    def test_repeating_log(self):
        logger = mock.Mock()  # used for tracking

        # Repeat this message 10 times per second
        # seconds=0 instructs Tornado to invoke this log on every IO loop
        # Below we yield gen.moment to allow IO loop iterations.
        # We do N+1 loops and check N count.
        logid = utils.create_repeating_log(logger.info, 'test', seconds=0)
        yield gen.moment
        yield gen.moment
        yield gen.moment
        yield gen.moment
        yield gen.moment

        utils.clear_repeating_log(logid)
        self.assertEquals(logger.info.call_count, 4)

        # Let's make sure that we don't keep looping our log message.
        yield gen.moment
        yield gen.moment
        self.assertEquals(logger.info.call_count, 4)
Example #9
0
    def _wait_until_stable(self, delay=3):
        """Poll and wait until an ElastiGroup has stabalized.

        Upon group creation, most of the instances will be in a "biding" state.
        This method watches the list of instances and waits until they are all
        in the 'fulfilled' state.
        """
        group_id = self._group['group']['id']

        # We use the repeating_log to let the user know we're still monitoring
        # things, while not  flooding them every time we make an API call. We
        # give them a message every 30s, but make an API call every 3 seconds
        # to check the status.
        repeating_log = utils.create_repeating_log(
            self.log.info,
            'Waiting for ElastiGroup to become stable',
            seconds=30)

        while True:
            response = yield self._get_group_status(group_id)

            # Find any nodes that are waiting for spot instance requests to be
            # fulfilled.
            pending = [
                i for i in response['response']['items']
                if i['status'] == 'pending-evaluation'
            ]
            fulfilled = [
                i['instanceId'] for i in response['response']['items']
                if i['status'] == 'fulfilled' and i['instanceId'] is not None
            ]

            if len(pending) < 1:
                self.log.info('All instance requests fulfilled: %s' %
                              ', '.join(fulfilled))
                break

            yield gen.sleep(delay)

        utils.clear_repeating_log(repeating_log)
Example #10
0
    def _wait_until_stable(self, delay=3):
        """Poll and wait until an ElastiGroup has stabalized.

        Upon group creation, most of the instances will be in a "biding" state.
        This method watches the list of instances and waits until they are all
        in the 'fulfilled' state.
        """
        group_id = self._group['group']['id']

        # We use the repeating_log to let the user know we're still monitoring
        # things, while not  flooding them every time we make an API call. We
        # give them a message every 30s, but make an API call every 3 seconds
        # to check the status.
        repeating_log = utils.create_repeating_log(
            self.log.info,
            'Waiting for ElastiGroup to become stable',
            seconds=30)

        while True:
            response = yield self._get_group_status(group_id)

            # Find any nodes that are waiting for spot instance requests to be
            # fulfilled.
            pending = [i for i in response['response']['items']
                       if i['status'] == 'pending-evaluation']
            fulfilled = [i['instanceId'] for i in response['response']['items']
                         if i['status'] == 'fulfilled' and i['instanceId'] is
                         not None]

            if len(pending) < 1:
                self.log.info('All instance requests fulfilled: %s' %
                              ', '.join(fulfilled))
                break

            yield gen.sleep(delay)

        utils.clear_repeating_log(repeating_log)
Example #11
0
    def wait_for_task(self,
                      task,
                      task_name=None,
                      sleep=5,
                      loc_log=log,
                      instance=None):
        """Monitors a RightScale task for completion.

        RightScale tasks are provided as URLs that we can query for the
        run-status of the task. This method repeatedly queries a task for
        completion (every 5 seconds), and returns when the task has finished.

        TODO: Add a task-timeout option.

        Note: This is a completely retryable operation in the event that an
        intermittent network connection causes any kind of a connection
        failure.

        Args:
            task: RightScale Task resource object.
            task_name: Human-readable name of the task to be executed.
            sleep: Integer of seconds to wait before the first status check.
            loc_log: logging.getLogger() object to be used to log task status.
                    This is useful when this API call is called from a Kingpin
                    actor, and you want to use the actor's specific logger.
                    If nothing is passed - local `log` object is used.
            instance: RightScale instance object on which the task is executed.

        Returns:
            bool: success status
        """

        if not task:
            # If there is no task to wait on - don't wait!
            raise gen.Return(True)

        timeout_id = None
        if task_name:
            timeout_id = utils.create_repeating_log(
                loc_log.info, 'Still waiting on %s' % task_name, seconds=sleep)

        # Tracking when the tasks start so we can search by date later
        # RightScale expects the time to be a string in UTC
        now = datetime.utcnow()
        tasks_start = now.strftime('%Y/%m/%d %H:%M:%S +0000')

        while True:
            # Get the task status
            output = yield self._get_task_info(task)
            summary = output.soul['summary']
            stamp = datetime.now()

            if 'success' in summary or 'completed' in summary:
                status = True
                break

            if 'failed' in summary:
                status = False
                break

            loc_log.debug('Task (%s) status: %s (updated at: %s)' %
                          (output.path, output.soul['summary'], stamp))

            yield utils.tornado_sleep(min(sleep, 5))

        loc_log.debug('Task (%s) status: %s (updated at: %s)' %
                      (output.path, output.soul['summary'], stamp))

        if timeout_id:
            utils.clear_repeating_log(timeout_id)

        if status is True:
            raise gen.Return(True)

        if not instance:
            raise gen.Return(status)

        # If something failed we want to find out why -- get audit logs

        # Contact RightScale for audit logs of this instance.
        now = datetime.utcnow()
        tasks_finish = now.strftime('%Y/%m/%d %H:%M:%S +0000')

        loc_log.error('Task failed. Instance: "%s".' % instance.soul['name'])

        audit_logs = yield self.get_audit_logs(
            instance=instance,
            start=tasks_start,
            end=tasks_finish,
            match='failed')

        # Print every audit log that was obtained (may be 0)
        [loc_log.error(l) for l in audit_logs]

        if not audit_logs:
            loc_log.error('No audit logs for %s' % instance)

        loc_log.debug('Task finished, return value: %s, summary: %s' %
                      (status, summary))

        raise gen.Return(status)
Example #12
0
    def wait_for_task(self,
                      task,
                      task_name=None,
                      sleep=5,
                      loc_log=log,
                      instance=None):
        """Monitors a RightScale task for completion.

        RightScale tasks are provided as URLs that we can query for the
        run-status of the task. This method repeatedly queries a task for
        completion (every 5 seconds), and returns when the task has finished.

        TODO: Add a task-timeout option.

        Note: This is a completely retryable operation in the event that an
        intermittent network connection causes any kind of a connection
        failure.

        Args:
            task: RightScale Task resource object.
            task_name: Human-readable name of the task to be executed.
            sleep: Integer of seconds to wait before the first status check.
            loc_log: logging.getLogger() object to be used to log task status.
                    This is useful when this API call is called from a Kingpin
                    actor, and you want to use the actor's specific logger.
                    If nothing is passed - local `log` object is used.
            instance: RightScale instance object on which the task is executed.

        Returns:
            bool: success status
        """

        if not task:
            # If there is no task to wait on - don't wait!
            raise gen.Return(True)

        timeout_id = None
        if task_name:
            timeout_id = utils.create_repeating_log(loc_log.info,
                                                    'Still waiting on %s' %
                                                    task_name,
                                                    seconds=sleep)

        # Tracking when the tasks start so we can search by date later
        # RightScale expects the time to be a string in UTC
        now = datetime.utcnow()
        tasks_start = now.strftime('%Y/%m/%d %H:%M:%S +0000')

        while True:
            # Get the task status
            output = yield self._get_task_info(task)
            summary = output.soul['summary'].lower()
            stamp = datetime.now()

            if 'success' in summary or 'completed' in summary:
                status = True
                break

            if 'failed' in summary:
                status = False
                break

            loc_log.debug('Task (%s) status: %s (updated at: %s)' %
                          (output.path, output.soul['summary'], stamp))

            yield utils.tornado_sleep(min(sleep, 5))

        loc_log.debug('Task (%s) status: %s (updated at: %s)' %
                      (output.path, output.soul['summary'], stamp))

        if timeout_id:
            utils.clear_repeating_log(timeout_id)

        if status is True:
            raise gen.Return(True)

        if not instance:
            raise gen.Return(status)

        # If something failed we want to find out why -- get audit logs

        # Contact RightScale for audit logs of this instance.
        now = datetime.utcnow()
        tasks_finish = now.strftime('%Y/%m/%d %H:%M:%S +0000')

        loc_log.error('Task failed. Instance: "%s".' % instance.soul['name'])

        audit_logs = yield self.get_audit_logs(instance=instance,
                                               start=tasks_start,
                                               end=tasks_finish,
                                               match='failed')

        # Print every audit log that was obtained (may be 0)
        [loc_log.error(l) for l in audit_logs]

        if not audit_logs:
            loc_log.error('No audit logs for %s' % instance)

        loc_log.debug('Task finished, return value: %s, summary: %s' %
                      (status, summary))

        raise gen.Return(status)