def _stopInstance(self):
        """
        Shutdown the slave and then stop the instance.

        We need to do both, to avoid the following sequence:
        - Shutdown instance
        - Pending build triggers new instance
        - When new slave connects, duplicate slave detection kicks in, causing
          the original slave to disconnect. That disconnect triggers the new
          slave instance to shutdown.
        - Loop.

        https://clusterhq.atlassian.net/browse/FLOC-1938
        """
        with start_action(
                action_type="ondemand_slave:stop_instance",
                slave=self.slavename,
                ).context():
            with start_action(
                    action_type="ondemand_slave:shutdown"
                    ).context():
                d = DeferredContext(self.shutdown())
                timeoutDeferred(reactor, d, 60)
                d = d.addActionFinish()
            d = DeferredContext(d)
            d.addBoth(lambda _: self.instance_booter.stop())
            d.addActionFinish()
Example #2
0
def bg(wait, command, raweliot, targets):
    """Run the default oam operation on targets"""
    if raweliot:
        eliot.to_file(sys.stdout)
    else:
        # eliottree.render_tasks(sys.stdout.write, tasks, colorize=True) #py3
        eliot.add_destination(render_stdout)
    procs = []
    if len(targets)==0:
        targets = ['localhost']
    with eliot.start_task(action_type='run_ops', targets=targets):
        with eliot.start_action(action_type='start_ops', targets=targets):
            for server in targets:
                if wait:
                    cmd = FG_CMD.format(server, command)
                else:
                    cmd = BG_CMD.format(server, SESSION_NAME, command)
                logging.debug('%s start, cmd: %s', server, cmd)
                with eliot.start_action(action_type='start_process', target=server, cmd=cmd):
                    procs.append(subprocess.Popen(cmd, shell=True))
        finished = 0
        with eliot.start_action(action_type='wait_finishes', targets=targets):
            while finished != len(procs):
                for index, server in enumerate(procs):
                    logging.debug('looping at %s %d', targets[index], finished)
                    if not server.poll() is None:
                        eliot.Message.log(message_type='finish', target=targets[index])
                        finished += 1
                time.sleep(1)
        with eliot.start_action(action_type='wait_terminations', targets=targets):
            for index, server in enumerate(procs):
                with eliot.start_action(action_type='wait_process', target=targets[index]):
                    server.wait()
                    logging.debug('%s finish, returncode=%d', targets[index], server.returncode)
Example #3
0
def _run_task(rule_name, master_id):
    del Logger._destinations._destinations[:]
    to_file(open(os.path.join(eliot_log_path, master_id), "ab"))

    with start_task(action_type="invenio_checker:supervisor:_run_task",
                    master_id=master_id) as eliot_task:
        from .models import CheckerRule
        # cleanup_failed_runs()

        redis_master = None

        def cleanup_session():
            print 'Cleaning up'
            if redis_master is not None:
                redis_master.zap()

        def sigint_hook(rcv_signal, frame):
            cleanup_session()

        def except_hook(type_, value, tback):
            cleanup_session()
            reraise(type_, value, tback)

        signal.signal(signal.SIGINT, sigint_hook)
        signal.signal(signal.SIGTERM, sigint_hook)
        sys.excepthook = except_hook

        with start_action(action_type='create master'):
            eliot_task_id = eliot_task.serialize_task_id()
            redis_master = RedisMaster(master_id, eliot_task_id, rule_name)

        with start_action(action_type='create subtasks'):
            rules = CheckerRule.from_ids((rule_name,))
            bundles = rules_to_bundles(rules, redis_master.all_recids)

            subtasks = []
            errback = handle_error.s()
            for rule, rule_chunks in bundles.iteritems():
                for chunk in rule_chunks:
                    task_id = uuid()
                    redis_master.workers_append(task_id)
                    eliot_task_id = eliot_task.serialize_task_id()
                    RedisWorker(task_id, eliot_task_id, chunk)
                    subtasks.append(run_test.subtask(args=(rule.filepath,
                                                           redis_master.master_id,
                                                           task_id),
                                                     task_id=task_id,
                                                     link_error=[errback]))

            Message.log(message_type='registered subtasks', value=str(redis_master.workers))

        with start_action(action_type='run chord'):
            redis_master.status = StatusMaster.running
            header = subtasks
            callback = handle_results.subtask(link_error=[handle_errors.s(redis_master.master_id)])
            my_chord = chord(header)
            result = my_chord(callback)
            redis_master.status = StatusMaster.running
Example #4
0
def check_links(urls):
    with start_action(action_type="check_links", urls=urls):
        for url in urls:
            try:
                with start_action(action_type="download", url=url):
                    response = requests.get(url)
                    response.raise_for_status()
            except Exception as e:
                raise ValueError(str(e))
Example #5
0
 def g():
     Message.log(message_type="a")
     with start_action(action_type="confounding-factor"):
         Message.log(message_type="b")
         yield None
         with start_action(action_type="double-confounding-factor"):
             yield None
             Message.log(message_type="c")
         Message.log(message_type="d")
     Message.log(message_type="e")
Example #6
0
def alice_invite(reactor, alice, temp_dir, request):
    node_dir = join(temp_dir, 'alice')

    with start_action(action_type=u"integration:alice:magic_folder:create"):
        # FIXME XXX by the time we see "client running" in the logs, the
        # storage servers aren't "really" ready to roll yet (uploads fairly
        # consistently fail if we don't hack in this pause...)
        import time ; time.sleep(5)
        proto = _CollectOutputProtocol()
        reactor.spawnProcess(
            proto,
            sys.executable,
            [
                sys.executable, '-m', 'allmydata.scripts.runner',
                'magic-folder', 'create',
                '--poll-interval', '2',
                '--basedir', node_dir, 'magik:', 'alice',
                join(temp_dir, 'magic-alice'),
            ]
        )
        pytest_twisted.blockon(proto.done)

    with start_action(action_type=u"integration:alice:magic_folder:invite") as a:
        proto = _CollectOutputProtocol()
        reactor.spawnProcess(
            proto,
            sys.executable,
            [
                sys.executable, '-m', 'allmydata.scripts.runner',
                'magic-folder', 'invite',
                '--basedir', node_dir, 'magik:', 'bob',
            ]
        )
        pytest_twisted.blockon(proto.done)
        invite = proto.output.getvalue()
        a.add_success_fields(invite=invite)

    with start_action(action_type=u"integration:alice:magic_folder:restart"):
        # before magic-folder works, we have to stop and restart (this is
        # crappy for the tests -- can we fix it in magic-folder?)
        try:
            alice.signalProcess('TERM')
            pytest_twisted.blockon(alice.exited)
        except ProcessExitedAlready:
            pass
        with start_action(action_type=u"integration:alice:magic_folder:magic-text"):
            magic_text = 'Completed initial Magic Folder scan successfully'
            pytest_twisted.blockon(_run_node(reactor, node_dir, request, magic_text))
    return invite
    def __init__(self, case):
        super(SubscriptionConvergence, self).__init__()

        case.clear_logs()

        self.case = case
        self.path = FilePath(mkdtemp().decode("utf-8"))
        self.domain = u"s4.example.com"
        self.bucket = u"s4"
        self.database = SubscriptionDatabase.from_directory(
            self.path, self.domain, self.bucket,
        )

        # Track which subscriptions have had these resources created in
        # Kubernetes (by Kubernetes).  Once they've been created, we expect
        # them to continue to exist until the subscription is deactivated.
        self.has_replicaset = set()
        self.has_pod = set()

        self.subscription_client = memory_client(self.database.path, self.domain)
        self.kubernetes = memory_kubernetes()
        self.kube_model = self.kubernetes.model
        self.kube_client = KubeClient(k8s=self.kubernetes.client())
        self.aws_region = FakeAWSServiceRegion(
            access_key="access_key_id",
            secret_key="secret_access_key",
        )
        self.action = start_action(action_type=u"convergence-test")
Example #8
0
def get_metadata_path(path):
    """
    Requests a metadata path from the metadata server available within GCE.

    The metadata server is a good way to query information about the currently
    running instance and project it is in. It is also the mechanism used to
    inject ssh public keys and service account session tokens into the VM.

    :param unicode path: The path on the metadata server to query.

    :returns unicode: The resulting value from the metadata server.
    """
    with start_action(
        action_type=u"flocker:node:agents:gce:get_metadata_path",
        path=path
    ) as action:
        timeout_sec = 3
        r = requests.get(_METADATA_SERVER + path,
                         headers=_METADATA_HEADERS,
                         timeout=timeout_sec)
        if r.status_code != 200:
            raise ValueError("Did not get success result from metadata server "
                             "for path {}, instead got {}.".format(
                                 path, r.status_code))
        action.add_success_fields(response=r.text)
        return r.text
Example #9
0
def main():
    with start_action(action_type="mypackage:main"):
        for i in [1, 3]:
            try:
                do_a_thing(i)
            except ValueError:
                LEGACY_LOGGER.info("Number {} was rejected.".format(i))
Example #10
0
    def test_concurrent_generators(self, logger):
        @eliot_friendly_generator_function
        def g(which):
            Message.log(message_type="{}-a".format(which))
            with start_action(action_type=which):
                Message.log(message_type="{}-b".format(which))
                yield
                Message.log(message_type="{}-c".format(which))
            Message.log(message_type="{}-d".format(which))

        g.debug = True  # output yielded messages

        gens = [g("1"), g("2")]
        with start_action(action_type="the-action"):
            while gens:
                for g in gens[:]:
                    try:
                        next(g)
                    except StopIteration:
                        gens.remove(g)

        assert_expected_action_tree(
            self,
            logger,
            "the-action",
            [
                "1-a",
                {"1": ["1-b", "yielded", "1-c"]},
                "2-a",
                {"2": ["2-b", "yielded", "2-c"]},
                "1-d",
                "2-d",
            ],
        )
Example #11
0
        def api_clean_state(
            name, configuration_method, state_method, delete_method,
        ):
            """
            Clean entities from the cluster.

            :param unicode name: The name of the entities to clean.
            :param configuration_method: The function to obtain the configured
                entities.
            :param state_method: The function to get the current entities.
            :param delete_method: The method to delete an entity.

            :return: A `Deferred` that fires when the entities have been
                deleted.
            """
            context = start_action(
                action_type=u"acceptance:cleanup_" + name,
            )
            with context.context():
                get_items = DeferredContext(configuration_method())

                def delete_items(items):
                    return gather_deferreds(list(
                        delete_method(item)
                        for item in items
                    ))
                get_items.addCallback(delete_items)
                get_items.addCallback(
                    lambda ignored: loop_until(
                        reactor, lambda: state_method().addCallback(
                            lambda result: [] == result
                        )
                    )
                )
                return get_items.addActionFinish()
    def take_over(self, upstream, header):
        """
        Begin actively proxying between this protocol and ``upstream``.

        :param Protocol: The upstream connection involved in this proxying
            operation.  It will be abused somewhat.  Read the implementation.

        :param bytes header: Any data that should be sent downstream before
            engaging the proxy.

        :return Deferred: A ``Deferred`` that fires when this protocol's
            connection is lost.  This should be tightly coupled to loss of the
            upstream protocol's connection.
        """
        self.done = Deferred()

        self._proxied_connections.inc()

        peer = self.transport.getPeer()
        a = start_action(
            action_type=u"grid-router:proxy:take-over",
            to=(peer.host, peer.port),
        )
        with a:
            self.transport.write(header)

            upstream.dataReceived = self.transport.write
            upstream.connectionLost = self._upstream_connection_lost

            self.dataReceived = upstream.transport.write
            self.upstream = upstream
            self.upstream.transport.resumeProducing()
            return self.done
Example #13
0
 def method_with_retry(*a, **kw):
     name = _callable_repr(method)
     action_type = _TRY_UNTIL_SUCCESS
     with start_action(action_type=action_type, function=name):
         return _poll_until_success_returning_result(
             should_retry, steps, sleep, method, a, kw
         )
Example #14
0
    def stop(self):
        """
        Stop the scenario from being maintained by stopping all the
        loops that may be executing.

        :return Deferred[Optional[Dict[unicode, Any]]]: Scenario metrics.
        """
        self.is_started = False
        if self.monitor_loop.running:
            self.monitor_loop.stop()

        if self.loop.running:
            self.loop.stop()

        outstanding_requests = self.rate_measurer.outstanding()

        if outstanding_requests > 0:
            msg = (
                "There are {num_requests} outstanding requests. " "Waiting {num_seconds} seconds for them to complete."
            ).format(num_requests=outstanding_requests, num_seconds=self.timeout)
            Message.log(key="outstanding_requests", value=msg)

        with start_action(action_type=u"flocker:benchmark:scenario:stop", scenario="request_load"):

            def no_outstanding_requests():
                return self.rate_measurer.outstanding() == 0

            scenario_stopped = loop_until(self.reactor, no_outstanding_requests, repeat(1))
            timeout(self.reactor, scenario_stopped, self.timeout)
            scenario = DeferredContext(scenario_stopped)

            def handle_timeout(failure):
                failure.trap(CancelledError)
                msg = ("Force stopping the scenario. " "There are {num_requests} outstanding requests").format(
                    num_requests=outstanding_requests
                )
                Message.log(key="force_stop_request", value=msg)

            scenario.addErrback(handle_timeout)

            def scenario_cleanup(ignored):
                """
                Calls the scenario cleanup, and wraps it inside an eliot
                start action, so we can see the logs if something goes
                wrong within the cleanup

                :return Deferred: that will fire once the cleanup has been
                    completed
                """
                with start_action(action_type=u"flocker:benchmark:scenario:cleanup", scenario="request_load"):
                    return self.request.run_cleanup()

            scenario.addBoth(scenario_cleanup)

            def return_metrics(_ignore):
                return self.rate_measurer.get_metrics()

            scenario.addCallback(return_metrics)

            return scenario.addActionFinish()
Example #15
0
    def test_yield_inside_nested_actions(self, logger):
        @eliot_friendly_generator_function
        def g():
            Message.log(message_type="a")
            with start_action(action_type="confounding-factor"):
                Message.log(message_type="b")
                yield None
                with start_action(action_type="double-confounding-factor"):
                    yield None
                    Message.log(message_type="c")
                Message.log(message_type="d")
            Message.log(message_type="e")

        g.debug = True  # output yielded messages

        with start_action(action_type="the-action"):
            list(g())

        assert_expected_action_tree(
            self,
            logger,
            "the-action",
            [
                "a",
                {
                    "confounding-factor": [
                        "b",
                        "yielded",
                        {"double-confounding-factor": ["yielded", "c"]},
                        "d",
                    ]
                },
                "e",
            ],
        )
def proxy(upstream, endpoint, header):
    """
    Establish a new connection to ``endpoint`` and begin proxying between that
    connection and ``upstream``.

    :param IProtocol upstream: A connected protocol.  All data received by
        this protocol from this point on will be sent along to another newly
        established connection.

    :param IStreamClientEndpoint endpoint: An endpoint to use to establish a
        new connection.  All data received over this connection will be sent
        along to the upstream connection.

    :param bytes header: Some extra data to write to the new downstream
        connection before proxying begins.
    """
    def failed(reason):
        upstream.transport.resumeProducing()
        upstream.transport.abortConnection()
        return reason

    upstream.transport.pauseProducing()

    peer = upstream.transport.getPeer()
    action = start_action(
        action_type=u"grid-router:proxy",
        **{u"from": (peer.host, peer.port)}
    )
    with action.context():
        d = DeferredContext(endpoint.connect(Factory.forProtocol(_Proxy)))
        d.addCallbacks(
            lambda downstream: DeferredContext(downstream.take_over(upstream, header)),
            failed,
        )
        return d.addActionFinish()
Example #17
0
    def create_node(self, name, distribution, metadata={}):
        size = self._default_size
        disk_size = 8

        with start_action(
            action_type=u"flocker:provision:aws:create_node",
            name=name,
            distribution=distribution,
            image_size=size,
            disk_size=disk_size,
            metadata=metadata,
        ):

            metadata = metadata.copy()
            metadata["Name"] = name

            disk1 = EBSBlockDeviceType()
            disk1.size = disk_size
            disk1.delete_on_termination = True
            diskmap = BlockDeviceMapping()
            diskmap["/dev/sda1"] = disk1

            images = self._connection.get_all_images(filters={"name": IMAGE_NAMES[distribution]})
            # Retry several times, no sleep between retries is needed.
            instance = poll_until(
                lambda: self._get_node(images[0].id, size, diskmap, metadata), repeat(0, 10), lambda x: None
            )
            return AWSNode(name=name, _provisioner=self, _instance=instance, distribution=distribution)
Example #18
0
    def test_nested_generators(self, logger):
        @eliot_friendly_generator_function
        def g(recurse):
            with start_action(action_type="a-recurse={}".format(recurse)):
                Message.log(message_type="m-recurse={}".format(recurse))
                if recurse:
                    set(g(False))
                else:
                    yield

        g.debug = True  # output yielded messages

        with start_action(action_type="the-action"):
            set(g(True))

        assert_expected_action_tree(
            self,
            logger,
            "the-action",
            [
                {
                    "a-recurse=True": [
                        "m-recurse=True",
                        {"a-recurse=False": ["m-recurse=False", "yielded"]},
                    ]
                }
            ],
        )
Example #19
0
    def _stop_container(self, container_name):
        """Attempt to stop the given container.

        There is a race condition between a process dying and
        Docker noticing that fact:

        https://github.com/docker/docker/issues/5165#issuecomment-65753753

        If we get an error indicating that this race condition happened,
        return False. This means the caller should try again. If we *do*
        successfully stop the container, return True.

        :raise APIError: If the container failed to stop for some unknown
            reason.
        :return: True if we stopped the container, False otherwise.

        """
        try:
            with start_action(
                action_type='flocker:docker:container_stop',
                container=container_name
            ):
                self._client.stop(container_name)
        except APIError as e:
            if e.response.status_code == NOT_FOUND:
                # If the container doesn't exist, we swallow the error,
                # since this method is supposed to be idempotent.
                return True
            elif e.response.status_code == INTERNAL_SERVER_ERROR:
                # Docker returns this if the process had died, but
                # hasn't noticed it yet.
                return False
            else:
                raise
        return True
Example #20
0
 def g(which):
     Message.log(message_type="{}-a".format(which))
     with start_action(action_type=which):
         Message.log(message_type="{}-b".format(which))
         yield
         Message.log(message_type="{}-c".format(which))
     Message.log(message_type="{}-d".format(which))
Example #21
0
def run_process(command, *args, **kwargs):
    """
    Run a child process, capturing its stdout and stderr.

    :param list command: An argument list to use to launch the child process.

    :raise CalledProcessError: If the child process has a non-zero exit status.

    :return: A ``_ProcessResult`` instance describing the result of the child
         process.
    """
    kwargs["stdout"] = PIPE
    kwargs["stderr"] = STDOUT
    action = start_action(
        action_type="run_process", command=command, args=args, kwargs=kwargs)
    with action:
        process = Popen(command, *args, **kwargs)
        output = process.stdout.read()
        status = process.wait()
        result = _ProcessResult(command=command, output=output, status=status)
        # TODO: We should be using a specific logging type for this.
        Message.new(
            command=result.command,
            output=result.output,
            status=result.status,
        ).write()
        if result.status:
            raise _CalledProcessError(
                returncode=status, cmd=command, output=output,
            )
    return result
Example #22
0
    def _run_nodes(self, count, image_id, size, diskmap):
        """
        Create an AWS instance with the given parameters.

        Return either boto.ec2.instance object or None if the instance
        could not be created.
        """
        with start_action(
            action_type=u"flocker:provision:aws:create_node:run_nodes",
            instance_count=count,
        ):
            reservation = self._connection.run_instances(
                image_id,
                min_count=1,
                max_count=count,
                key_name=self._keyname,
                instance_type=size,
                security_groups=self._security_groups,
                block_device_map=diskmap,
                placement=self._zone,
                # On some operating systems a tty is requried for sudo.
                # Since AWS systems have a non-root user as the login,
                # disable this, so we can use sudo with conch.
            )
            return reservation.instances
 def get_subscription(self, subscription_id):
     with start_action(action_type=u"subscription-database:get-subscription") as a:
         path = self._subscription_path(subscription_id)
         state = loads(path.getContent())
         loader = getattr(self, "_load_{}".format(state["version"]))
         a.add_success_fields(subscription=state)
         return loader(state)
Example #24
0
def sample(operation, metric, name):
    """
    Perform sampling of the operation.

    :param IOperation operation: An operation to perform.
    :param IMetric metric: A quantity to measure.
    :param int name: Identifier for individual sample.
    :return: Deferred firing with a sample. A sample is a dictionary
        containing a ``success`` boolean.  If ``success is True``, the
        dictionary also contains a ``value`` for the sample measurement.
        If ``success is False``, the dictionary also contains a
        ``reason`` for failure.
    """
    with start_action(action_type=u'flocker:benchmark:sample', sample=name):
        sampling = DeferredContext(maybeDeferred(operation.get_probe))

        def run_probe(probe):
            probing = metric.measure(probe.run)
            probing.addCallback(
                lambda measurement: dict(success=True, value=measurement)
            )
            probing.addCallback(bypass, probe.cleanup)

            return probing
        sampling.addCallback(run_probe)

        # Convert an error running the probe into a failed sample.
        def convert_to_result(failure):
            return dict(success=False, reason=failure.getTraceback())
        sampling.addErrback(convert_to_result)

        return sampling.addActionFinish()
Example #25
0
    def _get_attached_to(self, blockdevice_id):
        """
        Determines the instance a blockdevice is attached to.

        :param unicode blockdevice_id: The blockdevice_id of the blockdevice to
            query.

        :returns unicode: The name of the instance.

        :raises UnknownVolume: If there is no volume with the given id in the
            cluster.
        :raises UnattachedVolume: If the volume is not attached to any
            instance.
        """
        with start_action(
            action_type=u"flocker:node:agents:gce:get_attached_to",
            blockdevice_id=blockdevice_id
        ) as action:
            try:
                disk = self._get_gce_volume(blockdevice_id)
            except HttpError as e:
                if e.resp.status == 404:
                    raise UnknownVolume(blockdevice_id)
                else:
                    raise
            attached_to = _extract_attached_to(disk)
            if not attached_to:
                raise UnattachedVolume(blockdevice_id)
            action.add_success_fields(attached_to=attached_to)
            return attached_to
Example #26
0
    def create_volume(self, dataset_id, size):
        """
        Create a block device using the ICinderVolumeManager.
        The cluster_id and dataset_id are stored as metadata on the volume.

        See:

        http://docs.rackspace.com/cbs/api/v1.0/cbs-devguide/content/POST_createVolume_v1__tenant_id__volumes_volumes.html
        """
        metadata = {
            CLUSTER_ID_LABEL: unicode(self.cluster_id),
            DATASET_ID_LABEL: unicode(dataset_id),
        }
        action_type = u"blockdevice:cinder:create_volume"
        with start_action(action_type=action_type):
            # There could be difference between user-requested and
            # Cinder-created volume sizes due to several reasons:
            # 1) Round off from converting user-supplied 'size' to 'GiB' int.
            # 2) Cinder-specific size constraints.
            # XXX: Address size mistach (see
            # (https://clusterhq.atlassian.net/browse/FLOC-1874).
            requested_volume = self.cinder_volume_manager.create(
                size=Byte(size).to_GiB().value,
                metadata=metadata,
            )
            Message.new(blockdevice_id=requested_volume.id).write()
            created_volume = wait_for_volume(
                volume_manager=self.cinder_volume_manager,
                expected_volume=requested_volume,
            )
        return _blockdevicevolume_from_cinder_volume(
            cinder_volume=created_volume,
        )
Example #27
0
    def create_volume(self, dataset_id, size):
        """
        Create a block device using the ICinderVolumeManager.
        The cluster_id and dataset_id are stored as metadata on the volume.

        See:

        http://docs.rackspace.com/cbs/api/v1.0/cbs-devguide/content/POST_createVolume_v1__tenant_id__volumes_volumes.html
        """
        metadata = {
            CLUSTER_ID_LABEL: unicode(self.cluster_id),
            DATASET_ID_LABEL: unicode(dataset_id),
        }
        action_type = u"blockdevice:cinder:create_volume"
        with start_action(action_type=action_type):
            requested_volume = self.cinder_volume_manager.create(
                size=Byte(size).to_GB().value,
                metadata=metadata,
            )
            Message.new(blockdevice_id=requested_volume.id).write()
            created_volume = wait_for_volume(
                volume_manager=self.cinder_volume_manager,
                expected_volume=requested_volume,
            )
        return _blockdevicevolume_from_cinder_volume(
            cinder_volume=created_volume,
        )
Example #28
0
 def do_reboot(_):
     with start_action(
         action_type=u"flocker:provision:aws:reboot",
         instance_id=self._instance.id,
     ):
         self._instance.reboot()
         _wait_until_running(self._instance)
Example #29
0
 def g(recurse):
     with start_action(action_type="a-recurse={}".format(recurse)):
         Message.log(message_type="m-recurse={}".format(recurse))
         if recurse:
             set(g(False))
         else:
             yield
Example #30
0
def detach_destroy_volumes(api):
    """
    Detach and destroy all volumes known to this API.
    If we failed to detach a volume for any reason,
    sleep for 1 second and retry until we hit CLEANUP_RETRY_LIMIT.
    This is to facilitate best effort cleanup of volume
    environment after each test run, so that future runs
    are not impacted.
    """
    volumes = api.list_volumes()
    retry = 0
    action_type = u"agent:blockdevice:cleanup:details"
    with start_action(action_type=action_type):
        while retry < CLEANUP_RETRY_LIMIT and len(volumes) > 0:
            for volume in volumes:
                try:
                    if volume.attached_to is not None:
                        api.detach_volume(volume.blockdevice_id)
                    api.destroy_volume(volume.blockdevice_id)
                except:
                    write_traceback(_logger)

            time.sleep(1.0)
            volumes = api.list_volumes()
            retry += 1

        if len(volumes) > 0:
            Message.new(u"agent:blockdevice:failedcleanup:volumes",
                        volumes=volumes).write()
Example #31
0
 def _a_b_test(self, logger, g):
     with start_action(action_type=u"the-action"):
         self.assertIs(
             None,
             self.successResultOf(g()),
         )
     assert_expected_action_tree(
         self,
         logger,
         u"the-action",
         [
             u"a",
             u"yielded",
             u"b",
         ],
     )
Example #32
0
 def _notify_success(self, details):
     from sys import stdout, stderr
     a = start_action(
         action_type=u"signup:send-confirmation",
         subscription=attr.asdict(details),
     )
     with a.context():
         d = DeferredContext(
             self.send_signup_confirmation(
                 details.customer_email,
                 details.external_introducer_furl,
                 None,
                 stdout,
                 stderr,
             ))
         return d.addActionFinish()
Example #33
0
 def create_configmap(self):
     with start_action(action_type="create_configmap"):
         namespace = self.user.namespace
         cfgmap = self.cfg_map
         api = self.core_api
         try:
             self.log.info(
                 "Attempting to create configmap in {}".format(namespace))
             api.create_namespaced_config_map(namespace, cfgmap)
         except ApiException as e:
             if e.status != 409:
                 estr = "Create configmap failed: {}".format(e)
                 self.log.exception(estr)
                 raise
             else:
                 self.log.info("Configmap already exists.")
Example #34
0
 def test_logs_as_detail(self):
     """
     Captured logs are available as details on the fixture.
     """
     fixture = CaptureEliotLogs()
     fixture.setUp()
     try:
         with start_action(action_type=u"foo"):
             pass
         details = fixture.getDetails()
     finally:
         fixture.cleanUp()
     self.assertThat(
         details[fixture.LOG_DETAIL_NAME].as_text(),
         Equals(_eliottree(fixture.logs)),
     )
Example #35
0
    async def _process_batch(self, global_ctx, curr_batch):
        bf = datetime.datetime.now()

        action_args = dict(action_type="execute_batch",
                           batch_size=len(curr_batch),
                           worker_name=self.worker_name)
        with start_action(**action_args):
            await self.func(self, global_ctx, curr_batch)

        af = datetime.datetime.now()
        duration = round((af - bf).total_seconds())
        print(
            f"BATCH COMPLETE: {self.worker_name}.{self.funcname}: {len(curr_batch)} profiles, took {duration}s"
        )

        await self._postprocess_batch(global_ctx, curr_batch)
Example #36
0
 def version(self):
     """
     Issue a I{GET} for the Kubernetes server version.
     """
     action = start_action(
         action_type=u"network-client:version",
     )
     with action.context():
         url = self.kubernetes.base_url.child(u"version")
         d = DeferredContext(self._get(url))
         d.addCallback(check_status, (OK,), self.model)
         d.addCallback(readBody)
         d.addCallback(loads)
         d.addCallback(log_response_object, action)
         d.addCallback(self.model.version_type.create)
         return d.addActionFinish()
Example #37
0
    def wrapper(self, *args, **kwargs):

        serializable_args = tuple(_ensure_encodeable(a) for a in args)
        serializable_kwargs = {}
        for kwarg in kwargs:
            serializable_kwargs[kwarg] = _ensure_encodeable(kwargs[kwarg])

        context = start_action(
            action_type=label,
            args=serializable_args, kwargs=serializable_kwargs,
        )
        with context.context():
            d = DeferredContext(function(self, *args, **kwargs))
            d.addCallback(log_result, context)
            d.addActionFinish()
            return d.result
Example #38
0
def configure_sqlalchemy(db_settings, testing=False):
    with start_action(action_type="configure_sqlalchemy",
                      sqlalchemy_url=db_settings.uri) as ctx:

        if db_settings.enable_statement_history:
            connect_args = {
                "connection_factory": make_debug_connection_factory()
            }
        else:
            connect_args = {}

        engine = create_engine(db_settings.uri, connect_args=connect_args)
        Session.configure(bind=engine)
        zope.sqlalchemy.register(Session,
                                 keep_session=True if testing else False)
        db_metadata.bind = engine
Example #39
0
 def replace(self, obj):
     """
     Issue a I{PUT} to replace an existing object with a new one.
     """
     action = start_action(action_type=u"network-client:replace", )
     with action.context():
         url = self.kubernetes.base_url.child(*object_location(obj))
         document = iobject_to_raw(obj)
         Message.log(submitted_object=document)
         d = DeferredContext(self._put(url, document))
         d.addCallback(check_status, (OK, ))
         d.addCallback(readBody)
         d.addCallback(loads)
         d.addCallback(log_response_object, action)
         d.addCallback(iobject_from_raw)
         return d.addActionFinish()
 def connection(self, action_type: str) -> Iterator[Connection]:
     with el.start_action(action_type=action_type,
                          url=self.db_url,
                          driver=self.driver,
                          user=self.user):
         with TheJVM.borrow(self.classpath) as jvm:
             jvm.java.lang.Class.forName(self.driver)
             conn = jvm.java.sql.DriverManager.getConnection(
                 self.db_url, self.user, self.__password)
             try:
                 # avoid: JdbcUtils: Requested isolation level 1 is not supported;
                 #        falling back to default isolation level 2
                 conn.setTransactionIsolation(2)
                 yield conn
             finally:
                 conn.close()
Example #41
0
 def list(self, kind):
     """
     Issue a I{GET} to retrieve objects of a given kind.
     """
     action = start_action(
         action_type=u"network-client:list",
         kind=kind.kind,
         apiVersion=kind.apiVersion,
     )
     with action.context():
         url = self.kubernetes.base_url.child(*collection_location(kind))
         d = DeferredContext(self._get(url))
         d.addCallback(check_status, (OK, ))
         d.addCallback(readBody)
         d.addCallback(lambda body: iobject_from_raw(loads(body)))
         return d.addActionFinish()
Example #42
0
 def _namemap_to_json(self):
     with start_action(action_type="_namemap_to_json"):
         modmap = {}
         nm = self._name_to_manifest
         rm = self._results_map
         for k in nm:
             dt = nm[k].get("updated")
             dstr = None
             if dt:
                 dstr = self._serialize_datetime(dt)
             else:
                 dstr = rm[k].get("last_updated")
             ihash = nm[k].get("hash")
             if ihash and dstr:
                 modmap[k] = {"updated": dstr, "hash": ihash}
         return json.dumps(modmap, sort_keys=True, indent=4)
Example #43
0
 def create(self, obj):
     """
     Issue a I{POST} to create the given object.
     """
     action = start_action(action_type=u"network-client:create", )
     with action.context():
         url = self.kubernetes.base_url.child(*collection_location(obj))
         document = iobject_to_raw(obj)
         Message.log(submitted_object=document)
         d = DeferredContext(self._post(url, document))
         d.addCallback(check_status, (CREATED, ))
         d.addCallback(readBody)
         d.addCallback(loads)
         d.addCallback(log_response_object, action)
         d.addCallback(iobject_from_raw)
         return d.addActionFinish()
 def check_convergence(self, database, config, kube, aws):
     with start_action(action_type=u"check-convergence"):
         subscriptions = sorted(
             database.list_active_subscription_identifiers())
         Message.log(active_subscriptions=subscriptions)
         checks = {
             self.check_configmaps,
             self.check_deployments,
             self.check_replicasets,
             self.check_pods,
             self.check_service,
             self.check_route53,
         }
         k8s_state = self.kubernetes._state
         for check in checks:
             check(database, config, subscriptions, k8s_state, aws)
Example #45
0
    def wrapped():
        kwargs = {}
        try:
            if request.files:
                kwargs['request.files'] = {key:file.filename for key, file in request.files.items()}
        except:
            pass

        try:
            if request.form['text']:
                kwargs['request.form["text"]'] = request.form['text']
        except:
            pass

        with start_action(action_type='http', path=request.path, **kwargs):
            return func()
Example #46
0
def find_document(name: str, department: str) -> Document:
    with start_action(action_type="find_document",
                      name=name,
                      department=department) as action:
        document = session.query(Document).filter_by(
            name=name).join(SubjectArea).join(Department).filter_by(
                name=department).one()

        if document is None:
            raise ValueError(
                f"Document {args.name} in department {args.department} not found!"
            )

        action.add_success_fields(document_id=document.id)

    return document
Example #47
0
    def test_yield_none(self, logger):
        @eliot_friendly_generator_function
        def g():
            Message.log(message_type=u"hello")
            yield
            Message.log(message_type=u"goodbye")

        with start_action(action_type=u"the-action"):
            list(g())

        assert_expected_action_tree(
            self,
            logger,
            u"the-action",
            [u"hello", u"yielded", u"goodbye"],
        )
Example #48
0
File: gce.py Project: teazj/flocker
def wait_for_operation_async(reactor, compute, operation, timeout_steps):
    """
    Fires a deferred once a GCE operation is complete, or timeout passes.

    This function will poll the operation until it reaches state 'DONE' or
    times out, and then returns the final operation resource dict.

    :param reactor: The twisted ``IReactorTime`` provider to use to schedule
        delays.
    :param compute: The GCE compute python API object.
    :param operation: A dict representing a pending GCE operation resource.
        This can be either a zone or a global operation.
    :param timeout_steps: Iterable of times in seconds to wait until timing out
        the operation.

    :returns Deferred: A Deferred firing with the concluded GCE operation
        resource or calling its errback if it times out.
    """
    poller = _create_poller(operation)

    eliot_action = start_action(
        action_type=u"flocker:node:agents:gce:wait_for_operation_async",
        operation=operation
    )

    # Apologies for open-rolling eliot action manipulation. Eliot's
    # interactions with deferred confuse me.
    with eliot_action.context():
        def finished_operation_result():
            latest_operation = poller.poll(compute)
            if latest_operation['status'] == 'DONE':
                return latest_operation
            return None

        operation_deferred = loop_until(
            reactor,
            finished_operation_result,
            timeout_steps,
        )

    def conclude_operation(final_operation):
        eliot_action.add_success_fields(final_operation=final_operation)
        eliot_action.finish()
        return final_operation

    operation_deferred.addCallback(conclude_operation)
    return operation_deferred
Example #49
0
 def _authenticate_to_repo(self, resp):
     with start_action(action_type="_authenticate_to_repo"):
         self.logger.warning("Authentication Required.")
         self.logger.warning("Headers: {}".format(resp.headers))
         self.logger.warning("Body: {}".format(resp.text))
         magicheader = resp.headers['Www-Authenticate']
         if magicheader[:7] == "Bearer ":
             hd = {}
             hl = magicheader[7:].split(",")
             for hn in hl:
                 il = hn.split("=")
                 kk = il[0]
                 vv = il[1].replace('"', "")
                 hd[kk] = vv
             if (not hd or "realm" not in hd or "service" not in hd
                     or "scope" not in hd):
                 return None
             endpoint = hd["realm"]
             del hd["realm"]
             # We need to glue in authentication for DELETE, and that alas
             #  means a userid and password.
             r_user = os.getenv("IMAGE_REAPER_USER")
             r_pw = os.getenv("IMAGE_REAPER_PASSWORD")
             auth = None
             if r_user and r_pw:
                 auth = (r_user, r_pw)
                 self.logger.warning("Added Basic Auth credentials")
             headers = {
                 "Accept": ("application/vnd.docker.distribution." +
                            "manifest.v2+json")
             }
             self.logger.warning("Requesting auth scope {}".format(
                 hd["scope"]))
             tresp = requests.get(endpoint,
                                  headers=headers,
                                  params=hd,
                                  json=True,
                                  auth=auth)
             jresp = tresp.json()
             authtok = jresp.get("token")
             if authtok:
                 self.logger.info("Received an auth token.")
                 self.logger.warning("{}".format(authtok))
                 return {"Authorization": "Bearer {}".format(authtok)}
             else:
                 self.logger.error("No auth token: {}".format(jresp))
         return {}
Example #50
0
def _retry_exception_async(reactor, f, steps=(0.1,) * 10):
    """
    Retry a function if it raises an exception.

    :return: Deferred that fires with whatever the function returns or the
        last raised exception if the function never succeeds.
    """
    # Any failure is recorded and converted to False so that loop_until keeps
    # trying.  Any success is recorded and converted to True so that
    # loop_until completes even if the result evaluates to False.
    # If loop_until() succeeds then the recorded result is returned, otherwise
    # the last recorded failure is returned.
    saved_failure = [None]
    saved_result = [None]

    def handle_success(result):
        saved_result[0] = result
        return True

    def handle_failure(failure):
        Message.log(
            message_type=(
                u"flocker:provision:libcloud:retry_exception:got_exception"
            ),
        )
        write_failure(failure)
        saved_failure[0] = failure
        return False

    def make_call():
        d = maybeDeferred(f)
        d = DeferredContext(d)
        d.addCallbacks(handle_success, errback=handle_failure)
        return d.result

    action = start_action(
        action_type=u"flocker:provision:libcloud:retry_exception",
        function=function_serializer(f),
    )
    with action.context():
        d = loop_until(reactor, make_call, steps)
        d = DeferredContext(d)
        d.addCallbacks(
            lambda _: saved_result[0],
            errback=lambda _: saved_failure[0],
        )
        return d.addActionFinish()
def _converge_logic(actual, config, subscriptions, k8s, aws):
    convergers = [
        _converge_service,
        _converge_configmaps,
        _converge_deployments,
        _converge_replicasets,
        _converge_pods,
        _converge_route53_customer,
        _converge_route53_infrastructure,
    ]

    jobs = []
    for converger in convergers:
        with start_action(action_type=converger.func_name):
            jobs.extend(converger(actual, config, subscriptions, k8s, aws))

    return jobs
Example #52
0
def _execute_converge_output(jobs):
    if not jobs:
        return succeed(None)

    a = start_action(action_type=u"execute-converge-step")
    with a.context():
        job = jobs.pop(0)
        d = DeferredContext(job())
        d.addErrback(write_failure)
        d = d.addActionFinish()

    if jobs:
        # Capture whatever action context is active now and make sure it is
        # also active when we get back here to process the next job.
        DeferredContext(d).addCallback(
            lambda ignored: _execute_converge_output(jobs), )
    return d
Example #53
0
    def run_query(self, query_str, realtime=False, span=None, callback=None):
        settings = self.settings
        timeout = self.query_timeout

        with start_action(action_type=self.state_key, query=query_str):

            @on_event(query_str,
                      settings,
                      span=span,
                      realtime=realtime,
                      timeout=timeout)
            def handle_events(event, callback, *args, **kwargs):
                event_data = event.get("data", {})
                result = event_data.get("content", [])
                return callback(result)

            return handle_events(callback)
Example #54
0
def provision_subscription(smclient, details):
    """
    Create the subscription state in the SubscriptionManager service.

    :param SubscriptionDetails details:
    """
    def created(details):
        d = _wait_for_service(details.subscription_id)
        d.addCallback(lambda ignored: details)
        return d

    a = start_action(action_type=u"signup:provision-subscription")
    with a.context():
        d = DeferredContext(smclient.create(details.subscription_id,
                                            details), )
        d.addCallback(created)
        return d.addActionFinish()
Example #55
0
 def options_from_form(self, formdata=None):
     """Get user selections."""
     with start_action(action_type="options_from_form"):
         options = None
         if formdata:
             options = {}
             if "kernel_image" in formdata and formdata["kernel_image"]:
                 options["kernel_image"] = formdata["kernel_image"][0]
             if "size" in formdata and formdata["size"]:
                 options["size"] = formdata["size"][0]
             if "image_tag" in formdata and formdata["image_tag"]:
                 options["image_tag"] = formdata["image_tag"][0]
             if "clear_dotlocal" in formdata and formdata["clear_dotlocal"]:
                 options["clear_dotlocal"] = True
             if "enable_debug" in formdata and formdata["enable_debug"]:
                 options["enable_debug"] = True
         return options
Example #56
0
def run(statement, settings, timeout=None, **kwargs):
    with start_action(action_type="query.run", statement=statement):
        live_settings = settings["live"]

        channels = start(statement, settings, timeout=timeout, **kwargs)

        logging.debug(f"Results channel is {channels}")

        live_url = live_settings["url"]
        results_url = f"{live_url}/cometd"

        events_queue = Queue()
        process = Process(target=watch,
                          args=(results_url, channels, events_queue))
        process.start()

    return process, events_queue
Example #57
0
 def _build_pod_spec(self, img, node):
     with start_action(action_type="_build_pod_spec"):
         spec = client.V1PodSpec(
             containers=[
                 client.V1Container(
                     command=self.command,
                     image=img,
                     image_pull_policy="Always",
                     name=self._podname_from_image(img),
                     security_context=client.V1PodSecurityContext(
                         run_as_user=self.args.uid)
                 )
             ],
             restart_policy="Never",
             node_name=node
         )
         return spec
Example #58
0
 def _collect_start_stop_settings(self, settings):
     """Collect the settings from the UI related to the Start/Stop points.
     """
     with start_action(action_type="start_stop_settings"):
         quit = False
         start_pt = self.ui.start_pt.value()
         settings.start_pt = start_pt
         Message.log(start=start_pt)
         if not self.ui.stop_pt_checkbox.isChecked():
             stop_pt = self.ui.stop_pt.value()
             settings.stop_pt = stop_pt
             if start_pt >= stop_pt:
                 self._tell_start_greater_than_stop()
                 quit = True
         Message.log(stop=settings.stop_pt)
         Message.log(quit=quit)
         return settings, quit
Example #59
0
 def start_single_pod(self, spec):
     """Run a pod, with a single container, on a particular node.
     (Assuming that the pod is itself tied to a node in the pod spec.)
     This has the effect of pulling the image for that pod onto that
     node.  The run itself is unimportant.  It returns the name of the
     created pod.
     """
     with start_action(action_type="start_single_pod"):
         v1 = self.client
         name = self._derive_pod_name(spec)
         pod = client.V1Pod(spec=spec,
                            metadata=client.V1ObjectMeta(name=name))
         name = spec.containers[0].name
         self.logger.debug("Running pod %s" % name)
         made_pod = v1.create_namespaced_pod(self.namespace, pod)
         podname = made_pod.metadata.name
         return podname
 def ensure_namespaced_config_maps(self):
     with start_action(action_type="ensure_namespaced_config_maps"):
         namespace = self.namespace
         api = self.parent.api
         cm_map = self.def_lab_config_maps()
         for cm in cm_map:
             try:
                 self.log.info((("Attempting to create configmap {} " +
                                 "in {}").format(cm, namespace)))
                 api.create_namespaced_config_map(namespace, cm_map[cm])
             except ApiException as e:
                 if e.status != 409:
                     estr = "Create configmap failed: {}".format(e)
                     self.log.exception(estr)
                     raise
                 else:
                     self.log.info("Configmap already exists.")