コード例 #1
0
    def start(self, collection, docker, ping, database_name):
        options = self.options
        """Launches a cAdvisor container on the instance."""
        volumes = {
            '/': {'bind': '/rootfs', 'ro': True},
            '/var/run': {'bind': '/var/run', 'ro': False},
            '/sys': {'bind': '/sys', 'ro': True},
            '/var/lib/docker': {'bind': '/var/lib/docker', 'ro': True}
        }

        logger.debug("cAdvisor: Writing stats to %s" % database_name)
        command_args = " ".join([
            "-storage_driver=influxdb",
            "-log_dir=/",
            "-storage_driver_db=%s" % quote(database_name),
            "-storage_driver_host=%s:%d" % (quote(options.host),
                                            options.port),
            "-storage_driver_user=%s" % quote(options.user),
            "-storage_driver_password=%s" % quote(options.password),
            "-storage_driver_secure=%d" % options.secure,
            # TODO: Calculate based on the run time.
            "-storage_driver_buffer_duration=5s"
        ])
        yield docker.run_containers(collection, self.info.name,
                                    None, command_args, volumes,
                                    ports={8080: 8080})

        yield self.wait(collection, ping)
コード例 #2
0
 def _run_complete(self, session, mgr, future):
     logger.debug('Run Plan completed')
     try:
         response = future.result()
         logger.debug("Run response of: %s", response)
     except:
         logger.error("Run did an exception", exc_info=True)
コード例 #3
0
 def run(instance, tries=0):
     dns = getattr(instance.state, "dns_server", [])
     docker = instance.state.docker
     added_env = "\n".join([
         "HOST_IP=%s" % instance.instance.ip_address,
         "STATSD_HOST=%s" % instance.instance.private_ip_address,
         "STATSD_PORT=8125"])
     if env:
         _env = env + "\n" + added_env
     else:
         _env = added_env
     _env = self.substitute_names(_env, _env)
     container_env = _env.split("\n")
     container_args = self.substitute_names(command_args, _env)
     try:
         return docker.run_container(
             container_name, container_env, container_args,
             volumes, ports, dns=dns)
     except Exception as exc:
         logger.debug("Exception with run_container: %s", exc)
         if tries > 3:
             logger.debug("Giving up on running container.")
             return False
         docker.stop_container(container_name)
         return run(instance, tries=tries+1)
コード例 #4
0
    async def wait(self, collection, interval=60, timeout=600):
        """Waits till docker is available on every instance in the
        collection."""
        end = time.time() + timeout

        not_responded = self.not_responding_instances(collection)

        def get_container(inst):
            try:
                inst.state.docker.get_containers()
                inst.state.docker.responded = True
            except DOCKER_RETRY_EXC:
                logger.debug("Docker not ready yet on %s",
                             str(inst.instance.id))
            except Exception as exc:
                logger.debug("Got exception on %s: %r", str(inst.instance.id),
                             exc)

        # Attempt to fetch until they've all responded
        while not_responded and time.time() < end:
            await gen.multi(
                [collection.execute(get_container, x) for x in not_responded])

            # Update the not_responded
            not_responded = self.not_responding_instances(collection)

            if not_responded:
                await collection.wait(interval)

        # Prune the non-responding
        logger.debug("Pruning %d non-responding instances.",
                     len(not_responded))
        await collection.remove_instances(not_responded)
コード例 #5
0
    def wait(self, collection, interval=5, timeout=600):
        """Waits till docker is available on every instance in the
        collection."""
        end = time.time() + timeout

        not_responded = self.not_responding_instances(collection)

        def get_container(inst):
            try:
                inst.state.docker.get_containers()
                inst.state.docker.responded = True
            except Exception:
                pass

        # Attempt to fetch until they've all responded
        while not_responded and time.time() < end:
            yield [collection.execute(get_container, x) for x in
                   not_responded]

            # Update the not_responded
            not_responded = self.not_responding_instances(collection)

            if not_responded:
                yield collection.wait(interval)

        # Prune the non-responding
        logger.debug("Pruning %d non-responding instances.",
                     len(not_responded))
        collection.remove_instances(not_responded)
コード例 #6
0
    def get_amis(region):
        logger.debug("Working in %s" % region)
        try:
            conn = connect_to_region(
                region,
                aws_access_key_id=aws_access_key_id,
                aws_secret_access_key=aws_secret_access_key,
                port=port, is_secure=is_secure)

            filters = {}
            if owner_id is not None and use_filters:
                filters["owner-id"] = owner_id

            images = conn.get_all_images(filters=filters)

            # The last two highest sorted are the pvm and hvm instance id's
            # what is this 899.4 ??? XXX
            # images = sorted([x for x in images if "899.4" in x.name],
            #                key=lambda x: x.name)[-2:]
            images = sorted(images, key=lambda x: x.name)[-2:]
            AWS_AMI_IDS[region] = {x.virtualization_type: x for x in images}
            logger.debug("%s populated" % region)
        except Exception as exc:
            logger.exception('Could not get all images in %s' % region)
            errors.append(exc)
コード例 #7
0
ファイル: aws.py プロジェクト: alex/loads-broker
    def _recover(self):
        """Recover allocated instances from EC2."""
        recovered_instances = defaultdict(list)

        # Recover every region at once
        instancelist = yield [self._recover_region(x) for x in AWS_REGIONS]

        logger.debug("Found %s instances to recover.",
                     sum(map(len, instancelist)))

        for instances in instancelist:
            for instance in instances:
                tags = instance.tags

                # If this has been 'pending' too long, we put it in the main
                # instance pool for later reaping
                if not available_instance(instance):
                    self._instances[instance.region.name].append(instance)
                    continue

                if tags.get("RunId") and tags.get("Uuid"):
                    # Put allocated instances into a recovery pool separate
                    # from unallocated
                    inst_key = (tags["RunId"], tags["Uuid"])
                    recovered_instances[inst_key].append(instance)
                else:
                    self._instances[instance.region.name].append(instance)
        self._recovered = recovered_instances
コード例 #8
0
def setup_database(session, db_file):
    """Helper function to setup the initial database based off a json
    file"""
    logger.debug("Verifying database setup.")
    with open(db_file) as fp:
        data = json.load(fp, object_pairs_hook=OrderedDict)

    # Verify the project exists
    project = session.query(Project).filter_by(name=data["name"]).first()
    if not project:
        project = Project(name=data["name"])
        session.add(project)
        session.commit()

    logger.debug("Project ID: %s", project.uuid)

    # Key plans by name to look them up quickly if they exist
    existing = {plan.name: plan for plan in project.plans}

    # Verify every strategy exists
    for plan in data["plans"]:
        ex_plan = existing.get(plan["name"])
        if ex_plan:
            logger.debug("Found plan: %s, UUID: %s", ex_plan.name,
                         ex_plan.uuid)
            continue
        new_plan = Plan.from_json(plan)
        project.plans.append(new_plan)
        session.commit()

        logger.debug("Added plan: %s, UUID: %s", new_plan.name, new_plan.uuid)
    logger.debug("Finished database setup.")
コード例 #9
0
def makedirs(sftp, dirname, mode=511):
    """Creates a directory with the given dirname and mode on a remote server,
    including any intermediate-level directories."""

    if not dirname:
        raise OSError('Missing directory name')

    dirnames = deque([dirname])
    while True:
        dirname, basename = os.path.split(dirname)
        if not basename:
            dirname, basename = os.path.split(dirname)
        if not dirname or not basename:
            break
        dirnames.appendleft(dirname)

    for dirname in dirnames:
        try:
            attrs = sftp.stat(dirname)
        except OSError:
            logger.debug("Creating directory %s..." % dirname)
            sftp.mkdir(dirname, mode)
            continue

        if not stat.S_ISDIR(attrs.st_mode):
            raise OSError("%s exists and is not a directory" % dirname)
コード例 #10
0
ファイル: broker.py プロジェクト: alex/loads-broker
    def _start_set(self, setlink):
        if setlink.collection.started:
            return
        setlink.collection.started = True

        # Start cadvisor
        database_name = "%s-cadvisor" % self.run.id
        logger.debug("Starting up cadvisor on the hosts")
        yield self.helpers.cadvisor.start(
            setlink.collection, self.helpers.docker, self.helpers.ping,
            database_name)

        # Start heka
        yield self.helpers.heka.start(setlink.collection,
                                      self.helpers.docker,
                                      self.helpers.ping)

        # Startup local DNS if needed
        if self._use_dns:
            yield self.helpers.dns.start(setlink.collection, self._dns_map)

        # Startup the testers
        yield self.helpers.docker.run_containers(
            setlink.collection,
            container_name=setlink.meta.container_name,
            env=setlink.meta.environment_data,
            command_args=setlink.meta.additional_command_args,
            local_dns=self._use_dns
        )
コード例 #11
0
    def get_amis(region):
        logger.debug("Working in %s" % region)
        try:
            conn = connect_to_region(
                region,
                aws_access_key_id=aws_access_key_id,
                aws_secret_access_key=aws_secret_access_key,
                port=port, is_secure=is_secure)

            filters = {}
            if owner_id is not None and use_filters:
                filters["owner-id"] = owner_id

            images = conn.get_all_images(filters=filters)

            # The last two highest sorted are the pvm and hvm instance id's
            # what is this 899.4 ??? XXX
            # images = sorted([x for x in images if "899.4" in x.name],
            #                key=lambda x: x.name)[-2:]
            images = sorted(images, key=lambda x: x.name)[-2:]
            AWS_AMI_IDS[region] = {x.virtualization_type: x for x in images}
            logger.debug("%s populated" % region)
        except Exception as exc:
            logger.exception('Could not get all images in %s' % region)
            errors.append(exc)
コード例 #12
0
def setup_database(session, db_file):
    """Helper function to setup the initial database based off a json
    file"""
    logger.debug("Verifying database setup.")
    with open(db_file) as fp:
        data = json.load(fp, object_pairs_hook=OrderedDict)

    # Verify the project exists
    project = session.query(Project).filter_by(name=data["name"]).first()
    if not project:
        project = Project(name=data["name"])
        session.add(project)
        session.commit()

    logger.debug("Project ID: %s", project.uuid)

    # Key plans by name to look them up quickly if they exist
    existing = {plan.name: plan for plan in project.plans}

    # Verify every strategy exists
    for plan in data["plans"]:
        ex_plan = existing.get(plan["name"])
        if ex_plan:
            logger.debug("Found plan: %s, UUID: %s", ex_plan.name,
                         ex_plan.uuid)
            continue
        new_plan = Plan.from_json(plan)
        project.plans.append(new_plan)
        session.commit()

        logger.debug("Added plan: %s, UUID: %s", new_plan.name, new_plan.uuid)
    logger.debug("Finished database setup.")
コード例 #13
0
    def start(self, collection, docker, ping, database_name):
        """Launches Heka containers on all instances."""
        if not self.options:
            logger.debug("Heka not configured")
            return

        config_file = HEKA_CONFIG_TEMPLATE.substitute(
            remote_addr=join_host_port(self.options.host, self.options.port),
            remote_secure=self.options.secure and "true" or "false",
            influx_addr=join_host_port(self.influx.host, self.influx.port),
            influx_db=database_name)

        volumes = {'/home/core/heka': {'bind': '/heka', 'ro': False}}
        ports = {(8125, "udp"): 8125, 4352: 4352}

        # Upload heka config to all the instances
        def upload_files(inst):
            with StringIO(config_file) as fl:
                self.sshclient.upload_file(inst.instance, fl,
                                           "/home/core/heka/config.toml")
        yield collection.map(upload_files)

        logger.debug("Launching Heka...")
        yield docker.run_containers(collection, self.info.name,
                                    None, "hekad -config=/heka/config.toml",
                                    volumes=volumes, ports=ports)

        def ping_heka(inst):
            health_url = "http://%s:4352/" % inst.instance.ip_address
            yield ping.ping(health_url)
        yield collection.map(ping_heka)
コード例 #14
0
ファイル: aws.py プロジェクト: alex/loads-broker
    def wait_for_running(self, interval=5, timeout=600):
        """Wait for all the instances to be running. Instances unable
        to load will be removed."""
        def update_state(inst):
            try:
                inst.instance.update()
            except Exception:
                # Updating state can fail, it happens
                pass

        end_time = time.time() + 600

        pending = self.pending_instances()

        while time.time() < end_time and pending:
            # Update the state of all the pending instances
            yield [self.execute(update_state, inst) for inst in pending]

            pending = self.pending_instances()

            # Wait if there's pending to check again
            if pending:
                yield self.wait(interval)

        # Remove everything that isn't running by now
        dead = self.dead_instances() + self.pending_instances()

        # Don't wait for the future that kills them
        logger.debug("Removing %d dead instances that wouldn't run.",
                     len(dead))
        self.remove_instances(dead)
        return True
コード例 #15
0
    async def is_done(self, docker) -> bool:
        """Determine if finished or pending termination"""
        # If we haven't been started, we can't be done
        if not self.step_record.started_at:
            return False

        # If we're already stopped, then we're obviously done
        if self.ec2_collection.finished:
            return True

        run = self.step_record.run
        container_name = run.interpolate(
            self.step.container_name, self.step.environment_data)

        # If the collection has no instances running the container, its done
        instances_running = await docker.is_running(
            self.ec2_collection,
            container_name,
            prune=self.step.prune_running
        )
        if not instances_running:
            inst_info = []
            for inst, info in self._instance_debug_info().items():
                inst_info.append(inst)
                inst_info.append(pformat(info))
            logger.debug("No instances running, collection done.")
            logger.debug("Instance information:\n%s", '\n'.join(inst_info))
            return True

        # Remove instances that stopped responding
        await self.ec2_collection.remove_dead_instances()

        # Otherwise return whether we should be stopped
        return self.step_record.should_stop()
コード例 #16
0
    async def _is_done(self, setlink):
        """Given a StepRecordLink, determine if the collection has
        finished or should be terminated."""
        # If we haven't been started, we can't be done
        if not setlink.step_record.started_at:
            return False

        # If we're already stopped, then we're obviously done
        if setlink.ec2_collection.finished:
            return True

        # If the collection has no instances running the container, its done
        docker = self.helpers.docker
        container_name = setlink.step.container_name
        instances_running = await docker.is_running(
            setlink.ec2_collection,
            container_name,
            prune=setlink.step.prune_running)
        if not instances_running:
            inst_info = []
            for inst, info in self._instance_debug_info(setlink).items():
                inst_info.append(inst)
                inst_info.append(pformat(info))
            logger.debug("No instances running, collection done.")
            logger.debug("Instance information:\n%s", '\n'.join(inst_info))
            return True

        # Remove instances that stopped responding
        await setlink.ec2_collection.remove_dead_instances()

        # Otherwise return whether we should be stopped
        return setlink.step_record.should_stop()
コード例 #17
0
        def load(instance, tries=0):
            docker = instance.state.docker

            has_container = docker.has_image(container_name)
            if has_container:
                return

            if container_url:
                client = self.sshclient.connect(instance.instance)
                try:
                    output = docker.import_container(client, container_url)
                finally:
                    client.close()
            else:
                output = docker.pull_container(container_name)

            if not docker.has_image(container_name):
                if tries > 3:
                    logger.debug("Can't load container, retries exceeded.")
                    return False

                logger.debug("Unable to load container: %s. Retrying.",
                             output)
                return load(instance, tries+1)
            return output
コード例 #18
0
ファイル: broker.py プロジェクト: alex/loads-broker
 def _print_status(self):
     while True:
         if not len(self._runs):
             logger.debug("Status: No runs in progress.")
         for uuid, mgr in self._runs.items():
             run = mgr.run
             logger.debug("Run state for %s: %s - %s", run.uuid,
                          status_to_text(mgr.state), mgr.state_description)
         yield gen.Task(self.loop.add_timeout, time.time() + 10)
コード例 #19
0
 def get_container(inst):
     try:
         inst.state.docker.get_containers()
         inst.state.docker.responded = True
     except DOCKER_RETRY_EXC:
         logger.debug("Docker not ready yet on %s",
                      str(inst.instance.id))
     except Exception as exc:
         logger.debug("Got exception on %s: %r", str(inst.instance.id),
                      exc)
コード例 #20
0
ファイル: aws.py プロジェクト: alex/loads-broker
    def initialize(self):
        """Fully initialize the AWS pool and dependencies, recover existing
        instances, etc.

        :returns: A future that will require the loop running to retrieve.

        """
        logger.debug("Pulling CoreOS AMI info...")
        populate_ami_ids(self.access_key, self.secret_key, port=self.port,
                         owner_id=self.owner_id)
        return self._recover()
コード例 #21
0
    def initialize(self):
        """Fully initialize the AWS pool and dependencies, recover existing
        instances, etc.

        :returns: A future that will require the loop running to retrieve.

        """
        logger.debug("Pulling CoreOS AMI info...")
        populate_ami_ids(self.access_key, self.secret_key, port=self.port,
                         owner_id=self.owner_id, use_filters=self.use_filters)
        return self._recover()
コード例 #22
0
ファイル: util.py プロジェクト: alex/loads-broker
 def ___retry(*args, **kw):
     attempt = 1
     while attempt < attempts:
         try:
             return func(*args, **kw)
         except Exception:
             logger.debug('Failed (%d/%d)' % (attempt, attempts),
                          exc_info=True)
             attempt += 1
     # failed
     raise
コード例 #23
0
 def has_container(instance):
     try:
         all_containers = instance.state.docker.get_containers()
     except:
         if prune:
             msg = ("Lost contact with a container on %s, "
                    "marking dead.")
             logger.debug(msg % instance.id)
             instance.state.nonresponsive = True
         return not prune
     return any(container_name in cont["Image"]
                for cont in all_containers.values())
コード例 #24
0
    async def _get_steps(self):
        """Request all the step instances needed from the pool

        This is a separate method as both the recover run and new run
        will need to run this identically.

        """
        logger.debug('Getting steps & collections')
        steps = self.run.plan.steps
        collections = await gen.multi([
            self._pool.request_instances(self.run.uuid,
                                         s.uuid,
                                         count=s.instance_count,
                                         inst_type=s.instance_type,
                                         region=s.instance_region,
                                         plan=self.run.plan.name,
                                         owner=self.run.owner,
                                         run_max_time=s.run_delay +
                                         s.run_max_time) for s in steps
        ])

        try:
            # First, setup some dicst, all keyed by step.uuid
            steps_by_uuid = {x.uuid: x for x in steps}
            step_records_by_uuid = {
                x.step.uuid: x
                for x in self.run.step_records
            }

            # Link the step/step_record/ec2_collection under a single
            # StepRecordLink tuple
            for coll in collections:
                step = steps_by_uuid[coll.uuid]
                step_record = step_records_by_uuid[coll.uuid]
                setlink = StepRecordLink(step_record, step, coll)
                self._set_links.append(setlink)

        except Exception:
            # Ensure we return collections if something bad happened
            logger.error("Got an exception in runner, returning instances",
                         exc_info=True)

            try:
                await gen.multi(
                    [self._pool.release_instances(x) for x in collections])
            except:
                logger.error("Wat? Got an error returning instances.",
                             exc_info=True)

            # Clear out the setlinks to make sure they aren't cleaned up
            # again
            self._set_links = []
コード例 #25
0
    async def _initialize(self):
        # Initialize all the collections, this needs to always be done
        # just in case we're recovering
        await self._get_steps()

        # Skip if we're running
        if self.state == RUNNING:
            return

        # Wait for the collections to come up
        self.state_description = "Waiting for running instances."
        await gen.multi(
            [x.ec2_collection.wait_for_running() for x in self._set_links])

        # Setup docker on the collections
        docker = self.helpers.docker
        await gen.multi([
            docker.setup_collection(x.ec2_collection) for x in self._set_links
        ])

        # Wait for docker on all the collections to come up
        self.state_description = "Waiting for docker"
        await gen.multi([
            docker.wait(x.ec2_collection, timeout=360) for x in self._set_links
        ])

        # Pull the base containers we need (for heka)
        self.state_description = "Pulling base container images"

        for container in self.base_containers:
            logger.debug("Pulling base container " + container.name)
            await gen.multi([
                docker.load_containers(x.ec2_collection, container.name,
                                       container.url) for x in self._set_links
            ])

        logger.debug("Pulling containers for this step.")
        # Pull the appropriate containers for every collection
        self.state_description = "Pulling step images"
        await gen.multi([
            docker.load_containers(x.ec2_collection, x.step.container_name,
                                   x.step.container_url)
            for x in self._set_links
        ])

        self.state_description = ""

        self.run.state = RUNNING
        self.run.started_at = datetime.utcnow()
        self._db_session.commit()
        log_threadid("Now running.")
コード例 #26
0
ファイル: aws.py プロジェクト: alex/loads-broker
    def _region_conn(self, region=None):
        if region in self._conns:
            return self._conns[region]

        # Setup a connection
        logger.debug("Requesting connection for region: %s", region)
        conn = yield self._executor.submit(
            connect_to_region, region,
            aws_access_key_id=self.access_key,
            aws_secret_access_key=self.secret_key,
            port=self.port, is_secure=self.is_secure)

        self._conns[region] = conn
        logger.debug("Returning connection for region: %s", region)
        return conn
コード例 #27
0
ファイル: aws.py プロジェクト: alex/loads-broker
    def request_instances(self, run_id, uuid, count=1, inst_type="t1.micro",
                          region="us-west-2"):
        """Allocate a collection of instances.

        :param run_id: Run ID for these instances
        :param uuid: UUID to use for this collection
        :param count: How many instances to allocate
        :param type: EC2 Instance type the instances should be
        :param region: EC2 region to allocate the instances in
        :returns: Collection of allocated instances
        :rtype: :ref:`EC2Collection`

        """
        if region not in AWS_REGIONS:
            raise LoadsException("Unknown region: %s" % region)

        # First attempt to recover instances for this run/uuid
        instances = self._locate_recovered_instances(run_id, uuid)
        remaining_count = count - len(instances)

        # Add any more remaining that should be used
        instances.extend(
            self._locate_existing_instances(remaining_count, inst_type, region)
        )

        conn = yield self._region_conn(region)

        # Determine if we should allocate more instances
        num = count - len(instances)
        if num > 0:
            new_instances = yield self._allocate_instances(
                conn, num, inst_type, region)
            logger.debug("Allocated instances: %s", new_instances)
            instances.extend(new_instances)

        # Tag all the instances
        if self.use_filters:
            yield self._executor.submit(
                conn.create_tags,
                [x.id for x in instances],
                {
                    "Name": "loads-%s" % self.broker_id,
                    "Project": "loads",
                    "RunId": run_id,
                    "Uuid": uuid
                }
            )
        return EC2Collection(run_id, uuid, conn, instances, self._loop)
コード例 #28
0
def main(sysargs=None):
    """Parses arguments and starts up the loads-broker.

    This daemon runs in the foreground.

    """
    args, parser = _parse(sysargs)
    set_logger(debug=args.debug)
    loop = tornado.ioloop.IOLoop.instance()

    if args.aws_endpoints is not None:
        os.environ['BOTO_ENDPOINTS'] = args.aws_endpoints

    # an empty string means we don't filter by owner id
    # we translate this to None
    aws_owner_id = args.aws_owner_id and args.aws_owner_id or None
    aws_access_key = os.environ.get('AWS_ACCESS_KEY_ID')
    aws_secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY')

    heka_options = HekaOptions(args.heka_host, args.heka_port,
                               args.heka_secure)

    if args.no_influx:
        influx_options = None
    else:
        influx_options = InfluxOptions(args.influx_host, args.influx_port,
                                       args.influx_user, args.influx_password,
                                       args.influx_secure)

    application.broker = Broker(args.name,
                                loop,
                                args.database,
                                args.ssh_key,
                                heka_options,
                                influx_options,
                                aws_port=args.aws_port,
                                aws_owner_id=aws_owner_id,
                                aws_use_filters=not args.aws_skip_filters,
                                aws_access_key=aws_access_key,
                                aws_secret_key=aws_secret_key,
                                initial_db=args.initial_db)

    logger.debug('Listening on port %d...' % args.port)
    application.listen(args.port)
    try:
        loop.start()
    except KeyboardInterrupt:
        logger.debug('Bye')
コード例 #29
0
def main(sysargs=None):
    args, parser = _parse(sysargs)
    set_logger(debug=args.debug)

    c = Client(args.host, args.port, args.scheme)

    if not hasattr(args, 'func'):
        args.func = _COMMANDS['info']

    args.func = args.func(c.session, c.root)

    try:
        res = args.func(args)
        print(json.dumps(res))
    except requests.exceptions.ConnectionError as e:
        logger.debug('Cannot connect => ' + str(e))
コード例 #30
0
 def ___retry(*args, **kw):
     attempt = 0
     while True:
         attempt += 1
         try:
             result = func(*args, **kw)
         except Exception as exc:
             if (on_exception is None or not on_exception(exc)
                     or attempt == attempts):
                 logger.debug('Failed (%d/%d)' % (attempt, attempts),
                              exc_info=True)
                 raise
         else:
             if (on_result is None or not on_result(result)
                     or attempt == attempts):
                 return result
コード例 #31
0
ファイル: __init__.py プロジェクト: alex/loads-broker
def main(sysargs=None):
    args, parser = _parse(sysargs)
    set_logger(debug=args.debug)

    c = Client(args.host, args.port, args.scheme)

    if not hasattr(args, 'func'):
        args.func = _COMMANDS['info']

    args.func = args.func(c.session, c.root)

    try:
        res = args.func(args)
        print(json.dumps(res))
    except requests.exceptions.ConnectionError as e:
        logger.debug('Cannot connect => ' + str(e))
コード例 #32
0
 def ___retry(*args, **kw):
     attempt = 0
     while True:
         attempt += 1
         try:
             result = func(*args, **kw)
         except Exception as exc:
             if (on_exception is None or not on_exception(exc) or
                     attempt == attempts):
                 logger.debug('Failed (%d/%d)' % (attempt, attempts),
                              exc_info=True)
                 raise
         else:
             if (on_result is None or not on_result(result) or
                     attempt == attempts):
                 return result
コード例 #33
0
ファイル: aws.py プロジェクト: alex/loads-broker
    def __init__(self, broker_id, access_key=None, secret_key=None,
                 key_pair="loads", security="loads", max_idle=600,
                 user_data=None, io_loop=None, port=None,
                 owner_id="595879546273", use_filters=True):
        self.owner_id = owner_id
        self.use_filters = use_filters
        self.broker_id = broker_id
        self.access_key = access_key
        self.secret_key = secret_key
        self.max_idle = max_idle
        self.key_pair = key_pair
        self.security = security
        self.user_data = user_data
        self._instances = defaultdict(list)
        self._tag_filters = {"tag:Name": "loads-%s" % self.broker_id,
                             "tag:Project": "loads"}
        self._conns = {}
        self._recovered = {}
        self._executor = concurrent.futures.ThreadPoolExecutor(15)
        self._loop = io_loop or tornado.ioloop.IOLoop.instance()
        self.port = port
        # see https://github.com/boto/boto/issues/2617
        if port is not None:
            self.is_secure = port == 443
        else:
            self.is_secure = True

        # Asynchronously initialize ourself when the pool runs
        self._loop.add_future(
            self.initialize(),
            lambda x: logger.debug("Finished initializing. %s", x.result())
        )

        self.ready = Future()
コード例 #34
0
ファイル: aws.py プロジェクト: alex/loads-broker
    def _recover_region(self, region):
        """Recover all the instances in a region"""
        conn = yield self._region_conn(region)
        logger.debug("Requesting instances for %s", region)

        if self.use_filters:
            filters = self._tag_filters
        else:
            filters = {}

        instances = yield self._executor.submit(
            conn.get_only_instances,
            filters=filters)

        logger.debug("Finished requesting instances for %s", region)
        return instances
コード例 #35
0
    def safe_run_container(self, name: str, *args, **kwargs) -> Any:
        """Call run_container until it succeeds

        Max of 5 tries w/ attempts to stop potential zombie
        containers.

        """
        for i in range(5):
            try:
                return self.run_container(name, *args, **kwargs)
            except Exception as exc:
                logger.debug("Exception with run_container (%s)",
                             name, exc_info=True)
                if i == 4:
                    logger.debug("Giving up on running container.")
                    raise
                self.stop_container(name)
コード例 #36
0
ファイル: broker.py プロジェクト: alex/loads-broker
    def _is_done(self, setlink):
        """Given a ContainerSetLink, determine if the collection has
        finished or should be terminated."""
        # If we haven't been started, we can't be done
        if not setlink.running.started_at:
            return False

        # If the collection has no instances running the container, its done
        docker = self.helpers.docker
        container_name = setlink.meta.container_name
        instances_running = yield docker.is_running(setlink.collection,
                                                    container_name)
        if not instances_running:
            logger.debug("No instances running, collection done.")
            return True

        # Otherwise return whether we should be stopped
        return setlink.running.should_stop()
コード例 #37
0
ファイル: broker.py プロジェクト: kitcambridge/loads-broker
    def _initialize(self):
        # Initialize all the collections, this needs to always be done
        # just in case we're recovering
        yield self._get_container_sets()

        # Skip if we're running
        if self.state == RUNNING:
            return

        # Wait for the collections to come up
        self.state_Description = "Waiting for running instances."
        yield [x.collection.wait_for_running() for x in self._set_links]

        # Setup docker on the collections
        docker = self.helpers.docker
        yield [docker.setup_collection(x.collection) for x in self._set_links]

        # Wait for docker on all the collections to come up
        self.state_description = "Waiting for docker"
        yield [docker.wait(x.collection, timeout=120) for x in self._set_links]

        logger.debug("Pulling base containers: heka/cadvisor")

        # Pull the base containers we need (for heka / cadvisor)
        self.state_description = "Pulling base container images"

        for container in self.base_containers:
            yield [docker.load_containers(x.collection, container.name,
                                          container.url) for x in
                   self._set_links]

        logger.debug("Pulling containers for this set.")
        # Pull the appropriate containers for every collection
        self.state_description = "Pulling container set images"
        yield [docker.load_containers(x.collection, x.meta.container_name,
                                      x.meta.container_url) for x in
               self._set_links]

        self.state_description = ""

        self.run.state = RUNNING
        self.run.started_at = datetime.utcnow()
        self._db_session.commit()
        log_threadid("Now running.")
コード例 #38
0
        def run(instance, tries=0):
            dns = getattr(instance.state, "dns_server", [])
            docker = instance.state.docker
            rinstance = instance.instance

            extra = [("HOST_IP", rinstance.ip_address),
                     ("PRIVATE_IP", rinstance.private_ip_address),
                     ("STATSD_HOST", rinstance.private_ip_address),
                     ("STATSD_PORT", "8125")]
            extra_env = env.copy()
            extra_env.update(extra)
            _env = {
                self.substitute_names(k, extra_env):
                self.substitute_names(v, extra_env)
                for k, v in extra_env.items()
            }

            if command is None:
                _command = None
            else:
                _command = self.substitute_names(command, _env)

            _volumes = {}
            for host, volume in volumes.items():
                binding = volume.copy()
                binding["bind"] = self.substitute_names(
                    binding.get("bind", host), _env)
                _volumes[self.substitute_names(host, _env)] = binding

            try:
                return docker.run_container(name,
                                            _command,
                                            env=_env,
                                            volumes=_volumes,
                                            ports=ports,
                                            dns=dns,
                                            pid_mode=pid_mode)
            except Exception as exc:
                logger.debug("Exception with run_container: %s", exc)
                if tries > 3:
                    logger.debug("Giving up on running container.")
                    return False
                docker.stop_container(name)
                return run(instance, tries=tries + 1)
コード例 #39
0
 async def _start_step_containers(self, docker):
     """Startup the testers"""
     # XXX: run env should more likely override step env
     run = self.step_record.run
     env = run.environment_data or {}
     env.update(self.step.environment_data)
     env['CONTAINER_ID'] = self.step.uuid
     logger.debug("Starting step: %s", self.ec2_collection.uuid)
     container_name = run.interpolate(
         self.step.container_name, self.step.environment_data)
     await docker.run_containers(
         self.ec2_collection,
         container_name,
         self.step.additional_command_args,
         env=env,
         ports=self.step.port_mapping or {},
         volumes=self.step.volume_mapping or {},
         delay=self.step.node_delay,
     )
コード例 #40
0
    async def _start_base_containers(self, helpers, dns_map, influxdb_options):
        # Reload sysctl because coreos doesn't reload this right
        await helpers.ssh.reload_sysctl(self.ec2_collection)

        # Start Watcher
        await helpers.watcher.start(self.ec2_collection, helpers.docker)

        if self.is_monitored:
            await helpers.telegraf.start(
                self.ec2_collection,
                helpers.docker,
                influxdb_options,
                step=self.step.name,
                type_=self.step.docker_series
            )

        # Startup local DNS if needed
        if self.ec2_collection.local_dns:
            logger.debug("Starting up DNS")
            await helpers.dns.start(self.ec2_collection, dns_map)
コード例 #41
0
    async def remove_instances(self, ec2_instances):
        """Remove an instance entirely."""
        if not ec2_instances:
            return

        instances = [i.instance for i in ec2_instances]
        for inst in ec2_instances:
            self.instances.remove(inst)

        instance_ids = [x.id for x in instances]

        try:
            # Remove the tags
            await self.execute(self.conn.create_tags, instance_ids,
                               {"RunId": "", "Uuid": ""})
        except Exception:
            logger.debug("Error detagging instances, continuing.",
                         exc_info=True)

        try:
            logger.debug("Terminating instances %s" % str(instance_ids))
            # Nuke them
            await self.execute(self.conn.terminate_instances, instance_ids)
        except Exception:
            logger.debug("Error terminating instances.", exc_info=True)
コード例 #42
0
ファイル: broker.py プロジェクト: kitcambridge/loads-broker
    def _start_set(self, setlink):
        if setlink.collection.started:
            return
        setlink.collection.started = True

        # Start cadvisor
        database_name = "%s-cadvisor" % self.run.uuid
        logger.debug("Starting up cadvisor on the hosts")
        yield self.helpers.cadvisor.start(
            setlink.collection, self.helpers.docker, self.helpers.ping,
            database_name)

        # Start heka
        yield self.helpers.heka.start(setlink.collection,
                                      self.helpers.docker,
                                      self.helpers.ping,
                                      self.run.uuid)

        # Startup local DNS if needed
        if setlink.collection.local_dns:
            logger.debug("Starting up DNS")
            yield self.helpers.dns.start(setlink.collection, self._dns_map)

        # Startup the testers
        env = "\n".join([dict2str(self.run_env),
                         setlink.meta.environment_data,
                         "CONTAINER_ID=%s" % setlink.meta.uuid])
        logger.debug("Starting container set: %s", setlink.collection.uuid)
        yield self.helpers.docker.run_containers(
            setlink.collection,
            container_name=setlink.meta.container_name,
            env=env,
            command_args=setlink.meta.additional_command_args,
            ports=setlink.meta.port_mapping or {}
        )
コード例 #43
0
    def new_run(cls,
                run_helpers,
                db_session,
                pool,
                io_loop,
                plan_uuid,
                run_uuid=None,
                additional_env=None,
                owner=None):
        """Create a new run manager for the given strategy name

        This creates a new run for this strategy and initializes it.

        :param db_session: SQLAlchemy database session
        :param pool: AWS EC2Pool instance to allocate from
        :param io_loop: A tornado io loop
        :param plan_uuid: The strategy UUID to use for this run
        :param run_uuid: Use the provided run_uuid instead of generating one
        :param additional_env: Additional env args to use in container set
                               interpolation

        :returns: New RunManager in the process of being initialized,
                  along with a future tracking the run.

        """
        # Create the run for this manager
        logger.debug('Starting a new run manager')
        run = Run.new_run(db_session, plan_uuid, owner)
        if run_uuid:
            run.uuid = run_uuid
        db_session.add(run)
        db_session.commit()

        log_threadid("Committed new session.")

        run_manager = cls(run_helpers, db_session, pool, io_loop, run)
        if additional_env:
            run_manager.run_env.update(additional_env)
        future = gen.convert_yielded(run_manager.start())
        return run_manager, future
コード例 #44
0
    async def remove_instances(self, ec2_instances):
        """Remove an instance entirely."""
        if not ec2_instances:
            return

        instances = [i.instance for i in ec2_instances]
        for inst in ec2_instances:
            self.instances.remove(inst)

        instance_ids = [x.id for x in instances]

        try:
            # Remove the tags
            await self.execute(self.conn.create_tags, instance_ids,
                               {"RunId": "", "Uuid": ""})
        except Exception:
            logger.debug("Error detagging instances, continuing.",
                         exc_info=True)

        try:
            logger.debug("Terminating instances %s" % str(instance_ids))
            # Nuke them
            await self.execute(self.conn.terminate_instances, instance_ids)
        except Exception:
            logger.debug("Error terminating instances.", exc_info=True)
コード例 #45
0
ファイル: broker.py プロジェクト: alex/loads-broker
    def _cleanup(self, exc=False):
        if exc:
            # Ensure we try and shut them down
            logger.debug("Exception occurred, ensure containers terminated.",
                         exc_info=True)
            try:
                yield [self._stop_set(s) for s in self._set_links]
            except Exception:
                logger.error("Le sigh, error shutting down instances.",
                             exc_info=True)

        # Ensure we always release the collections we used
        logger.debug("Returning collections")

        try:
            yield [self._pool.release_instances(x.collection)
                   for x in self._set_links]
        except Exception:
            logger.error("Embarassing, error returning instances.",
                         exc_info=True)

        self._set_links = []
コード例 #46
0
ファイル: broker.py プロジェクト: alex/loads-broker
    def _run(self):
        # Skip if we're not running
        if self.state != RUNNING:
            return

        while True:
            if self.abort:
                logger.debug("Aborted, exiting run loop.")
                break

            stop = yield self._check_containers()
            if stop:
                break

            # Now we sleep for a bit
            yield gen.Task(self._loop.add_timeout, time.time() +
                           self.sleep_time)

        # We're done running, time to terminate
        self.run.state = TERMINATING
        self.run.completed_at = datetime.utcnow()
        self._db_session.commit()
コード例 #47
0
    async def _run(self):
        # Skip if we're not running
        if self.state != RUNNING:
            return

        # Main run loop
        while True:
            if self.abort:
                logger.debug("Aborted, exiting run loop.")
                break

            stop = await self._check_steps()
            if stop:
                break

            # Now we sleep for a bit
            await gen.Task(self._loop.add_timeout,
                           time.time() + self.sleep_time)

        # We're done running, time to terminate
        self.run.state = TERMINATING
        self.run.completed_at = datetime.utcnow()
        self._db_session.commit()
コード例 #48
0
    async def start(self, collection, docker):
        """Launches Heka containers on all instances."""
        if not self.options:
            logger.debug("Watcher not configured")
            return

        bind = {'bind': '/var/run/docker.sock', 'ro': False}
        volumes = {'/var/run/docker.sock': bind}
        ports = {}
        env = {
            'AWS_ACCESS_KEY_ID': self.options['AWS_ACCESS_KEY_ID'] or "",
            'AWS_SECRET_ACCESS_KEY': self.options['AWS_SECRET_ACCESS_KEY']
            or ""
        }

        logger.debug("Launching Watcher...")
        await docker.run_containers(collection,
                                    self.info.name,
                                    "python ./watch.py",
                                    env=env,
                                    volumes=volumes,
                                    ports=ports,
                                    pid_mode="host")
コード例 #49
0
    async def _start_step(self, setlink):
        setlink.ec2_collection.started = True

        # Reload sysctl because coreos doesn't reload this right
        await self.helpers.ssh.reload_sysctl(setlink.ec2_collection)

        # Start Watcher
        await self.helpers.watcher.start(setlink.ec2_collection,
                                         self.helpers.docker)

        # Start heka
        await self.helpers.heka.start(setlink.ec2_collection,
                                      self.helpers.docker,
                                      self.helpers.ping,
                                      "db" + self.run.uuid.replace('-', ''),
                                      series=setlink.step.docker_series)

        # Startup local DNS if needed
        if setlink.ec2_collection.local_dns:
            logger.debug("Starting up DNS")
            await self.helpers.dns.start(setlink.ec2_collection, self._dns_map)

        # Startup the testers
        env = self.run_env.copy()
        env.update(setlink.step.environment_data)
        env['CONTAINER_ID'] = setlink.step.uuid
        logger.debug("Starting step: %s", setlink.ec2_collection.uuid)
        await self.helpers.docker.run_containers(
            setlink.ec2_collection,
            setlink.step.container_name,
            setlink.step.additional_command_args,
            env=env,
            ports=setlink.step.port_mapping or {},
            volumes=setlink.step.volume_mapping or {},
            delay=setlink.step.node_delay,
        )
コード例 #50
0
    async def _cleanup(self, exc=False):
        if exc:
            # Ensure we try and shut them down
            logger.debug("Exception occurred, ensure containers terminated.",
                         exc_info=True)
            try:
                await gen.multi([self._stop_step(s) for s in self._set_links])
            except Exception:
                logger.error("Le sigh, error shutting down instances.",
                             exc_info=True)

        # Ensure we always release the collections we used
        logger.debug("Returning collections")

        try:
            await gen.multi([
                self._pool.release_instances(x.ec2_collection)
                for x in self._set_links
            ])
        except Exception:
            logger.error("Embarassing, error returning instances.",
                         exc_info=True)

        self._set_links = []
コード例 #51
0
        def load(instance):
            def debug(msg):
                logger.debug("[%s] %s" % (instance.instance.id, msg))

            docker = instance.state.docker

            has_container = docker.has_image(container_name)
            if has_container and "latest" not in container_name:
                return

            if container_url:
                debug("Importing %s" % container_url)
                with self.sshclient.connect(instance.instance) as client:
                    output = docker.import_container(client, container_url)
                    if output:
                        logger.debug(output)
            else:
                debug("Pulling %r" % container_name)
                output = docker.pull_container(container_name)

            if not image_loaded(docker, container_name):
                debug("Docker does not have %s" % container_name)
                return False
            return output
コード例 #52
0
    async def _recover(self):
        """Recover allocated instances from EC2."""
        recovered_instances = defaultdict(list)

        # Recover every region at once
        instancelist = await gen.multi(
            [self._recover_region(x) for x in AWS_REGIONS])

        logger.debug("Found %s instances to look at for recovery.",
                     sum(map(len, instancelist)))

        allocated = 0
        not_used = 0

        for instances in instancelist:
            for instance in instances:
                # skipping terminated instances
                if instance.state == 'terminated':
                    continue
                tags = instance.tags
                region = instance.region.name
                logger.debug('- %s (%s)' % (instance.id, region))
                # If this has been 'pending' too long, we put it in the main
                # instance pool for later reaping
                if not available_instance(instance):
                    self._instances[region].append(instance)
                    continue

                if tags.get("RunId") and tags.get("Uuid"):
                    # Put allocated instances into a recovery pool separate
                    # from unallocated
                    inst_key = (tags["RunId"], tags["Uuid"])
                    recovered_instances[inst_key].append(instance)
                    allocated += 1
                else:
                    self._instances[region].append(instance)
                    not_used += 1

        logger.debug("%d instances were allocated to a run" % allocated)
        logger.debug("%d instances were not used" % not_used)

        self._recovered = recovered_instances
コード例 #53
0
    def __init__(self,
                 name,
                 io_loop,
                 sqluri,
                 ssh_key,
                 heka_options,
                 influx_options,
                 aws_port=None,
                 aws_owner_id="595879546273",
                 aws_use_filters=True,
                 aws_access_key=None,
                 aws_secret_key=None,
                 initial_db=None):
        self.name = name
        logger.debug("loads-broker (%s)", self.name)

        self.loop = io_loop
        self._base_env = BASE_ENV.copy()
        self.watcher_options = {
            'AWS_ACCESS_KEY_ID': aws_access_key,
            'AWS_SECRET_ACCESS_KEY': aws_secret_key
        }
        user_data = _DEFAULTS["user_data"]
        if user_data is not None and os.path.exists(user_data):
            with open(user_data) as f:
                user_data = f.read()

        self.influx_options = influx_options

        if influx_options is None:
            self.influx = None
        else:
            influx_args = {
                "host": influx_options.host,
                "port": influx_options.port,
                "username": influx_options.user,
                "password": influx_options.password,
                "database": "loads"
            }

            if influx_options.secure:
                influx_args["ssl"] = True
                influx_args["verify_ssl"] = True

            if InfluxDBClient is None:
                raise ImportError('You need to install the influx lib')
            self.influx = InfluxDBClient(**influx_args)

        self.pool = aws.EC2Pool(self.name,
                                user_data=user_data,
                                io_loop=self.loop,
                                port=aws_port,
                                owner_id=aws_owner_id,
                                use_filters=aws_use_filters,
                                access_key=aws_access_key,
                                secret_key=aws_secret_key)

        # Utilities used by RunManager
        ssh = SSH(ssh_keyfile=ssh_key)
        self.run_helpers = run_helpers = RunHelpers()
        run_helpers.ping = Ping(self.loop)
        run_helpers.docker = Docker(ssh)
        run_helpers.dns = DNSMasq(DNSMASQ_INFO, run_helpers.docker)
        run_helpers.heka = Heka(HEKA_INFO,
                                ssh=ssh,
                                options=heka_options,
                                influx=influx_options)
        run_helpers.watcher = Watcher(WATCHER_INFO,
                                      options=self.watcher_options)
        run_helpers.ssh = ssh

        self.db = Database(sqluri, echo=True)

        # Run managers keyed by uuid
        self._runs = {}

        # Ensure the db is setup
        if initial_db:
            setup_database(self.db.session(), initial_db)
コード例 #54
0
 def debug(self, msg):
     logger.debug('[uuid:%s] %s' % (self.uuid, msg))
コード例 #55
0
 def _initialized(self, future):
     # Run the result to ensure we raise an exception if any occurred
     logger.debug("Finished initializing: %s.", future.result())
     self.ready.set_result(True)
コード例 #56
0
def log_threadid(msg):
    """Log a message, including the thread ID"""
    thread_id = threading.currentThread().ident
    logger.debug("Msg: %s, ThreadID: %s", msg, thread_id)
コード例 #57
0
    async def request_instances(self,
                                run_id: str,
                                uuid: str,
                                count=1,
                                inst_type="t1.micro",
                                region="us-west-2",
                                allocate_missing=True,
                                plan: Optional[str] = None,
                                owner: Optional[str] = None,
                                run_max_time: Optional[int] = None):
        """Allocate a collection of instances.

        :param run_id: Run ID for these instances
        :param uuid: UUID to use for this collection
        :param count: How many instances to allocate
        :param type: EC2 Instance type the instances should be
        :param region: EC2 region to allocate the instances in
        :param allocate_missing:
            If there's insufficient existing instances for this uuid,
            whether existing or new instances should be allocated to the
            collection.
        :param plan: Name of the instances' plan
        :param owner: Owner name of the instances
        :param run_max_time: Maximum expected run-time of instances in
            seconds
        :returns: Collection of allocated instances
        :rtype: :class:`EC2Collection`

        """
        if region not in AWS_REGIONS:
            raise LoadsException("Unknown region: %s" % region)

        # First attempt to recover instances for this run/uuid
        instances = self._locate_recovered_instances(run_id, uuid)
        remaining_count = count - len(instances)

        conn = await self._region_conn(region)

        # If existing/new are not being allocated, the recovered are
        # already tagged, so we're done.
        if not allocate_missing:
            return EC2Collection(run_id, uuid, conn, instances, self._loop)

        # Add any more remaining that should be used
        instances.extend(
            self._locate_existing_instances(remaining_count, inst_type, region)
        )

        # Determine if we should allocate more instances
        num = count - len(instances)
        if num > 0:
            new_instances = await self._allocate_instances(
                conn, num, inst_type, region)
            logger.debug("Allocated instances%s: %s",
                         " (Owner: %s)" % owner if owner else "",
                         new_instances)
            instances.extend(new_instances)

        # Tag all the instances
        if self.use_filters:
            tags = {
                "Name": "loads-{}{}".format(self.broker_id,
                                            "-" + plan if plan else ""),
                "Project": "loads",
                "RunId": run_id,
                "Uuid": uuid,
            }
            if owner:
                tags["Owner"] = owner
            if run_max_time is not None:
                self._tag_for_reaping(tags, run_max_time)

            # Sometimes, we can get instance data back before the AWS
            # API fully recognizes it, so we wait as needed.
            async def tag_instance(instance):
                retries = 0
                while True:
                    try:
                        await self._run_in_executor(
                            conn.create_tags, [instance.id], tags)
                        break
                    except:
                        if retries > 5:
                            raise
                    retries += 1
                    await gen.Task(self._loop.add_timeout, time.time() + 1)
            await gen.multi([tag_instance(x) for x in instances])
        return EC2Collection(run_id, uuid, conn, instances, self._loop)
コード例 #58
0
 def _set_state(self, state):
     self._state_description = state
     if state:
         logger.debug(state)
コード例 #59
0
    async def start(self,
                    collection,
                    docker,
                    ping,
                    database_name,
                    series=None):
        """Launches Heka containers on all instances."""
        if not self.options:
            logger.debug("Heka not configured")
            return

        volumes = {
            '/home/core/heka': {
                'bind': '/heka',
                'ro': False
            },
            # '/proc': {'bind': '/proc', 'ro': False}
        }
        ports = {(8125, "udp"): 8125, 4352: 4352}

        series_name = ""
        if series:
            series_name = "%s." % series

        # Upload heka config to all the instances
        def upload_files(inst):
            hostname = "%s%s" % (series_name,
                                 inst.instance.ip_address.replace('.', '_'))
            if self.influx:
                config_file = HEKA_CONFIG_TEMPLATE.substitute(
                    remote_addr=join_host_port(self.options.host,
                                               self.options.port),
                    remote_secure=self.options.secure and "true" or "false",
                    influx_addr=join_host_port(self.influx.host,
                                               self.influx.port),
                    influx_db=database_name,
                    hostname=hostname)
            else:
                config_file = HEKA_NOINFLUX_TEMPLATE.substitute(
                    remote_addr=join_host_port(self.options.host,
                                               self.options.port),
                    remote_secure=self.options.secure and "true" or "false",
                    hostname=hostname)
            with StringIO(config_file) as fl:
                self.sshclient.upload_file(inst.instance, fl,
                                           "/home/core/heka/config.toml")

        await collection.map(upload_files)

        logger.debug("Launching Heka...")
        await docker.run_containers(collection,
                                    self.info.name,
                                    "hekad -config=/heka/config.toml",
                                    volumes=volumes,
                                    ports=ports,
                                    pid_mode="host")

        await gen.multi([
            ping.ping("http://%s:4352/" % inst.instance.ip_address)
            for inst in collection.instances
        ])