Example #1
0
        def _monitor_data_watch(data, stat, event):
            """Monitor individual monitor."""
            if (event is not None and event.type == 'DELETED') or stat is None:
                _LOGGER.info('Removing watch on deleted monitor: %s', name)
                return

            try:
                count = yaml.load(data)['count']
            except Exception:  # pylint: disable=W0703
                _LOGGER.exception('Invalid monitor: %s', name)
                return

            _LOGGER.info('Reconfigure monitor: %s, count: %s', name, count)
            state['monitors'][name] = {
                'count': count,
                'available': 2.0 * count,
                'last_update': time.time(),
                'rate': (2.0 * count / _INTERVAL)
            }
Example #2
0
    def _watch_finished_snapshots(snapshots):
        """Watch /finished.history nodes."""
        start_time = time.time()
        finished_history = cell_state.finished_history.copy()

        for db_node in sorted(set(loaded_snapshots) - set(snapshots)):
            _LOGGER.info('Unloading snapshot: %s', db_node)
            for instance in loaded_snapshots.pop(db_node):
                finished_history.pop(instance, None)

        for db_node in sorted(set(snapshots) - set(loaded_snapshots)):
            _LOGGER.info('Loading snapshot: %s', db_node)
            loading_start_time = time.time()
            loaded_snapshots[db_node] = []

            data, _stat = zkclient.get(z.path.finished_history(db_node))

            with tempfile.NamedTemporaryFile(delete=False, mode='wb') as f:
                f.write(zlib.decompress(data))
            try:
                conn = sqlite3.connect(f.name)
                cur = conn.cursor()
                sql = 'SELECT name, data FROM finished ORDER BY timestamp'
                for row in cur.execute(sql):
                    instance, data = row
                    if data:
                        data = yaml.load(data)
                    finished_history[instance] = data
                    loaded_snapshots[db_node].append(instance)
                conn.close()
            finally:
                os.unlink(f.name)

            _LOGGER.debug('Loading time: %s', time.time() - loading_start_time)

        cell_state.finished_history = finished_history
        _LOGGER.debug(
            'Loaded snapshots: %d, finished: %d, finished history: %d, '
            'time: %s', len(loaded_snapshots), len(cell_state.finished),
            len(cell_state.finished_history),
            time.time() - start_time)

        return True
Example #3
0
    def schedule(app, manifest, count, env, proid):
        """Schedule app(s) on the cell master"""
        with io.open(manifest, 'rb') as fd:
            data = yaml.load(stream=fd)
        # TODO: should we delete all potential attributes starting
        #                with _ ?
        if '_id' in data:
            del data['_id']

        data['environment'] = env
        if 'affinity' not in data:
            # TODO: allow custom affinity formats.
            data['affinity'] = '{0}.{1}'.format(*app.split('.'))

        data['proid'] = proid
        scheduled = masterapi.create_apps(context.GLOBAL.zk.conn, app, data,
                                          count, 'admin')
        for app_id in scheduled:
            print(app_id)
Example #4
0
def _update_nodes_change(data):
    """Update local Treadmill Nodes IP IPSet when the globals server list gets
    updated."""
    servers = yaml.load(data)

    server_ips = []
    for server in servers:
        try:
            server_ip = socket.gethostbyname(server)
            server_ips.append(server_ip)

        except socket.gaierror:
            _LOGGER.warning('Unable to resolve %r', server)
            continue

    iptables.atomic_set(iptables.SET_TM_NODES,
                        content=server_ips,
                        set_type='hash:ip',
                        family='inet')
Example #5
0
    def test_normalize_run_once(self):
        """Test missing defaults which cause the app to fail."""
        doc = """
        services:
        - command: /bin/sleep 1m
          name: sleep1m
          restart:
            limit: 0
        memory: 150M
        cpu: 10%
        disk: 100M
        """

        masterapi.create_apps.side_effect = _create_apps

        new_doc = self.instance.create('proid.app', yaml.load(doc))

        # Disable E1126: Sequence index is not an int, slice, or instance
        # pylint: disable=E1126
        self.assertEqual(new_doc['services'][0]['restart']['interval'], 60)
        self.assertTrue(masterapi.create_apps.called)
Example #6
0
    def top(port, socket, auth, title, modules, config, cors_origin, workers,
            backlog, authz):
        """Run Treadmill API server."""
        context.GLOBAL.zk.add_listener(zkutils.exit_on_lost)

        api_modules = {module: None for module in modules}
        for module, cfg in config:
            if module not in api_modules:
                raise click.UsageError(
                    'Orphan config: %s, not in: %r' % (module, modules)
                )
            api_modules[module] = yaml.load(stream=cfg)
            cfg.close()

        api_paths = api.init(api_modules, title.replace('_', ' '), cors_origin,
                             authz)

        if port:
            rest_server = rest.TcpRestServer(port, auth_type=auth,
                                             protect=api_paths,
                                             workers=workers,
                                             backlog=backlog)
        # TODO: need to rename that - conflicts with import socket.
        elif socket:
            rest_server = rest.UdsRestServer(socket, auth_type=auth,
                                             workers=workers,
                                             backlog=backlog)
        else:
            click.echo('port or socket must be specified')
            sys.exit(1)

        try:
            rest_server.run()
        except sock.error as sock_err:
            _LOGGER.warning('Socker error: %s', sock_err)
            if sock_err.errno == errno.EADDRINUSE:
                # TODO: hack, but please keep it for now, otherwise on the
                #       setup several master processes run on same server
                #       lookup api (listen on port 8080) is in tight loop.
                time.sleep(5)
Example #7
0
        def on_event(filename, operation, content):
            """Event handler.
            """
            if not filename.startswith('/identity-groups/'):
                return

            sow = operation is None

            full_identity = filename[len('/identity-groups/'):]
            identity_group, identity = full_identity.rsplit('/', 1)
            message = {
                'topic': '/identity-groups',
                'identity-group': identity_group,
                'identity': int(identity),
                'app': None,
                'host': None,
                'sow': sow
            }
            if content:
                message.update(yaml.load(content))

            return message
Example #8
0
    def status(self, timeout=30):
        """Query the status of the resource service.

        :param ``float`` timeout:
            Wait at least timeout seconds for the service to reply.
        :raises ``ResourceServiceTimeoutError``:
            If the requested service does not come up before timeout.
        :raises ``socket.error``:
            If there is a communication error with the service.
        """
        backoff = 0
        while backoff <= (timeout / 2):
            with contextlib.closing(socket.socket(socket.AF_UNIX,
                                                  type=socket.SOCK_STREAM,
                                                  proto=0)) as status_socket:
                try:
                    status_socket.connect(self.status_sock)
                    status = yaml.load(stream=status_socket.makefile('r'))
                except socket.error as err:
                    if err.errno in (errno.ECONNREFUSED, errno.ENOENT):
                        status = None
                    else:
                        raise

            if status is not None:
                break

            _LOGGER.info('Waiting for service %r to become available',
                         self.name)
            # Implement a backoff mechanism
            backoff += (backoff or 1)
            time.sleep(backoff)

        else:
            raise ResourceServiceTimeoutError(
                'Service %r timed out' % (self.name),
            )

        return status
Example #9
0
    def test_cache_placement_data(self):
        """Tests sync of placement data.
        """
        # Access to a protected member _synchronize of a client class
        # pylint: disable=W0212
        zk_content = {
            'placement': {
                'test.xx.com': {
                    '.data': """
                        state: up
                        since: 100
                    """,
                    'xxx.app1#1234': {
                        '.data': '{identity: 1}\n',
                    },
                }
            },
            'scheduled': {
                'xxx.app1#1234': {
                    'affinity': 'app1',
                    'memory': '1G',
                    'disk': '1G',
                    'cpu': '100%',
                    'identity_group': 'xxx.app1',
                },
            }
        }
        self.make_mock_zk(zk_content)
        zkclient = kazoo.client.KazooClient()
        self.evmgr._hostname = 'test.xx.com'
        self.evmgr._cache(zkclient, 'xxx.app1#1234')

        appcache = os.path.join(self.cache, 'xxx.app1#1234')
        self.assertTrue(os.path.exists(appcache))

        with io.open(appcache) as f:
            data = yaml.load(stream=f)
            self.assertEqual(data['identity'], 1)
Example #10
0
def _configure(apis, manifest, appname):
    """Configure a Treadmill app"""
    try:
        existing = restclient.get(apis, _APP_REST_PATH + appname).json()

    except restclient.NotFoundError:
        if not manifest:
            raise
        else:
            existing = None

    if manifest:
        with io.open(manifest, 'rb') as fd:
            app = yaml.load(stream=fd)
        if existing:
            restclient.put(apis, _APP_REST_PATH + appname, payload=app)
        else:
            restclient.post(apis, _APP_REST_PATH + appname, payload=app)

        # Get new value after update.
        existing = restclient.get(apis, _APP_REST_PATH + appname).json()

    cli.out(_FORMATTER(existing))
Example #11
0
def _configure(apis, manifest, appname):
    """Configure a Treadmill app"""
    try:
        existing = restclient.get(apis, _APP_REST_PATH + appname).json()
    except restclient.NotFoundError:
        if not manifest:
            raise
        existing = None

    if manifest:
        app = yaml.load(stream=manifest)

        if existing:
            response = restclient.put(apis,
                                      _APP_REST_PATH + appname,
                                      payload=app)
        else:
            response = restclient.post(apis,
                                       _APP_REST_PATH + appname,
                                       payload=app)
        existing = response.json()

    cli.out(_FORMATTER(existing))
Example #12
0
    def _load_server_info(self, path):
        """Loads the server info from the given path.

        :param path:
            The path to the server info
        :return:
            A `dict` representing the server info or None
        """
        try:
            with io.open(path, 'r') as f:
                server_info = yaml.load(stream=f)

                if not server_info:
                    return

                if 'partition' not in server_info:
                    return

                if fnmatch.fnmatch(server_info['partition'], self._partition):
                    if self._add_ldap_connection(server_info):
                        hostname = os.path.basename(path)
                        server_info['hostname'] = hostname
                        _LOGGER.info('Found valid server %r', server_info)
                        return server_info

                _LOGGER.info('Found invalid server %r at path %r', server_info,
                             path)

        except OSError as err:
            _LOGGER.exception('Cannot read server info %r', path)
            if err.errno is not errno.ENOENT:
                raise
        except yaml.YAMLError:
            _LOGGER.exception('Invalid server info YAML %r', path)

        return None
Example #13
0
        def on_event(filename, operation, content):
            """Event handler.
            """
            if not filename.startswith('{}/'.format(_SUB_DIR)):
                return None

            app_group = os.path.basename(filename)

            sow = operation is None
            message = {'topic': _TOPIC, 'app-group': app_group, 'sow': sow}

            if content:
                app_group_data = yaml.load(content)
                raw_data = app_group_data.pop('data', [])
                message.update(app_group_data)

                data = {}
                for kv_str in raw_data:
                    (key, val) = kv_str.split('=', 1)
                    data[key] = val

                message['data'] = data

            return message
Example #14
0
def _render(name, ctx):
    """Render named template."""
    jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__))
    template = jinja_env.get_template(name)
    return yaml.load(template.render(**ctx.__dict__))
Example #15
0
    def server(approot, register, port, auth, modules, config, title,
               cors_origin, rate_limit_global, rate_limit_module,
               rate_limit_by):
        """Runs nodeinfo server."""
        rate_limit = _get_rate_limit(
            rate_limit_global, rate_limit_module, rate_limit_by
        )

        rest_server = rest.TcpRestServer(port, auth_type=auth,
                                         rate_limit=rate_limit)
        port = rest_server.port

        hostname = sysinfo.hostname()
        hostport = '%s:%s' % (hostname, port)

        if register:
            zkclient = context.GLOBAL.zk.conn
            zkclient.add_listener(zkutils.exit_on_lost)

            appname = 'root.%s#%010d' % (hostname, os.getpid())
            app_pattern = 'root.%s#*' % (hostname)
            path = z.path.endpoint(appname, 'tcp', 'nodeinfo')
            _LOGGER.info('register endpoint: %s %s', path, hostport)
            zkutils.create(zkclient, path, hostport,
                           acl=[zkclient.make_servers_acl()],
                           ephemeral=True)

            # TODO: remove "legacy" endpoint registration once conversion is
            #       complete.
            tm_env = appenv.AppEnvironment(approot)

            endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir)
            endpoints_mgr.unlink_all(
                app_pattern, endpoint='nodeinfo', proto='tcp'
            )

            # On Linux endpoint for nodeinfo is a symlink pointing to
            # /proc/{pid}, on Windows it's just a regular file
            owner = '/proc/{}'.format(os.getpid()) if os.name == 'posix' \
                else None

            endpoints_mgr.create_spec(
                appname=appname,
                endpoint='nodeinfo',
                proto='tcp',
                real_port=port,
                pid=os.getpid(),
                port=port,
                owner=owner,
            )

        _LOGGER.info('Starting nodeinfo server on port: %s', port)

        utils.drop_privileges()

        if modules:
            api_modules = {module: None for module in modules}
            for module, cfg in config:
                if module not in api_modules:
                    raise click.UsageError(
                        'Orphan config: %s, not in: %r' % (module, api_modules)
                    )
                api_modules[module] = yaml.load(stream=cfg)
                cfg.close()

            rest_server.protect = api.init(
                api_modules,
                title.replace('_', ' '),
                cors_origin
            )

        rest_server.run()
Example #16
0
def _install_services(scan_dir, package, src_dir, dst_dir, params, prefix_len,
                      rec=None):
    """Expand services in scan directory and install.
    """
    package_name = package.__name__
    contents = pkg_resources.resource_listdir(package_name, src_dir)

    for item in contents:
        if item in (_CONTROL_DIR_NAME, _CONTROL_DIR_FILE):
            continue

        resource_path = os.path.join(src_dir, item)
        if pkg_resources.resource_isdir(package_name,
                                        os.path.join(src_dir, item)):
            dst_path = os.path.join(dst_dir, resource_path[prefix_len:])

            fs.mkdir_safe(dst_path)
            if rec:
                rec.write('%s\n' % os.path.join(dst_path, ''))

            _install(
                package,
                os.path.join(src_dir, item),
                dst_dir,
                params,
                prefix_len=prefix_len,
                rec=rec
            )
        elif resource_path.endswith('.yml'):
            dst_path = os.path.join(dst_dir, resource_path[prefix_len:-4])
            name = os.path.basename(dst_path)

            _LOGGER.info('Expand service (%s): %s => %s', name, resource_path,
                         dst_path)

            fs.mkdir_safe(dst_path)
            if rec:
                rec.write('%s\n' % os.path.join(dst_path, ''))

            service_conf_file = pkg_resources.resource_string(
                package_name,
                resource_path
            )

            if not service_conf_file:
                _LOGGER.warning('Service def was empty: %s', resource_path)
                continue

            service_conf = yaml.load(service_conf_file.decode('utf8'))
            service_conf = bootstrap.interpolate_service_conf(
                resource_path, service_conf, name, params)

            svc = supervisor.create_service(
                scan_dir,
                service_conf['name'],
                service_conf['command'],
                userid=service_conf['userid'],
                downed=service_conf['downed'],
                environ_dir=service_conf['environ_dir'],
                environ=service_conf['environ'],
                monitor_policy=service_conf['monitor_policy'],
                notification_fd=service_conf['notification_fd'],
                call_before_run=service_conf['call_before_run'],
                call_before_finish=service_conf['call_before_finish'],
                logger_args=service_conf['logger_args'],
                ionice_prio=0,
            )

            for file in service_conf['data_dir']:
                permission = 0o644
                if file['executable']:
                    permission = 0o755
                fs.write_safe(
                    os.path.join(svc.data_dir, file['path']),
                    lambda f, file=file: f.write(
                        file['content']
                    ),
                    mode='w',
                    permission=permission
                )
Example #17
0
    def _on_created(self, impl, filepath):
        """Private handler for request creation events.
        """
        # Avoid triggering on changes to the service directory itself.
        if filepath == self._rsrc_dir:
            return False

        req_id = os.path.basename(filepath)

        # Avoid triggerring on temporary files
        if req_id[0] == '.':
            return False

        req_file = os.path.join(filepath, REQ_FILE)
        rep_file = os.path.join(filepath, REP_FILE)

        try:
            with io.open(req_file) as f:
                req_data = yaml.load(stream=f)

        except IOError as err:
            if (err.errno == errno.ENOENT or
                    err.errno == errno.ENOTDIR):
                _LOGGER.exception('Removing invalid request: %r', req_id)
                try:
                    fs.rm_safe(filepath)
                except OSError as rm_err:
                    if rm_err.errno == errno.EISDIR:
                        fs.rmtree_safe(filepath)
                    else:
                        raise
                return False
            raise

        # TODO: We should also validate the req_id format
        with lc.LogContext(_LOGGER, req_id,
                           adapter_cls=lc.ContainerAdapter) as log:

            log.debug('created %r: %r', req_id, req_data)

            try:
                # TODO: We should also validate the req_id format
                utils.validate(req_data, impl.PAYLOAD_SCHEMA)
                res = impl.on_create_request(req_id, req_data)

            except exc.InvalidInputError as err:
                log.error('Invalid request data: %r: %s', req_data, err)
                res = {'_error': {'input': req_data, 'why': str(err)}}

            except Exception as err:  # pylint: disable=W0703
                log.exception('Unable to process request: %r %r:',
                              req_id, req_data)
                res = {'_error': {'input': req_data, 'why': str(err)}}

        if res is None:
            # Request was not actioned
            return False

        fs.write_safe(
            rep_file,
            lambda f: yaml.dump(
                res, explicit_start=True, explicit_end=True,
                default_flow_style=False, stream=f
            ),
            mode='w',
            permission=0o644
        )

        # Return True if there were no error
        return not bool(res.get('_error', False))
Example #18
0
def _run(apis, count, manifest, memory, cpu, disk, tickets, service,
         restart_limit, restart_interval, endpoint, appname, command):
    """Run Treadmill app."""
    # too many branches
    #
    # pylint: disable=R0912
    app = {}
    if manifest:
        with io.open(manifest, 'rb') as fd:
            app = yaml.load(stream=fd)

    if endpoint:
        app['endpoints'] = [{
            'name': name,
            'port': port
        } for name, port in endpoint]
    if tickets:
        app['tickets'] = tickets

    if command:
        if not service:
            # Take the basename of the command, always assume / on all
            # platforms.
            service = os.path.basename(shlex.split(command[0])[0])

    services_dict = {svc['name']: svc for svc in app.get('services', [])}
    if service:
        if service not in services_dict:
            services_dict[service] = {
                'name': service,
                'restart': {
                    'limit': restart_limit,
                    'interval': restart_interval,
                }
            }

        if command:
            services_dict[service]['command'] = ' '.join(list(command))

    if services_dict:
        app['services'] = list(six.itervalues(services_dict))

    if app:
        # Ensure defaults are set.
        if 'memory' not in app:
            app['memory'] = _DEFAULT_MEM
        if 'disk' not in app:
            app['disk'] = _DEFAULT_DISK
        if 'cpu' not in app:
            app['cpu'] = _DEFAULT_CPU

        # Override if requested.
        if memory is not None:
            app['memory'] = str(memory)
        if disk is not None:
            app['disk'] = str(disk)
        if cpu is not None:
            app['cpu'] = str(cpu)

    url = '/instance/' + appname
    if count:
        url += '?count=%d' % count

    response = restclient.post(apis, url, payload=app)
    for instance_id in response.json()['instances']:
        cli.out(instance_id)