예제 #1
0
    def _cache(self, zkclient, app):
        """Reads the manifest from Zk and stores it as YAML in <cache>/<app>.
        """
        appnode = z.path.scheduled(app)
        placement_node = z.path.placement(self._hostname, app)
        manifest_file = None
        try:
            manifest = zkutils.get(zkclient, appnode)
            # TODO: need a function to parse instance id from name.
            manifest['task'] = app[app.index('#') + 1:]

            placement_info = zkutils.get(zkclient, placement_node)
            if placement_info is not None:
                manifest.update(placement_info)

            manifest_file = os.path.join(self.tm_env.cache_dir, app)
            fs.write_safe(manifest_file,
                          lambda f: yaml.dump(manifest, stream=f),
                          prefix='.%s-' % app,
                          mode='w',
                          permission=0o644)
            _LOGGER.info('Created cache manifest: %s', manifest_file)

        except kazoo.exceptions.NoNodeError:
            _LOGGER.warning('App %r not found', app)
예제 #2
0
    def execute(self, data):
        """Put the container into the down state which will trigger cleanup.
        """
        unique_name, service_name = data['id'].split(',')
        container_dir = os.path.join(self._tm_env.apps_dir, unique_name)
        container_svc = supervisor.open_service(container_dir)

        _LOGGER.critical('Container down: %r', data)
        data_dir = container_svc.data_dir
        fs.write_safe(
            os.path.join(data_dir, EXIT_INFO),
            lambda f: f.writelines(
                utils.json_genencode({
                    'service': service_name,
                    'return_code': data['return_code'],
                    'signal': data['signal'],
                    'timestamp': data['timestamp']
                })),
            mode='w',
            prefix='.tmp',
            permission=0o644)

        try:
            supervisor.control_service(container_svc.directory,
                                       supervisor.ServiceControlAction.down)
        except subproc.CalledProcessError as err:
            _LOGGER.warning('Failed to bring down container: %r', unique_name)

        return True
예제 #3
0
def create(alerts_dir,
           epoch_ts=None,
           instanceid=None,
           summary=None,
           type_=None,
           **alert_data):
    """Create a file in alerts_dir representing the alert.
    """
    if not epoch_ts:
        epoch_ts = time.time()

    alert_data.update({
        'epoch_ts': epoch_ts,
        'instanceid': instanceid,
        'summary': summary,
        'type_': type_,
    })

    filename = os.path.join(alerts_dir, _to_filename(instanceid, type_))
    fs.write_safe(
        filename,
        lambda f: f.write(json.dumps(alert_data, indent=4).encode()),
        prefix='.tmp',
        permission=0o644,
        subdir='.tmp',
    )
    return filename
예제 #4
0
def write(benchmark_result_file, result):
    """Write benchmark result.

    Sample output file format:
        [device0]
        device = 589d88bd-8098-4041-900e-7fcac18abab3
        write_bps = 314572800
        read_bps = 314572800
        write_iops = 64000
        read_iops = 4000

    :param benchmark_result_file:
        benchmark result file
    :param result:
        {device: {metric: value, }, }
    """
    config = configparser.SafeConfigParser()
    device_count = 0
    for device, metrics in six.iteritems(result):
        section = _DEVICE + six.text_type(device_count)
        device_count += 1
        config.add_section(section)
        config.set(section, _DEVICE, device)
        for metric, value in six.iteritems(metrics):
            config.set(section, metric, six.text_type(value))

    fs.write_safe(benchmark_result_file, config.write, permission=0o644)
예제 #5
0
def post(events_dir, event):
    """Post application event to event directory.
    """
    _LOGGER.debug('post: %s: %r', events_dir, event)

    (
        _ts,
        _src,
        instanceid,
        event_type,
        event_data,
        payload
    ) = event.to_data()
    filename = '%s,%s,%s,%s' % (
        time.time(),
        instanceid,
        event_type,
        event_data
    )

    def _write_temp(temp):
        if payload is None:
            pass
        elif isinstance(payload, six.string_types):
            temp.write(payload)
        else:
            yaml.dump(payload, stream=temp)

    fs.write_safe(
        os.path.join(events_dir, filename),
        _write_temp,
        prefix='.tmp',
        mode='w',
        permission=0o644
    )
예제 #6
0
def _write_keytab(fname, data):
    """Safely writes data to file.

    :param fname: Keytab filename.
    :param data: Keytab data in bytes.
    """
    _LOGGER.info('Writing %s: %s', fname, hashlib.sha1(data).hexdigest())
    fs.write_safe(fname, lambda f: f.write(data), prefix='.tmp', mode='wb')
예제 #7
0
 def _write_app_yaml(event, manifest_str):
     """Helper method to create app.yaml file in the event directory.
     """
     fs.write_safe(
         event,
         lambda f: f.write(manifest_str),
         mode='w',
     )
 def _write_keytab(self, response):
     """Safely writes the keytab to disk."""
     keytab = os.path.join(self.keytab_dir, self.peer())
     keytab_entries = base64.standard_b64decode(
         response['result']['keytab_entries'])
     _LOGGER.info('Writing keytab: %s', keytab)
     fs.write_safe(keytab,
                   lambda f: f.write(keytab_entries),
                   owner=(self.uid, self.gid))
예제 #9
0
def _dump_entry(base, entry_dn, entry_data):
    """Write an entry under a base directory.
    """
    subpath = ['data.ldif'] + entry_dn.split(',') + [base]
    fullpath = os.path.join(*reversed(subpath))
    fs.write_safe(
        fullpath,
        lambda fd: fd.write(_sorted_ldif_entry(entry_dn, entry_data)),
        mode='w',
        permission=0o644,
    )
예제 #10
0
def _write_keytab(keytab_entries, keytab, owner):
    """Write keytab file."""

    try:
        pwnam = pwd.getpwnam(owner)
    except KeyError:
        _LOGGER.error('Invalid user: %s', owner)
        return

    fs.write_safe(keytab,
                  lambda f: f.write(keytab_entries),
                  owner=(pwnam.pw_uid, pwnam.pw_gid))
예제 #11
0
def write_keytab(keytab_file, encoded):
    """Safely writes encoded keytab data to file.
    We get encoded keytab from via json gssapiprotocol
    and decode into real keytab in files

    :param fname: Keytab filename.
    :param encoded: Keytab encoded data
    """
    _LOGGER.debug('Write keytab file %s', keytab_file)
    fs.write_safe(keytab_file,
                  lambda f: f.write(base64.urlsafe_b64decode(encoded)),
                  prefix='.tmp',
                  mode='wb')
예제 #12
0
        def _write(self, timeout_at):
            """Setup the watchdog's lease file.
            """
            dirname = os.path.dirname(self.filename)
            filename = os.path.basename(self.filename)

            fs.mkdir_safe(dirname)
            fs.write_safe(self.filename,
                          lambda f: f.write(self.content),
                          prefix='.' + filename,
                          mode='w',
                          permission=0o600)
            os.utime(self.filename, (timeout_at, timeout_at))
예제 #13
0
def _copy_keytab(kt_file, keytab, owner):
    """Copy keytab from cache."""
    try:
        pwnam = pwd.getpwnam(owner)
    except KeyError:
        _LOGGER.error('Invalid user: %s', owner)
        return

    with io.open(kt_file, 'rb') as kt:
        fs.write_safe(
            keytab,
            lambda f: f.write(kt.read()),
            owner=(pwnam.pw_uid, pwnam.pw_gid)
        )
예제 #14
0
def benchmark(directory,
              volume=BENCHMARK_VOLUME,
              rw_type=BENCHMARK_RW_TYPE,
              job_number=BENCHMARK_JOB_NUMBER,
              thread_number=BENCHMARK_THREAD_NUMBER,
              block_size=BENCHMARK_IOPS_BLOCK_SIZE,
              max_seconds=BENCHMARK_MAX_SECONDS):
    """Use fio to do benchmark.
    """
    result = {}
    config_file = os.path.join(directory, _BENCHMARK_CONFIG_FILE)
    result_file = os.path.join(directory, _BENCHMARK_RESULT_FILE)

    # prepare fio config
    config = configparser.SafeConfigParser()
    global_section = 'global'
    config.add_section(global_section)
    config.set(global_section, 'group_reporting', '1')
    config.set(global_section, 'unlink', '1')
    config.set(global_section, 'time_based', '1')
    config.set(global_section, 'direct', '1')
    config.set(global_section, 'size', volume)
    config.set(global_section, 'rw', rw_type)
    config.set(global_section, 'numjobs', job_number)
    config.set(global_section, 'iodepth', thread_number)
    config.set(global_section, 'bs', block_size)
    config.set(global_section, 'runtime', max_seconds)
    drive_section = 'drive'
    config.add_section(drive_section)
    config.set(drive_section, 'directory', directory)
    fs.write_safe(config_file, lambda f: config.write(EqualSpaceRemover(f)))

    # start fio
    ret = subproc.call([
        'fio', config_file, '--norandommap', '--minimal', '--output',
        result_file
    ])

    # parse fio terse result
    # http://fio.readthedocs.io/en/latest/fio_doc.html#terse-output
    if ret == 0:
        with io.open(result_file) as fp:
            metric_list = fp.read().split(';')
            result[Metrics.READ_BPS.value] = int(float(metric_list[6]) * 1024)
            result[Metrics.READ_IOPS.value] = int(metric_list[7])
            result[Metrics.WRITE_BPS.value] = int(
                float(metric_list[47]) * 1024)
            result[Metrics.WRITE_IOPS.value] = int(metric_list[48])

    return result
예제 #15
0
def save_app(manifest, container_dir, app_json=STATE_JSON):
    """Saves app manifest and freezes to object."""
    # Save the manifest with allocated vip and ports in the state
    #
    state_file = os.path.join(container_dir, app_json)
    fs.write_safe(
        state_file,
        lambda f: f.writelines(utils.json_genencode(manifest)),
        mode='w',
        # chmod for the file to be world readable.
        permission=0o644)

    # Freeze the app data into a namedtuple object
    return utils.to_obj(manifest)
예제 #16
0
    def _cache(self, zkclient, app, check_existing=False):
        """Read the manifest and placement data from Zk and store it as YAML in
        <cache>/<app>.

        :param ``str`` app:
            Instance name.
        :param ``bool`` check_existing:
            Whether to check if the file already exists and is up to date.
        """
        placement_node = z.path.placement(self._hostname, app)
        try:
            placement_data, placement_metadata = zkutils.get_with_metadata(
                zkclient, placement_node
            )
            placement_time = placement_metadata.ctime / 1000.0
        except kazoo.exceptions.NoNodeError:
            _LOGGER.info('Placement %s/%s not found', self._hostname, app)
            return

        manifest_file = os.path.join(self.tm_env.cache_dir, app)
        if check_existing:
            try:
                manifest_time = os.stat(manifest_file).st_ctime
            except FileNotFoundError:
                manifest_time = None

            if manifest_time and manifest_time >= placement_time:
                _LOGGER.info('%s is up to date', manifest_file)
                return

        app_node = z.path.scheduled(app)
        try:
            manifest = zkutils.get(zkclient, app_node)
            # TODO: need a function to parse instance id from name.
            manifest['task'] = app[app.index('#') + 1:]

            if placement_data is not None:
                manifest.update(placement_data)

            fs.write_safe(
                manifest_file,
                lambda f: yaml.dump(manifest, stream=f),
                prefix='.%s-' % app,
                mode='w',
                permission=0o644
            )
            _LOGGER.info('Created cache manifest: %s', manifest_file)

        except kazoo.exceptions.NoNodeError:
            _LOGGER.info('App %s not found', app)
예제 #17
0
def create(alerts_dir,
           epoch_ts=None,
           instanceid=None,
           summary=None,
           type_=None,
           **kwargs):
    """Create a file in alerts_dir representing the alert."""
    fs.write_safe(os.path.join(alerts_dir, _to_filename(instanceid, type_)),
                  lambda f: f.write(
                      json.dumps(dict(epoch_ts=epoch_ts or time.time(),
                                      instanceid=instanceid,
                                      summary=summary,
                                      type_=type_,
                                      **kwargs),
                                 indent=4).encode()),
                  permission=0o644)
예제 #18
0
def flag_aborted(container_dir, why=None, payload=None):
    """Flags container as aborted.

    Called when aborting in failed run step.
    Consumed by cleanup script.
    """
    if payload is not None:
        payload = str(payload)

    fs.write_safe(os.path.join(container_dir, 'aborted'),
                  lambda f: f.writelines(
                      utils.json_genencode({
                          'why': _why_str(why),
                          'payload': payload
                      })),
                  mode='w',
                  permission=0o644)
예제 #19
0
    def execute(self, data):
        """Shut down the node by writing a watchdog with the down reason data.
        """
        _LOGGER.critical('Node down: %r', data)
        filename = os.path.join(
            self._watchdog_dir,
            'Monitor-{prefix}{service}'.format(prefix=self._prefix,
                                               service=data['service']))
        fs.write_safe(
            filename,
            lambda f: f.write('Node service {service!r} crashed.'
                              ' Last exit {return_code} (sig:{signal}).'.
                              format(service=data['service'],
                                     return_code=data['return_code'],
                                     signal=data['signal'])),
            prefix='.tmp',
            mode='w',
            permission=0o644)

        return True
예제 #20
0
    def execute(self, data):
        """Execute the down action.

        :returns ``bool``:
            ``True`` - Monitor should keep running.
            ``False`` - Monitor should stop.
        """
        _LOGGER.critical('Container down: %r', data)
        data_dir = self._container_svc.data_dir
        fs.write_safe(os.path.join(data_dir, EXIT_INFO),
                      lambda f: f.writelines(utils.json_genencode(data)),
                      mode='w',
                      prefix='.tmp',
                      permission=0o644)
        # NOTE: This will take down this container's monitor service as well.
        # NOTE: The supervisor has to be running as we call from inside the
        #       container.
        supervisor.control_service(self._container_svc.directory,
                                   supervisor.ServiceControlAction.down)

        return False
예제 #21
0
    def krb5keytab(keytab, sock_path):
        """The client utility to get krb5keytab from the local proxy."""
        if not sock_path:
            sock_path = _DEFAULT_SOCK_PATH

        client = peercredprotocol.PeerCredLineClient(sock_path)
        try:
            client.connect()
            # If we write keytab ourselvs, do not ask server to write the
            # file.
            request = {
                'keytab': (not keytab)
            }

            client.write(json.dumps(request).encode('utf8'))
            reply = client.read()
            if not reply:
                cli.bad_exit('Connection closed.')

            response = json.loads(reply.decode())
            if response.get('status') != 'success':
                cli.bad_exit(response.get('why', 'Unknown error'))

            if keytab:
                keytab_entries = base64.standard_b64decode(
                    response['result']['keytab_entries']
                )
                _LOGGER.info('Writing keytab: %s', keytab)
                fs.write_safe(
                    keytab,
                    lambda f: f.write(keytab_entries),
                )

        except FileNotFoundError:
            cli.bad_exit(
                'Failed connecting to %s, krb5keytab proxy is not running.',
                sock_path
            )
        finally:
            client.disconnect()
예제 #22
0
def configure(tm_env, event, runtime):
    """Creates directory necessary for starting the application.

    This operation is idem-potent (it can be repeated).

    The directory layout is::

        - (treadmill root)/
          - apps/
            - (app unique name)/
              - data/
                - app_start
                - app.json
                - manifest.yml
                - policy.json
                env/
                - TREADMILL_*
                run
                finish
                log/
                - run

    The 'run' script is responsible for creating container environment
    and starting the container.

    The 'finish' script is invoked when container terminates and will
    deallocate any resources (NAT rules, etc) that were allocated for the
    container.
    """
    # Load the app from the event
    try:
        manifest_data = app_manifest.load(tm_env, event, runtime)
    except IOError:
        # File is gone. Nothing to do.
        _LOGGER.exception('No event to load: %r', event)
        return

    # Freeze the app data into a namedtuple object
    app = utils.to_obj(manifest_data)

    # Generate a unique name for the app
    uniq_name = appcfg.app_unique_name(app)

    # Write the actual container start script
    if os.name == 'nt':
        run_script = ' '.join(
            [sys.executable, '-m', 'treadmill.ms', 'sproc', 'run', '.'])
    else:
        run_script = ' '.join(
            ['exec', dist.TREADMILL_BIN, 'sproc', 'run', '../'])

    # Create the service for that container
    container_svc = supervisor.create_service(tm_env.apps_dir,
                                              name=uniq_name,
                                              app_run_script=run_script,
                                              userid='root',
                                              downed=False,
                                              monitor_policy={
                                                  'limit': 0,
                                                  'interval': 60
                                              },
                                              environ={},
                                              environment=app.environment)
    data_dir = container_svc.data_dir

    # Copy the original event as 'manifest.yml' in the container dir
    shutil.copyfile(event, os.path.join(data_dir, 'manifest.yml'))

    # Store the app.json in the container directory
    fs.write_safe(os.path.join(data_dir, appcfg.APP_JSON),
                  lambda f: f.writelines(utils.json_genencode(manifest_data)),
                  mode='w',
                  permission=0o644)

    appevents.post(
        tm_env.app_events_dir,
        events.ConfiguredTraceEvent(instanceid=app.name,
                                    uniqueid=app.uniqueid))

    return container_svc.directory
예제 #23
0
def _install_services(scan_dir, package, src_dir, dst_dir, params, prefix_len,
                      rec=None):
    """Expand services in scan directory and install.
    """
    package_name = package.__name__
    contents = pkg_resources.resource_listdir(package_name, src_dir)

    for item in contents:
        if item in (_CONTROL_DIR_NAME, _CONTROL_DIR_FILE):
            continue

        resource_path = os.path.join(src_dir, item)
        if pkg_resources.resource_isdir(package_name,
                                        os.path.join(src_dir, item)):
            dst_path = os.path.join(dst_dir, resource_path[prefix_len:])

            fs.mkdir_safe(dst_path)
            if rec:
                rec.write('%s\n' % os.path.join(dst_path, ''))

            _install(
                package,
                os.path.join(src_dir, item),
                dst_dir,
                params,
                prefix_len=prefix_len,
                rec=rec
            )
        elif resource_path.endswith('.yml'):
            dst_path = os.path.join(dst_dir, resource_path[prefix_len:-4])
            name = os.path.basename(dst_path)

            _LOGGER.info('Expand service (%s): %s => %s', name, resource_path,
                         dst_path)

            fs.mkdir_safe(dst_path)
            if rec:
                rec.write('%s\n' % os.path.join(dst_path, ''))

            service_conf_file = pkg_resources.resource_string(
                package_name,
                resource_path
            )

            if not service_conf_file:
                _LOGGER.warning('Service def was empty: %s', resource_path)
                continue

            service_conf = yaml.load(service_conf_file.decode('utf8'))
            service_conf = bootstrap.interpolate_service_conf(
                resource_path, service_conf, name, params)

            svc = supervisor.create_service(
                scan_dir,
                service_conf['name'],
                service_conf['command'],
                userid=service_conf['userid'],
                downed=service_conf['downed'],
                environ_dir=service_conf['environ_dir'],
                environ=service_conf['environ'],
                monitor_policy=service_conf['monitor_policy'],
                notification_fd=service_conf['notification_fd'],
                call_before_run=service_conf['call_before_run'],
                call_before_finish=service_conf['call_before_finish'],
                logger_args=service_conf['logger_args'],
                ionice_prio=0,
            )

            for file in service_conf['data_dir']:
                permission = 0o644
                if file['executable']:
                    permission = 0o755
                fs.write_safe(
                    os.path.join(svc.data_dir, file['path']),
                    lambda f, file=file: f.write(
                        file['content']
                    ),
                    mode='w',
                    permission=permission
                )
예제 #24
0
    def _on_created(self, impl, filepath):
        """Private handler for request creation events.
        """
        # Avoid triggering on changes to the service directory itself.
        if filepath == self._rsrc_dir:
            return False

        req_id = os.path.basename(filepath)

        # Avoid triggerring on temporary files
        if req_id[0] == '.':
            return False

        req_file = os.path.join(filepath, REQ_FILE)
        rep_file = os.path.join(filepath, REP_FILE)

        try:
            with io.open(req_file) as f:
                req_data = yaml.load(stream=f)

        except IOError as err:
            if (err.errno == errno.ENOENT or
                    err.errno == errno.ENOTDIR):
                _LOGGER.exception('Removing invalid request: %r', req_id)
                try:
                    fs.rm_safe(filepath)
                except OSError as rm_err:
                    if rm_err.errno == errno.EISDIR:
                        fs.rmtree_safe(filepath)
                    else:
                        raise
                return False
            raise

        # TODO: We should also validate the req_id format
        with lc.LogContext(_LOGGER, req_id,
                           adapter_cls=lc.ContainerAdapter) as log:

            log.debug('created %r: %r', req_id, req_data)

            try:
                # TODO: We should also validate the req_id format
                utils.validate(req_data, impl.PAYLOAD_SCHEMA)
                res = impl.on_create_request(req_id, req_data)

            except exc.InvalidInputError as err:
                log.error('Invalid request data: %r: %s', req_data, err)
                res = {'_error': {'input': req_data, 'why': str(err)}}

            except Exception as err:  # pylint: disable=W0703
                log.exception('Unable to process request: %r %r:',
                              req_id, req_data)
                res = {'_error': {'input': req_data, 'why': str(err)}}

        if res is None:
            # Request was not actioned
            return False

        fs.write_safe(
            rep_file,
            lambda f: yaml.dump(
                res, explicit_start=True, explicit_end=True,
                default_flow_style=False, stream=f
            ),
            mode='w',
            permission=0o644
        )

        # Return True if there were no error
        return not bool(res.get('_error', False))
예제 #25
0
def configure(tm_env, event, runtime):
    """Creates directory necessary for starting the application.

    This operation is idem-potent (it can be repeated).

    The directory layout is::

        - (treadmill root)/
          - apps/
            - (app unique name)/
              - data/
                - app_start
                - app.json
                - manifest.yml
                env/
                - TREADMILL_*
                run
                finish
                log/
                - run

    The 'run' script is responsible for creating container environment
    and starting the container.

    The 'finish' script is invoked when container terminates and will
    deallocate any resources (NAT rules, etc) that were allocated for the
    container.
    """
    # Load the app from the event
    try:
        manifest_data = load_runtime_manifest(tm_env, event, runtime)
    except IOError:
        # File is gone. Nothing to do.
        _LOGGER.exception('No event to load: %r', event)
        return None

    # Freeze the app data into a namedtuple object
    app = utils.to_obj(manifest_data)

    # Generate a unique name for the app
    uniq_name = appcfg.app_unique_name(app)

    # Write the actual container start script
    if os.name == 'nt':
        run_script = '{treadmill}/scripts/treadmill sproc run .'.format(
            treadmill=subproc.resolve('treadmill'),
        )
    else:
        run_script = 'exec {treadmill}/bin/treadmill sproc run ../'.format(
            treadmill=subproc.resolve('treadmill'),
        )

    # Create the service for that container
    container_svc = supervisor.create_service(
        tm_env.apps_dir,
        name=uniq_name,
        app_run_script=run_script,
        userid='root',
        downed=False,
        monitor_policy={
            'limit': 0,
            'interval': 60,
            'tombstone': {
                'uds': False,
                'path': tm_env.running_tombstone_dir,
                'id': app.name
            }
        },
        environ={},
        environment=app.environment
    )
    data_dir = container_svc.data_dir

    # Copy the original event as 'manifest.yml' in the container dir
    try:
        shutil.copyfile(
            event,
            os.path.join(data_dir, 'manifest.yml')
        )
    except IOError as err:
        # File is gone, cleanup.
        if err.errno == errno.ENOENT:
            shutil.rmtree(container_svc.directory)
            _LOGGER.exception('Event gone: %r', event)
            return None
        else:
            raise

    # Store the app.json in the container directory
    fs.write_safe(
        os.path.join(data_dir, appcfg.APP_JSON),
        lambda f: f.writelines(
            utils.json_genencode(manifest_data)
        ),
        mode='w',
        permission=0o644
    )

    appevents.post(
        tm_env.app_events_dir,
        events.ConfiguredTraceEvent(
            instanceid=app.name,
            uniqueid=app.uniqueid
        )
    )

    return container_svc.directory