def _cache(self, zkclient, app): """Reads the manifest from Zk and stores it as YAML in <cache>/<app>. """ appnode = z.path.scheduled(app) placement_node = z.path.placement(self._hostname, app) manifest_file = None try: manifest = zkutils.get(zkclient, appnode) # TODO: need a function to parse instance id from name. manifest['task'] = app[app.index('#') + 1:] placement_info = zkutils.get(zkclient, placement_node) if placement_info is not None: manifest.update(placement_info) manifest_file = os.path.join(self.tm_env.cache_dir, app) fs.write_safe(manifest_file, lambda f: yaml.dump(manifest, stream=f), prefix='.%s-' % app, mode='w', permission=0o644) _LOGGER.info('Created cache manifest: %s', manifest_file) except kazoo.exceptions.NoNodeError: _LOGGER.warning('App %r not found', app)
def execute(self, data): """Put the container into the down state which will trigger cleanup. """ unique_name, service_name = data['id'].split(',') container_dir = os.path.join(self._tm_env.apps_dir, unique_name) container_svc = supervisor.open_service(container_dir) _LOGGER.critical('Container down: %r', data) data_dir = container_svc.data_dir fs.write_safe( os.path.join(data_dir, EXIT_INFO), lambda f: f.writelines( utils.json_genencode({ 'service': service_name, 'return_code': data['return_code'], 'signal': data['signal'], 'timestamp': data['timestamp'] })), mode='w', prefix='.tmp', permission=0o644) try: supervisor.control_service(container_svc.directory, supervisor.ServiceControlAction.down) except subproc.CalledProcessError as err: _LOGGER.warning('Failed to bring down container: %r', unique_name) return True
def create(alerts_dir, epoch_ts=None, instanceid=None, summary=None, type_=None, **alert_data): """Create a file in alerts_dir representing the alert. """ if not epoch_ts: epoch_ts = time.time() alert_data.update({ 'epoch_ts': epoch_ts, 'instanceid': instanceid, 'summary': summary, 'type_': type_, }) filename = os.path.join(alerts_dir, _to_filename(instanceid, type_)) fs.write_safe( filename, lambda f: f.write(json.dumps(alert_data, indent=4).encode()), prefix='.tmp', permission=0o644, subdir='.tmp', ) return filename
def write(benchmark_result_file, result): """Write benchmark result. Sample output file format: [device0] device = 589d88bd-8098-4041-900e-7fcac18abab3 write_bps = 314572800 read_bps = 314572800 write_iops = 64000 read_iops = 4000 :param benchmark_result_file: benchmark result file :param result: {device: {metric: value, }, } """ config = configparser.SafeConfigParser() device_count = 0 for device, metrics in six.iteritems(result): section = _DEVICE + six.text_type(device_count) device_count += 1 config.add_section(section) config.set(section, _DEVICE, device) for metric, value in six.iteritems(metrics): config.set(section, metric, six.text_type(value)) fs.write_safe(benchmark_result_file, config.write, permission=0o644)
def post(events_dir, event): """Post application event to event directory. """ _LOGGER.debug('post: %s: %r', events_dir, event) ( _ts, _src, instanceid, event_type, event_data, payload ) = event.to_data() filename = '%s,%s,%s,%s' % ( time.time(), instanceid, event_type, event_data ) def _write_temp(temp): if payload is None: pass elif isinstance(payload, six.string_types): temp.write(payload) else: yaml.dump(payload, stream=temp) fs.write_safe( os.path.join(events_dir, filename), _write_temp, prefix='.tmp', mode='w', permission=0o644 )
def _write_keytab(fname, data): """Safely writes data to file. :param fname: Keytab filename. :param data: Keytab data in bytes. """ _LOGGER.info('Writing %s: %s', fname, hashlib.sha1(data).hexdigest()) fs.write_safe(fname, lambda f: f.write(data), prefix='.tmp', mode='wb')
def _write_app_yaml(event, manifest_str): """Helper method to create app.yaml file in the event directory. """ fs.write_safe( event, lambda f: f.write(manifest_str), mode='w', )
def _write_keytab(self, response): """Safely writes the keytab to disk.""" keytab = os.path.join(self.keytab_dir, self.peer()) keytab_entries = base64.standard_b64decode( response['result']['keytab_entries']) _LOGGER.info('Writing keytab: %s', keytab) fs.write_safe(keytab, lambda f: f.write(keytab_entries), owner=(self.uid, self.gid))
def _dump_entry(base, entry_dn, entry_data): """Write an entry under a base directory. """ subpath = ['data.ldif'] + entry_dn.split(',') + [base] fullpath = os.path.join(*reversed(subpath)) fs.write_safe( fullpath, lambda fd: fd.write(_sorted_ldif_entry(entry_dn, entry_data)), mode='w', permission=0o644, )
def _write_keytab(keytab_entries, keytab, owner): """Write keytab file.""" try: pwnam = pwd.getpwnam(owner) except KeyError: _LOGGER.error('Invalid user: %s', owner) return fs.write_safe(keytab, lambda f: f.write(keytab_entries), owner=(pwnam.pw_uid, pwnam.pw_gid))
def write_keytab(keytab_file, encoded): """Safely writes encoded keytab data to file. We get encoded keytab from via json gssapiprotocol and decode into real keytab in files :param fname: Keytab filename. :param encoded: Keytab encoded data """ _LOGGER.debug('Write keytab file %s', keytab_file) fs.write_safe(keytab_file, lambda f: f.write(base64.urlsafe_b64decode(encoded)), prefix='.tmp', mode='wb')
def _write(self, timeout_at): """Setup the watchdog's lease file. """ dirname = os.path.dirname(self.filename) filename = os.path.basename(self.filename) fs.mkdir_safe(dirname) fs.write_safe(self.filename, lambda f: f.write(self.content), prefix='.' + filename, mode='w', permission=0o600) os.utime(self.filename, (timeout_at, timeout_at))
def _copy_keytab(kt_file, keytab, owner): """Copy keytab from cache.""" try: pwnam = pwd.getpwnam(owner) except KeyError: _LOGGER.error('Invalid user: %s', owner) return with io.open(kt_file, 'rb') as kt: fs.write_safe( keytab, lambda f: f.write(kt.read()), owner=(pwnam.pw_uid, pwnam.pw_gid) )
def benchmark(directory, volume=BENCHMARK_VOLUME, rw_type=BENCHMARK_RW_TYPE, job_number=BENCHMARK_JOB_NUMBER, thread_number=BENCHMARK_THREAD_NUMBER, block_size=BENCHMARK_IOPS_BLOCK_SIZE, max_seconds=BENCHMARK_MAX_SECONDS): """Use fio to do benchmark. """ result = {} config_file = os.path.join(directory, _BENCHMARK_CONFIG_FILE) result_file = os.path.join(directory, _BENCHMARK_RESULT_FILE) # prepare fio config config = configparser.SafeConfigParser() global_section = 'global' config.add_section(global_section) config.set(global_section, 'group_reporting', '1') config.set(global_section, 'unlink', '1') config.set(global_section, 'time_based', '1') config.set(global_section, 'direct', '1') config.set(global_section, 'size', volume) config.set(global_section, 'rw', rw_type) config.set(global_section, 'numjobs', job_number) config.set(global_section, 'iodepth', thread_number) config.set(global_section, 'bs', block_size) config.set(global_section, 'runtime', max_seconds) drive_section = 'drive' config.add_section(drive_section) config.set(drive_section, 'directory', directory) fs.write_safe(config_file, lambda f: config.write(EqualSpaceRemover(f))) # start fio ret = subproc.call([ 'fio', config_file, '--norandommap', '--minimal', '--output', result_file ]) # parse fio terse result # http://fio.readthedocs.io/en/latest/fio_doc.html#terse-output if ret == 0: with io.open(result_file) as fp: metric_list = fp.read().split(';') result[Metrics.READ_BPS.value] = int(float(metric_list[6]) * 1024) result[Metrics.READ_IOPS.value] = int(metric_list[7]) result[Metrics.WRITE_BPS.value] = int( float(metric_list[47]) * 1024) result[Metrics.WRITE_IOPS.value] = int(metric_list[48]) return result
def save_app(manifest, container_dir, app_json=STATE_JSON): """Saves app manifest and freezes to object.""" # Save the manifest with allocated vip and ports in the state # state_file = os.path.join(container_dir, app_json) fs.write_safe( state_file, lambda f: f.writelines(utils.json_genencode(manifest)), mode='w', # chmod for the file to be world readable. permission=0o644) # Freeze the app data into a namedtuple object return utils.to_obj(manifest)
def _cache(self, zkclient, app, check_existing=False): """Read the manifest and placement data from Zk and store it as YAML in <cache>/<app>. :param ``str`` app: Instance name. :param ``bool`` check_existing: Whether to check if the file already exists and is up to date. """ placement_node = z.path.placement(self._hostname, app) try: placement_data, placement_metadata = zkutils.get_with_metadata( zkclient, placement_node ) placement_time = placement_metadata.ctime / 1000.0 except kazoo.exceptions.NoNodeError: _LOGGER.info('Placement %s/%s not found', self._hostname, app) return manifest_file = os.path.join(self.tm_env.cache_dir, app) if check_existing: try: manifest_time = os.stat(manifest_file).st_ctime except FileNotFoundError: manifest_time = None if manifest_time and manifest_time >= placement_time: _LOGGER.info('%s is up to date', manifest_file) return app_node = z.path.scheduled(app) try: manifest = zkutils.get(zkclient, app_node) # TODO: need a function to parse instance id from name. manifest['task'] = app[app.index('#') + 1:] if placement_data is not None: manifest.update(placement_data) fs.write_safe( manifest_file, lambda f: yaml.dump(manifest, stream=f), prefix='.%s-' % app, mode='w', permission=0o644 ) _LOGGER.info('Created cache manifest: %s', manifest_file) except kazoo.exceptions.NoNodeError: _LOGGER.info('App %s not found', app)
def create(alerts_dir, epoch_ts=None, instanceid=None, summary=None, type_=None, **kwargs): """Create a file in alerts_dir representing the alert.""" fs.write_safe(os.path.join(alerts_dir, _to_filename(instanceid, type_)), lambda f: f.write( json.dumps(dict(epoch_ts=epoch_ts or time.time(), instanceid=instanceid, summary=summary, type_=type_, **kwargs), indent=4).encode()), permission=0o644)
def flag_aborted(container_dir, why=None, payload=None): """Flags container as aborted. Called when aborting in failed run step. Consumed by cleanup script. """ if payload is not None: payload = str(payload) fs.write_safe(os.path.join(container_dir, 'aborted'), lambda f: f.writelines( utils.json_genencode({ 'why': _why_str(why), 'payload': payload })), mode='w', permission=0o644)
def execute(self, data): """Shut down the node by writing a watchdog with the down reason data. """ _LOGGER.critical('Node down: %r', data) filename = os.path.join( self._watchdog_dir, 'Monitor-{prefix}{service}'.format(prefix=self._prefix, service=data['service'])) fs.write_safe( filename, lambda f: f.write('Node service {service!r} crashed.' ' Last exit {return_code} (sig:{signal}).'. format(service=data['service'], return_code=data['return_code'], signal=data['signal'])), prefix='.tmp', mode='w', permission=0o644) return True
def execute(self, data): """Execute the down action. :returns ``bool``: ``True`` - Monitor should keep running. ``False`` - Monitor should stop. """ _LOGGER.critical('Container down: %r', data) data_dir = self._container_svc.data_dir fs.write_safe(os.path.join(data_dir, EXIT_INFO), lambda f: f.writelines(utils.json_genencode(data)), mode='w', prefix='.tmp', permission=0o644) # NOTE: This will take down this container's monitor service as well. # NOTE: The supervisor has to be running as we call from inside the # container. supervisor.control_service(self._container_svc.directory, supervisor.ServiceControlAction.down) return False
def krb5keytab(keytab, sock_path): """The client utility to get krb5keytab from the local proxy.""" if not sock_path: sock_path = _DEFAULT_SOCK_PATH client = peercredprotocol.PeerCredLineClient(sock_path) try: client.connect() # If we write keytab ourselvs, do not ask server to write the # file. request = { 'keytab': (not keytab) } client.write(json.dumps(request).encode('utf8')) reply = client.read() if not reply: cli.bad_exit('Connection closed.') response = json.loads(reply.decode()) if response.get('status') != 'success': cli.bad_exit(response.get('why', 'Unknown error')) if keytab: keytab_entries = base64.standard_b64decode( response['result']['keytab_entries'] ) _LOGGER.info('Writing keytab: %s', keytab) fs.write_safe( keytab, lambda f: f.write(keytab_entries), ) except FileNotFoundError: cli.bad_exit( 'Failed connecting to %s, krb5keytab proxy is not running.', sock_path ) finally: client.disconnect()
def configure(tm_env, event, runtime): """Creates directory necessary for starting the application. This operation is idem-potent (it can be repeated). The directory layout is:: - (treadmill root)/ - apps/ - (app unique name)/ - data/ - app_start - app.json - manifest.yml - policy.json env/ - TREADMILL_* run finish log/ - run The 'run' script is responsible for creating container environment and starting the container. The 'finish' script is invoked when container terminates and will deallocate any resources (NAT rules, etc) that were allocated for the container. """ # Load the app from the event try: manifest_data = app_manifest.load(tm_env, event, runtime) except IOError: # File is gone. Nothing to do. _LOGGER.exception('No event to load: %r', event) return # Freeze the app data into a namedtuple object app = utils.to_obj(manifest_data) # Generate a unique name for the app uniq_name = appcfg.app_unique_name(app) # Write the actual container start script if os.name == 'nt': run_script = ' '.join( [sys.executable, '-m', 'treadmill.ms', 'sproc', 'run', '.']) else: run_script = ' '.join( ['exec', dist.TREADMILL_BIN, 'sproc', 'run', '../']) # Create the service for that container container_svc = supervisor.create_service(tm_env.apps_dir, name=uniq_name, app_run_script=run_script, userid='root', downed=False, monitor_policy={ 'limit': 0, 'interval': 60 }, environ={}, environment=app.environment) data_dir = container_svc.data_dir # Copy the original event as 'manifest.yml' in the container dir shutil.copyfile(event, os.path.join(data_dir, 'manifest.yml')) # Store the app.json in the container directory fs.write_safe(os.path.join(data_dir, appcfg.APP_JSON), lambda f: f.writelines(utils.json_genencode(manifest_data)), mode='w', permission=0o644) appevents.post( tm_env.app_events_dir, events.ConfiguredTraceEvent(instanceid=app.name, uniqueid=app.uniqueid)) return container_svc.directory
def _install_services(scan_dir, package, src_dir, dst_dir, params, prefix_len, rec=None): """Expand services in scan directory and install. """ package_name = package.__name__ contents = pkg_resources.resource_listdir(package_name, src_dir) for item in contents: if item in (_CONTROL_DIR_NAME, _CONTROL_DIR_FILE): continue resource_path = os.path.join(src_dir, item) if pkg_resources.resource_isdir(package_name, os.path.join(src_dir, item)): dst_path = os.path.join(dst_dir, resource_path[prefix_len:]) fs.mkdir_safe(dst_path) if rec: rec.write('%s\n' % os.path.join(dst_path, '')) _install( package, os.path.join(src_dir, item), dst_dir, params, prefix_len=prefix_len, rec=rec ) elif resource_path.endswith('.yml'): dst_path = os.path.join(dst_dir, resource_path[prefix_len:-4]) name = os.path.basename(dst_path) _LOGGER.info('Expand service (%s): %s => %s', name, resource_path, dst_path) fs.mkdir_safe(dst_path) if rec: rec.write('%s\n' % os.path.join(dst_path, '')) service_conf_file = pkg_resources.resource_string( package_name, resource_path ) if not service_conf_file: _LOGGER.warning('Service def was empty: %s', resource_path) continue service_conf = yaml.load(service_conf_file.decode('utf8')) service_conf = bootstrap.interpolate_service_conf( resource_path, service_conf, name, params) svc = supervisor.create_service( scan_dir, service_conf['name'], service_conf['command'], userid=service_conf['userid'], downed=service_conf['downed'], environ_dir=service_conf['environ_dir'], environ=service_conf['environ'], monitor_policy=service_conf['monitor_policy'], notification_fd=service_conf['notification_fd'], call_before_run=service_conf['call_before_run'], call_before_finish=service_conf['call_before_finish'], logger_args=service_conf['logger_args'], ionice_prio=0, ) for file in service_conf['data_dir']: permission = 0o644 if file['executable']: permission = 0o755 fs.write_safe( os.path.join(svc.data_dir, file['path']), lambda f, file=file: f.write( file['content'] ), mode='w', permission=permission )
def _on_created(self, impl, filepath): """Private handler for request creation events. """ # Avoid triggering on changes to the service directory itself. if filepath == self._rsrc_dir: return False req_id = os.path.basename(filepath) # Avoid triggerring on temporary files if req_id[0] == '.': return False req_file = os.path.join(filepath, REQ_FILE) rep_file = os.path.join(filepath, REP_FILE) try: with io.open(req_file) as f: req_data = yaml.load(stream=f) except IOError as err: if (err.errno == errno.ENOENT or err.errno == errno.ENOTDIR): _LOGGER.exception('Removing invalid request: %r', req_id) try: fs.rm_safe(filepath) except OSError as rm_err: if rm_err.errno == errno.EISDIR: fs.rmtree_safe(filepath) else: raise return False raise # TODO: We should also validate the req_id format with lc.LogContext(_LOGGER, req_id, adapter_cls=lc.ContainerAdapter) as log: log.debug('created %r: %r', req_id, req_data) try: # TODO: We should also validate the req_id format utils.validate(req_data, impl.PAYLOAD_SCHEMA) res = impl.on_create_request(req_id, req_data) except exc.InvalidInputError as err: log.error('Invalid request data: %r: %s', req_data, err) res = {'_error': {'input': req_data, 'why': str(err)}} except Exception as err: # pylint: disable=W0703 log.exception('Unable to process request: %r %r:', req_id, req_data) res = {'_error': {'input': req_data, 'why': str(err)}} if res is None: # Request was not actioned return False fs.write_safe( rep_file, lambda f: yaml.dump( res, explicit_start=True, explicit_end=True, default_flow_style=False, stream=f ), mode='w', permission=0o644 ) # Return True if there were no error return not bool(res.get('_error', False))
def configure(tm_env, event, runtime): """Creates directory necessary for starting the application. This operation is idem-potent (it can be repeated). The directory layout is:: - (treadmill root)/ - apps/ - (app unique name)/ - data/ - app_start - app.json - manifest.yml env/ - TREADMILL_* run finish log/ - run The 'run' script is responsible for creating container environment and starting the container. The 'finish' script is invoked when container terminates and will deallocate any resources (NAT rules, etc) that were allocated for the container. """ # Load the app from the event try: manifest_data = load_runtime_manifest(tm_env, event, runtime) except IOError: # File is gone. Nothing to do. _LOGGER.exception('No event to load: %r', event) return None # Freeze the app data into a namedtuple object app = utils.to_obj(manifest_data) # Generate a unique name for the app uniq_name = appcfg.app_unique_name(app) # Write the actual container start script if os.name == 'nt': run_script = '{treadmill}/scripts/treadmill sproc run .'.format( treadmill=subproc.resolve('treadmill'), ) else: run_script = 'exec {treadmill}/bin/treadmill sproc run ../'.format( treadmill=subproc.resolve('treadmill'), ) # Create the service for that container container_svc = supervisor.create_service( tm_env.apps_dir, name=uniq_name, app_run_script=run_script, userid='root', downed=False, monitor_policy={ 'limit': 0, 'interval': 60, 'tombstone': { 'uds': False, 'path': tm_env.running_tombstone_dir, 'id': app.name } }, environ={}, environment=app.environment ) data_dir = container_svc.data_dir # Copy the original event as 'manifest.yml' in the container dir try: shutil.copyfile( event, os.path.join(data_dir, 'manifest.yml') ) except IOError as err: # File is gone, cleanup. if err.errno == errno.ENOENT: shutil.rmtree(container_svc.directory) _LOGGER.exception('Event gone: %r', event) return None else: raise # Store the app.json in the container directory fs.write_safe( os.path.join(data_dir, appcfg.APP_JSON), lambda f: f.writelines( utils.json_genencode(manifest_data) ), mode='w', permission=0o644 ) appevents.post( tm_env.app_events_dir, events.ConfiguredTraceEvent( instanceid=app.name, uniqueid=app.uniqueid ) ) return container_svc.directory