def yml(self): di = BeneDict({ 'version': '3', }) for pg in self.list_process_groups(): if 'services' not in di: di['services'] = {} di['services'].update(pg.yml_dict()) for p in self.list_processes(): di['services'][p.name] = p.yml_dict() return di.dump_yaml_str()
def __init__(self, name, image): super().__init__() self.data = BeneDict({ 'name': name, 'image': image, 'env': [{ 'name': 'SYMPHONY_ROLE', 'value': name }] }) self.mounted_volumes = [] self.pod_yml = None
def load(cls, di): name = di['name'] data = di['data'] container_image = data['image'] instance = cls(name, container_image) instance.data = BeneDict(di['data']) return instance
def load(cls, di): instance = cls('', '') instance.data = BeneDict(di['data']) instance.mounted_volumes = [ KubeVolume.load(x) for x in di['mounted_volumes'] ] return instance
class DockerServiceYML(DockerConfigYML): def __init__(self, name, container_image): super().__init__() self.name = name self.data = BeneDict({ 'image': container_image, }) @classmethod def load(cls, di): name = di['name'] data = di['data'] container_image = data['image'] instance = cls(name, container_image) instance.data = BeneDict(di['data']) return instance def save(self): di = {} di['name'] = self.name di['data'] = self.data return di def yml(self): return self.data.dump_yaml_str() def set_env(self, name, value): if 'environment' not in self.data.keys(): self.data['environment'] = BeneDict() self.data['environment'][name] = value def set_envs(self, di): for k, v in di.items(): self.set_env(k, v) def set_hostname(self, hostname): self.data.hostname = hostname def set_port(self, port): if 'ports' not in self.data.keys(): self.data['ports'] = [] self.data['ports'].append(port) def set_ports(self, ports): if 'ports' not in self.data.keys(): self.data['ports'] = [] self.data['ports'].extend(ports)
def load_config(self): surreal_yml_path = U.get_config_file() if not U.f_exists(surreal_yml_path): raise ValueError('Cannot find surreal config file at {}'.format( surreal_yml_path)) self.config = BeneDict.load_yaml_file(surreal_yml_path) SymphonyConfig().set_username(self.username) SymphonyConfig().set_experiment_folder(self.folder)
def set_env(self, name, value): name = str(name) value = str(value) for entry in self.data['env']: if entry.name == name: entry.value = value return self.data.env.append(BeneDict({'name': name, 'value': value}))
def config_view(self): """ kubectl config view Generates a yaml of context and cluster info """ out, err, retcode = runner.run_verbose('kubectl config view', print_out=False, raise_on_error=True) return BeneDict(load_yaml_str(out))
def pod_spec(self): """ Returns a spec to fall under Pod: spec: """ if self.use_memory: emptyDir_config = {'medium': 'Memory'} else: emptyDir_config = {} return BeneDict({'name': self.name, 'emptyDir': emptyDir_config})
def pod_spec(self): """ Returns a spec to fall under Pod: spec: """ return BeneDict({ 'name': self.name, 'gitRepo': { 'repository': self.repository, 'revision': self.revision, } })
def pod_spec(self): """ Returns a spec to fall under Pod: spec: """ return BeneDict({ 'name': self.name, 'secret': { 'secretName': self.secret_name, 'defaultMode': self.defaultMode, } })
def pod_spec(self): """ Returns a spec to fall under Pod: spec: """ return BeneDict({ 'name': self.name, 'nfs': { 'server': self.server, 'path': self.path, } })
def pod_spec(self): """ Returns a spec to fall under Pod: spec: """ return BeneDict({ 'name': self.name, 'hostPath': { 'hostpath_type': self.hostpath_type, 'path': self.path, }, })
def __init__(self, name, data): self.name = name self.data = data self.data = BeneDict({ 'apiVersion': 'v1', 'kind': 'Secret', 'metadata': { 'name': name, }, 'type': 'Opaque', 'data': data, })
def mount_volume(self, volume, mount_path): assert isinstance(volume, KubeVolume) volume_mounts = self.data.get('volumeMounts', []) volume_mounts.append( BeneDict({ 'name': volume.name, 'mountPath': mount_path })) self.data['volumeMounts'] = volume_mounts self.mounted_volumes.append(volume) if self.pod_yml is not None: self.pod_yml.add_volume(volume)
def load_config(self): surreal_yml_path = U.get_config_file() if not U.f_exists(surreal_yml_path): raise ValueError('Cannot find surreal config file at {}'.format( surreal_yml_path)) self.config = BeneDict.load_yaml_file(surreal_yml_path) SymphonyConfig().set_username(self.username) SymphonyConfig().set_experiment_folder(self.folder) if 'docker_build_settings' in self.config: for setting in self.config['docker_build_settings']: self.docker_build_settings[setting['name']] = setting
def external_url(self, experiment_name, service_name): res = BeneDict( self.query_resources('svc', 'yaml', names=[service_name], namespace=experiment_name)) conf = res.status.loadBalancer if not ('ingress' in conf and 'ip' in conf.ingress[0]): raise ValueError('Service {} not found in experiment {}'.format( service_name, experiment_name)) ip = conf.ingress[0].ip port = res.spec.ports[0].port return '{}:{}'.format(ip, port)
def setup(self): super().setup() self.docker_build_settings = {} self.config = BeneDict() self.load_config() self._setup_create() self._setup_tensorboard() self._setup_docker_clean() # Secondary nfs related support self._setup_get_videos() self._setup_get_config() self._setup_get_tensorboard()
def __init__(self, name): super().__init__() self.name = name self.data = BeneDict({ 'apiVersion': 'v1', 'kind': 'Service', 'metadata': { 'name': name, 'labels': {}, }, 'spec': { 'ports': [{}], 'selector': {}, }, })
def filter_data(data, perturbation_type=None, enforce_task_success=True, time_window=None): data_block = copy.deepcopy(data) if perturbation_type is not None: data_block = U.filter_by(data_block, 'stim_site', [perturbation_type]) if enforce_task_success: data_block = U.filter_by(data_block, 'behavior_report', [1]) if time_window is not None: low, high = time_window data_block['train_rates'] = data_block[ 'train_rates'][:, np.arange(low, high), :] data = BeneDict(data_block) return data
class KubeConfigYML(object): def __init__(self): self.data = BeneDict({}) def set_attr(self, new_config): """ New config is a dictionary with the fields to be updated """ merge_dict(self.data, new_config) def yml(self): """ Dump yml string for kubernetes launch yml """ return self.data.dump_yaml_str()
def __init__(self, name): super().__init__() self.data = BeneDict({ 'apiVersion': 'v1', 'kind': 'Pod', 'metadata': { 'name': name, 'labels': { 'symphony_pg': name } }, 'spec': { 'containers': [] } }) self.container_ymls = [] self.container_names = set()
def describe_process_group(self, experiment_name, process_group_name): """ Returns: { 'p1': {'status': 'live', 'timestamp': '11:23'}, 'p2': {'status': 'dead'} } """ res = self.query_resources('pod', names=[process_group_name], output_format='json', namespace=experiment_name) if not res: raise ValueError('Cannot find process_group {} in experiment {}' \ .format(process_group_name, experiment_name)) pod = BeneDict(res) return self._parse_container_statuses(pod.status.containerStatuses)
def __init__(self, name, port): super().__init__(name) self.name = name self.port = port self.data = BeneDict({ 'apiVersion': 'v1', 'kind': 'Service', 'metadata': { 'name': name, 'labels': {} }, 'spec': { 'type': 'ClusterIP', 'ports': [{ 'port': port }], 'selector': { 'service-' + name: 'bind' }, }, })
def __init__(self, config_file='~/.dockerly.yml'): try: self.config = BeneDict.load_yaml_file( os.path.expanduser(config_file)) except FileNotFoundError: print('must specify a config file ' + config_file) raise for key in ['container_root', 'host_root', 'default_image']: assert key in self.config, 'config "{}" missing'.format(key) for k, v in self.config.items(): if v == '_fill_yours_': raise ValueError('please fill in key "{}" in {}'.format( k, config_file)) self.container_root = self.config.container_root assert os.path.isabs(self.container_root), \ 'container_root must be an absolute path: ' + self.container_root # must use realpath, otherwise relative path will be wrong self.host_root = os.path.realpath( os.path.expanduser(self.config.host_root)) self.ports = self.config.ports self.default_image = self.config.default_image self._docker_exe = 'nvidia-docker' if self.config.nvidia else 'docker' self._dry_run = False
def describe_experiment(self, experiment_name): """ Returns: { 'pgroup1': { 'p1': {'status': 'live', 'timestamp': '11:23'}, 'p2': {'status': 'dead'} }, None: { # always have all the processes 'p3_lone': {'status': 'running'} } } """ all_processes = BeneDict( self.query_resources('pod', output_format='json', namespace=experiment_name)) out = OrderedDict() for pod in all_processes.items: pod_name = pod.metadata.name if 'containerStatuses' in pod.status: # Pod is created container_statuses = self._parse_container_statuses( pod.status.containerStatuses) # test if the process is stand-alone if len(container_statuses) == 1 and list( container_statuses.keys())[0] == pod_name: if not None in out: out[None] = OrderedDict() out[None][pod_name] = container_statuses[pod_name] else: out[pod_name] = container_statuses else: out[pod_name] = { '~': self._parse_unstarted_pod_statuses(pod.status) } return out
def __init__(self): self.data = BeneDict({})
def set_env(self, name, value): if 'environment' not in self.data.keys(): self.data['environment'] = BeneDict() self.data['environment'][name] = value
def __init__(self, name, container_image): super().__init__() self.name = name self.data = BeneDict({ 'image': container_image, })
def create_basic(self, *, settings, experiment_name, algorithm_args, input_args, force, dry_run): settings = _merge_setting_dictionaries(settings, self.DEFAULT_SETTING_BASIC) settings = _merge_setting_dictionaries(input_args, settings) settings = BeneDict(settings) cluster = self.create_cluster() if 'mount_secrets' in self.config: secrets = self.config.mount_secrets else: secrets = None exp = cluster.new_experiment(experiment_name, secrets=secrets) image_builder = SurrealDockerBuilder( build_settings=self.docker_build_settings, images_requested={ 'agent': { 'identifier': settings.agent.image, 'build_config': settings.agent.build_image }, 'nonagent': { 'identifier': settings.nonagent.image, 'build_config': settings.nonagent.build_image }, }, tag=exp.name, push=True) agent_image = image_builder.images_provided['agent'] nonagent_image = image_builder.images_provided['nonagent'] # defer to build last, so we don't build unless everything passes algorithm_args += [ "--num-agents", str(settings.num_agents * settings.agent_batch), ] # TODO: restore_functionalities if settings.restore_folder is not None: algorithm_args += [ "--restore_folder", shlex.quote(settings.restore_folder) ] experiment_folder = self.get_remote_experiment_folder(exp.name) algorithm_args += ["--experiment-folder", str(experiment_folder)] algorithm_args += ["--env", str(settings.env)] algorithm_args += ["--agent-batch", str(settings.agent_batch)] algorithm_args += ["--eval-batch", str(settings.eval_batch)] executable = self._find_executable(settings.algorithm) cmd_gen = CommandGenerator(num_agents=settings.num_agents, num_evals=settings.num_evals, executable=executable, config_commands=algorithm_args) nonagent = exp.new_process_group('nonagent') learner = nonagent.new_process('learner', container_image=nonagent_image, args=[cmd_gen.get_command('learner')]) # Because learner and everything are bundled together # json_path = 'cluster_definition.tf.json' # always use slash # filepath = pkg_resources.resource_filename(__name__, json_path) json_path = self.config.cluster_definition dispatcher = GKEDispatcher(json_path) # We only need to claim resources for learner dispatcher.assign_to(learner, process_group=nonagent, **settings.nonagent.scheduling) # For dm_control learner.set_env('DISABLE_MUJOCO_RENDERING', "1") replay = nonagent.new_process('replay', container_image=nonagent_image, args=[cmd_gen.get_command('replay')]) ps = nonagent.new_process('ps', container_image=nonagent_image, args=[cmd_gen.get_command('ps')]) tensorboard = nonagent.new_process( 'tensorboard', container_image=nonagent_image, args=[cmd_gen.get_command('tensorboard')]) tensorplex = nonagent.new_process( 'tensorplex', container_image=nonagent_image, args=[cmd_gen.get_command('tensorplex')]) loggerplex = nonagent.new_process( 'loggerplex', container_image=nonagent_image, args=[cmd_gen.get_command('loggerplex')]) nonagent.image_pull_policy('Always') agents = [] for i in range(settings.num_agents): if settings.agent_batch == 1: agent_name = 'agent-{}'.format(i) else: agent_name = 'agents-{}'.format(i) agent = exp.new_process(agent_name, container_image=agent_image, args=[cmd_gen.get_command(agent_name)]) agent.image_pull_policy('Always') # We only need to claim resources for learner dispatcher.assign_to(agent, **settings.agent.scheduling) agents.append(agent) evals = [] for i in range(settings.num_evals): if settings.eval_batch == 1: eval_name = 'eval-{}'.format(i) else: eval_name = 'evals-{}'.format(i) eval_p = exp.new_process(eval_name, container_image=agent_image, args=[cmd_gen.get_command(eval_name)]) dispatcher.assign_to(eval_p, **settings.agent.scheduling) eval_p.image_pull_policy('Always') evals.append(eval_p) setup_network(agents=agents, evals=evals, learner=learner, replay=replay, ps=ps, tensorboard=tensorboard, tensorplex=tensorplex, loggerplex=loggerplex) if 'nfs' in self.config: print('NFS mounted') nfs_server = self.config.nfs.servername nfs_server_path = self.config.nfs.fs_location nfs_mount_path = self.config.nfs.mount_path for proc in exp.list_all_processes(): proc.mount_nfs(server=nfs_server, path=nfs_server_path, mount_path=nfs_mount_path) else: print('NFS not mounted') image_builder.build() cluster.launch(exp, force=force, dry_run=dry_run)