def compress_nested_container(u_container): if isinstance(u_container, dict): cdict = {} for key, value in items(u_container): if isinstance(value, dict) or isinstance(value, list): cdict[key] = compress_nested_container(value) else: if isinstance(value, np.ndarray): cdict[key] = compress_array(value) else: cdict[key] = value return cdict elif isinstance(u_container, list): clist = [] for value in u_container: if isinstance(value, dict) or isinstance(value, list): clist.append(compress_nested_container(value)) else: if isinstance(value, np.ndarray): clist.append(compress_array(value)) else: clist.append(value) return clist
def to_unit(self, V): if V.shape[0] == 0: return np.array([]) if V.ndim == 1: V = V[None, :] squeeze = True else: squeeze = False U = np.zeros(V.shape) for name, variable in items(self.variables_meta): indices = variable['indices'] if variable['type'] == 'int': vals = V[:, indices] U[:, indices] = self.int_to_unit(vals, variable['min'], variable['max']) elif variable['type'] == 'float': vals = V[:, indices] U[:, indices] = self.float_to_unit(vals, variable['min'], variable['max']) elif variable['type'] == 'enum': for ind in indices: U[:, ind] = V[:, ind] # Assumed to already be stored in a 1-hot encoding else: raise Exception("Unknown variable type.") if squeeze: U = np.squeeze(U) return U
def paramify_and_print(self, data_vector, left_indent=0, indent_top_row=False): params = self.paramify(data_vector) indentation = ' ' * left_indent if indent_top_row: sys.stderr.write(indentation) sys.stderr.write('NAME TYPE VALUE\n') sys.stderr.write(indentation) sys.stderr.write('---- ---- -----\n') for param_name, param in items(params): if param['type'] == 'float': format_str = '%s%-12.12s %-9.9s %-12f\n' elif param['type'] == 'enum': format_str = '%s%-12.12s %-9.9s %-12s\n' else: format_str = '%s%-12.12s %-9.9s %-12d\n' for i in xrange(len(param['values'])): if i == 0: sys.stderr.write(format_str % (indentation, param_name, param['type'], param['values'][i])) else: sys.stderr.write(format_str % (indentation, '', param['values'][i]))
def decompress_nested_container(c_container): if isinstance(c_container, dict): if 'ctype' in c_container and c_container['ctype'] == COMPRESS_TYPE: try: return decompress_array(c_container) except: raise Exception( 'Container does not contain a valid array.' ) # TODO, dangerous, very generic exception catch here else: udict = {} for key, value in items(c_container): if isinstance(value, dict) or isinstance(value, list): udict[key] = decompress_nested_container(value) else: udict[key] = value return udict elif isinstance(c_container, list): ulist = [] for value in c_container: if isinstance(value, dict) or isinstance(value, list): ulist.append(decompress_nested_container(value)) else: ulist.append(value) return ulist
def tired(db, experiment_name, resources): """ return True if no resources are accepting jobs """ jobs = load_jobs(db, experiment_name) for resource_name, resource in items(resources): if resource.acceptingJobs(jobs): return False return True
def print_dict(d, level=1): if isinstance(d, dict): if level > 1: print("") for k, v in items(d): print( " " * level, k, ) print_dict(v, level=level + 1) else: print(d)
def variables_config_to_meta(self, variables_config): """ Converts a dict of variable meta-information from a config-file format into a format that can be more easily used by bayesopt routines. """ # Stores the metadata for the dataset that allows a conversion # from a config file representation into a matrix representation. # The main addition that this variable adds is a mapping between # each variable and associated column indices in the matrix # representation. variables_meta = OrderedDict() cardinality = 0 # The number of distinct variables num_dims = 0 # The number of dimensions in the matrix representation for name, variable in items(variables_config): cardinality += variable['size'] vdict = { 'type': variable['type'].lower(), 'indices': [] } # indices stores a mapping from these variable(s) to their matrix column(s) if vdict['type'] == 'int': vdict['min'] = int(variable['min']) vdict['max'] = int(variable['max']) elif vdict['type'] == 'float': vdict['min'] = float(variable['min']) vdict['max'] = float(variable['max']) elif vdict['type'] == 'enum': vdict['options'] = list(variable['options']) else: raise Exception("Unknown variable type.") for i in xrange(variable['size']): if vdict['type'] == 'int': vdict['indices'].append(num_dims) num_dims += 1 elif vdict['type'] == 'float': vdict['indices'].append(num_dims) num_dims += 1 elif vdict['type'] == 'enum': vdict['indices'].append( list( np.arange(len(list(variable['options']))) + num_dims)) num_dims += len(list(variable['options'])) else: raise Exception("Unknown variable type.") variables_meta[name] = vdict return variables_meta, num_dims, cardinality
def __init__(self, tasks_config, variables_config): self.tasks = {} for task_name, task_options in items(tasks_config): self.tasks[task_name] = Task(task_name, task_options, variables_config) self.dummy_task = Task('dummy', {'type': 'dummy'}, variables_config) #TODO: Validate the data self._inputs = np.zeros((0, self.num_dims)) #np.array([]) self._pending = np.zeros((0, self.num_dims)) #np.array([]) self._values = np.zeros((0, self.num_dims)) #np.array([]) self._costs = np.zeros((0, self.num_dims)) #np.array([]) self.variables_config = copy.copy(variables_config)
def matlab_launcher(job): # Run it as a Matlab function. try: import pymatlab except: raise Exception( "Cannot import pymatlab. pymatlab is required for Matlab jobs. It is installable with pip." ) sys.stderr.write("Booting up Matlab...\n") session = pymatlab.session_factory() # Add directory to the Matlab path. session.run("cd('%s')" % os.path.realpath(job['expt_dir'])) session.run('params = struct()') for name, param in items(job['params']): vals = param['values'] # sys.stderr.write('%s = %s\n' % (param['name'], str(vals))) # should have dtype=float explicitly, otherwise # if they are ints it will automatically do int64, which # matlab will receive, and will tend to break matlab scripts # because in matlab things tend to always be double type session.putvalue('params_%s' % name, np.array(vals, dtype=float)) session.run("params.%s = params_%s" % (name, name)) # pymatlab sucks, so I cannot put the value directly into a struct # instead i do this silly workaround to put it in a variable and then # copy that over into the struct # session.run('params_%s'%param['name']) sys.stderr.write('Running function %s\n' % job['function-name']) # Execute the function session.run('result = %s(params)' % job['function-name']) # Get the result result = session.getvalue('result') # TODO: this only works for single-task right now result = float(result) sys.stderr.write("Got result %s\n" % (result)) del session return result
def parse_resources_from_config(config): """Parse the config dict and return a dictionary of resource objects keyed by resource name""" # If the user did not explicitly specify resources if "resources" not in config: default_resource_name = 'Main' task_names = parse_tasks_in_resource_from_config(config, default_resource_name) return {default_resource_name : resource_factory(default_resource_name, task_names, config)} # If resources are specified else: resources = dict() for resource_name, resource_opts in items(config["resources"]): task_names = parse_tasks_in_resource_from_config(config, resource_name) resources[resource_name] = resource_factory(resource_name, task_names, resource_opts) return resources
def vectorify(self, params): v = np.zeros(self.num_dims) for name, param in items(params): indices = self.variables_meta[name]['indices'] if param['type'] == 'int' or param['type'] == 'float': v[indices] = param['values'] elif param['type'] == 'enum': for i, ind in enumerate(indices): offset = self.variables_meta[name]['options'].index( param['values'][i]) v[ind[0] + offset] = 1 else: raise Exception('Unknown parameter type.') return v
def create_task(): task_name = "mytask" task_type = "OBJECTIVE" variables_config = OrderedDict([('X', { "type": "INT", "size": 2, "min": -1, "max": 10 }), ('Y', { "type": "FLOAT", "size": 3, "min": -0.003, "max": 1e-1 }), ('Z', { "type": "ENUM", "size": 2, "options": ["one", "two", "three"] })]) variables_meta, num_dims, cardinality = Task.variables_config_to_meta( variables_config) # Create a set of inputs that satisfies the constraints of each variable X = np.zeros((10, num_dims)) for i in xrange(10): for name, variable in items(variables_meta): indices = variable['indices'] if variable['type'] == 'int': X[i, indices] = np.random.randint(variable['min'], variable['max'] + 1, len(indices)) elif variable['type'] == 'float': X[i, indices] = np.random.rand(len(indices)) * ( variable['max'] - variable['min']) + variable['min'] elif variable['type'] == 'enum': for ind in indices: cat = np.random.randint(len(ind)) X[i, ind[cat]] = 1 y = np.random.randn(10) t = Task(task_name, task_type, variables_config, data=X, values=y) return t
def from_unit(self, U): if U.shape[0] == 0: return np.array([]) if U.ndim == 1: U = U[None, :] squeeze = True else: squeeze = False V = np.zeros(U.shape) for name, variable in items(self.variables_meta): indices = variable['indices'] if variable['type'] == 'int': vals = U[:, indices] assert ( variable['max'] - variable['min'] > 0.0 ), 'Your specified min (%f) for the variable %s must be less than the max (%f)' % ( variable['min'], name, variable['max']) V[:, indices] = self.unit_to_int(vals, variable['min'], variable['max']) elif variable['type'] == 'float': vals = U[:, indices] assert ( variable['max'] - variable['min'] > 0.0 ), 'Your specified min (%f) for the variable %s must be less than the max (%f)' % ( variable['min'], name, variable['max']) V[:, indices] = self.unit_to_float(vals, variable['min'], variable['max']) elif variable['type'] == 'enum': for ind in indices: # This is a bit more complicated than to_unit because # the values might come from the unit hypercube, meaning # that U might not have a 1-hot encoding. v = np.zeros(V[:, ind].shape) v[np.arange(v.shape[0]), U[:, ind].argmax(1)] = 1 V[:, ind] = v else: raise Exception("Unknown variable type: %s" % variable['type']) if squeeze: V = np.squeeze(V) return V
def python_launcher(job): # Run a Python function sys.stderr.write("Running python job.\n") # Add directory to the system path. sys.path.append(os.path.realpath(job['expt_dir'])) # Change into the directory. os.chdir(job['expt_dir']) sys.stderr.write("Changed into dir %s\n" % (os.getcwd())) # Convert the JSON object into useful parameters. params = {} for name, param in items(job['params']): vals = param['values'] if param['type'].lower() == 'float': params[name] = np.array(vals) elif param['type'].lower() == 'int': params[name] = np.array(vals, dtype=int) elif param['type'].lower() == 'enum': params[name] = vals else: raise Exception("Unknown parameter type.") # Load up this module and run main_file = job['main-file'] if main_file[-3:] == '.py': main_file = main_file[:-3] sys.stderr.write('Importing %s.py\n' % main_file) module = __import__(main_file) sys.stderr.write('Running %s.main()\n' % main_file) result = module.main(job['id'], params) # Change back out. os.chdir('..') # TODO: add dict capability sys.stderr.write("Got result %s\n" % (result)) return result
def parse_tasks_in_resource_from_config(config, resource_name): """parse the config dict and return a list of task names that use the given resource name""" # If the user did not explicitly specify tasks, then we have to assume # the single task runs on all resources # TODO: THIS IS VERY DANGEROUS, BECAUSE THE TASK MIGHT NOT NAMED MAIN # NEED TO HAVE A CONFIG PARSING SECTION OF THE CODE!!! if "tasks" not in config: return ['main'] else: tasks = list() for task_name, task_config in items(config["tasks"]): # If the user specified tasks but not specific resources for those tasks, # We have to assume the tasks run on all resources... if "resources" not in task_config: tasks.append(task_name) else: if resource_name in task_config["resources"]: tasks.append(task_name) return tasks
def paramify(self, data_vector): if data_vector.ndim != 1: raise Exception('Input to paramify must be a 1-D array.') params = {} for name, vdict in items(self.variables_meta): indices = vdict['indices'] params[name] = {} params[name]['type'] = vdict['type'] if vdict['type'] == 'int' or vdict['type'] == 'float': params[name]['values'] = data_vector[indices] elif vdict['type'] == 'enum': params[name]['values'] = [] for ind in indices: params[name]['values'].append( vdict['options'][data_vector[ind].argmax(0)]) else: raise Exception('Unknown parameter type.') return params
def main(): options, expt_dir = get_options() resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) experiment_name = options.get("experiment-name", 'unnamed-experiment') # Connect to the database db_address = options['database']['address'] sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) while True: for resource_name, resource in items(resources): jobs = load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here while resource.acceptingJobs(jobs): # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = load_jobs(db, experiment_name) # Remove any broken jobs from pending. remove_broken_jobs(db, jobs, experiment_name, resources) # Get a suggestion for the next job suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id save_job(suggested_job, db, experiment_name) jobs = load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(list(resources.values()), jobs) # If no resources are accepting jobs, sleep # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached) if tired(db, experiment_name, resources): time.sleep(options.get('polling-time', 5))
def values(self): """return a dictionary of the task values keyed by task name""" return { task_name: task.values for task_name, task in items(self.tasks) }