예제 #1
0
def compress_nested_container(u_container):
    if isinstance(u_container, dict):
        cdict = {}
        for key, value in items(u_container):
            if isinstance(value, dict) or isinstance(value, list):
                cdict[key] = compress_nested_container(value)
            else:
                if isinstance(value, np.ndarray):
                    cdict[key] = compress_array(value)
                else:
                    cdict[key] = value

        return cdict
    elif isinstance(u_container, list):
        clist = []
        for value in u_container:
            if isinstance(value, dict) or isinstance(value, list):
                clist.append(compress_nested_container(value))
            else:
                if isinstance(value, np.ndarray):
                    clist.append(compress_array(value))
                else:
                    clist.append(value)

        return clist
예제 #2
0
    def to_unit(self, V):
        if V.shape[0] == 0:
            return np.array([])

        if V.ndim == 1:
            V = V[None, :]
            squeeze = True
        else:
            squeeze = False

        U = np.zeros(V.shape)
        for name, variable in items(self.variables_meta):
            indices = variable['indices']
            if variable['type'] == 'int':
                vals = V[:, indices]
                U[:, indices] = self.int_to_unit(vals, variable['min'],
                                                 variable['max'])
            elif variable['type'] == 'float':
                vals = V[:, indices]
                U[:, indices] = self.float_to_unit(vals, variable['min'],
                                                   variable['max'])
            elif variable['type'] == 'enum':
                for ind in indices:
                    U[:,
                      ind] = V[:,
                               ind]  # Assumed to already be stored in a 1-hot encoding
            else:
                raise Exception("Unknown variable type.")

        if squeeze:
            U = np.squeeze(U)

        return U
예제 #3
0
    def paramify_and_print(self,
                           data_vector,
                           left_indent=0,
                           indent_top_row=False):
        params = self.paramify(data_vector)
        indentation = ' ' * left_indent

        if indent_top_row:
            sys.stderr.write(indentation)
        sys.stderr.write('NAME          TYPE       VALUE\n')
        sys.stderr.write(indentation)
        sys.stderr.write('----          ----       -----\n')

        for param_name, param in items(params):

            if param['type'] == 'float':
                format_str = '%s%-12.12s  %-9.9s  %-12f\n'
            elif param['type'] == 'enum':
                format_str = '%s%-12.12s  %-9.9s  %-12s\n'
            else:
                format_str = '%s%-12.12s  %-9.9s  %-12d\n'

            for i in xrange(len(param['values'])):
                if i == 0:
                    sys.stderr.write(format_str %
                                     (indentation, param_name, param['type'],
                                      param['values'][i]))
                else:
                    sys.stderr.write(format_str %
                                     (indentation, '', param['values'][i]))
예제 #4
0
def decompress_nested_container(c_container):
    if isinstance(c_container, dict):
        if 'ctype' in c_container and c_container['ctype'] == COMPRESS_TYPE:
            try:
                return decompress_array(c_container)
            except:
                raise Exception(
                    'Container does not contain a valid array.'
                )  # TODO, dangerous, very generic exception catch here
        else:
            udict = {}
            for key, value in items(c_container):
                if isinstance(value, dict) or isinstance(value, list):
                    udict[key] = decompress_nested_container(value)
                else:
                    udict[key] = value

            return udict
    elif isinstance(c_container, list):
        ulist = []
        for value in c_container:
            if isinstance(value, dict) or isinstance(value, list):
                ulist.append(decompress_nested_container(value))
            else:
                ulist.append(value)

        return ulist
예제 #5
0
def tired(db, experiment_name, resources):
    """
    return True if no resources are accepting jobs
    """
    jobs = load_jobs(db, experiment_name)
    for resource_name, resource in items(resources):
        if resource.acceptingJobs(jobs):
            return False
    return True
예제 #6
0
def print_dict(d, level=1):
    if isinstance(d, dict):
        if level > 1: print("")
        for k, v in items(d):
            print(
                "  " * level,
                k,
            )
            print_dict(v, level=level + 1)
    else:
        print(d)
예제 #7
0
    def variables_config_to_meta(self, variables_config):
        """
        Converts a dict of variable meta-information from a config-file format into
        a format that can be more easily used by bayesopt routines.
        """
        # Stores the metadata for the dataset that allows a conversion
        # from a config file representation into a matrix representation.
        # The main addition that this variable adds is a mapping between
        # each variable and associated column indices in the matrix
        # representation.
        variables_meta = OrderedDict()
        cardinality = 0  # The number of distinct variables
        num_dims = 0  # The number of dimensions in the matrix representation

        for name, variable in items(variables_config):
            cardinality += variable['size']
            vdict = {
                'type': variable['type'].lower(),
                'indices': []
            }  # indices stores a mapping from these variable(s) to their matrix column(s)

            if vdict['type'] == 'int':
                vdict['min'] = int(variable['min'])
                vdict['max'] = int(variable['max'])
            elif vdict['type'] == 'float':
                vdict['min'] = float(variable['min'])
                vdict['max'] = float(variable['max'])
            elif vdict['type'] == 'enum':
                vdict['options'] = list(variable['options'])
            else:
                raise Exception("Unknown variable type.")

            for i in xrange(variable['size']):
                if vdict['type'] == 'int':
                    vdict['indices'].append(num_dims)
                    num_dims += 1
                elif vdict['type'] == 'float':
                    vdict['indices'].append(num_dims)
                    num_dims += 1
                elif vdict['type'] == 'enum':
                    vdict['indices'].append(
                        list(
                            np.arange(len(list(variable['options']))) +
                            num_dims))
                    num_dims += len(list(variable['options']))
                else:
                    raise Exception("Unknown variable type.")

            variables_meta[name] = vdict

        return variables_meta, num_dims, cardinality
예제 #8
0
    def __init__(self, tasks_config, variables_config):
        self.tasks = {}
        for task_name, task_options in items(tasks_config):
            self.tasks[task_name] = Task(task_name, task_options,
                                         variables_config)

        self.dummy_task = Task('dummy', {'type': 'dummy'}, variables_config)

        #TODO: Validate the data
        self._inputs = np.zeros((0, self.num_dims))  #np.array([])
        self._pending = np.zeros((0, self.num_dims))  #np.array([])
        self._values = np.zeros((0, self.num_dims))  #np.array([])
        self._costs = np.zeros((0, self.num_dims))  #np.array([])

        self.variables_config = copy.copy(variables_config)
예제 #9
0
def matlab_launcher(job):
    # Run it as a Matlab function.

    try:
        import pymatlab
    except:
        raise Exception(
            "Cannot import pymatlab. pymatlab is required for Matlab jobs. It is installable with pip."
        )

    sys.stderr.write("Booting up Matlab...\n")
    session = pymatlab.session_factory()

    # Add directory to the Matlab path.
    session.run("cd('%s')" % os.path.realpath(job['expt_dir']))

    session.run('params = struct()')
    for name, param in items(job['params']):
        vals = param['values']

        # sys.stderr.write('%s = %s\n' % (param['name'], str(vals)))

        # should have dtype=float explicitly, otherwise
        # if they are ints it will automatically do int64, which
        # matlab will receive, and will tend to break matlab scripts
        # because in matlab things tend to always be double type
        session.putvalue('params_%s' % name, np.array(vals, dtype=float))
        session.run("params.%s = params_%s" % (name, name))
        # pymatlab sucks, so I cannot put the value directly into a struct
        # instead i do this silly workaround to put it in a variable and then
        # copy that over into the struct
        # session.run('params_%s'%param['name'])

    sys.stderr.write('Running function %s\n' % job['function-name'])

    # Execute the function
    session.run('result = %s(params)' % job['function-name'])

    # Get the result
    result = session.getvalue('result')

    # TODO: this only works for single-task right now
    result = float(result)
    sys.stderr.write("Got result %s\n" % (result))

    del session

    return result
예제 #10
0
def parse_resources_from_config(config):
    """Parse the config dict and return a dictionary of resource objects keyed by resource name"""

    # If the user did not explicitly specify resources
    if "resources" not in config:
        default_resource_name = 'Main'
        task_names = parse_tasks_in_resource_from_config(config, default_resource_name)
        return {default_resource_name : resource_factory(default_resource_name, task_names, config)}

    # If resources are specified
    else:
        resources = dict()
        for resource_name, resource_opts in items(config["resources"]):
            task_names = parse_tasks_in_resource_from_config(config, resource_name)
            resources[resource_name] = resource_factory(resource_name, task_names, resource_opts)
        return resources
예제 #11
0
    def vectorify(self, params):
        v = np.zeros(self.num_dims)
        for name, param in items(params):
            indices = self.variables_meta[name]['indices']

            if param['type'] == 'int' or param['type'] == 'float':
                v[indices] = param['values']
            elif param['type'] == 'enum':
                for i, ind in enumerate(indices):
                    offset = self.variables_meta[name]['options'].index(
                        param['values'][i])
                    v[ind[0] + offset] = 1
            else:
                raise Exception('Unknown parameter type.')

        return v
예제 #12
0
def create_task():
    task_name = "mytask"
    task_type = "OBJECTIVE"

    variables_config = OrderedDict([('X', {
        "type": "INT",
        "size": 2,
        "min": -1,
        "max": 10
    }), ('Y', {
        "type": "FLOAT",
        "size": 3,
        "min": -0.003,
        "max": 1e-1
    }), ('Z', {
        "type": "ENUM",
        "size": 2,
        "options": ["one", "two", "three"]
    })])

    variables_meta, num_dims, cardinality = Task.variables_config_to_meta(
        variables_config)

    # Create a set of inputs that satisfies the constraints of each variable
    X = np.zeros((10, num_dims))
    for i in xrange(10):
        for name, variable in items(variables_meta):
            indices = variable['indices']
            if variable['type'] == 'int':
                X[i, indices] = np.random.randint(variable['min'],
                                                  variable['max'] + 1,
                                                  len(indices))
            elif variable['type'] == 'float':
                X[i, indices] = np.random.rand(len(indices)) * (
                    variable['max'] - variable['min']) + variable['min']
            elif variable['type'] == 'enum':
                for ind in indices:
                    cat = np.random.randint(len(ind))
                    X[i, ind[cat]] = 1

    y = np.random.randn(10)

    t = Task(task_name, task_type, variables_config, data=X, values=y)

    return t
예제 #13
0
    def from_unit(self, U):
        if U.shape[0] == 0:
            return np.array([])

        if U.ndim == 1:
            U = U[None, :]
            squeeze = True
        else:
            squeeze = False

        V = np.zeros(U.shape)
        for name, variable in items(self.variables_meta):
            indices = variable['indices']
            if variable['type'] == 'int':
                vals = U[:, indices]
                assert (
                    variable['max'] - variable['min'] > 0.0
                ), 'Your specified min (%f) for the variable %s must be less than the max (%f)' % (
                    variable['min'], name, variable['max'])
                V[:, indices] = self.unit_to_int(vals, variable['min'],
                                                 variable['max'])
            elif variable['type'] == 'float':
                vals = U[:, indices]
                assert (
                    variable['max'] - variable['min'] > 0.0
                ), 'Your specified min (%f) for the variable %s must be less than the max (%f)' % (
                    variable['min'], name, variable['max'])
                V[:, indices] = self.unit_to_float(vals, variable['min'],
                                                   variable['max'])
            elif variable['type'] == 'enum':
                for ind in indices:
                    # This is a bit more complicated than to_unit because
                    # the values might come from the unit hypercube, meaning
                    # that U might not have a 1-hot encoding.
                    v = np.zeros(V[:, ind].shape)
                    v[np.arange(v.shape[0]), U[:, ind].argmax(1)] = 1
                    V[:, ind] = v
            else:
                raise Exception("Unknown variable type: %s" % variable['type'])

        if squeeze:
            V = np.squeeze(V)

        return V
예제 #14
0
def python_launcher(job):
    # Run a Python function
    sys.stderr.write("Running python job.\n")

    # Add directory to the system path.
    sys.path.append(os.path.realpath(job['expt_dir']))

    # Change into the directory.
    os.chdir(job['expt_dir'])
    sys.stderr.write("Changed into dir %s\n" % (os.getcwd()))

    # Convert the JSON object into useful parameters.
    params = {}
    for name, param in items(job['params']):
        vals = param['values']

        if param['type'].lower() == 'float':
            params[name] = np.array(vals)
        elif param['type'].lower() == 'int':
            params[name] = np.array(vals, dtype=int)
        elif param['type'].lower() == 'enum':
            params[name] = vals
        else:
            raise Exception("Unknown parameter type.")

    # Load up this module and run
    main_file = job['main-file']
    if main_file[-3:] == '.py':
        main_file = main_file[:-3]
    sys.stderr.write('Importing %s.py\n' % main_file)
    module = __import__(main_file)
    sys.stderr.write('Running %s.main()\n' % main_file)
    result = module.main(job['id'], params)

    # Change back out.
    os.chdir('..')

    # TODO: add dict capability

    sys.stderr.write("Got result %s\n" % (result))

    return result
예제 #15
0
def parse_tasks_in_resource_from_config(config, resource_name):
    """parse the config dict and return a list of task names that use the given resource name"""
    # If the user did not explicitly specify tasks, then we have to assume
    # the single task runs on all resources
    # TODO: THIS IS VERY DANGEROUS, BECAUSE THE TASK MIGHT NOT NAMED MAIN
    # NEED TO HAVE A CONFIG PARSING SECTION OF THE CODE!!!
    if "tasks" not in config:
        return ['main']
    else:
        tasks = list()
        for task_name, task_config in items(config["tasks"]):
            # If the user specified tasks but not specific resources for those tasks,
            # We have to assume the tasks run on all resources...
            if "resources" not in task_config:
                tasks.append(task_name)
            else:
                if resource_name in task_config["resources"]:
                    tasks.append(task_name)

        return tasks 
예제 #16
0
    def paramify(self, data_vector):
        if data_vector.ndim != 1:
            raise Exception('Input to paramify must be a 1-D array.')

        params = {}
        for name, vdict in items(self.variables_meta):
            indices = vdict['indices']
            params[name] = {}
            params[name]['type'] = vdict['type']

            if vdict['type'] == 'int' or vdict['type'] == 'float':
                params[name]['values'] = data_vector[indices]
            elif vdict['type'] == 'enum':
                params[name]['values'] = []
                for ind in indices:
                    params[name]['values'].append(
                        vdict['options'][data_vector[ind].argmax(0)])
            else:
                raise Exception('Unknown parameter type.')

        return params
예제 #17
0
def main():
    options, expt_dir = get_options()

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)        
    db         = MongoDB(database_address=db_address)
    
    while True:

        for resource_name, resource in items(resources):

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the 
            #       resource is not full, we might wait because of cost incurred
            # Note: I chose to fill up one resource and them move on to the next
            # You could also do it the other way, by changing "while" to "if" here

            while resource.acceptingJobs(jobs):

                # Load jobs from DB 
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)
                
                # Remove any broken jobs from pending.
                remove_broken_jobs(db, jobs, experiment_name, resources)

                # Get a suggestion for the next job
                suggested_job = get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name)
    
                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir)

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(list(resources.values()), jobs)

        # If no resources are accepting jobs, sleep
        # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
        if tired(db, experiment_name, resources):
            time.sleep(options.get('polling-time', 5))
예제 #18
0
 def values(self):
     """return a dictionary of the task values keyed by task name"""
     return {
         task_name: task.values
         for task_name, task in items(self.tasks)
     }