Python MongoDB.save Examples

Programming Language: Python

Namespace/Package Name: spearmint.utils.database.mongodb

Class/Type: MongoDB

Method/Function: save

Examples at hotexamples.com: 7

Python MongoDB.save - 7 examples found. These are the top rated real world Python examples of spearmint.utils.database.mongodb.MongoDB.save extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MongoDB(26)

load(15)

save(5)

minBranin(1)

numJobsSinceMin(1)

remove_collection(1)

Example #1

Show file

File: launcher.py Project: arunraja-hub/hw-sw-coop

def launch(db_address, experiment_name, job_id):
    """
    Launches a job from on a given id.
    """

    db = MongoDB(database_address=db_address)
    job = db.load(experiment_name, 'jobs', {'id': job_id})

    start_time = time.time()
    job['start time'] = start_time
    db.save(job, experiment_name, 'jobs', {'id': job_id})

    sys.stderr.write("Job launching after %0.2f seconds in submission.\n" %
                     (start_time - job['submit time']))

    success = False

    try:
        if job['language'].lower() == 'matlab':
            result = matlab_launcher(job)

        elif job['language'].lower() == 'python':
            result = python_launcher(job)
            # sys.stderr.write('RESULT EQUALS %s' % result)
        elif job['language'].lower() == 'shell':
            result = shell_launcher(job)
        else:
            raise Exception("That language has not been implemented.")

        if not isinstance(result, dict):
            # Returning just NaN means NaN on all tasks
            if np.isnan(result):
                # Apparently this dict generator throws an error for some people??
                result = {task_name: np.nan for task_name in job['tasks']}
                # So we use the much uglier version below... ????
                # result = dict(zip(job['tasks'], [np.nan]*len(job['tasks'])))
            elif len(
                    job['tasks']
            ) == 1:  # Only one named job and result is not a dict, stick it in dict
                result = {job['tasks'][0]: result}
            else:
                result = {DEFAULT_TASK_NAME: result}
        else:
            if "objective" in result and "constraints" in result:
                result_new = dict()
                result_new[DEFAULT_TASK_NAME] = result["objective"]
                for i in xrange(len(result["constraints"])):
                    result_new['%s%d' % (DEFAULT_CONSTRAINT_NAME,
                                         i)] = result["constraints"][i]
                result = result_new

        # actually it's ok if the result dict contains extra stuff. so it would be fine just to
        # check that all((t in result for t in job['tasks']))


#        if set(result.keys()) != set(job['tasks']):
        if not set(job['tasks']).issubset(set(result.keys())):
            #            if set(result.keys()).union(['NaN']) != set(job['tasks']):
            if not set(job['tasks']).issubset(
                    set(result.keys()).union(['NaN'])):
                raise Exception(
                    "Result task names %s did not match job task names %s." %
                    (result.keys(), job['tasks']))

        success = True
    except:
        sys.stderr.flush()
        sys.stdout.flush()
        import traceback
        traceback.print_exc()
        sys.stderr.write("Problem executing the function\n")
        print sys.exc_info()

    end_time = time.time()

    if success:
        sys.stderr.write("Completed successfully in %0.2f seconds. [%s]\n" %
                         (end_time - start_time, result))

        job['values'] = result
        job['status'] = 'complete'
        job['end time'] = end_time

    else:
        sys.stderr.write("Job failed in %0.2f seconds.\n" %
                         (end_time - start_time))

        # Update metadata.
        job['status'] = 'broken'
        job['end time'] = end_time

    db.save(job, experiment_name, 'jobs', {'id': job_id})

Example #2

Show file

File: launcher.py Project: ascripter/Spearmint

def launch(db_address, experiment_name, job_id):
    """
    Launches a job from on a given id.
    """

    db  = MongoDB(database_address=db_address)
    job = db.load(experiment_name, 'jobs', {'id' : job_id})

    start_time        = time.time()
    job['start time'] = start_time
    db.save(job, experiment_name, 'jobs', {'id' : job_id})

    sys.stderr.write("Job launching after %0.2f seconds in submission.\n" 
                     % (start_time-job['submit time']))

    success = False

    try:
        if job['language'].lower() == 'matlab':
            result = matlab_launcher(job)

        elif job['language'].lower() == 'python':
            result = python_launcher(job)

        elif job['language'].lower() == 'shell':
            result = shell_launcher(job)

        elif job['language'].lower() == 'mcr':
            result = mcr_launcher(job)

        else:
            raise Exception("That language has not been implemented.")

        if not isinstance(result, dict):
            # Returning just NaN means NaN on all tasks
            if np.isnan(result):
                # Apparently this dict generator throws an error for some people??
                # result = {task_name: np.nan for task_name in job['tasks']}
                # So we use the much uglier version below... ????
                result = dict(list(zip(job['tasks'], [np.nan]*len(job['tasks']))))
            elif len(job['tasks']) == 1: # Only one named job
                result = {job['tasks'][0] : result}
            else:
                result = {'main' : result}
        
        if set(result.keys()) != set(job['tasks']):
            raise Exception("Result task names %s did not match job task names %s." % (list(result.keys()), job['tasks']))

        success = True
    except:
        import traceback
        traceback.print_exc()
        sys.stderr.write("Problem executing the function\n")
        print(sys.exc_info())
        
    end_time = time.time()

    if success:
        sys.stderr.write("Completed successfully in %0.2f seconds. [%s]\n" 
                         % (end_time-start_time, result))
        
        job['values']   = result
        job['status']   = 'complete'
        job['end time'] = end_time

    else:
        sys.stderr.write("Job failed in %0.2f seconds.\n" % (end_time-start_time))
    
        # Update metadata.
        job['status']   = 'broken'
        job['end time'] = end_time

    db.save(job, experiment_name, 'jobs', {'id' : job_id})

Example #3

Show file

File: main.py Project: magnus-the-collectioner/A-Star-Collaboration

def main():

    parser = optparse.OptionParser(usage="usage: %prog [options] directory")

    parser.add_option("--config", dest="config_file",
                      help="Configuration file name.",
                      type="string", default="config.json")
    parser.add_option("--no-output", action="store_true",
                      help="Do not create output files.")
    parser.add_option("--repeat", dest="repeat",
                      help="Used for repeating the same experiment many times.",
                      type="int", default="-1")

    (commandline_kwargs, args) = parser.parse_args()

    # Read in the config file
    #expt_dir = os.path.realpath('examples/cifar10')
    expt_dir  = os.path.realpath(args[0])
    if not os.path.isdir(expt_dir):
        raise Exception("Cannot find directory %s" % expt_dir)

    options = parse_config_file(expt_dir, commandline_kwargs.config_file)
    experiment_name = options["experiment-name"]

    # Special advanced feature for repeating the same experiment many times
    if commandline_kwargs.repeat >= 0:
        experiment_name = repeat_experiment_name(experiment_name, commandline_kwargs.repeat)

    if not commandline_kwargs.no_output: # if we want output
        if commandline_kwargs.repeat >= 0:
            output_directory = repeat_output_dir(expt_dir, commandline_kwargs.repeat)
        else:
            output_directory = os.path.join(expt_dir, 'output', options["experiment-name"])
        if not os.path.isdir(output_directory):
            os.mkdir(output_directory)

        if commandline_kwargs.repeat < 0:
            rootLogger = logging.getLogger()
            fileHandler = logging.FileHandler(os.path.join(output_directory, 'main.log'))
            fileHandler.setFormatter(logFormatter)
            fileHandler.setLevel(logLevel)
            rootLogger.addHandler(fileHandler)
        # consoleHandler = logging.StreamHandler()
        # consoleHandler.setFormatter(logFormatter)
        # consoleHandler.setLevel(logLevel)
        # rootLogger.addHandler(consoleHandler)
    else:
        output_directory = None

    input_space = InputSpace(options["variables"])

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])

    chooser = chooser_module.init(input_space, options)

    # Connect to the database

    db_address = options['database']['address']
    db         = MongoDB(database_address=db_address)

    if os.getenv('SPEARMINT_MAX_ITERATIONS') == None and 'max_iterations' not in set(options.keys()):
	maxiterations = DEFAULT_MAX_ITERATIONS
    elif os.getenv('SPEARMINT_MAX_ITERATIONS') != None:
	maxiterations = int(os.getenv('SPEARMINT_MAX_ITERATIONS'))
    else:
	maxiterations = options['max_iterations']

    # Set random seed

    if 'random_seed' in options.keys():
	    np.random.seed(int(options['random_seed']))
	    seed(int(options['random_seed']))

    waiting_for_results = False  # for printing purposes only
    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I could chose to fill up one resource and them move on to the next ("if")
            # You could also do it the other way, by changing "if" to "while" here

            # Remove any broken jobs from pending
            # note: make sure to do this before the acceptingJobs() condition is checked
            remove_broken_jobs(db, jobs, experiment_name, resources)

            if resource.acceptingJobs(jobs):

                if waiting_for_results:
                    logging.info('\n')
                waiting_for_results = False

                optim_start_time = time.time()

                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)

                # Print out a list of broken jobs
                print_broken_jobs(jobs)

                # Get a suggestion for the next job
                tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

                # Special case when coupled and there is a NaN task-- what to do with NaN task when decoupled??
                if 'NaN' in tasks and 'NaN' not in resource.tasks:
                    resource.tasks.append('NaN')

                # Load the model hypers from the database.
                hypers = db.load(experiment_name, 'hypers')

                # "Fit" the chooser - give the chooser data and let it fit the model(s).
                # NOTE: even if we are only suggesting for 1 task, we need to fit all of them
                # because the acquisition function for one task depends on all the tasks

                hypers = chooser.fit(tasks, hypers)

                if hypers:
                    logging.debug('GP covariance hyperparameters:')
                print_hypers(hypers)

                # Save the hyperparameters to the database.
                if hypers:
                    db.save(hypers, experiment_name, 'hypers')

                # Compute the best value so far, a.k.a. the "recommendation"

                recommendation = chooser.best()

                # Save the recommendation in the DB

                numComplete_by_task = {task_name : task.numComplete(jobs) for task_name, task in tasks.iteritems()}

                db.save({'num_complete' : resource.numComplete(jobs),
                     'num_complete_tasks' : numComplete_by_task,
                     'params'   : input_space.paramify(recommendation['model_model_input']),
                     'objective': recommendation['model_model_value'],
                     'params_o' : None if recommendation['obser_obser_input'] is None else input_space.paramify(recommendation['obser_obser_input']),
                     'obj_o'    : recommendation['obser_obser_value'],
                     'params_om': None if recommendation['obser_model_input'] is None else input_space.paramify(recommendation['obser_model_input']),
                     'obj_om'   : recommendation['obser_model_value']},
                experiment_name, 'recommendations', {'id' : len(jobs)})

                # Get the decoupling groups
                task_couplings = {task_name : tasks[task_name].options["group"] for task_name in resource.tasks}

                logging.info('\nGetting suggestion for %s...\n' % (', '.join(task_couplings.keys())))

                # Get the next suggested experiment from the chooser.

                suggested_input, suggested_tasks = chooser.suggest(task_couplings, optim_start_time)
                suggested_task = suggested_tasks[0] # hack, deal with later

                suggested_job = {
                    'id'          : len(jobs) + 1,
                    'params'      : input_space.paramify(suggested_input),
                    'expt_dir'    : options['main_file_path'],
                    'tasks'       : suggested_tasks,
                    'resource'    : resource_name,
                    'main-file'   : resource.main_file,
                    'language'    : options['tasks'][suggested_task]['language'],
                    'status'      : 'new',
                    'submit time' : time.time(),
                    'start time'  : None,
                    'end time'    : None
                }

                save_job(suggested_job, db, experiment_name)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address,
                                                      expt_dir, output_directory)

                # Print the current time
                logging.info('Current time: %s' % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    logging.info('Job %s failed -- check output file for details.' % job['id'])
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

                if len(set(task_couplings.values())) > 1: # if decoupled
                    print_tasks_status(tasks.values(), jobs)

                # For debug - print pending jobs
                print_pending_jobs(jobs)


        # Terminate the optimization if all resources are finished (run max number of jobs)
        # or ANY task is finished (just my weird convention)
        if reduce(lambda x,y: x and y, map(lambda x: x.maxCompleteReached(jobs), resources.values()), True) or \
           reduce(lambda x,y: x or y,  map(lambda x: x.maxCompleteReached(jobs), tasks.values()),     False):
            # Do all this extra work just to save the final recommendation -- would be ok to delete everything
            # in here and just "return"
            sys.stdout.write('\n')
            jobs = load_jobs(db, experiment_name)
            tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)
            hypers = db.load(experiment_name, 'hypers')
            hypers = chooser.fit(tasks, hypers)
            if hypers:
                db.save(hypers, experiment_name, 'hypers')
            # logging.info('\n**All resources have run the maximum number of jobs.**\nFinal recommendation:')
            recommendation = chooser.best()

            # numComplete_per_task
            numComplete_by_task = {task_name : task.numComplete(jobs) for task_name, task in tasks.iteritems()}
            db.save({'num_complete'       : resource.numComplete(jobs),
                     'num_complete_tasks' : numComplete_by_task,
                     'params'   : input_space.paramify(recommendation['model_model_input']),
                     'objective': recommendation['model_model_value'],
                     'params_o' : None if recommendation['obser_obser_input'] is None else input_space.paramify(recommendation['obser_obser_input']),
                     'obj_o'    : recommendation['obser_obser_value'],
                     'params_om': None if recommendation['obser_model_input'] is None else input_space.paramify(recommendation['obser_model_input']),
                     'obj_om'   : recommendation['obser_model_value']},
                experiment_name, 'recommendations', {'id'       : len(jobs)})
            logging.info('Maximum number of jobs completed. Have a nice day.')
            return

        # If no resources are accepting jobs, sleep
        if no_free_resources(db, experiment_name, resources):
            # Don't use logging here because it's too much effort to use logging without a newline at the end
            sys.stdout.write('Waiting for results...' if not waiting_for_results else '.')
            sys.stdout.flush()
            # sys.stderr.flush()
            waiting_for_results = True
            time.sleep(options['polling_time'])
        else:
            sys.stdout.write('\n')

Example #4

Show file

File: launcher.py Project: pdeboer/Spearmint

def launch(db_address, experiment_name, job_id):
    """
    Launches a job from on a given id.
    """

    db = MongoDB(database_address=db_address)
    job = db.load(experiment_name, 'jobs', {'id': job_id})

    start_time = time.time()
    job['start time'] = start_time
    db.save(job, experiment_name, 'jobs', {'id': job_id})

    sys.stderr.write("Job launching after %0.2f seconds in submission.\n"
                     % (start_time - job['submit time']))

    success = False

    try:
        if job['language'].lower() == 'matlab':
            result = matlab_launcher(job)

        elif job['language'].lower() == 'python':
            result = python_launcher(job)

        elif job['language'].lower() == 'shell':
            result = shell_launcher(job)

        elif job['language'].lower() == 'mcr':
            result = mcr_launcher(job)

        else:
            raise Exception("That language has not been implemented.")

        if not isinstance(result, dict):
            # Returning just NaN means NaN on all tasks
            if np.isnan(result):
                # Apparently this dict generator throws an error for some people??
                # result = {task_name: np.nan for task_name in job['tasks']}
                # So we use the much uglier version below... ????
                result = dict(zip(job['tasks'], [np.nan] * len(job['tasks'])))
            elif len(job['tasks']) == 1:  # Only one named job
                result = {job['tasks'][0]: result}
            else:
                result = {'main': result}

        if set(result.keys()) != set(job['tasks']):
            raise Exception("Result task names %s did not match job task names %s." % (result.keys(), job['tasks']))

        success = True
    except:
        import traceback
        traceback.print_exc()
        sys.stderr.write("Problem executing the function\n")
        print(sys.exc_info())

    end_time = time.time()

    if success:
        sys.stderr.write("Completed successfully in %0.2f seconds. [%s]\n"
                         % (end_time - start_time, result))

        job['values'] = result
        job['status'] = 'complete'
        job['end time'] = end_time

    else:
        sys.stderr.write("Job failed in %0.2f seconds.\n" % (end_time - start_time))

        # Update metadata.
        job['status'] = 'broken'
        job['end time'] = end_time

    db.save(job, experiment_name, 'jobs', {'id': job_id})

Example #5

Show file

File: show_best.py Project: liliumao/Spearmint

def main(filter=None):
    """
    Usage: python make_plots.py PATH_TO_DIRECTORY
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--clean', action='store_true', help='remove broken jobs')
    parser.add_argument('--table', action='store_true', help='print table')
    parser.add_argument('--csv', action='store_true', help='save table as csv')
    parser.add_argument('--d', type=int, help='sort by distance from dth smallest result')
    parser.add_argument('--name', help='experiment name', default=None)
    args, unknown = parser.parse_known_args()

    options, expt_dir = get_options(unknown)
    # print "options:"
    # print_dict(options)

    # reduce the grid size
    options["grid_size"] = 400

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    # print "chooser", chooser
    if args.name:
        experiment_name = args.name
    else:
        experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    # sys.stderr.write('Using database at %s.\n' % db_address)
    db         = MongoDB(database_address=db_address)

    # testing below here
    jobs = load_jobs(db, experiment_name)
    print len(jobs), 'jobs found'
    # print jobs

    # remove_broken_jobs
    if args.clean:
        for job in jobs:
            if job['status'] == 'pending':
                sys.stderr.write('Broken job %s detected.\n' % job['id'])
                job['status'] = 'broken'
                db.save(job, experiment_name, 'jobs', {'id' : job['id']})

    # print "resources:", resources
    # print_dict(resources)
    resource = resources.itervalues().next()

    task_options = {task: options["tasks"][task] for task in resource.tasks}
    # print "task_options:"
    # print_dict(task_options) # {'main': {'likelihood': u'NOISELESS', 'type': 'OBJECTIVE'}}

    task_group = load_task_group(db, options, experiment_name, resource.tasks)
    hypers = load_hypers(db, experiment_name)
    chooser.fit(task_group, hypers, task_options)
    lp, x = chooser.best()

    if args.table:
        os.chdir(unknown[0])
        out_file = open('results.csv', 'w') if args.csv else sys.stdout

        # get the observed points
        task = task_group.tasks.itervalues().next()
        idata = task.valid_normalized_data_dict
        inputs = idata["inputs"]
        inputs = map(lambda i: [paramify(task_group, task_group.from_unit(i)).values(), i], inputs)
        vals = idata["values"]
        vals = [task.unstandardize_mean(task.unstandardize_variance(v)) for v in vals]

        out_file.write('\n%10s' % 'result')
        lengths = [10]
        for name, vdict in task.variables_meta.iteritems():
            name = '%10s' % name
            out_file.write(',' + name)
            lengths.append(len(name))
        out_file.write('\n')

        line_template = '%' + str(lengths[0]) + '.4f,' + ','.join(['%' + str(l) +
            ('.4f' if 'enum' not in inputs[0][0][i]['type'] else 's') for i, l in enumerate(lengths[1:])])

        points = sorted(zip(vals, inputs), key=lambda r: r[0])
        if args.d is not None:
            target = x
            if args.d >= 0:
                target = points[args.d][1][1]
            points = sorted(points, key=lambda r: np.linalg.norm(r[1][1] - target))
        for i, point in enumerate(points):
            subs = [point[0]] + [d['values'][0] for d in point[1][0]]
            out_file.write(line_template % tuple(subs) + '\n')
        out_file.close()

Example #6

Show file

File: main.py Project: beangoben/spearmint_docker

def main(expt_dir, config_file="config.json", no_output=False, repeat=-1):
    if not os.path.isdir(expt_dir):
        raise Exception("Cannot find directory %s" % expt_dir)

    options = parse_config_file(expt_dir, config_file)
    experiment_name = options["experiment_name"]

    # Special advanced feature for repeating the same experiment many times
    if repeat >= 0:
        experiment_name = repeat_experiment_name(experiment_name, repeat)

    if not no_output:  # if we want output
        if repeat >= 0:
            output_directory = repeat_output_dir(expt_dir, repeat)
        else:
            output_directory = os.path.join(expt_dir, 'output')
        if not os.path.isdir(output_directory):
            os.mkdir(output_directory)

        if repeat < 0:
            rootLogger = logging.getLogger()
            fileHandler = logging.FileHandler(
                os.path.join(output_directory, 'main.log'))
            fileHandler.setFormatter(logFormatter)
            fileHandler.setLevel(logLevel)
            rootLogger.addHandler(fileHandler)
        # consoleHandler = logging.StreamHandler()
        # consoleHandler.setFormatter(logFormatter)
        # consoleHandler.setLevel(logLevel)
        # rootLogger.addHandler(consoleHandler)
    else:
        output_directory = None

    input_space = InputSpace(options["variables"])

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(input_space, options)

    # Connect to the database
    db_address = options['database']['address']
    db = MongoDB(database_address=db_address)

    overall_start_time = time.time()
    db.save({'start-time': overall_start_time}, experiment_name, 'start-time')

    waiting_for_results = False  # for printing purposes only
    while True:

        for resource_name, resource in resources.iteritems():

            jobs = load_jobs(db, experiment_name)
            # resource.printStatus(jobs)

            # If the resource is currently accepting more jobs
            # TODO: here cost will eventually also be considered: even if the
            #       resource is not full, we might wait because of cost incurred
            # Note: I could chose to fill up one resource and them move on to the next ("if")
            # You could also do it the other way, by changing "if" to "while" here

            # Remove any broken jobs from pending
            # note: make sure to do this before the acceptingJobs() condition is checked
            remove_broken_jobs(db, jobs, experiment_name, resources)

            if resource.acceptingJobs(jobs):

                if waiting_for_results:
                    logging.info('\n')
                waiting_for_results = False

                # Load jobs from DB
                # (move out of one or both loops?) would need to pass into load_tasks
                jobs = load_jobs(db, experiment_name)

                # Print out a list of broken jobs
                print_broken_jobs(jobs)

                # Get a suggestion for the next job
                tasks = parse_tasks_from_jobs(jobs, experiment_name, options,
                                              input_space)

                # Special case when coupled and there is a NaN task-- what to do with NaN task when decoupled??
                if 'NaN' in tasks and 'NaN' not in resource.tasks:
                    resource.tasks.append('NaN')

                # Load the model hypers from the database.
                hypers = db.load(experiment_name, 'hypers')

                # "Fit" the chooser - give the chooser data and let it fit the model(s).
                # NOTE: even if we are only suggesting for 1 task, we need to fit all of them
                # because the acquisition function for one task depends on all the tasks
                hypers = chooser.fit(tasks, hypers)

                if hypers:
                    logging.debug('GP covariance hyperparameters:')
                print_hypers(hypers, input_space, options)
                # if 'duration hypers' in hypers:
                # logging.debug('Duration GP covariance hyperparameters:')
                # print_hypers(hypers['duration hypers'], input_space, options)

                # Save the hyperparameters to the database.
                if hypers:
                    db.save(hypers, experiment_name, 'hypers')

                if options['recommendations'] == "during":
                    # Compute the best value so far, a.k.a. the "recommendation"
                    recommendation = chooser.best()

                    # Save the recommendation in the DB if there are more complete jobs than last time
                    store_recommendation(recommendation, db, experiment_name,
                                         tasks, jobs, input_space,
                                         time.time() - overall_start_time)

                # Get the decoupling groups
                task_couplings = {
                    task_name: tasks[task_name].options["group"]
                    for task_name in resource.tasks
                }

                logging.info('\nGetting suggestion for %s...\n' %
                             (', '.join(task_couplings.keys())))

                # Get the next suggested experiment from the chooser.
                suggested_input, suggested_tasks = chooser.suggest(
                    task_couplings)
                suggested_task = suggested_tasks[0]  # hack, deal with later

                suggested_job = {
                    'id': len(jobs) + 1,
                    'params': input_space.paramify(suggested_input),
                    'expt_dir': options['main_file_path'],
                    'tasks': suggested_tasks,
                    'resource': resource_name,
                    'main-file': options['tasks'][suggested_task]['main_file'],
                    'language': options['tasks'][suggested_task]['language'],
                    'status': 'new',
                    'submit time': time.time(),
                    'start time': None,
                    'end time': None,
                    'fast update':
                    chooser.fast_update  # just for plotting - not important
                }

                save_job(suggested_job, db, experiment_name)

                # Submit the job to the appropriate resource
                process_id = resource.attemptDispatch(experiment_name,
                                                      suggested_job,
                                                      db_address, expt_dir,
                                                      output_directory)

                # Print the current time
                logging.info(
                    'Current time: %s' %
                    datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

                # Set the status of the job appropriately (successfully submitted or not)
                if process_id is None:
                    suggested_job['status'] = 'broken'
                    logging.info(
                        'Job %s failed -- check output file for details.' %
                        job['id'])
                    save_job(suggested_job, db, experiment_name)
                else:
                    suggested_job['status'] = 'pending'
                    suggested_job['proc_id'] = process_id
                    save_job(suggested_job, db, experiment_name)

                jobs = load_jobs(db, experiment_name)

                # Print out the status of the resources
                # resource.printStatus(jobs)
                print_resources_status(resources.values(), jobs)

                if len(set(task_couplings.values())) > 1:  # if decoupled
                    print_tasks_status(tasks.values(), jobs)

                # For debug - print pending jobs
                print_pending_jobs(jobs)

        # Terminate the optimization if all resources are finished (run max number of jobs)
        # or ANY task is finished (just my weird convention)
        jobs = load_jobs(db, experiment_name)
        tasks = parse_tasks_from_jobs(jobs, experiment_name, options,
                                      input_space)
        terminate_resources = reduce(
            lambda x, y: x and y,
            map(lambda x: x.maxCompleteReached(jobs), resources.values()),
            True)
        terminate_tasks = reduce(
            lambda x, y: x or y,
            map(lambda x: x.maxCompleteReached(jobs), tasks.values()), False)
        terminate_maxtime = (time.time() - overall_start_time) >= (
            options['max_time_mins'] * 60.0)

        if terminate_resources or terminate_tasks or terminate_maxtime:

            if terminate_resources:
                logging.info(
                    'Maximum number of jobs completed on all resources.')
            if terminate_tasks:
                logging.info(
                    'Maximum number of jobs reached for at least one task.')
            if terminate_maxtime:
                logging.info(
                    'Maximum total experiment time of %f minutes reached.' %
                    options['max_time_mins'])

            # save rec in DB
            if options['recommendations'] in ("during", "end-one"):
                logging.info('Making final recommendation:')
                recommendation = chooser.best()
                store_recommendation(recommendation,
                                     db,
                                     experiment_name,
                                     tasks,
                                     jobs,
                                     input_space,
                                     time.time() - overall_start_time,
                                     final=True)
            elif options['recommendations'] == "end-all":
                logging.info('Making recommendations...')
                all_jobs = jobs
                for i in xrange(len(all_jobs)):
                    logging.info('')
                    logging.info(
                        '-------------------------------------------------')
                    logging.info(
                        '     Getting recommendations for iter %d/%d      ' %
                        (i, len(all_jobs)))
                    logging.info(
                        '-------------------------------------------------')
                    logging.info('')

                    jobs = all_jobs[:i + 1]
                    tasks = parse_tasks_from_jobs(jobs, experiment_name,
                                                  options, input_space)
                    hypers = chooser.fit(tasks, hypers)
                    print_hypers(hypers, input_space, options)
                    # get the biggest end time of the jobs
                    end_time = max([job['end time'] for job in jobs])
                    elapsed_time = end_time - overall_start_time

                    recommendation = chooser.best()
                    store_recommendation(recommendation, db, experiment_name,
                                         tasks, jobs, input_space,
                                         elapsed_time)

            logging.info('Have a nice day.')
            return

        # If no resources are accepting jobs, sleep
        if no_free_resources(db, experiment_name, resources):
            # Don't use logging here because it's too much effort to use logging without a newline at the end
            sys.stdout.write(
                'Waiting for results...' if not waiting_for_results else '.')
            sys.stdout.flush()
            # sys.stderr.flush()
            waiting_for_results = True
            time.sleep(options['polling_time'])
        else:
            sys.stdout.write('\n')

Example #7

Show file

class Spearmint(Printer):
    def __init__(self, config_file, work_dir):
        Printer.__init__(self, 'Spearmint', color='grey')
        self.work_dir = work_dir
        print(config_file)
        self._parse_config_file(config_file)
        try:
            self.batch_size = self.param_dict['resources']['my-machine'][
                'max-concurrent']
#			self.num_batches = self.param_dict['general']['batches_per_round']
        except KeyError:
            #			self.num_batches = 1
            self.batch_size = 1
        self.all_params, self.all_losses = [], []

    def rand_gens(self, var_type='float', size=1):
        if var_type == 'float':
            return np.random.uniform(low=0, high=1, size=size)
        else:
            raise NotImplementedError

    def _parse_config_file(self, config_file):
        self.json_parser = ParserJSON(file_name=config_file)
        self.json_parser.parse()
        self.param_dict = self.json_parser.param_dict

        # now get the total number of variables
        # and create a dictionary with the size of each variable
        self.total_size = 0
        self.var_sizes = []
        self.var_names = []
        for var_name, var_dict in self.param_dict['variables'].items():
            self.total_size += var_dict['size']
            self.var_sizes.append(int(var_dict['size']))
            self.var_names.append(var_name)

#			self.total_size += var_dict[list(var_dict)[0]]['size']
#			self.var_sizes.append(int(var_dict[list(var_dict)[0]]['size']))
#			self.var_names.append(list(var_dict)[0])
#

    def _generate_uniform(self, num_samples=10):
        self.container, self.sampled_params = {}, {}
        values = []
        for var_index, var_name in enumerate(self.var_names):
            sampled_values = self.rand_gens(
                var_type=self.param_dict['variables'][var_name]['type'],
                size=(self.param_dict['variables'][var_name]['size'],
                      num_samples))
            values.extend(sampled_values)
            self.container[var_name] = sampled_values
        values = np.array(values)
        self.proposed = values.transpose()

    def _parse_observations(self, observations):
        all_params, all_losses = [], []
        for observation in observations:
            params = []
            for var_name in self.var_names:
                params.extend(observation[var_name]['samples'])
            if len(self.all_params) > 0:
                if np.amin([
                        np.linalg.norm(params - old_param)
                        for old_param in self.all_params
                ]) > 1e-6:
                    all_losses.append(observation['loss'])
                    all_params.append(params)
            else:
                all_losses.append(observation['loss'])
                all_params.append(params)
        for index, element in enumerate(all_params):
            self.all_params.append(element)
            self.all_losses.append(all_losses[index])
        return all_params, all_losses

    def _create_mongo_instance(self):
        self.db_path = '%s/db_%s/' % (self.work_dir,
                                      self.param_dict['experiment-name'])
        print(self.db_path)
        try:
            shutil.rmtree(self.db_path)
        except:
            pass
        os.mkdir(self.db_path)
        subprocess.call('mongod --fork --logpath %s/mongodb.log --dbpath %s' %
                        (self.db_path, self.db_path),
                        shell=True)

    def _create_spearmint_parameters(self):
        self._create_mongo_instance()
        self.options, self.exp_dir = get_options(self.work_dir)
        self.resources = parse_resources_from_config(self.options)
        self.chooser_module = importlib.import_module('spearmint.choosers.' +
                                                      self.options['chooser'])
        self.chooser = self.chooser_module.init(self.options)
        self.experiment_name = self.options.get('experiment-name',
                                                'unnamed_experiment')

        self.db_address = self.options['database']['address']
        self.db = MongoDB(database_address=self.db_address)

    def _sample_parameter_sets(self, num_samples, observations):
        all_params, all_losses = self._parse_observations(observations)
        self._create_spearmint_parameters()

        # dump all observations in database
        for index, param in enumerate(all_params):
            print('PARAM', param, all_losses[index])
            params = {}
            start_index = 0
            for var_index, var_name in enumerate(self.var_names):
                var_dict = self.param_dict['variables'][var_name]
                params[var_name] = {
                    'type':
                    var_dict['type'],
                    'values':
                    np.array(param[start_index:start_index + var_dict['size']])
                }
                start_index += var_dict['size']
            job = {
                'id': index + 1,
                'expt_dir': self.work_dir,
                'tasks': ['main'],
                'resource': 'my-machine',
                'main-file': 'main_file.py',
                'language': 'PYTHON',
                'status': 'new',
                'submit time': time.time(),
                'start time': time.time(),
                'end time': None,
                'params': params
            }
            time.sleep(0.1)
            job['values'] = {'main': all_losses[index]}
            job['status'] = 'complete'
            job['end time'] = time.time()

            #			for key, value in job.items():
            #				print(key, value)

            self.db.save(job, self.experiment_name, 'jobs', {'id': job['id']})

        self.proposed = []
        for resource_name, resource in self.resources.items():
            print('RUNNING SPEARMINT')
            suggested_job = get_suggestion(self.chooser, resource.tasks,
                                           self.db, self.exp_dir, self.options,
                                           resource_name)
            print('DONE')
            vector = []
            for var_name in self.var_names:
                vector.extend(suggested_job['params'][var_name]['values'])
            vector = np.array(vector)
            for index in range(num_samples):
                self.proposed.append(vector)

        print('PROPOSED', self.proposed)
        subprocess.call(
            'mongod --shutdown --logpath %s/mongodb.log --dbpath %s' %
            (self.db_path, self.db_path),
            shell=True)

    def choose(self, num_samples=None, observations=None):
        current_dir = os.getcwd()
        os.chdir(self.work_dir)

        if not num_samples:
            num_samples = self.batch_size

        if observations:
            self._print('proposing samples')
            self._sample_parameter_sets(num_samples, observations)
        else:
            self._print('choosing uniformly')
            self._generate_uniform(1)

        os.chdir(current_dir)

        #		print('SHAPE', self.proposed.shape)
        return self.proposed