Ejemplo n.º 1
0
 def worker(proc_args, out_q, i_proc):
     """ 
     The worker function: this function is assigned to different processes
     """
     print "Process %i has been assigned %i subtask(s)" % (os.getpid(), len(proc_args))
     for arg_idx, arg in enumerate(proc_args):
         t = Stopwatch()
         out_q.put(multiply_matrices(arg))
         elapsed = t.finish(milli=False)
         print "Process %i needed %d seconds for task %i/%i" % (os.getpid(), elapsed, arg_idx+1, len(proc_args))
Ejemplo n.º 2
0
def gridsearch(nhidden, lrate, wcost, momentum, n_in_train_minibatch, \
                     sampling_steps, n_temperatures, max_time_secs, save_plots):
    params = ['nhidden', 'lrate', 'wcost', 'momentum', 'n_in_train_minibatch', 'n_sampling_steps', 'n_temperatures']
    attempts = []
    for nh in nhidden: # [200, 400, 800]:
        for lr in lrate: # [0.005, 0.02]:
            for wc in wcost: #, 0.002]:
                for mo in momentum: # [0, 0.4, 0.9]:
                    for nitm in n_in_train_minibatch: # [10, 250]:
                        for st in sampling_steps:
                            for ntemp in n_temperatures:
                                attempts.append({'nhidden': nh,
                                                 'lrate': lr,
                                                 'wcost': wc,
                                                 'momentum': mo,
                                                 'n_in_train_minibatch': nitm,
                                                 'n_sampling_steps': st,
                                                 'n_temperatures': ntemp,
                                                 'should_plot': False,
                                                 })
    
    nattempts = len(attempts)
    pid = os.getpid()
    total_time_secs = max_time_secs * nattempts
    print 'Beginning %i attempts (PID=%s, DT=%s), each for %i secs, ETA = %s' % \
        (nattempts, pid, dt_str(), max_time_secs, eta_str(total_time_secs))
    for attempt in attempts: print attempt

    # Generate data set
    n_trainpatterns = 5000
    n_validpatterns = 1000
    train_pset, valid_pset, test_pset = create_mnist_patternsets(n_trainpatterns=n_trainpatterns, n_validpatterns=n_validpatterns)
    n_trainpatterns, n_validpatterns, n_testpatterns = train_pset.n, valid_pset.n, test_pset.n
    valid_patterns = np.array(valid_pset.patterns).reshape((n_validpatterns,-1))
    test_patterns = np.array(test_pset.patterns).reshape((n_testpatterns, -1))

    all_t = Stopwatch()
    for attempt_idx, attempt in enumerate(attempts):
        np.random.seed(1)
        t = Stopwatch()
        net = RbmNetwork(np.prod(train_pset.shape),
                         attempt['nhidden'],
                         attempt['lrate'],
                         attempt['wcost'],
                         attempt['momentum'],
                         attempt['n_temperatures'],
                         attempt['n_sampling_steps'],
                         v_shape=train_pset.shape,
                         plot=save_plots)
        train_errors = []
        valid_errors = []
        test_errors = []
        epochnum = 0
        while True: # train as long as time limit is not exceeded
            minibatch_pset = Minibatch(train_pset, nitm)
            net.learn_trial(minibatch_pset.patterns)

            # calculate errors
            train_error = np.mean(net.test_trial(minibatch_pset.patterns)[0])
            train_errors.append(train_error)
            valid_error = np.mean(net.test_trial(valid_patterns)[0])
            valid_errors.append(valid_error)
            test_error = np.mean(net.test_trial(test_patterns)[0])
            test_errors.append(test_error)

            if t.finish(milli=False) > max_time_secs: break
            epochnum += 1

        train_elapsed = t.finish(milli=False)

        # add ATTEMPT keys that will vary each time below
        # this line. the HASH should be the same for this
        # set of parameters every time you run the benchmark
        attempt['hash'] = hash(HashableDict(attempt))
        attempt['attempt_idx'] = attempt_idx
        attempt['pid'] = pid
        attempt['n_train_epochs'] = epochnum
        attempt['train_elapsed'] = train_elapsed
        attempt['dt'] = dt_str()
        attempt['test_error'] = train_errors[-1] # error from last iteration
        attempt['npatterns'] = n_trainpatterns

        if save_plots:
            filename = 'error%i.png' % attempt_idx
            net.save_error_plots(train_errors, valid_errors, test_errors, filename)

        print 'Finished %i of %i: error %.2f in %.1f secs' % (attempt_idx+1, \
                                           nattempts, test_error, train_elapsed)
        print attempt
        print '--------------------'
        print
    
    print_best(attempts, params)