Ejemplo n.º 1
0
def collect_n_samples(RIPL, n, mh_iter, ids, max_runtime=600, verbose=True, callback=None, callback_kwargs={}):
    '''Tries to collect n samples before timeout'''
    samples = np.zeros((len(ids), 0))
    experiment_start = time.clock()
    start = time.clock()
    iteration = 0
    for unused in range(n):
        if time.clock() - experiment_start < max_runtime:
            # Sample
            iteration += 1
            RIPL.infer(mh_iter)
            # Record sample
            samples = np.column_stack([samples, [RIPL.report_value(an_id) for an_id in ids]])
            if verbose:
                print 'Iteration %d : Score %s' % (iteration, callback(samples=samples, **callback_kwargs))
        else:
            break
    finish = time.clock()
    time_per_sample = max(finish - start, 0.01) / iteration
    if verbose:
        print '%d iterations : %f seconds : %f seconds per iteration' % (iteration, finish - start, time_per_sample)
    # Compute average ess
    start = time.clock()
    ess = np.mean([(samples.shape[1]) / act.batch_means(samples[i,:]) for i in range(samples.shape[0])])
    if np.isnan(ess) or np.isinf(ess):
        ess = 1
    finish = time.clock()
    time_to_compute_ess = max(finish - start, 0.01)
    if verbose:
        print 'ESS = %f : %f seconds' % (ess, time_to_compute_ess)
    else:
        #print 'ESS = %3.0f' % ess
        pass
    # Finished - return samples and ...TODO
    return {'samples' : samples, 'ess' : ess, 'runtime' : time.clock() - start}
Ejemplo n.º 2
0
def collect_n_es(RIPL, n, mh_iter, ids, min_samples=None, max_runtime=600, verbose=True):
    '''Tries to collect enough samples of ids to have an ess > n'''
    #### TODO - Note the min time of 0.01 and the 1 percent computation heuristic of 100
    ess = 0
    samples = np.zeros((len(ids), 0))
    trial_iterations = max(n, min_samples)
    experiment_start = time.clock()
    # While samples not large enough and not timed out
    while (np.floor(ess) + 1 < n) and (time.clock() - experiment_start < max_runtime):
        # Perform some iterations, recording how fast the sampler is running
        start = time.clock()
        iteration = 0
        for unused in range(trial_iterations):
            if time.clock() - experiment_start < max_runtime:
                # Sample
                iteration += 1
                RIPL.infer(mh_iter)
                # Record sample
                samples = np.column_stack([samples, [RIPL.report_value(an_id) for an_id in ids]])
        finish = time.clock()
        time_per_sample = max(finish - start, 0.01) / iteration
        if verbose:
            print '%d iterations : %f seconds : %f seconds per iteration' % (iteration, finish - start, time_per_sample)
        # Compute average ess
        start = time.clock()
        ess = np.mean([(samples.shape[1]) / act.batch_means(samples[i,:]) for i in range(samples.shape[0])])
        if np.isnan(ess) or np.isinf(ess):
            ess = 1
        finish = time.clock()
        time_to_compute_ess = max(finish - start, 0.01)
        if verbose:
            print 'ESS = %f : %f seconds' % (ess, time_to_compute_ess)
        else:
            #print 'ESS = %3.0f' % ess
            pass
        # Decide how long to sample for
        samples_per_effective_sample = samples.shape[1] / ess
        estimated_samples_required = samples_per_effective_sample * (n - ess)
        if np.isnan(estimated_samples_required) or np.isinf(estimated_samples_required):
            estimated_samples_required = 1
        else:
            estimated_samples_required = int(np.floor(estimated_samples_required))
        if verbose:
            print 'Estimated samples required %d' % estimated_samples_required
        balance_computation_samples = int(np.floor(100 * time_to_compute_ess / time_per_sample))
        if verbose:
            print '1%% samples %d' % balance_computation_samples
        trial_iterations = min(estimated_samples_required, balance_computation_samples)
        if verbose:
            print 'Sampling for %d' % trial_iterations
    # Finished - return samples and ...TODO
    return {'samples' : samples, 'ess' : ess}
Ejemplo n.º 3
0
def IRM_HighSchool(fold=1,burn=50,n_samples=100,mh_iter=100,verbose=True):

    # Create RIPL and clear any previous session
    MyRIPL = venture_engine
    MyRIPL.clear()

    # Load high school data set
    data = scipy.io.loadmat("../data/hs/hs_%dof5.mat" % fold, squeeze_me=True)
    observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat))
    missing  = list(zip(data['test_i'].flat,  data['test_j'].flat,  data['test_v'].flat))

    # Convenience functions
    MyRIPL.assume('min-2', parse('(lambda (x y) if (< x y) x y)'))
    MyRIPL.assume('max-2', parse('(lambda (x y) if (> x y) x y)'))
    # Instantiate CRP
    MyRIPL.assume('alpha', parse('(uniform-continuous 0.0001 2.0)'))
    MyRIPL.assume('cluster-crp', parse('(CRP/make alpha)'))
    # Create class assignment lookup function
    MyRIPL.assume('node->class', parse('(mem (lambda (nodes) (cluster-crp)))'))
    # Create class interaction probability lookup function
    MyRIPL.assume('classes->parameters', parse('(mem (lambda (class1 class2) (beta 0.5 0.5)))')) 
    MyRIPL.assume('classes->parameters-symmetric', parse('(lambda (class1 class2) (classes->parameters (min-2 class1 class2) (max-2 class1 class2)))')) 
    # Create relation evaluation function
    MyRIPL.assume('p-friends', parse('(lambda (node1 node2) (classes->parameters-symmetric (node->class node1) (node->class node2)))')) 
    MyRIPL.assume('friends', parse('(lambda (node1 node2) (bernoulli (p-friends node1 node2)))')) 

    # Tell Venture about observations
    for (i,j,v) in observed:
        if verbose:
            print 'Observing (%3d, %3d) = %d' % (i, j, v)
        if v:
            MyRIPL.observe(parse('(friends %d %d)' % (i, j)), 'true')
        else:
            MyRIPL.observe(parse('(friends %d %d)' % (i, j)), 'false')
                
    # Tell Venture that we want to predict P(unobserved link)
    truth = []
    missing_links = []
    for (i,j,v) in missing:
        if verbose:
            print 'Predicting (%3d, %3d) = %d' % (i, j, v)
        truth.append(int(v))
        missing_links.append(MyRIPL.predict(parse('(p-friends %d %d)' % (i, j))))

    # Perform inference
    for sample_number in range(burn):
        MyRIPL.infer(mh_iter)
        print 'Burn in sample %4d' % (sample_number + 1)
        roc_data = []
        for (true_link, (missing_link, _)) in zip(truth, missing_links):
            roc_data.append((true_link, MyRIPL.report_value(missing_link)))
        print ROCData(roc_data).auc()
    samples = np.zeros((len(truth), 0))
    #ess_samples = []
    for sample_number in range(n_samples):
        MyRIPL.infer(mh_iter)
        print 'Sample %4d' % (sample_number + 1)
        roc_data = []
        for (true_link, (missing_link, _)) in zip(truth, missing_links):
            roc_data.append((true_link, MyRIPL.report_value(missing_link)))
        samples = np.column_stack([samples, [roc_datum[1] for roc_datum in roc_data]])
        ess = (sample_number + 1) / act.batch_means(samples[0,:])
        print ess
        print ROCData(roc_data).auc()

    print 'Fold complete'
    predictions = list(samples.mean(axis=1))
    roc_data = []
    for (true_link, prediction) in zip(truth, predictions):
        roc_data.append((true_link, prediction))
    AUC = ROCData(roc_data).auc()
    print 'AUC = %f' % AUC
    
    return {'truth' : truth, 'predictions' : predictions, 'samples' : samples, 'AUC' : AUC}
Ejemplo n.º 4
0
def collect_n_samples_before_timeout(RIPL, n, initial_mh_iter, ids, max_runtime=600, verbose=True):
    '''Alters the number of intermediate mh_iter to n samples in max_runtime'''
    #### FIXME - Dangerous - thinning depends on the state! - need to do a trial run!
    #### Maybe it does need linear regression to be awesome?
    #### TODO - start here
    samples = np.zeros((len(ids), 0))
    experiment_start = time.clock()
    start = time.clock()
    iteration = 0
    mh_iter = initial_mh_iter
    mh_sum = 0
    # For a maximum of n iterations
    for unused in range(n):
        if time.clock() - experiment_start < max_runtime:
            # Sample
            iteration += 1
            if (iteration > 10) and (iteration < n):
                # A few samples have been collected, adjust mh_iter to finish on time
                #### TODO - a filtering approach would be more adaptive
                now = time.clock()
                time_elapsed = now - start
                time_remaining = max_runtime - time_elapsed
                time_per_mh_iter = time_elapsed / mh_sum
                samples_remaining = n - iteration
                time_remaining_per_sample = time_remaining / samples_remaining
                mh_iter = max(1, int(round(time_remaining_per_sample / time_per_mh_iter)))
                if verbose:
                    print 'Time elapsed  ', time_elapsed
                    print 'Time remaining', time_remaining
                    print 'Time per mh   ', time_per_mh_iter
                    print 'Samples to go ', samples_remaining
                    print 'Time per samp.', time_remaining
                    print 'Remaining per.', time_remaining_per_sample
                    print 'mh_iter', mh_iter
            RIPL.infer(mh_iter)
            mh_sum += mh_iter
            if verbose:
                print 'Iteration %d' % iteration
            # Record sample
            samples = np.column_stack([samples, [RIPL.report_value(an_id) for an_id in ids]])
        else:
            break
    finish = time.clock()
    time_per_sample = max(finish - start, 0.01) / iteration
    if verbose:
        print '%d iterations : %f seconds : %f seconds per iteration' % (iteration, finish - start, time_per_sample)
    # Compute average ess
    start = time.clock()
    ess = np.mean([(samples.shape[1]) / act.batch_means(samples[i,:]) for i in range(samples.shape[0])])
    if np.isnan(ess) or np.isinf(ess):
        ess = 1
    finish = time.clock()
    time_to_compute_ess = max(finish - start, 0.01)
    if verbose:
        print 'ESS = %f : %f seconds' % (ess, time_to_compute_ess)
    else:
        #print 'ESS = %3.0f' % ess
        pass
    # Finished - return samples and ...TODO
    max_mem = memory()
    return {'samples' : samples, 'ess' : ess, 'max_memory' : max_mem}