コード例 #1
0
ファイル: sample.py プロジェクト: jamesrobertlloyd/church-irm
def estimate_mh_time(RIPL, n, initial_mh_iter, ids, max_runtime=600, verbose=True):
    '''Uses a doubling strategy to try to estimate time per mh iter'''
    experiment_start = time.clock()
    start = time.clock()
    samples = np.zeros((len(ids), 0)) # These are recorded as normal for timing purposes
    iteration = 0
    mh_iter = initial_mh_iter
    mh_sum = 0
    max_mem = memory()
    while time.clock() - experiment_start < max_runtime:
        iteration += 1
        if verbose:
            print 'Sample %d : trying %d steps' % (iteration, mh_iter) 
        RIPL.infer(mh_iter)
        mh_sum += mh_iter
        samples = np.column_stack([samples, [RIPL.report_value(an_id) for an_id in ids]])
        if time.clock() - experiment_start < 10:
            # Not much time has passed - estimates will be unreliable - just double iterations
            mh_iter = mh_iter * 2
        else:
            # Some time has passed - make sure that next sample will not take too long
            now = time.clock()
            time_elapsed = now - start
            time_remaining = max_runtime - time_elapsed
            time_per_mh_iter = time_elapsed / mh_sum
            max_possible_iters = int(round(time_remaining / time_per_mh_iter))
            mh_iter = min(max(1, max_possible_iters), mh_iter * 2)
        max_mem = max(max_mem, memory())
    finish = time.clock()
    time_per_mh_iter = max(finish - start, 0.01) / mh_sum
    max_mem = max(max_mem, memory())
    return {'time_per_mh_iter' : time_per_mh_iter, 'max_memory' : max_mem}
コード例 #2
0
def cold_start_single_run(data, model_class, exp_params, model_params):
    """Function to be sent to picloud"""
    start = time.clock()
    model = model_class(**model_params)  # Create model
    model.create_RIPL()
    model.observe_data(data["observations"])  # Observe data
    truth, missing_links = model.set_predictions(data["missing"])
    # Burn in
    sample.collect_n_samples(
        model.RIPL,
        n=exp_params["n_samples"],
        mh_iter=exp_params["intermediate_iter"],
        ids=missing_links,
        max_runtime=exp_params["max_burn_time"],
        verbose=True,
        callback=return_AUC,
        callback_kwargs={"truth": truth},
    )
    # Collect samples
    max_memory = memory()
    mcmc_output = sample.collect_n_samples(
        model.RIPL,
        n=exp_params["n_samples"],
        mh_iter=exp_params["intermediate_iter"],
        ids=missing_links,
        max_runtime=exp_params["max_sample_time"],
        verbose=True,
        callback=return_AUC,
        callback_kwargs={"truth": truth},
    )
    samples = mcmc_output["samples"]
    n_samples = samples.shape[1]
    sample_ess = mcmc_output["ess"]
    max(max_memory, memory())
    # Score samples
    predictions = list(samples.mean(axis=1))
    roc_data = []
    for (true_link, prediction) in zip(truth, predictions):
        roc_data.append((true_link, prediction))
    AUC = ROCData(roc_data).auc()
    max_memory = max(max_memory, memory())

    return {
        "predictions": predictions,
        "ess": sample_ess,
        "AUC": AUC,
        "runtime": time.clock() - start,
        "max_memory": max(max_memory, memory()),
        "n_samples": n_samples,
    }
コード例 #3
0
def IRM(fold=1,burn=50,n_samples=100,mh_iter=10,verbose=True):

    # Start timing
    start = time.clock()

    # Create RIPL and clear any previous session
    MyRIPL = venture_engine
    MyRIPL.clear()

    # Load high school data set
    #data = scipy.io.loadmat("../../data/hs/hs_%dof5.mat" % fold, squeeze_me=True)
    data = scipy.io.loadmat("../../data/irm_synth/irm_synth_20.mat", squeeze_me=True)
    observed = list(zip(data['train_i'].flat, data['train_j'].flat, data['train_v'].flat))
    missing  = list(zip(data['test_i'].flat,  data['test_j'].flat,  data['test_v'].flat))
    #observed = [(1,2,1),(1,3,1),(1,4,1),(2,3,0),(2,5,1)]
    #missing  = [(1,5,1),(2,4,0),(3,4,1),(4,5,1)]

    # Convenience functions
    MyRIPL.assume('min-2', parse('(lambda (x y) (if (> x y) y x))'))
    MyRIPL.assume('max-2', parse('(lambda (x y) (if (> x y) x y))'))
    # Instantiate CRP
    MyRIPL.assume('alpha', parse('(uniform-continuous 0.0001 2.0)'))
    MyRIPL.assume('cluster-crp', parse('(CRP/make alpha)'))
    # Create class assignment lookup function
    MyRIPL.assume('node->class', parse('(mem (lambda (nodes) (cluster-crp)))'))
    # Create class interaction probability lookup function
    MyRIPL.assume('classes->parameters', parse('(mem (lambda (class1 class2) (beta 0.5 0.5)))')) 
    #MyRIPL.assume('classes->parameters-symmetric', parse('(lambda (class1 class2) (classes->parameters (min-2 class1 class2) (max-2 class1 class2)))')) 
    # Create relation evaluation function
    MyRIPL.assume('p-friends', parse('(lambda (node1 node2) (classes->parameters (node->class node1) (node->class node2)))')) 
    MyRIPL.assume('friends', parse('(lambda (node1 node2) (bernoulli (p-friends node1 node2)))')) 

    # Tell Venture about observations
    for (i,j,v) in observed:
        if verbose:
            print 'Observing (%3d, %3d) = %d' % (i, j, v)
        if v:
            MyRIPL.observe(parse('(friends %d %d)' % (i, j)), 'true')
            MyRIPL.observe(parse('(friends %d %d)' % (j, i)), 'true')
        else:
            MyRIPL.observe(parse('(friends %d %d)' % (i, j)), 'false')
            MyRIPL.observe(parse('(friends %d %d)' % (j, i)), 'false')
                
    # Tell Venture that we want to predict P(unobserved link)
    truth = []
    missing_links = []
    for (i,j,v) in missing:
        if verbose:
            print 'Predicting (%3d, %3d) = %d' % (i, j, v)
        truth.append(int(v))
        missing_links.append(MyRIPL.predict(parse('(p-friends %d %d)' % (i, j))))

    # Burn in
    #sample.collect_n_es(MyRIPL, n=burn, mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], min_samples=n_samples*2, max_runtime=600, verbose=verbose)
    #sample.collect_n_samples(MyRIPL, n=burn, mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], max_runtime=600, verbose=verbose)
    sample.collect_n_samples_before_timeout(MyRIPL, n=burn, initial_mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], max_runtime=60, verbose=verbose)
    # Collect samples
    #mcmc_output = sample.collect_n_es(MyRIPL, n=n_samples, mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], min_samples=n_samples*2, max_runtime=300, verbose=verbose)
    #mcmc_output = sample.collect_n_samples(MyRIPL, n=n_samples, mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], max_runtime=300, verbose=verbose)
    mcmc_output = sample.collect_n_samples_before_timeout(MyRIPL, n=n_samples, initial_mh_iter=mh_iter, ids = [an_id for (an_id, _) in missing_links], max_runtime=60, verbose=verbose)
    samples = mcmc_output['samples']
    sample_ess = mcmc_output['ess']

    if verbose:
        print 'Fold complete'
    predictions = list(samples.mean(axis=1))
    roc_data = []
    for (true_link, prediction) in zip(truth, predictions):
        roc_data.append((true_link, prediction))
    AUC = ROCData(roc_data).auc()
    if verbose:
        print 'AUC = %f' % AUC
        
    max_mem = memory()
    
    return {'truth' : truth, 'predictions' : predictions, 'samples' : samples, 'ess' : sample_ess, 'AUC' : AUC, 'runtime' : time.clock() - start, 'max_mem' : max_mem}
コード例 #4
0
ファイル: sample.py プロジェクト: jamesrobertlloyd/church-irm
def collect_n_samples_before_timeout(RIPL, n, initial_mh_iter, ids, max_runtime=600, verbose=True):
    '''Alters the number of intermediate mh_iter to n samples in max_runtime'''
    #### FIXME - Dangerous - thinning depends on the state! - need to do a trial run!
    #### Maybe it does need linear regression to be awesome?
    #### TODO - start here
    samples = np.zeros((len(ids), 0))
    experiment_start = time.clock()
    start = time.clock()
    iteration = 0
    mh_iter = initial_mh_iter
    mh_sum = 0
    # For a maximum of n iterations
    for unused in range(n):
        if time.clock() - experiment_start < max_runtime:
            # Sample
            iteration += 1
            if (iteration > 10) and (iteration < n):
                # A few samples have been collected, adjust mh_iter to finish on time
                #### TODO - a filtering approach would be more adaptive
                now = time.clock()
                time_elapsed = now - start
                time_remaining = max_runtime - time_elapsed
                time_per_mh_iter = time_elapsed / mh_sum
                samples_remaining = n - iteration
                time_remaining_per_sample = time_remaining / samples_remaining
                mh_iter = max(1, int(round(time_remaining_per_sample / time_per_mh_iter)))
                if verbose:
                    print 'Time elapsed  ', time_elapsed
                    print 'Time remaining', time_remaining
                    print 'Time per mh   ', time_per_mh_iter
                    print 'Samples to go ', samples_remaining
                    print 'Time per samp.', time_remaining
                    print 'Remaining per.', time_remaining_per_sample
                    print 'mh_iter', mh_iter
            RIPL.infer(mh_iter)
            mh_sum += mh_iter
            if verbose:
                print 'Iteration %d' % iteration
            # Record sample
            samples = np.column_stack([samples, [RIPL.report_value(an_id) for an_id in ids]])
        else:
            break
    finish = time.clock()
    time_per_sample = max(finish - start, 0.01) / iteration
    if verbose:
        print '%d iterations : %f seconds : %f seconds per iteration' % (iteration, finish - start, time_per_sample)
    # Compute average ess
    start = time.clock()
    ess = np.mean([(samples.shape[1]) / act.batch_means(samples[i,:]) for i in range(samples.shape[0])])
    if np.isnan(ess) or np.isinf(ess):
        ess = 1
    finish = time.clock()
    time_to_compute_ess = max(finish - start, 0.01)
    if verbose:
        print 'ESS = %f : %f seconds' % (ess, time_to_compute_ess)
    else:
        #print 'ESS = %3.0f' % ess
        pass
    # Finished - return samples and ...TODO
    max_mem = memory()
    return {'samples' : samples, 'ess' : ess, 'max_memory' : max_mem}