def regenerate_data(data_model, pct=80., std=1.):
    """ Regenerate test data using specified data generation function
    Allowed models: %s """ % data_gen_models
    if data_model not in data_gen_models.split(' '):
        raise TypeError, 'Unrecognized data model "%s"; must be one of %s' % (data_model, data_gen_models)
    
    import data
    reload(data)
    eval('data.generate_%s()' % data_model)
    data.add_sampling_error(std=std)
    data.knockout_uniformly_at_random(pct=pct)
Example #2
0
def regenerate_data(data_model, pct=80., std=1.):
    """ Regenerate test data using specified data generation function
    Allowed models: %s """ % data_gen_models
    if data_model not in data_gen_models.split(' '):
        raise TypeError, 'Unrecognized data model "%s"; must be one of %s' % (
            data_model, data_gen_models)

    import data
    reload(data)
    eval('data.generate_%s()' % data_model)
    data.add_sampling_error(std=std)
    data.knockout_uniformly_at_random(pct=pct)
Example #3
0
    import pylab as pl
    import data

    data.age_range = pl.arange(0, 81, 20)
    data.time_range = pl.arange(1980, 2005, 5)
    data.regions = pl.randint(5, 15)

    time.sleep(pl.rand() * 5.)
    t0 = time.time()
    data.generate_fe('test_data/%s.csv' %
                     t0)  # included just to get good test coverage
    data.generate_smooth_gp_re_a('test_data/%s.csv' % t0,
                                 country_variation=True)

    std = 5. * pl.rand(len(pl.csv2rec('test_data/%s.csv' % t0)))
    pct = 90.

    print data.age_range, data.time_range, data.regions, pl.mean(std), pct

    data.add_sampling_error('test_data/%s.csv' % t0,
                            'test_data/noisy_%s.csv' % t0,
                            std=std)
    data.knockout_uniformly_at_random('test_data/noisy_%s.csv' % t0,
                                      'test_data/missing_noisy_%s.csv' % t0,
                                      pct=pct)

    mod_mc = evaluate_model(
        'gp_re_a',
        'knockout pct=%d, model matches data, has laplace priors, sigma_e = Exp(1)'
        % pct, 'test_data/missing_noisy_%s.csv' % t0, 'test_data/%s.csv' % t0)
    f = open('dev_log.csv', 'a')
    f_csv = csv.writer(f)
    f_csv.writerow(results)
    f.close()

    return mod_mc

if __name__ == '__main__':
    import pylab as pl
    import data

    data.age_range = pl.arange(0, 81, 20)
    data.time_range = pl.arange(1980, 2005, 5)
    data.regions = pl.randint(5,15)

    time.sleep(pl.rand()*5.)
    t0 = time.time()
    data.generate_fe('test_data/%s.csv'%t0)  # included just to get good test coverage
    data.generate_smooth_gp_re_a('test_data/%s.csv'%t0, country_variation=True)

    std=5.*pl.rand(len(pl.csv2rec('test_data/%s.csv'%t0)))
    pct=90.

    print data.age_range, data.time_range, data.regions, pl.mean(std), pct

    data.add_sampling_error('test_data/%s.csv'%t0, 'test_data/noisy_%s.csv'%t0, std=std)
    data.knockout_uniformly_at_random('test_data/noisy_%s.csv'%t0, 'test_data/missing_noisy_%s.csv'%t0, pct=pct)

    mod_mc = evaluate_model('gp_re_a', 'knockout pct=%d, model matches data, has laplace priors, sigma_e = Exp(1)' % pct,
                   'test_data/missing_noisy_%s.csv'%t0, 'test_data/%s.csv'%t0)