def regenerate_data(data_model, pct=80., std=1.): """ Regenerate test data using specified data generation function Allowed models: %s """ % data_gen_models if data_model not in data_gen_models.split(' '): raise TypeError, 'Unrecognized data model "%s"; must be one of %s' % (data_model, data_gen_models) import data reload(data) eval('data.generate_%s()' % data_model) data.add_sampling_error(std=std) data.knockout_uniformly_at_random(pct=pct)
def regenerate_data(data_model, pct=80., std=1.): """ Regenerate test data using specified data generation function Allowed models: %s """ % data_gen_models if data_model not in data_gen_models.split(' '): raise TypeError, 'Unrecognized data model "%s"; must be one of %s' % ( data_model, data_gen_models) import data reload(data) eval('data.generate_%s()' % data_model) data.add_sampling_error(std=std) data.knockout_uniformly_at_random(pct=pct)
import pylab as pl import data data.age_range = pl.arange(0, 81, 20) data.time_range = pl.arange(1980, 2005, 5) data.regions = pl.randint(5, 15) time.sleep(pl.rand() * 5.) t0 = time.time() data.generate_fe('test_data/%s.csv' % t0) # included just to get good test coverage data.generate_smooth_gp_re_a('test_data/%s.csv' % t0, country_variation=True) std = 5. * pl.rand(len(pl.csv2rec('test_data/%s.csv' % t0))) pct = 90. print data.age_range, data.time_range, data.regions, pl.mean(std), pct data.add_sampling_error('test_data/%s.csv' % t0, 'test_data/noisy_%s.csv' % t0, std=std) data.knockout_uniformly_at_random('test_data/noisy_%s.csv' % t0, 'test_data/missing_noisy_%s.csv' % t0, pct=pct) mod_mc = evaluate_model( 'gp_re_a', 'knockout pct=%d, model matches data, has laplace priors, sigma_e = Exp(1)' % pct, 'test_data/missing_noisy_%s.csv' % t0, 'test_data/%s.csv' % t0)
f = open('dev_log.csv', 'a') f_csv = csv.writer(f) f_csv.writerow(results) f.close() return mod_mc if __name__ == '__main__': import pylab as pl import data data.age_range = pl.arange(0, 81, 20) data.time_range = pl.arange(1980, 2005, 5) data.regions = pl.randint(5,15) time.sleep(pl.rand()*5.) t0 = time.time() data.generate_fe('test_data/%s.csv'%t0) # included just to get good test coverage data.generate_smooth_gp_re_a('test_data/%s.csv'%t0, country_variation=True) std=5.*pl.rand(len(pl.csv2rec('test_data/%s.csv'%t0))) pct=90. print data.age_range, data.time_range, data.regions, pl.mean(std), pct data.add_sampling_error('test_data/%s.csv'%t0, 'test_data/noisy_%s.csv'%t0, std=std) data.knockout_uniformly_at_random('test_data/noisy_%s.csv'%t0, 'test_data/missing_noisy_%s.csv'%t0, pct=pct) mod_mc = evaluate_model('gp_re_a', 'knockout pct=%d, model matches data, has laplace priors, sigma_e = Exp(1)' % pct, 'test_data/missing_noisy_%s.csv'%t0, 'test_data/%s.csv'%t0)