def gen_new_fake_data(n_rain=50, n_samples=10, rel_noise=2, mean_slope=5): dummy_rain = np.array([[abs(x)] for x in gauss(50)]) arr2csv(dummy_rain, 'dummy_rainfalldata.csv') sample_sizes = np.random.randint(n_rain//4, n_rain*2, n_samples) if opt.TEST_FALSE_CASE: dummy_samples = [abs(gauss(sz)) * rel_noise for sz in sample_sizes] else: max_shift = opt.MAX_FAKE_DATA_SHIFT shifts = np.random.randint(-max_shift, max_shift, n_samples) slopes = np.array([abs(x) for x in gauss(n_samples)]) + mean_slope constants = [abs(x) for x in gauss(n_samples)] dummy_samples = [] for idx, shift in enumerate(shifts): sample_size = sample_sizes[idx] sample = gauss(sample_size) * rel_noise # add noise rainyr_first = shift rainyr_last = shift + n_rain - 1 transformed_rain = slopes[idx] * dummy_rain + constants[idx] if shift > 0: time_period = range(shift, shift + n_rain) for rain_yr, yr in enumerate(time_period): try: # sample[shift] <-- rain[0] sample[yr] += transformed_rain[rain_yr] except IndexError: continue else: time_period = range(abs(shift), abs(shift) + n_rain) for yr, rain_yr in enumerate(time_period): try: # sample[0] <-- rain[shift] sample[yr] += transformed_rain[rain_yr] except IndexError: continue dummy_samples.append([abs(x) for x in sample]) arr2csv([shifts, slopes, constants], 'dummy_params.csv') arr2csv(transpose(dummy_samples), 'dummy_sampledata.csv')
def gen_new_fake_data(n_rain=50, n_samples=10, rel_noise=2, mean_slope=5): dummy_rain = np.array([[abs(x)] for x in gauss(50)]) arr2csv(dummy_rain, 'dummy_rainfalldata.csv') sample_sizes = np.random.randint(n_rain // 4, n_rain * 2, n_samples) if opt.TEST_FALSE_CASE: dummy_samples = [abs(gauss(sz)) * rel_noise for sz in sample_sizes] else: max_shift = opt.MAX_FAKE_DATA_SHIFT shifts = np.random.randint(-max_shift, max_shift, n_samples) slopes = np.array([abs(x) for x in gauss(n_samples)]) + mean_slope constants = [abs(x) for x in gauss(n_samples)] dummy_samples = [] for idx, shift in enumerate(shifts): sample_size = sample_sizes[idx] sample = gauss(sample_size) * rel_noise # add noise rainyr_first = shift rainyr_last = shift + n_rain - 1 transformed_rain = slopes[idx] * dummy_rain + constants[idx] if shift > 0: time_period = range(shift, shift + n_rain) for rain_yr, yr in enumerate(time_period): try: # sample[shift] <-- rain[0] sample[yr] += transformed_rain[rain_yr] except IndexError: continue else: time_period = range(abs(shift), abs(shift) + n_rain) for yr, rain_yr in enumerate(time_period): try: # sample[0] <-- rain[shift] sample[yr] += transformed_rain[rain_yr] except IndexError: continue dummy_samples.append([abs(x) for x in sample]) arr2csv([shifts, slopes, constants], 'dummy_params.csv') arr2csv(transpose(dummy_samples), 'dummy_sampledata.csv')
# Data Pre-Processing ############################################################ assert 0 <= opt.MIN_OVERLAP <= len(csv2arr(opt.RAINFALLDATA_FN)) # format and normalize rainfall data rain = csv2arr(opt.RAINFALLDATA_FN, hasheaders=opt.RAIN_DATA_HAS_HEADERS) rain = list(np.array(rain).T[0]) # convert Nx1 array to list rain = [float(x) for x in rain] # read and format sample data and take the transpose # (so now each sample is a row) sampledata = csv2arr(opt.SAMPLEDATA_FN, hasheaders=opt.SAMPLE_DATA_HAS_HEADERS) # take the transpose (so now each sample is a row) sampledata = transpose(sampledata) # convert any strings to floats sampledata = [[float(x) for x in sample if x] for sample in sampledata] ############################################################ # Analysis ############################################################ if not opt.DEBUG_MODE_ON or opt.TEST_DUMMY_RESULTS_LIKE_THEY_ARE_REAL: # Test single sample results if using generated fake data single_sample_results, probs = align(rain, sampledata) print "\nSingle Sample Results:" q = 1 print "# : (p < {}, p, r, shift)".format(opt.ALPHA) digs = int(ceil(log10(len(single_sample_results)))) for idx, res in enumerate(single_sample_results):
# if None, defaults to 'summary_data.csv' (in the folder containing this # script) outf = None ##################################################### # set defaults if not rows2grab: rows2grab = [2, 3] if not trans2grab: trans2grab = range(10) if not summary_dir: summary_dir = os.path.join(os.getcwd(), 'summaries') if not outf: outf = 'summary_data.csv' # do stuff data = [] data_guide = [] for fn in os.listdir(summary_dir): path2summary = os.path.join(summary_dir, fn) summary = csv2arr(path2summary)[4:] # throw out first 4 rows for m in trans2grab: for k in rows2grab: data.append(summary[11*m + k][3:]) x = 10*m + k + 4 # in original csv (not counting blank rows) data_guide.append(fn[:-4] + '_({}-{}-{}))'.format(m, k, x)) data = transpose(data) data.insert(0, data_guide) arr2csv(data, filename=outf)
# if None, defaults to 'summary_data.csv' (in the folder containing this # script) outf = None ##################################################### # set defaults if not rows2grab: rows2grab = [2, 3] if not trans2grab: trans2grab = range(10) if not summary_dir: summary_dir = os.path.join(os.getcwd(), 'summaries') if not outf: outf = 'summary_data.csv' # do stuff data = [] data_guide = [] for fn in os.listdir(summary_dir): path2summary = os.path.join(summary_dir, fn) summary = csv2arr(path2summary)[4:] # throw out first 4 rows for m in trans2grab: for k in rows2grab: data.append(summary[11 * m + k][3:]) x = 10 * m + k + 4 # in original csv (not counting blank rows) data_guide.append(fn[:-4] + '_({}-{}-{}))'.format(m, k, x)) data = transpose(data) data.insert(0, data_guide) arr2csv(data, filename=outf)
############################################################ assert 0 <= opt.MIN_OVERLAP <= len(csv2arr(opt.RAINFALLDATA_FN)) # format and normalize rainfall data rain = csv2arr(opt.RAINFALLDATA_FN, hasheaders=opt.RAIN_DATA_HAS_HEADERS) rain = list(np.array(rain).T[0]) # convert Nx1 array to list rain = [float(x) for x in rain] # read and format sample data and take the transpose # (so now each sample is a row) sampledata = csv2arr(opt.SAMPLEDATA_FN, hasheaders=opt.SAMPLE_DATA_HAS_HEADERS) # take the transpose (so now each sample is a row) sampledata = transpose(sampledata) # convert any strings to floats sampledata = [[float(x) for x in sample if x] for sample in sampledata] ############################################################ # Analysis ############################################################ if not opt.DEBUG_MODE_ON or opt.TEST_DUMMY_RESULTS_LIKE_THEY_ARE_REAL: # Test single sample results if using generated fake data single_sample_results, probs = align(rain, sampledata) print "\nSingle Sample Results:" q = 1 print "# : (p < {}, p, r, shift)".format(opt.ALPHA) digs = int(ceil(log10(len(single_sample_results)))) for idx, res in enumerate(single_sample_results):