def subsample(start, end): start = int(start) end = int(end) global curr_ds global details # use minutes dataset if timedelta of selected period is "short" enough # (note that this "short enough" is just arbitrary for this example) if end-start < 43383600000: xs = aapl_min[(aapl_min.Date > start) & (aapl_min.Date < end)] else: xs = aapl[(aapl.Date > start) & (aapl.Date < end)] factor = len(xs) // FACTOR_BASE if factor <= 1: tss = xs.Date resampled = xs.Price else: resampled = coarsen(np.mean, np.asarray(xs.Price), factor) tss = coarsen(np.min, np.asarray(xs.Date), factor) curr_ds = dict( Date=[x for x in tss], Price=[float(x) for x in resampled], DateFmt=[fromtimestamp(x//1000) for x in tss], ) details = { "start": curr_ds['DateFmt'][0], "end": curr_ds['DateFmt'][-1], "factor": factor, "samples_no": len(tss), "original_samples_no": len(xs), } return json.jsonify(curr_ds)
def subsample(start, end): start = int(start) end = int(end) global curr_ds global details # use minutes dataset if timedelta of selected period is "short" enough # (note that this "short enough" is just arbitrary for this example) if end - start < 43383600000: xs = aapl_min[(aapl_min.Date > start) & (aapl_min.Date < end)] else: xs = aapl[(aapl.Date > start) & (aapl.Date < end)] factor = len(xs) // FACTOR_BASE if factor <= 1: tss = xs.Date resampled = xs.Price else: resampled = coarsen(np.mean, np.asarray(xs.Price), factor) tss = coarsen(np.min, np.asarray(xs.Date), factor) curr_ds = dict( Date=[x for x in tss], Price=[float(x) for x in resampled], DateFmt=[fromtimestamp(x // 1000) for x in tss], ) details = { "start": curr_ds['DateFmt'][0], "end": curr_ds['DateFmt'][-1], "factor": factor, "samples_no": len(tss), "original_samples_no": len(xs), } return json.jsonify(curr_ds)
print ("Simulating new data...") res = map(create_sim_data, haapl.Date, haapl.Low, haapl.High) newdt = pd.DataFrame({'Date': [to_seconds(item[0]) for sublist in res for item in sublist], 'Price': [item[1] for sublist in res for item in sublist]}) print ("Creating new file: data/aapl_minutes_raw.csv") newdt.to_csv('data/aapl_minutes_raw.csv') print ("Done!") aapl_min = pd.read_csv('data/aapl_minutes_raw.csv') # build some data factor = len(aapl) // FACTOR_BASE resampled = coarsen(np.mean, np.asarray(aapl.Price), factor) tss = coarsen(np.min, np.asarray(aapl.Date), factor) ftss = [fromtimestamp(x//1000).strftime("%Y-%m-%d %H:%M:%S") for x in tss] curr_ds = dict( Date=[x for x in tss], DateFmt=[ts for ts in ftss], Price=[float(x) for x in resampled], ) details = { "start": curr_ds['DateFmt'][0], "end": curr_ds['DateFmt'][-1], "factor": factor, "samples_no": len(curr_ds['DateFmt']), "original_samples_no": len(aapl),
print("Simulating new data...") res = map(create_sim_data, haapl.Date, haapl.Low, haapl.High) newdt = pd.DataFrame({ 'Date': [to_seconds(item[0]) for sublist in res for item in sublist], 'Price': [item[1] for sublist in res for item in sublist] }) print("Creating new file: data/aapl_minutes_raw.csv") newdt.to_csv('data/aapl_minutes_raw.csv') print("Done!") aapl_min = pd.read_csv('data/aapl_minutes_raw.csv') # build some data factor = len(aapl) // FACTOR_BASE resampled = coarsen(np.mean, np.asarray(aapl.Price), factor) tss = coarsen(np.min, np.asarray(aapl.Date), factor) ftss = [fromtimestamp(x // 1000).strftime("%Y-%m-%d %H:%M:%S") for x in tss] curr_ds = dict( Date=[x for x in tss], DateFmt=[ts for ts in ftss], Price=[float(x) for x in resampled], ) details = { "start": curr_ds['DateFmt'][0], "end": curr_ds['DateFmt'][-1], "factor": factor, "samples_no": len(curr_ds['DateFmt']), "original_samples_no": len(aapl),