def subsample(start, end):
    start = int(start)
    end = int(end)
    global curr_ds
    global details

    # use minutes dataset if timedelta of selected period is "short" enough
    # (note that this "short enough" is just arbitrary for this example)
    if end-start < 43383600000:
        xs = aapl_min[(aapl_min.Date > start) & (aapl_min.Date < end)]
    else:
        xs = aapl[(aapl.Date > start) & (aapl.Date < end)]

    factor = len(xs) // FACTOR_BASE
    if factor <= 1:
        tss = xs.Date
        resampled = xs.Price
    else:
        resampled = coarsen(np.mean, np.asarray(xs.Price), factor)
        tss = coarsen(np.min, np.asarray(xs.Date), factor)

    curr_ds = dict(
        Date=[x for x in tss],
        Price=[float(x) for x in resampled],
        DateFmt=[fromtimestamp(x//1000) for x in tss],
    )
    details = {
        "start": curr_ds['DateFmt'][0],
        "end": curr_ds['DateFmt'][-1],
        "factor": factor,
        "samples_no": len(tss),
        "original_samples_no": len(xs),
    }
    return json.jsonify(curr_ds)
Esempio n. 2
0
def subsample(start, end):
    start = int(start)
    end = int(end)
    global curr_ds
    global details

    # use minutes dataset if timedelta of selected period is "short" enough
    # (note that this "short enough" is just arbitrary for this example)
    if end - start < 43383600000:
        xs = aapl_min[(aapl_min.Date > start) & (aapl_min.Date < end)]
    else:
        xs = aapl[(aapl.Date > start) & (aapl.Date < end)]

    factor = len(xs) // FACTOR_BASE
    if factor <= 1:
        tss = xs.Date
        resampled = xs.Price
    else:
        resampled = coarsen(np.mean, np.asarray(xs.Price), factor)
        tss = coarsen(np.min, np.asarray(xs.Date), factor)

    curr_ds = dict(
        Date=[x for x in tss],
        Price=[float(x) for x in resampled],
        DateFmt=[fromtimestamp(x // 1000) for x in tss],
    )
    details = {
        "start": curr_ds['DateFmt'][0],
        "end": curr_ds['DateFmt'][-1],
        "factor": factor,
        "samples_no": len(tss),
        "original_samples_no": len(xs),
    }
    return json.jsonify(curr_ds)
    print ("Simulating new data...")
    res = map(create_sim_data, haapl.Date, haapl.Low, haapl.High)
    newdt = pd.DataFrame({'Date': [to_seconds(item[0]) for sublist in res for item in sublist],
                          'Price': [item[1] for sublist in res for item in sublist]})
    print ("Creating new file: data/aapl_minutes_raw.csv")
    newdt.to_csv('data/aapl_minutes_raw.csv')
    print ("Done!")


aapl_min = pd.read_csv('data/aapl_minutes_raw.csv')


# build some data
factor = len(aapl) // FACTOR_BASE
resampled = coarsen(np.mean, np.asarray(aapl.Price), factor)
tss = coarsen(np.min, np.asarray(aapl.Date), factor)
ftss = [fromtimestamp(x//1000).strftime("%Y-%m-%d %H:%M:%S") for x in tss]

curr_ds = dict(
    Date=[x for x in tss],
    DateFmt=[ts for ts in ftss],
    Price=[float(x) for x in resampled],
)

details = {
    "start": curr_ds['DateFmt'][0],
    "end": curr_ds['DateFmt'][-1],
    "factor": factor,
    "samples_no": len(curr_ds['DateFmt']),
    "original_samples_no": len(aapl),
Esempio n. 4
0
    print("Simulating new data...")
    res = map(create_sim_data, haapl.Date, haapl.Low, haapl.High)
    newdt = pd.DataFrame({
        'Date': [to_seconds(item[0]) for sublist in res for item in sublist],
        'Price': [item[1] for sublist in res for item in sublist]
    })
    print("Creating new file: data/aapl_minutes_raw.csv")
    newdt.to_csv('data/aapl_minutes_raw.csv')
    print("Done!")

aapl_min = pd.read_csv('data/aapl_minutes_raw.csv')

# build some data
factor = len(aapl) // FACTOR_BASE
resampled = coarsen(np.mean, np.asarray(aapl.Price), factor)
tss = coarsen(np.min, np.asarray(aapl.Date), factor)
ftss = [fromtimestamp(x // 1000).strftime("%Y-%m-%d %H:%M:%S") for x in tss]

curr_ds = dict(
    Date=[x for x in tss],
    DateFmt=[ts for ts in ftss],
    Price=[float(x) for x in resampled],
)

details = {
    "start": curr_ds['DateFmt'][0],
    "end": curr_ds['DateFmt'][-1],
    "factor": factor,
    "samples_no": len(curr_ds['DateFmt']),
    "original_samples_no": len(aapl),