Ejemplo n.º 1
0
def run_data(fname, limit=None, sensors=None):

    data = []
    # load up the data
    print("Loading Data")
    data_rows = 0
    max_input = 0
    last_row = np.inf
    with open(fname, 'rb') as infile:
        reader = csv.DictReader(infile)
        fields = reader.fieldnames
        for row in reader:
            dt = datetime.strptime(row['timestamp'], "%Y-%m-%d %H:%M:%S")
            if type(limit) is datetime and limit > dt:
                last_row = data_rows
            if sensors is None:
                counts = [int(row[x]) for x in fields[1:]]
            else:
                counts = [int(row[x]) for x in fields[1:] if int(x) in sensors]
            if any(map(lambda x: x > 300, counts)):
                continue
            downstream = max(1, sum(counts))
            data.append({
                'timestamp': dt,
                'downstream': downstream
            })
            if downstream < 300:
                max_input = max(max_input, downstream)
            data_rows += 1
            # if data_rows > 100:
            #     break
    print("Data length", data_rows, "max_input", max_input)
    print("Done\nAnalysing Data")
    # process the data
    model = create_upstream_model(max_input, steps)
    step_predictions = {
        i: [] for i in steps
    }
    row_count = 0
    progress = pyprind.ProgBar(min(last_row, data_rows), width=50, stream=1)
    it = iter(data)
    for row in it:
        progress.update()
        if row['downstream'] < 300:
            result = model.run(row)
            for i in steps:
                step_predictions[i].append(result.inferences["multiStepBestPredictions"][i])

        if type(limit) is datetime and row['timestamp'] >= limit:
            break

        row_count += 1
    return step_predictions, data, model, it, row_count, len(data)
Ejemplo n.º 2
0
                ('rmse', rmse(stepped_vals, pred_vals))
            ]))
        print(tabulate.tabulate(table, 'keys', 'latex'))

        print("Loading matplotlib")
        import matplotlib.pyplot as plt

        true_y = []
        true_x = []
        pred_y = []
        print("Predicting data rows: {}".format(data_len - row_count))

        progress = pyprind.ProgBar(data_len - row_count, width=50, stream=1)
        for row in it:
            progress.update()
            preds = model.run(row)
            if row['timestamp'] > datetime(2013, 6, 15):
                break
            true_x.append(row['timestamp'])
            true_y.append(row['downstream'])
            pred_y.append(preds.inferences["multiStepBestPredictions"][1])
        np_tx = np.array(true_x)[1:]
        np_ty = np.array(true_y)[1:]
        np_py = np.array(pred_y)[:-1]
        print("GEH:  ",  geh(np_ty, np_py))
        print("MAPE: ", mape(np_ty, np_py))
        print("RMSE: ", rmse(np_ty, np_py))


        print()
        print("True x:", len(true_x))