/
main.py
133 lines (110 loc) · 3.71 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import logging
from argparse import ArgumentParser
import gif
import matplotlib.pyplot as plt
from load_data import TIDE_HEIGHT, chunk, process_data
from plot import GPPlot, plot_scatter
from predict import sequential_predictions, train
logging.basicConfig(level=logging.INFO)
LOG = logging.getLogger(__name__)
def plot_scatter_command(args):
data, to_predict, true_data, tide_height_nans = process_data(normalise_data=False)
fig, ax = plt.subplots()
plot_scatter(
ax,
data,
true_data,
TIDE_HEIGHT,
savefig=args.save_figures,
fig_name=args.fig_name,
)
def estimate_noise_command(args):
data, _, true_data, tide_height_nans = process_data(normalise_data=False)
true_data = true_data.loc[~tide_height_nans.values]
diff = true_data - data
LOG.info(f"Estimate of the noise: {diff.std()}")
def train_command(args):
data, to_predict, true_data, tide_height_nans = process_data(normalise_data=True)
predictions, mean, var, _ = train(
to_predict,
data,
)
# filter the true tide_height to only be
# at the non_nan points
true_data_filtered = true_data.loc[tide_height_nans.values]
plot = GPPlot(
data,
true_data_filtered,
mean,
var,
[predictions],
TIDE_HEIGHT,
)
plot.init_plot()
plot.plot()
if args.save_figures:
plot.savefig(args.fig_name)
def sequential_prediction_command(args):
data, to_predict, true_data, tide_height_nans = process_data(normalise_data=True)
max_time = data.index.max()
means = []
variances = []
predictions = []
data_chunks = []
for data_chunk in chunk(data):
prediction, mean, var = sequential_predictions(data_chunk, max_time=max_time)
means.append(mean)
variances.append(var)
predictions.append(prediction)
data_chunks.append(data_chunk)
@gif.frame
def animate(i):
plot = GPPlot(
data_chunks[i],
true_data,
means[i],
variances[i],
[predictions[i]],
TIDE_HEIGHT,
join=False,
)
plot.init_plot()
plot.plot()
frames = [animate(i) for i in range(len(means))]
gif.save(frames, f"{args.fig_name}.gif", duration=60, unit="s", between="startend")
def main(args):
parser = ArgumentParser(
description="Plot a scatter plot of data or fit a GP and plot the result"
)
parser.add_argument(
"--save-figures", action="store_true", help="Save the generated figures as pdf"
)
parser.add_argument(
"--quiet", action="store_true", help="Do not show any of the generated plots"
)
parser.add_argument("--fig-name", default="plot", help="A name for the plot")
subparsers = parser.add_subparsers()
parser_scatter = subparsers.add_parser(
"plot_scatter", help="Generates a scatter plot of the data"
)
parser_scatter.set_defaults(func=plot_scatter_command)
parser_train = subparsers.add_parser(
"train", help="Trains a GP and optimises its hyperparameters on all the data"
)
parser_train.set_defaults(func=train_command)
parser_seq = subparsers.add_parser(
"sequential_prediction",
help="Generates predictions for a GP on a sequential basis",
)
parser_seq.set_defaults(func=sequential_prediction_command)
parser_noise = subparsers.add_parser(
"estimate_noise", help="Estimate the noise in the data"
)
parser_noise.set_defaults(func=estimate_noise_command)
parsed_args = parser.parse_args(args)
parsed_args.func(parsed_args)
if not parsed_args.quiet:
plt.show()
if __name__ == "__main__":
import sys
main(sys.argv[1:])