def close(self, *args, **kwds): r""" Return the time series of all historical closing prices for this stock. If no arguments are given, will return last acquired historical data. Otherwise, data will be gotten from Google Finance. INPUT: - ``startdate`` -- string, (default: ``'Jan+1,+1900'``) - ``enddate`` -- string, (default: current date) - ``histperiod`` -- string, (``'daily'`` or ``'weekly'``) OUTPUT: A time series -- close price data. EXAMPLES: You can directly obtain close data as so:: sage: finance.Stock('vmw').close(startdate='Jan+1,+2008', enddate='Feb+1,+2008') # optional -- internet [84.6000, 83.9500, 80.4900, 72.9900, ... 83.0000, 54.8700, 56.4200, 56.6700, 57.8500] Or, you can initialize stock data first and then extract the Close data:: sage: c = finance.Stock('vmw') # optional -- internet sage: c.history(startdate='Feb+1,+2008', enddate='Mar+1,+2008')[:5] # optional -- internet [ 1-Feb-08 56.98 58.14 55.06 57.85 2490481, 4-Feb-08 58.00 60.47 56.91 58.05 1840709, 5-Feb-08 57.60 59.30 57.17 59.30 1712179, 6-Feb-08 60.32 62.00 59.50 61.52 2211775, 7-Feb-08 60.50 62.75 59.56 60.80 1521651 ] sage: c.close() # optional -- internet [57.8500, 58.0500, 59.3000, 61.5200, ... 58.2900, 60.1800, 59.8600, 59.9500, 58.6700] Otherwise, :meth:`history` will be called with the default arguments returning a year's worth of data:: sage: finance.Stock('vmw').close() # random; optional -- internet [57.7100, 56.9900, 55.5500, 57.3300, 65.9900 ... 84.9900, 84.6000, 83.9500, 80.4900, 72.9900] """ from time_series import TimeSeries if len(args) != 0: return TimeSeries([x.close for x in self.history(*args, **kwds)]) try: return TimeSeries([x.close for x in self.__historical]) except AttributeError: pass return TimeSeries([x.close for x in self.history(*args, **kwds)])
def open(self, *args, **kwds): r""" Return a time series containing historical opening prices for this stock. If no arguments are given, will return last acquired historical data. Otherwise, data will be gotten from Google Finance. INPUT: - ``startdate`` -- string, (default: ``'Jan+1,+1900'``) - ``enddate`` -- string, (default: current date) - ``histperiod`` -- string, (``'daily'`` or ``'weekly'``) OUTPUT: A time series -- close price data. EXAMPLES: You can directly obtain Open data as so:: sage: finance.Stock('vmw').open(startdate='Jan+1,+2008', enddate='Feb+1,+2008') # optional -- internet [83.0500, 85.4900, 84.9000, 82.0000, 81.2500 ... 82.0000, 58.2700, 54.4900, 55.6000, 56.9800] Or, you can initialize stock data first and then extract the Open data:: sage: c = finance.Stock('vmw') sage: c.google(startdate='Feb+1,+2008', enddate='Mar+1,+2008')[:5] # optional -- internet [ 31-Jan-08 55.60 57.35 55.52 56.67 2591100, 1-Feb-08 56.98 58.14 55.06 57.85 2473000, 4-Feb-08 58.00 60.47 56.91 58.05 1816500, 5-Feb-08 57.60 59.30 57.17 59.30 1709000, 6-Feb-08 60.32 62.00 59.50 61.52 2191100 ] sage: c.open() # optional -- internet [55.6000, 56.9800, 58.0000, 57.6000, 60.3200 ... 56.5500, 59.3000, 60.0000, 59.7900, 59.2600] Otherwise, ``self.google()`` will be called with the default arguments returning a year's worth of data:: sage: finance.Stock('vmw').open() # random; optional -- internet [52.1100, 60.9900, 59.0000, 56.0500, 57.2500 ... 83.0500, 85.4900, 84.9000, 82.0000, 81.2500] """ from time_series import TimeSeries if len(args) != 0: return TimeSeries([x.open for x in self.google(*args, **kwds)]) try: return TimeSeries([x.open for x in self.__historical]) except AttributeError: pass return TimeSeries([x.open for x in self.google(*args, **kwds)])
def test_eq(): # generate variant variants = [[i // 5 for i in range(100 * j, 199 * j)] for j in range(1, 5)] for one in variants: for two in variants: if one == two: assert TimeSeries('a', one) == TimeSeries('b', two) else: assert TimeSeries('c', one) != TimeSeries('d', two)
def test_cum_sum(source): ts = TimeSeries("test", source) cs = ts.cumsum() assert len(cs.ts) == len(set(source)) assert cs.vs[-1] == ts.vs.sum()
def engine_func(): global feature_set, label_set get_metric() df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv') feature_set, label_set = hybrid_data(df_in) model = TimeSeries(model=MODEL) # df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv') history = model.train_model(features=feature_set, labels=label_set, epochs=10) write_history(history) prediction = model.get_prediction(feature_set) write_prediction(prediction.tolist()) model.save_model() # Write predictions and scores to disk mail_interval = int(time.time()) train_interval = int(time.time()) predict_interval = int(time.time()) get_metric_interval = int(time.time()) idle_status = False while True: time_now = int(time.time()) if time_now - get_metric_interval >= GET_METRIC_INTERVAL: get_metric() feature_set, label_set = hybrid_data(df_in) if time_now - predict_interval >= PREDICT_INTERVAL: idle_status = False print("Predicting ...") prediction = model.get_prediction(feature_set) write_prediction(prediction.tolist()) predict_interval = int(time.time()) elif time_now - mail_interval >= MAIL_INTERVAL: idle_status = False print("Sending Email ... ") status = mail(TO_ADDRESS, read_prediction()) print(status) mail_interval = int(time.time()) elif time_now - train_interval >= TRAIN_INTERVAL: idle_status = False print("Training model ....") # df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv') history = model.train_model(features=feature_set, labels=label_set, epochs=1) write_history(history) model.save_model() train_interval = int(time.time()) else: if not idle_status: print("Engine Idle ...") idle_status = True
def test_dt(): dt_from_ = datetime.datetime(2019, 12, 1) dt_to_ = datetime.datetime(2020, 1, 1) from_ = dt_from_.timestamp() to_ = dt_to_.timestamp() orig_grid = 10 ts_raw = [ i * orig_grid + from_ for i in range(-2 * orig_grid, int((to_ - from_) / orig_grid) + 2 * orig_grid) ] assert ts_raw ts = TimeSeries("test", ts_raw) assert ts.ts.min() < from_ assert ts.ts.max() > to_ time_interval = datetime.timedelta(days=1) assert ts[from_:to_:time_interval.total_seconds( )] == ts[dt_from_:dt_to_:time_interval]
def to_future_matrix(X, days_predict=5, days_window=5, train_model=None): #Note X is the dataframe that follow the format when first read from excel #initialize TS model ts_model = TimeSeries(days_window, train_model) all_ctry_new_df = pd.DataFrame(columns=[ "country_id", "date", "cases", "deaths", "cases_14_100k", "cases_100k" ]) country_id_col = X.loc[:, "country_id"].unique() for country in country_id_col: X_cur = X[X["country_id"] == country].copy(deep=True) ctry_df = process_ts_ctry(ts_model, country, X_cur, days_predict, days_window, train_model) all_ctry_new_df = pd.concat([all_ctry_new_df, ctry_df], axis=0) return all_ctry_new_df
def test_to_grid(start, stop, count): ts = TimeSeries("test", dates_perc) sliced = ts[start:stop] assert len(sliced.ts) assert sliced.sum() period = ((stop or sliced.ts.max()) - (start or sliced.ts.min())) / count gridded = ts[start:stop:period] if start: assert gridded.ts.min() == start if count != 1 and stop: assert gridded.ts.max() == stop assert len(gridded.ts) == count assert ts[start:stop].sum() == gridded.sum()
def get_filtered_ts(dt_start, dt_end, in_dir, target_month, target_year): filtered_ts_list = [] date_start = str(target_month).zfill(2) + "/" + str( dt_start.day).zfill(2) + "/" + str(target_year) date_end = str(target_month).zfill(2) + "/" + str( dt_end.day).zfill(2) + "/" + str(target_year) for server in os.listdir(in_dir): print server for file_name in os.listdir(in_dir + "/" + server + "/"): mac = file_name.split(".")[0] csv_path = in_dir + "/" + server + "/" + file_name ts = TimeSeries(csv_path, target_month, target_year, metric, dt_start, dt_end) if filter_ts(ts): filtered_ts_list.append( [mac, server, csv_path, date_start, date_end]) return filtered_ts_list
def correct_signal(self, time, signal, tmin=None, tmax=None, r0=5e-2, correction="bassetbound", window="blackmanharris", impedance=None): if tmin is None: tmin =time[0] if tmax is None: tmax = time[-1] dt = time[1] - time[0] mask = np.logical_and(time>tmin, time<tmax) sig = signal[mask] t = time[mask] signal_length = len(sig) freq = rfftfreq(signal_length, dt)[1:] amp = getattr(self, f"amplitude_ratio_{correction}")(freq) phase = getattr(self, f"phase_{correction}")(freq) if impedance is None: impedance = np.ones_like(amp) else: impedance = getattr(self, f"{impedance}_impedance")(freq, r0) response = amp * np.exp(1j * phase) / impedance response = np.r_[1, response] win = get_window(window, signal_length) corrected_signal = irfft(rfft(sig * win) / response, n=signal_length) return TimeSeries(corrected_signal, t, name="Corrected")
def plot(): global ax, fig, serverMac, change_points, ts, label_text, serverMac_id id_stringvar.set(str(serverMac_id[serverMac] + 1) + "/" + str(len(serverMac_id))) server = serverMac.split("_")[0] mac = serverMac.split("_")[1] in_file_path = "../../input/" + date_dir + "/" + server + "/" + mac + ".csv" in_file_path_cp = "./output/" + date_dir + "/" + server + "/" + mac + ".csv" dt_axvline = [] if os.path.exists(in_file_path_cp): df = pd.read_csv(in_file_path_cp) for idx, row in df.iterrows(): dt_axvline.append(row["dt"]) change_points.append(row["dt"]) fig.clf() ax = fig.add_subplot(111) ts = TimeSeries(in_file_path, target_month, target_year, metric) plot_procedures.plotax_ts(ax, ts, dt_axvline = dt_axvline, ylim = [-0.01, 1.01]) canvas.show() fig.canvas.mpl_connect('button_press_event', handle_mouse_click)
from time_series import TimeSeries # Holt-Winters or Triple Exponential Smoothing model from statsmodels.tsa.holtwinters import ExponentialSmoothing # Imports for data visualization import matplotlib.pyplot as plt import pandas as pd from matplotlib.dates import DateFormatter from matplotlib import dates as mpld from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() ts = TimeSeries('dataset/average_temp_india.csv', train_size=0.7) plt.plot(ts.data.iloc[:, 1].index, ts.data.iloc[:, 1]) plt.gcf().autofmt_xdate() plt.title("Average Temperature of India (2000-2018)") plt.xlabel("Time") plt.ylabel("Temparature (°C)") plt.show() model = ExponentialSmoothing(ts.train, trend='additive', seasonal='additive').fit() prediction = model.predict(start=ts.data.iloc[:, 1].index[0], end=ts.data.iloc[:, 1].index[-1]) """Brutlag Algorithm""" PERIOD = 12 # The given time series has seasonal_period=12 GAMMA = 0.3684211 # the seasonility component
# Imports for data visualization import matplotlib.pyplot as plt from pandas.plotting import register_matplotlib_converters from matplotlib.dates import DateFormatter from matplotlib import dates as mpld # Seasonal Decompose from statsmodels.tsa.seasonal import seasonal_decompose # Holt-Winters or Triple Exponential Smoothing model from statsmodels.tsa.holtwinters import ExponentialSmoothing register_matplotlib_converters() ts = TimeSeries('dataset/monthly_sales.csv', train_size=0.8) print("Sales Data\n") print(ts.data.describe()) print("\nHead and Tail of the time series\n") print(ts.data.head(5).iloc[:, 1:]) print(ts.data.tail(5).iloc[:, 1:]) # Plot of raw time series data plt.plot(ts.data.index, ts.data.sales) plt.gcf().autofmt_xdate() date_format = mpld.DateFormatter('%Y-%m') plt.gca().xaxis.set_major_formatter(date_format) plt.title("Sales Data Analysis (2013-2016)") plt.xlabel("Time")
the code is for capstone project in Data Science Institute, 2019 author: zhuzilin, [email protected] """ from time_series import StatFunc, ARMA, TimeSeries import random import matplotlib.pyplot as plt import math """ figure 1 2 mixed gaussian """ gauss0 = StatFunc(random.gauss, {"mu": 0, "sigma": 1}) gauss1 = StatFunc(random.gauss, {"mu": 20, "sigma": 1}) ts = TimeSeries([gauss0, gauss1], [100, 1]) x = [] y = [] c = [] for t in range(1000): t, v, color = ts.next() x.append(t) y.append(v) c.append(color) plt.figure() ax = plt.gca() ax.scatter(x, y, c=c) plt.savefig("example/f1.png") """
# pHat has a voltage divider using 120k + 820k resistors # (mapping 25.85V onto the 3.3V max) VOLT_DIVIDER = 120.0 / (120.0 + 820.0) # our sampling time in secs INTERVAL = 1.0 # which ADC channel ADC_CHANNEL = 0 if 'debug' in sys.argv: DEBUG = True else: DEBUG = False ts = TimeSeries(["voltage"]) if DEBUG: print("\nPress CTRL+C to exit.\n") time.sleep( INTERVAL) # short pause after ads1015 class creation recommended(??) try: while True: t = time.time() value = adc.read_adc(ADC_CHANNEL, gain=GAIN, data_rate=DATA_RATE) volts = float(value) / MAX_VALUE * GAIN_VOLTAGE / VOLT_DIVIDER if DEBUG: print("{0:.3f} {1:5d} {2:.6f}".format(t, value, volts)) ts.store(t, [volts])
PERIODICITIES = [] SEASONALITY_TIMES = [] TRENDS = [] TREND_TYPES = [] TREND_TIMES = [] CONCEPT_DRIFT = [] CONCEPT_DRIFT_TIMES = [] for filename in to_include: print(filename) NAMES.append(filename) df = pd.read_csv('../datasets/' + filename, header=0) ts = TimeSeries(df, timestep=to_include[filename][0], dateformat=to_include[filename][1], name=filename) NUMBER_OF_TIME_STEPS.append(ts.get_length()) TIME_STEP_SIZES.append(ts.get_timestep()) MINIMUMS.append(ts.get_min()) MAXIMUMS.append(ts.get_max()) MEDIANS.append(ts.get_median()) MEANS.append(ts.get_mean()) NUMBER_OF_ANOMALIES.append(len(ts.dataframe[ts.dataframe["outlier"] == 1]))
def test_create(): ts = TimeSeries("test", dates_perc) assert len(ts.ts) == len(set(dates_perc)) == len(ts.vs)
# TMP36 reads 0V at -50C and 2V at +150C return 100 * volt - 50 # which channel to read TARGET_ADC = 0 # our sampling time in secs INTERVAL = 1.0 if use_hw_spi: mcp = Adafruit_MCP3008.MCP3008(spi=SPI.SpiDev(0, 0)) else: mcp = Adafruit_MCP3008.MCP3008(clk=CLK, cs=CS, miso=MISO, mosi=MOSI) ts = TimeSeries(["voltage", "temp"]) print("\nPress CTRL+C to exit.\n") try: while True: # read the analog pin value = mcp.read_adc(TARGET_ADC) volt = voltage(value) temp_C = temp(volt) temp_F = 9 * temp_C / 5 + 32 t = time.time() form = 't={time:.3f} - val= {volt:.3f} V == {temp:.3f} C / {temp_F:.3f} F' print(form.format(time=t, volt=volt, temp=temp_C, temp_F=temp_F))
def main(): parser = argparse.ArgumentParser() default_dataset = 'toy-data.npz' parser.add_argument('--data', default=default_dataset, help='data file') parser.add_argument('--seed', type=int, default=None, help='random seed. Randomly set if not specified.') # training options parser.add_argument('--nz', type=int, default=32, help='dimension of latent variable') parser.add_argument('--epoch', type=int, default=1000, help='number of training epochs') parser.add_argument('--batch-size', type=int, default=128, help='batch size') parser.add_argument('--lr', type=float, default=8e-5, help='encoder/decoder learning rate') parser.add_argument('--dis-lr', type=float, default=1e-4, help='discriminator learning rate') parser.add_argument('--min-lr', type=float, default=5e-5, help='min encoder/decoder learning rate for LR ' 'scheduler. -1 to disable annealing') parser.add_argument('--min-dis-lr', type=float, default=7e-5, help='min discriminator learning rate for LR ' 'scheduler. -1 to disable annealing') parser.add_argument('--wd', type=float, default=0, help='weight decay') parser.add_argument('--overlap', type=float, default=.5, help='kernel overlap') parser.add_argument('--no-norm-trans', action='store_true', help='if set, use Gaussian posterior without ' 'transformation') parser.add_argument('--plot-interval', type=int, default=1, help='plot interval. 0 to disable plotting.') parser.add_argument('--save-interval', type=int, default=0, help='interval to save models. 0 to disable saving.') parser.add_argument('--prefix', default='pbigan', help='prefix of output directory') parser.add_argument('--comp', type=int, default=7, help='continuous convolution kernel size') parser.add_argument('--ae', type=float, default=.2, help='autoencoding regularization strength') parser.add_argument('--aeloss', default='smooth_l1', help='autoencoding loss. (options: mse, smooth_l1)') parser.add_argument('--ema', dest='ema', type=int, default=-1, help='start epoch of exponential moving average ' '(EMA). -1 to disable EMA') parser.add_argument('--ema-decay', type=float, default=.9999, help='EMA decay') parser.add_argument('--mmd', type=float, default=1, help='MMD strength for latent variable') # squash is off when rescale is off parser.add_argument('--squash', dest='squash', action='store_const', const=True, default=True, help='bound the generated time series value ' 'using tanh') parser.add_argument('--no-squash', dest='squash', action='store_const', const=False) # rescale to [-1, 1] parser.add_argument('--rescale', dest='rescale', action='store_const', const=True, default=True, help='if set, rescale time to [-1, 1]') parser.add_argument('--no-rescale', dest='rescale', action='store_const', const=False) args = parser.parse_args() batch_size = args.batch_size nz = args.nz epochs = args.epoch plot_interval = args.plot_interval save_interval = args.save_interval try: npz = np.load(args.data) train_data = npz['data'] train_time = npz['time'] train_mask = npz['mask'] except FileNotFoundError: if args.data != default_dataset: raise # Generate the default toy dataset from scratch train_data, train_time, train_mask, _, _ = gen_data( n_samples=10000, seq_len=200, max_time=1, poisson_rate=50, obs_span_rate=.25, save_file=default_dataset) _, in_channels, seq_len = train_data.shape train_time *= train_mask if args.seed is None: rnd = np.random.RandomState(None) random_seed = rnd.randint(np.iinfo(np.uint32).max) else: random_seed = args.seed rnd = np.random.RandomState(random_seed) np.random.seed(random_seed) torch.manual_seed(random_seed) # Scale time max_time = 5 train_time *= max_time squash = None rescaler = None if args.rescale: rescaler = Rescaler(train_data) train_data = rescaler.rescale(train_data) if args.squash: squash = torch.tanh out_channels = 64 cconv_ref = 98 train_dataset = TimeSeries(train_data, train_time, train_mask, label=None, max_time=max_time, cconv_ref=cconv_ref, overlap_rate=args.overlap, device=device) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True, collate_fn=train_dataset.collate_fn) n_train_batch = len(train_loader) time_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True, collate_fn=train_dataset.collate_fn) test_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=train_dataset.collate_fn) grid_decoder = SeqGeneratorDiscrete(in_channels, nz, squash) decoder = Decoder(grid_decoder, max_time=max_time).to(device) cconv = ContinuousConv1D(in_channels, out_channels, max_time, cconv_ref, overlap_rate=args.overlap, kernel_size=args.comp, norm=True).to(device) encoder = Encoder(cconv, nz, not args.no_norm_trans).to(device) pbigan = PBiGAN(encoder, decoder, args.aeloss).to(device) critic_cconv = ContinuousConv1D(in_channels, out_channels, max_time, cconv_ref, overlap_rate=args.overlap, kernel_size=args.comp, norm=True).to(device) critic = ConvCritic(critic_cconv, nz).to(device) ema = None if args.ema >= 0: ema = EMA(pbigan, args.ema_decay, args.ema) optimizer = optim.Adam(pbigan.parameters(), lr=args.lr, weight_decay=args.wd) critic_optimizer = optim.Adam(critic.parameters(), lr=args.dis_lr, weight_decay=args.wd) scheduler = make_scheduler(optimizer, args.lr, args.min_lr, epochs) dis_scheduler = make_scheduler(critic_optimizer, args.dis_lr, args.min_dis_lr, epochs) path = '{}_{}'.format(args.prefix, datetime.now().strftime('%m%d.%H%M%S')) output_dir = Path('results') / 'toy-pbigan' / path print(output_dir) log_dir = mkdir(output_dir / 'log') model_dir = mkdir(output_dir / 'model') start_epoch = 0 with (log_dir / 'seed.txt').open('w') as f: print(random_seed, file=f) with (log_dir / 'gpu.txt').open('a') as f: print(torch.cuda.device_count(), start_epoch, file=f) with (log_dir / 'args.txt').open('w') as f: for key, val in sorted(vars(args).items()): print(f'{key}: {val}', file=f) tracker = Tracker(log_dir, n_train_batch) visualizer = Visualizer(encoder, decoder, batch_size, max_time, test_loader, rescaler, output_dir, device) start = time.time() epoch_start = start for epoch in range(start_epoch, epochs): loss_breakdown = defaultdict(float) for ((val, idx, mask, _, cconv_graph), (_, idx_t, mask_t, index, _)) in zip(train_loader, time_loader): z_enc, x_recon, z_gen, x_gen, ae_loss = pbigan( val, idx, mask, cconv_graph, idx_t, mask_t) cconv_graph_gen = train_dataset.make_graph(x_gen, idx_t, mask_t, index) real = critic(cconv_graph, batch_size, z_enc) fake = critic(cconv_graph_gen, batch_size, z_gen) D_loss = gan_loss(real, fake, 1, 0) critic_optimizer.zero_grad() D_loss.backward(retain_graph=True) critic_optimizer.step() G_loss = gan_loss(real, fake, 0, 1) mmd_loss = mmd(z_enc, z_gen) loss = G_loss + ae_loss * args.ae + mmd_loss * args.mmd optimizer.zero_grad() loss.backward() optimizer.step() if ema: ema.update() loss_breakdown['D'] += D_loss.item() loss_breakdown['G'] += G_loss.item() loss_breakdown['AE'] += ae_loss.item() loss_breakdown['MMD'] += mmd_loss.item() loss_breakdown['total'] += loss.item() if scheduler: scheduler.step() if dis_scheduler: dis_scheduler.step() cur_time = time.time() tracker.log(epoch, loss_breakdown, cur_time - epoch_start, cur_time - start) if plot_interval > 0 and (epoch + 1) % plot_interval == 0: if ema: ema.apply() visualizer.plot(epoch) ema.restore() else: visualizer.plot(epoch) model_dict = { 'pbigan': pbigan.state_dict(), 'critic': critic.state_dict(), 'ema': ema.state_dict() if ema else None, 'epoch': epoch + 1, 'args': args, } torch.save(model_dict, str(log_dir / 'model.pth')) if save_interval > 0 and (epoch + 1) % save_interval == 0: torch.save(model_dict, str(model_dir / f'{epoch:04d}.pth')) print(output_dir)
from multiprocessing import Process import matplotlib.pyplot as plt graph = tf.get_default_graph() warnings.filterwarnings("ignore") def write_prediction(prd, tr): with open('results.json', 'w+') as file: file.write(json.dumps({"prediction": prd.tolist()})) file.write(json.dumps({"true": tr.tolist()})) with open('config.json', 'r+') as f: f = json.loads(f.read()) MAIL_INTERVAL = f['MAIL_INTERVAL'] TRAIN_INTERVAL = f['TRAIN_INTERVAL'] PREDICT_INTERVAL = f['PREDICT_INTERVAL'] TO_ADDRESS = f['TO_ADDRESS'] MODEL = f['MODEL'] PREDICT_LEN = f['PREDICT_LEN'] FEED_LEN = f['FEED_LEN'] model = TimeSeries(model=MODEL) df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv') # history = model.train_model(dataframe=df_in, epochs=1) # predict = model.actual_vs_predict(df_in) # plt.plot(predict, color='red') plt.plot(df_in['AWS/EC2 CPUUtilization'].values, color='blue') # plt.ylim(0, 100) plt.show()
def test_sum(source): ts = TimeSeries("test", source) assert ts.sum() == ts.vs.sum()
def test_getitem(): ts = TimeSeries("test", dates_perc) assert len(ts[1586000000:].ts) == len(set(dates_perc)) assert len(ts[:1588000000].ts) == len(set(dates_perc))
def main(): parser = argparse.ArgumentParser() default_dataset = 'toy-data.npz' parser.add_argument('--data', default=default_dataset, help='data file') parser.add_argument('--seed', type=int, default=None, help='random seed. Randomly set if not specified.') # training options parser.add_argument('--nz', type=int, default=32, help='dimension of latent variable') parser.add_argument('--epoch', type=int, default=1000, help='number of training epochs') parser.add_argument('--batch-size', type=int, default=128, help='batch size') parser.add_argument('--lr', type=float, default=1e-4, help='learning rate') parser.add_argument('--min-lr', type=float, default=5e-5, help='min learning rate for LR scheduler. ' '-1 to disable annealing') parser.add_argument('--plot-interval', type=int, default=10, help='plot interval. 0 to disable plotting.') parser.add_argument('--save-interval', type=int, default=0, help='interval to save models. 0 to disable saving.') parser.add_argument('--prefix', default='pvae', help='prefix of output directory') parser.add_argument('--comp', type=int, default=5, help='continuous convolution kernel size') parser.add_argument('--sigma', type=float, default=.2, help='standard deviation for Gaussian likelihood') parser.add_argument('--overlap', type=float, default=.5, help='kernel overlap') # squash is off when rescale is off parser.add_argument('--squash', dest='squash', action='store_const', const=True, default=True, help='bound the generated time series value ' 'using tanh') parser.add_argument('--no-squash', dest='squash', action='store_const', const=False) # rescale to [-1, 1] parser.add_argument('--rescale', dest='rescale', action='store_const', const=True, default=True, help='if set, rescale time to [-1, 1]') parser.add_argument('--no-rescale', dest='rescale', action='store_const', const=False) args = parser.parse_args() batch_size = args.batch_size nz = args.nz epochs = args.epoch plot_interval = args.plot_interval save_interval = args.save_interval try: npz = np.load(args.data) train_data = npz['data'] train_time = npz['time'] train_mask = npz['mask'] except FileNotFoundError: if args.data != default_dataset: raise # Generate the default toy dataset from scratch train_data, train_time, train_mask, _, _ = gen_data( n_samples=10000, seq_len=200, max_time=1, poisson_rate=50, obs_span_rate=.25, save_file=default_dataset) _, in_channels, seq_len = train_data.shape train_time *= train_mask if args.seed is None: rnd = np.random.RandomState(None) random_seed = rnd.randint(np.iinfo(np.uint32).max) else: random_seed = args.seed rnd = np.random.RandomState(random_seed) np.random.seed(random_seed) torch.manual_seed(random_seed) # Scale time max_time = 5 train_time *= max_time squash = None rescaler = None if args.rescale: rescaler = Rescaler(train_data) train_data = rescaler.rescale(train_data) if args.squash: squash = torch.tanh out_channels = 64 cconv_ref = 98 train_dataset = TimeSeries( train_data, train_time, train_mask, label=None, max_time=max_time, cconv_ref=cconv_ref, overlap_rate=args.overlap, device=device) train_loader = DataLoader( train_dataset, batch_size=batch_size, shuffle=True, drop_last=True, collate_fn=train_dataset.collate_fn) n_train_batch = len(train_loader) test_batch_size = 64 test_loader = DataLoader(train_dataset, batch_size=test_batch_size, collate_fn=train_dataset.collate_fn) grid_decoder = SeqGeneratorDiscrete(in_channels, nz, squash) decoder = Decoder(grid_decoder, max_time=max_time).to(device) cconv = ContinuousConv1D( in_channels, out_channels, max_time, cconv_ref, overlap_rate=args.overlap, kernel_size=args.comp, norm=True).to(device) encoder = Encoder(nz, cconv).to(device) pvae = PVAE(encoder, decoder, sigma=args.sigma).to(device) optimizer = optim.Adam(pvae.parameters(), lr=args.lr) scheduler = make_scheduler(optimizer, args.lr, args.min_lr, epochs) path = '{}_{}_{}'.format( args.prefix, datetime.now().strftime('%m%d.%H%M%S'), '_'.join([f'lr_{args.lr:g}'])) output_dir = Path('results') / 'toy-pvae' / path print(output_dir) log_dir = mkdir(output_dir / 'log') model_dir = mkdir(output_dir / 'model') start_epoch = 0 with (log_dir / 'seed.txt').open('w') as f: print(random_seed, file=f) with (log_dir / 'gpu.txt').open('a') as f: print(torch.cuda.device_count(), start_epoch, file=f) with (log_dir / 'args.txt').open('w') as f: for key, val in sorted(vars(args).items()): print(f'{key}: {val}', file=f) tracker = Tracker(log_dir, n_train_batch) visualizer = Visualizer(encoder, decoder, test_batch_size, max_time, test_loader, rescaler, output_dir, device) start = time.time() epoch_start = start for epoch in range(start_epoch, epochs): loss_breakdown = defaultdict(float) for val, idx, mask, _, cconv_graph in train_loader: optimizer.zero_grad() loss = pvae(val, idx, mask, cconv_graph) loss.backward() optimizer.step() loss_breakdown['loss'] += loss.item() if scheduler: scheduler.step() cur_time = time.time() tracker.log( epoch, loss_breakdown, cur_time - epoch_start, cur_time - start) if plot_interval > 0 and (epoch + 1) % plot_interval == 0: visualizer.plot(epoch) model_dict = { 'pvae': pvae.state_dict(), 'epoch': epoch + 1, 'args': args, } torch.save(model_dict, str(log_dir / 'model.pth')) if save_interval > 0 and (epoch + 1) % save_interval == 0: torch.save(model_dict, str(model_dir / f'{epoch:04d}.pth')) print(output_dir)
import torch import torch.nn as nn from network import Net from prepare_data import prepare_data from time_series import TimeSeries from trainer import Trainer if __name__ == '__main__': time_series_data = prepare_data() epoch_num = 100 batch_size = 4 dataset = TimeSeries(time_series_data, input_time_interval=365, output_time_interval=7, output_keyword='peak_load') net = Net(in_ch=dataset.data_channels, out_ch=dataset.output_time_interval) optimizer = torch.optim.Adam(net.parameters(), lr=0.0001) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=0.1, patience=10, verbose=True, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0,
# which ADC channel ADC_CHANNEL = 0 def temp(volt): """Convert analog voltage to temperature in C""" # TMP36 reads 0V at -50C and 2V at +150C return 100 * volt - 50 if 'debug' in sys.argv: DEBUG = True else: DEBUG = False ts = TimeSeries(["volts", "temp"]) if DEBUG: print("\nPress CTRL+C to exit.\n") time.sleep( INTERVAL) # short pause after ads1015 class creation recommended(??) try: while True: t = time.time() value = adc.read_adc(ADC_CHANNEL, gain=GAIN, data_rate=DATA_RATE) volts = float(value) / MAX_VALUE * GAIN_VOLTAGE / VOLT_DIVIDER temp_C = temp(volts) temp_F = 9 * temp_C / 5 + 32
def test_max_min(): ts = TimeSeries('test', dates_perc) assert ts.max() == (1587000000, 3) assert ts.min() == (1586000000, 1)
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # Bokeh component classes # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # Categories map of dropdown values, SQL column, and SQL table (and data source for range_categories) categories = Categories(sources) # Bokeh table objects data_tables = DataTables(sources) # Bokeh objects for each tab layout planning_data = PlanningData(custom_title, data_tables) roi_viewer = ROI_Viewer(sources, custom_title) mlc_analyzer = MLC_Analyzer(sources, custom_title, data_tables) time_series = TimeSeries(sources, categories.range, custom_title, data_tables) correlation = Correlation(sources, categories, custom_title) regression = Regression(sources, time_series, correlation, categories.multi_var_reg_var_names, custom_title, data_tables) correlation.add_regression_link(regression) rad_bio = RadBio(sources, time_series, correlation, regression, custom_title, data_tables) dvhs = DVHs(sources, time_series, correlation, regression, custom_title, data_tables) query = Query(sources, categories, dvhs, rad_bio, roi_viewer, time_series, correlation, regression, mlc_analyzer, custom_title, data_tables) dvhs.add_query_link(query) # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # Listen for changes to sources
def test_dist(): ts1 = TimeSeries('test', dates_perc) ts2 = TimeSeries('tost', dates_cut) assert ts1.sum() != ts2.sum() with pytest.raises(Exception): dist = ts1.dist(ts2)
def test_div(source, x): ts = TimeSeries("test", source) assert abs((ts / x).sum() - ts.sum() / x) < 0.001
def sarimax(ts_obj, gaussian_window_size, step_size, plot_anomaly_score=False, plot_forecast=False): slide_size = 200 if ts_obj.get_length() >= slide_size: n = slide_size list_df = [ ts_obj.dataframe[i:i + n] for i in range(0, ts_obj.dataframe.shape[0], n) ] anomaly_scores_list = [] times_list = [] forecasts_list = [] for chunk_df in tqdm(list_df): print(ts_obj.name) if len(chunk_df) >= slide_size: chunk_ts_obj = TimeSeries(chunk_df, timestep=ts_obj.timestep, dateformat=ts_obj.dateformat, name=ts_obj.name) # NEED TO SET CHARACTERISTIC OF SEASONALITY ONLY chunk_ts_obj.set_seasonality() chunk_result = sarimax_mini(chunk_ts_obj, gaussian_window_size, step_size, plot_anomaly_score=False, plot_forecast=False) anomaly_scores_list.append(chunk_result["Anomaly Scores"]) times_list.append(chunk_result["Time"]) forecasts_list.append(chunk_result["Forecast"]) anomaly_scores = [] for sublist in anomaly_scores_list: for item in sublist: anomaly_scores.append(item) forecast = [] for sublist in forecasts_list: for item in sublist: forecast.append(item) while len(anomaly_scores) < ts_obj.get_length(): anomaly_scores.append(0) while len(forecast) < ts_obj.get_length(): forecast.append(0) if plot_forecast: plt.plot(forecast, alpha=.7, label="Predictions") plt.plot(ts_obj.dataframe["value"].values, alpha=.5, label="Data") plt.legend() plt.show() if plot_anomaly_score: plt.subplot(211) plt.title("Anomaly Scores") plt.plot(anomaly_scores) plt.ylim([.99, 1]) plt.subplot(212) plt.title("Time Series") plt.plot(ts_obj.dataframe["value"].values) plt.axvline(ts_obj.get_probationary_index(), color="black", label="probationary line") plt.tight_layout() plt.show() return { "Anomaly Scores": np.asarray(anomaly_scores), "Time": sum(times_list), "Forecast": forecast } else: return sarimax_mini(ts_obj, gaussian_window_size, step_size, plot_anomaly_score=plot_anomaly_score, plot_forecast=plot_forecast)