Esempio n. 1
0
    def close(self, *args, **kwds):
        r"""
        Return the time series of all historical closing prices for this stock.
        If no arguments are given, will return last acquired historical data.
        Otherwise, data will be gotten from Google Finance.

        INPUT:

        - ``startdate`` -- string, (default: ``'Jan+1,+1900'``)

        - ``enddate`` -- string, (default: current date)

        - ``histperiod`` -- string, (``'daily'`` or ``'weekly'``)

        OUTPUT:

        A time series -- close price data.

        EXAMPLES:

        You can directly obtain close data as so::

            sage: finance.Stock('vmw').close(startdate='Jan+1,+2008', enddate='Feb+1,+2008')                 # optional -- internet
            [84.6000, 83.9500, 80.4900, 72.9900, ... 83.0000, 54.8700, 56.4200, 56.6700, 57.8500]

        Or, you can initialize stock data first and then extract the Close
        data::

            sage: c = finance.Stock('vmw')  # optional -- internet
            sage: c.history(startdate='Feb+1,+2008', enddate='Mar+1,+2008')[:5]    # optional -- internet
            [
              1-Feb-08 56.98 58.14 55.06 57.85    2490481,
              4-Feb-08 58.00 60.47 56.91 58.05    1840709,
              5-Feb-08 57.60 59.30 57.17 59.30    1712179,
              6-Feb-08 60.32 62.00 59.50 61.52    2211775,
              7-Feb-08 60.50 62.75 59.56 60.80    1521651
            ]
            sage: c.close()    # optional -- internet
            [57.8500, 58.0500, 59.3000, 61.5200, ... 58.2900, 60.1800, 59.8600, 59.9500, 58.6700]

        Otherwise, :meth:`history` will be called with the default
        arguments returning a year's worth of data::

            sage: finance.Stock('vmw').close()   # random; optional -- internet
            [57.7100, 56.9900, 55.5500, 57.3300, 65.9900 ... 84.9900, 84.6000, 83.9500, 80.4900, 72.9900]
        """

        from time_series import TimeSeries

        if len(args) != 0:
            return TimeSeries([x.close for x in self.history(*args, **kwds)])

        try:
            return TimeSeries([x.close for x in self.__historical])
        except AttributeError:
            pass

        return TimeSeries([x.close for x in self.history(*args, **kwds)])
Esempio n. 2
0
    def open(self, *args, **kwds):
        r"""
        Return a time series containing historical opening prices for this
        stock. If no arguments are given, will return last acquired historical
        data. Otherwise, data will be gotten from Google Finance.

        INPUT:

        - ``startdate`` -- string, (default: ``'Jan+1,+1900'``)

        - ``enddate`` -- string, (default: current date)

        - ``histperiod`` -- string, (``'daily'`` or ``'weekly'``)

        OUTPUT:

        A time series -- close price data.

        EXAMPLES:

        You can directly obtain Open data as so::

            sage: finance.Stock('vmw').open(startdate='Jan+1,+2008', enddate='Feb+1,+2008')                 # optional -- internet
            [83.0500, 85.4900, 84.9000, 82.0000, 81.2500 ... 82.0000, 58.2700, 54.4900, 55.6000, 56.9800]

        Or, you can initialize stock data first and then extract the Open
        data::

            sage: c = finance.Stock('vmw')
            sage: c.google(startdate='Feb+1,+2008', enddate='Mar+1,+2008')[:5]    # optional -- internet
            [
             31-Jan-08 55.60 57.35 55.52 56.67    2591100,
              1-Feb-08 56.98 58.14 55.06 57.85    2473000,
              4-Feb-08 58.00 60.47 56.91 58.05    1816500,
              5-Feb-08 57.60 59.30 57.17 59.30    1709000,
              6-Feb-08 60.32 62.00 59.50 61.52    2191100
            ]
            sage: c.open()    # optional -- internet
            [55.6000, 56.9800, 58.0000, 57.6000, 60.3200 ... 56.5500, 59.3000, 60.0000, 59.7900, 59.2600]

        Otherwise, ``self.google()`` will be called with the default
        arguments returning a year's worth of data::

            sage: finance.Stock('vmw').open()   # random; optional -- internet
            [52.1100, 60.9900, 59.0000, 56.0500, 57.2500 ... 83.0500, 85.4900, 84.9000, 82.0000, 81.2500]
        """

        from time_series import TimeSeries

        if len(args) != 0:
            return TimeSeries([x.open for x in self.google(*args, **kwds)])

        try:
            return TimeSeries([x.open for x in self.__historical])
        except AttributeError:
            pass

        return TimeSeries([x.open for x in self.google(*args, **kwds)])
Esempio n. 3
0
def test_eq():
    # generate variant
    variants = [[i // 5 for i in range(100 * j, 199 * j)] for j in range(1, 5)]

    for one in variants:
        for two in variants:
            if one == two:
                assert TimeSeries('a', one) == TimeSeries('b', two)
            else:
                assert TimeSeries('c', one) != TimeSeries('d', two)
Esempio n. 4
0
def test_cum_sum(source):
    ts = TimeSeries("test", source)

    cs = ts.cumsum()
    assert len(cs.ts) == len(set(source))

    assert cs.vs[-1] == ts.vs.sum()
Esempio n. 5
0
def engine_func():

    global feature_set, label_set
    get_metric()
    df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv')
    feature_set, label_set = hybrid_data(df_in)
    model = TimeSeries(model=MODEL)
    # df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv')
    history = model.train_model(features=feature_set,
                                labels=label_set,
                                epochs=10)
    write_history(history)
    prediction = model.get_prediction(feature_set)
    write_prediction(prediction.tolist())
    model.save_model()

    # Write predictions and scores to disk

    mail_interval = int(time.time())
    train_interval = int(time.time())
    predict_interval = int(time.time())
    get_metric_interval = int(time.time())
    idle_status = False

    while True:
        time_now = int(time.time())

        if time_now - get_metric_interval >= GET_METRIC_INTERVAL:
            get_metric()
            feature_set, label_set = hybrid_data(df_in)

        if time_now - predict_interval >= PREDICT_INTERVAL:
            idle_status = False
            print("Predicting ...")
            prediction = model.get_prediction(feature_set)
            write_prediction(prediction.tolist())
            predict_interval = int(time.time())

        elif time_now - mail_interval >= MAIL_INTERVAL:
            idle_status = False
            print("Sending Email ... ")
            status = mail(TO_ADDRESS, read_prediction())
            print(status)
            mail_interval = int(time.time())

        elif time_now - train_interval >= TRAIN_INTERVAL:
            idle_status = False
            print("Training model ....")
            # df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv')
            history = model.train_model(features=feature_set,
                                        labels=label_set,
                                        epochs=1)
            write_history(history)
            model.save_model()
            train_interval = int(time.time())

        else:
            if not idle_status:
                print("Engine Idle ...")
                idle_status = True
Esempio n. 6
0
def test_dt():
    dt_from_ = datetime.datetime(2019, 12, 1)
    dt_to_ = datetime.datetime(2020, 1, 1)

    from_ = dt_from_.timestamp()
    to_ = dt_to_.timestamp()

    orig_grid = 10

    ts_raw = [
        i * orig_grid + from_
        for i in range(-2 * orig_grid,
                       int((to_ - from_) / orig_grid) + 2 * orig_grid)
    ]

    assert ts_raw

    ts = TimeSeries("test", ts_raw)

    assert ts.ts.min() < from_
    assert ts.ts.max() > to_

    time_interval = datetime.timedelta(days=1)

    assert ts[from_:to_:time_interval.total_seconds(
    )] == ts[dt_from_:dt_to_:time_interval]
Esempio n. 7
0
def to_future_matrix(X, days_predict=5, days_window=5, train_model=None):
    #Note X is the dataframe that follow the format when first read from excel
    #initialize TS model
    ts_model = TimeSeries(days_window, train_model)
    all_ctry_new_df = pd.DataFrame(columns=[
        "country_id", "date", "cases", "deaths", "cases_14_100k", "cases_100k"
    ])
    country_id_col = X.loc[:, "country_id"].unique()
    for country in country_id_col:
        X_cur = X[X["country_id"] == country].copy(deep=True)
        ctry_df = process_ts_ctry(ts_model, country, X_cur, days_predict,
                                  days_window, train_model)
        all_ctry_new_df = pd.concat([all_ctry_new_df, ctry_df], axis=0)
    return all_ctry_new_df
Esempio n. 8
0
def test_to_grid(start, stop, count):
    ts = TimeSeries("test", dates_perc)

    sliced = ts[start:stop]
    assert len(sliced.ts)
    assert sliced.sum()

    period = ((stop or sliced.ts.max()) - (start or sliced.ts.min())) / count

    gridded = ts[start:stop:period]

    if start:
        assert gridded.ts.min() == start
    if count != 1 and stop:
        assert gridded.ts.max() == stop

    assert len(gridded.ts) == count
    assert ts[start:stop].sum() == gridded.sum()
Esempio n. 9
0
def get_filtered_ts(dt_start, dt_end, in_dir, target_month, target_year):
    filtered_ts_list = []

    date_start = str(target_month).zfill(2) + "/" + str(
        dt_start.day).zfill(2) + "/" + str(target_year)
    date_end = str(target_month).zfill(2) + "/" + str(
        dt_end.day).zfill(2) + "/" + str(target_year)

    for server in os.listdir(in_dir):
        print server
        for file_name in os.listdir(in_dir + "/" + server + "/"):
            mac = file_name.split(".")[0]
            csv_path = in_dir + "/" + server + "/" + file_name

            ts = TimeSeries(csv_path, target_month, target_year, metric,
                            dt_start, dt_end)
            if filter_ts(ts):
                filtered_ts_list.append(
                    [mac, server, csv_path, date_start, date_end])
    return filtered_ts_list
Esempio n. 10
0
 def correct_signal(self, time, signal, tmin=None, tmax=None, r0=5e-2,
     correction="bassetbound", window="blackmanharris", impedance=None):
     if tmin is None:
         tmin =time[0]
     if tmax is None:
         tmax = time[-1]
     dt = time[1] - time[0]
     mask = np.logical_and(time>tmin, time<tmax)
     sig = signal[mask]
     t = time[mask]
     signal_length = len(sig)
     freq = rfftfreq(signal_length, dt)[1:]
     amp = getattr(self, f"amplitude_ratio_{correction}")(freq)
     phase = getattr(self, f"phase_{correction}")(freq)
     if impedance is None:
         impedance = np.ones_like(amp)
     else:
         impedance = getattr(self, f"{impedance}_impedance")(freq, r0)
     response = amp * np.exp(1j * phase) / impedance
     response = np.r_[1, response]
     win = get_window(window, signal_length)
     corrected_signal = irfft(rfft(sig * win) / response, n=signal_length)
     return TimeSeries(corrected_signal, t, name="Corrected")
Esempio n. 11
0
def plot():
	global ax, fig, serverMac, change_points, ts, label_text, serverMac_id
		
	id_stringvar.set(str(serverMac_id[serverMac] + 1) + "/" + str(len(serverMac_id)))
		
	server = serverMac.split("_")[0]
	mac = serverMac.split("_")[1]
	in_file_path = "../../input/" + date_dir + "/" + server + "/" + mac + ".csv"
	in_file_path_cp = "./output/" + date_dir + "/" + server + "/" + mac + ".csv"

	dt_axvline = []
	if os.path.exists(in_file_path_cp):
		df = pd.read_csv(in_file_path_cp)
		for idx, row in df.iterrows():
			dt_axvline.append(row["dt"])
			change_points.append(row["dt"])

	fig.clf()
	ax = fig.add_subplot(111)

	ts = TimeSeries(in_file_path, target_month, target_year, metric)
	plot_procedures.plotax_ts(ax, ts, dt_axvline = dt_axvline, ylim = [-0.01, 1.01])
	canvas.show()
	fig.canvas.mpl_connect('button_press_event', handle_mouse_click)
from time_series import TimeSeries

# Holt-Winters or Triple Exponential Smoothing model
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Imports for data visualization
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.dates import DateFormatter
from matplotlib import dates as mpld

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

ts = TimeSeries('dataset/average_temp_india.csv', train_size=0.7)

plt.plot(ts.data.iloc[:, 1].index, ts.data.iloc[:, 1])
plt.gcf().autofmt_xdate()
plt.title("Average Temperature of India (2000-2018)")
plt.xlabel("Time")
plt.ylabel("Temparature (°C)")
plt.show()

model = ExponentialSmoothing(ts.train, trend='additive',
                             seasonal='additive').fit()
prediction = model.predict(start=ts.data.iloc[:, 1].index[0],
                           end=ts.data.iloc[:, 1].index[-1])
"""Brutlag Algorithm"""
PERIOD = 12  # The given time series has seasonal_period=12
GAMMA = 0.3684211  # the seasonility component
Esempio n. 13
0
# Imports for data visualization
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
from matplotlib.dates import DateFormatter
from matplotlib import dates as mpld

# Seasonal Decompose
from statsmodels.tsa.seasonal import seasonal_decompose

# Holt-Winters or Triple Exponential Smoothing model
from statsmodels.tsa.holtwinters import ExponentialSmoothing

register_matplotlib_converters()

ts = TimeSeries('dataset/monthly_sales.csv', train_size=0.8)

print("Sales Data\n")
print(ts.data.describe())

print("\nHead and Tail of the time series\n")
print(ts.data.head(5).iloc[:, 1:])
print(ts.data.tail(5).iloc[:, 1:])

# Plot of raw time series data
plt.plot(ts.data.index, ts.data.sales)
plt.gcf().autofmt_xdate()
date_format = mpld.DateFormatter('%Y-%m')
plt.gca().xaxis.set_major_formatter(date_format)
plt.title("Sales Data Analysis (2013-2016)")
plt.xlabel("Time")
Esempio n. 14
0
the code is for capstone project in Data Science Institute, 2019
author: zhuzilin, [email protected]
"""
from time_series import StatFunc, ARMA, TimeSeries
import random
import matplotlib.pyplot as plt
import math
"""
figure 1
2 mixed gaussian
"""
gauss0 = StatFunc(random.gauss, {"mu": 0, "sigma": 1})

gauss1 = StatFunc(random.gauss, {"mu": 20, "sigma": 1})

ts = TimeSeries([gauss0, gauss1], [100, 1])

x = []
y = []
c = []
for t in range(1000):
    t, v, color = ts.next()
    x.append(t)
    y.append(v)
    c.append(color)

plt.figure()
ax = plt.gca()
ax.scatter(x, y, c=c)
plt.savefig("example/f1.png")
"""
Esempio n. 15
0
# pHat has a voltage divider using 120k + 820k resistors
# (mapping 25.85V onto the 3.3V max)
VOLT_DIVIDER = 120.0 / (120.0 + 820.0)

# our sampling time in secs
INTERVAL = 1.0

# which ADC channel
ADC_CHANNEL = 0

if 'debug' in sys.argv:
    DEBUG = True
else:
    DEBUG = False

ts = TimeSeries(["voltage"])
if DEBUG:
    print("\nPress CTRL+C to exit.\n")
time.sleep(
    INTERVAL)  # short pause after ads1015 class creation recommended(??)

try:
    while True:
        t = time.time()
        value = adc.read_adc(ADC_CHANNEL, gain=GAIN, data_rate=DATA_RATE)
        volts = float(value) / MAX_VALUE * GAIN_VOLTAGE / VOLT_DIVIDER

        if DEBUG:
            print("{0:.3f} {1:5d} {2:.6f}".format(t, value, volts))
        ts.store(t, [volts])
Esempio n. 16
0
PERIODICITIES = []
SEASONALITY_TIMES = []
TRENDS = []
TREND_TYPES = []
TREND_TIMES = []
CONCEPT_DRIFT = []
CONCEPT_DRIFT_TIMES = []

for filename in to_include:
    print(filename)

    NAMES.append(filename)

    df = pd.read_csv('../datasets/' + filename, header=0)
    ts = TimeSeries(df,
                    timestep=to_include[filename][0],
                    dateformat=to_include[filename][1],
                    name=filename)

    NUMBER_OF_TIME_STEPS.append(ts.get_length())

    TIME_STEP_SIZES.append(ts.get_timestep())

    MINIMUMS.append(ts.get_min())

    MAXIMUMS.append(ts.get_max())

    MEDIANS.append(ts.get_median())

    MEANS.append(ts.get_mean())

    NUMBER_OF_ANOMALIES.append(len(ts.dataframe[ts.dataframe["outlier"] == 1]))
Esempio n. 17
0
def test_create():
    ts = TimeSeries("test", dates_perc)

    assert len(ts.ts) == len(set(dates_perc)) == len(ts.vs)
Esempio n. 18
0
    # TMP36 reads 0V at -50C and 2V at +150C
    return 100 * volt - 50


# which channel to read
TARGET_ADC = 0

# our sampling time in secs
INTERVAL = 1.0

if use_hw_spi:
    mcp = Adafruit_MCP3008.MCP3008(spi=SPI.SpiDev(0, 0))
else:
    mcp = Adafruit_MCP3008.MCP3008(clk=CLK, cs=CS, miso=MISO, mosi=MOSI)

ts = TimeSeries(["voltage", "temp"])
print("\nPress CTRL+C to exit.\n")

try:
    while True:
        # read the analog pin
        value = mcp.read_adc(TARGET_ADC)
        volt = voltage(value)
        temp_C = temp(volt)
        temp_F = 9 * temp_C / 5 + 32

        t = time.time()

        form = 't={time:.3f} - val= {volt:.3f} V  ==  {temp:.3f} C / {temp_F:.3f} F'
        print(form.format(time=t, volt=volt, temp=temp_C, temp_F=temp_F))
Esempio n. 19
0
def main():
    parser = argparse.ArgumentParser()

    default_dataset = 'toy-data.npz'
    parser.add_argument('--data', default=default_dataset, help='data file')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='random seed. Randomly set if not specified.')

    # training options
    parser.add_argument('--nz',
                        type=int,
                        default=32,
                        help='dimension of latent variable')
    parser.add_argument('--epoch',
                        type=int,
                        default=1000,
                        help='number of training epochs')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        help='batch size')
    parser.add_argument('--lr',
                        type=float,
                        default=8e-5,
                        help='encoder/decoder learning rate')
    parser.add_argument('--dis-lr',
                        type=float,
                        default=1e-4,
                        help='discriminator learning rate')
    parser.add_argument('--min-lr',
                        type=float,
                        default=5e-5,
                        help='min encoder/decoder learning rate for LR '
                        'scheduler. -1 to disable annealing')
    parser.add_argument('--min-dis-lr',
                        type=float,
                        default=7e-5,
                        help='min discriminator learning rate for LR '
                        'scheduler. -1 to disable annealing')
    parser.add_argument('--wd', type=float, default=0, help='weight decay')
    parser.add_argument('--overlap',
                        type=float,
                        default=.5,
                        help='kernel overlap')
    parser.add_argument('--no-norm-trans',
                        action='store_true',
                        help='if set, use Gaussian posterior without '
                        'transformation')
    parser.add_argument('--plot-interval',
                        type=int,
                        default=1,
                        help='plot interval. 0 to disable plotting.')
    parser.add_argument('--save-interval',
                        type=int,
                        default=0,
                        help='interval to save models. 0 to disable saving.')
    parser.add_argument('--prefix',
                        default='pbigan',
                        help='prefix of output directory')
    parser.add_argument('--comp',
                        type=int,
                        default=7,
                        help='continuous convolution kernel size')
    parser.add_argument('--ae',
                        type=float,
                        default=.2,
                        help='autoencoding regularization strength')
    parser.add_argument('--aeloss',
                        default='smooth_l1',
                        help='autoencoding loss. (options: mse, smooth_l1)')
    parser.add_argument('--ema',
                        dest='ema',
                        type=int,
                        default=-1,
                        help='start epoch of exponential moving average '
                        '(EMA). -1 to disable EMA')
    parser.add_argument('--ema-decay',
                        type=float,
                        default=.9999,
                        help='EMA decay')
    parser.add_argument('--mmd',
                        type=float,
                        default=1,
                        help='MMD strength for latent variable')

    # squash is off when rescale is off
    parser.add_argument('--squash',
                        dest='squash',
                        action='store_const',
                        const=True,
                        default=True,
                        help='bound the generated time series value '
                        'using tanh')
    parser.add_argument('--no-squash',
                        dest='squash',
                        action='store_const',
                        const=False)

    # rescale to [-1, 1]
    parser.add_argument('--rescale',
                        dest='rescale',
                        action='store_const',
                        const=True,
                        default=True,
                        help='if set, rescale time to [-1, 1]')
    parser.add_argument('--no-rescale',
                        dest='rescale',
                        action='store_const',
                        const=False)

    args = parser.parse_args()

    batch_size = args.batch_size
    nz = args.nz

    epochs = args.epoch
    plot_interval = args.plot_interval
    save_interval = args.save_interval

    try:
        npz = np.load(args.data)
        train_data = npz['data']
        train_time = npz['time']
        train_mask = npz['mask']
    except FileNotFoundError:
        if args.data != default_dataset:
            raise
        # Generate the default toy dataset from scratch
        train_data, train_time, train_mask, _, _ = gen_data(
            n_samples=10000,
            seq_len=200,
            max_time=1,
            poisson_rate=50,
            obs_span_rate=.25,
            save_file=default_dataset)

    _, in_channels, seq_len = train_data.shape
    train_time *= train_mask

    if args.seed is None:
        rnd = np.random.RandomState(None)
        random_seed = rnd.randint(np.iinfo(np.uint32).max)
    else:
        random_seed = args.seed
    rnd = np.random.RandomState(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)

    # Scale time
    max_time = 5
    train_time *= max_time

    squash = None
    rescaler = None
    if args.rescale:
        rescaler = Rescaler(train_data)
        train_data = rescaler.rescale(train_data)
        if args.squash:
            squash = torch.tanh

    out_channels = 64
    cconv_ref = 98

    train_dataset = TimeSeries(train_data,
                               train_time,
                               train_mask,
                               label=None,
                               max_time=max_time,
                               cconv_ref=cconv_ref,
                               overlap_rate=args.overlap,
                               device=device)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              drop_last=True,
                              collate_fn=train_dataset.collate_fn)
    n_train_batch = len(train_loader)

    time_loader = DataLoader(train_dataset,
                             batch_size=batch_size,
                             shuffle=True,
                             drop_last=True,
                             collate_fn=train_dataset.collate_fn)

    test_loader = DataLoader(train_dataset,
                             batch_size=batch_size,
                             collate_fn=train_dataset.collate_fn)

    grid_decoder = SeqGeneratorDiscrete(in_channels, nz, squash)
    decoder = Decoder(grid_decoder, max_time=max_time).to(device)

    cconv = ContinuousConv1D(in_channels,
                             out_channels,
                             max_time,
                             cconv_ref,
                             overlap_rate=args.overlap,
                             kernel_size=args.comp,
                             norm=True).to(device)
    encoder = Encoder(cconv, nz, not args.no_norm_trans).to(device)

    pbigan = PBiGAN(encoder, decoder, args.aeloss).to(device)

    critic_cconv = ContinuousConv1D(in_channels,
                                    out_channels,
                                    max_time,
                                    cconv_ref,
                                    overlap_rate=args.overlap,
                                    kernel_size=args.comp,
                                    norm=True).to(device)
    critic = ConvCritic(critic_cconv, nz).to(device)

    ema = None
    if args.ema >= 0:
        ema = EMA(pbigan, args.ema_decay, args.ema)

    optimizer = optim.Adam(pbigan.parameters(),
                           lr=args.lr,
                           weight_decay=args.wd)
    critic_optimizer = optim.Adam(critic.parameters(),
                                  lr=args.dis_lr,
                                  weight_decay=args.wd)

    scheduler = make_scheduler(optimizer, args.lr, args.min_lr, epochs)
    dis_scheduler = make_scheduler(critic_optimizer, args.dis_lr,
                                   args.min_dis_lr, epochs)

    path = '{}_{}'.format(args.prefix, datetime.now().strftime('%m%d.%H%M%S'))

    output_dir = Path('results') / 'toy-pbigan' / path
    print(output_dir)
    log_dir = mkdir(output_dir / 'log')
    model_dir = mkdir(output_dir / 'model')

    start_epoch = 0

    with (log_dir / 'seed.txt').open('w') as f:
        print(random_seed, file=f)
    with (log_dir / 'gpu.txt').open('a') as f:
        print(torch.cuda.device_count(), start_epoch, file=f)
    with (log_dir / 'args.txt').open('w') as f:
        for key, val in sorted(vars(args).items()):
            print(f'{key}: {val}', file=f)

    tracker = Tracker(log_dir, n_train_batch)
    visualizer = Visualizer(encoder, decoder, batch_size, max_time,
                            test_loader, rescaler, output_dir, device)
    start = time.time()
    epoch_start = start

    for epoch in range(start_epoch, epochs):
        loss_breakdown = defaultdict(float)

        for ((val, idx, mask, _, cconv_graph),
             (_, idx_t, mask_t, index, _)) in zip(train_loader, time_loader):

            z_enc, x_recon, z_gen, x_gen, ae_loss = pbigan(
                val, idx, mask, cconv_graph, idx_t, mask_t)

            cconv_graph_gen = train_dataset.make_graph(x_gen, idx_t, mask_t,
                                                       index)

            real = critic(cconv_graph, batch_size, z_enc)
            fake = critic(cconv_graph_gen, batch_size, z_gen)

            D_loss = gan_loss(real, fake, 1, 0)

            critic_optimizer.zero_grad()
            D_loss.backward(retain_graph=True)
            critic_optimizer.step()

            G_loss = gan_loss(real, fake, 0, 1)

            mmd_loss = mmd(z_enc, z_gen)

            loss = G_loss + ae_loss * args.ae + mmd_loss * args.mmd

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if ema:
                ema.update()

            loss_breakdown['D'] += D_loss.item()
            loss_breakdown['G'] += G_loss.item()
            loss_breakdown['AE'] += ae_loss.item()
            loss_breakdown['MMD'] += mmd_loss.item()
            loss_breakdown['total'] += loss.item()

        if scheduler:
            scheduler.step()
        if dis_scheduler:
            dis_scheduler.step()

        cur_time = time.time()
        tracker.log(epoch, loss_breakdown, cur_time - epoch_start,
                    cur_time - start)

        if plot_interval > 0 and (epoch + 1) % plot_interval == 0:
            if ema:
                ema.apply()
                visualizer.plot(epoch)
                ema.restore()
            else:
                visualizer.plot(epoch)

        model_dict = {
            'pbigan': pbigan.state_dict(),
            'critic': critic.state_dict(),
            'ema': ema.state_dict() if ema else None,
            'epoch': epoch + 1,
            'args': args,
        }
        torch.save(model_dict, str(log_dir / 'model.pth'))
        if save_interval > 0 and (epoch + 1) % save_interval == 0:
            torch.save(model_dict, str(model_dir / f'{epoch:04d}.pth'))

    print(output_dir)
Esempio n. 20
0
from multiprocessing import Process
import matplotlib.pyplot as plt
graph = tf.get_default_graph()
warnings.filterwarnings("ignore")


def write_prediction(prd, tr):
    with open('results.json', 'w+') as file:
        file.write(json.dumps({"prediction": prd.tolist()}))
        file.write(json.dumps({"true": tr.tolist()}))


with open('config.json', 'r+') as f:
    f = json.loads(f.read())
    MAIL_INTERVAL = f['MAIL_INTERVAL']
    TRAIN_INTERVAL = f['TRAIN_INTERVAL']
    PREDICT_INTERVAL = f['PREDICT_INTERVAL']
    TO_ADDRESS = f['TO_ADDRESS']
    MODEL = f['MODEL']
    PREDICT_LEN = f['PREDICT_LEN']
    FEED_LEN = f['FEED_LEN']

model = TimeSeries(model=MODEL)
df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv')
# history = model.train_model(dataframe=df_in, epochs=1)
# predict = model.actual_vs_predict(df_in)
# plt.plot(predict, color='red')
plt.plot(df_in['AWS/EC2 CPUUtilization'].values, color='blue')
# plt.ylim(0, 100)
plt.show()
Esempio n. 21
0
def test_sum(source):
    ts = TimeSeries("test", source)

    assert ts.sum() == ts.vs.sum()
Esempio n. 22
0
def test_getitem():
    ts = TimeSeries("test", dates_perc)

    assert len(ts[1586000000:].ts) == len(set(dates_perc))
    assert len(ts[:1588000000].ts) == len(set(dates_perc))
def main():
    parser = argparse.ArgumentParser()

    default_dataset = 'toy-data.npz'
    parser.add_argument('--data', default=default_dataset,
                        help='data file')
    parser.add_argument('--seed', type=int, default=None,
                        help='random seed. Randomly set if not specified.')

    # training options
    parser.add_argument('--nz', type=int, default=32,
                        help='dimension of latent variable')
    parser.add_argument('--epoch', type=int, default=1000,
                        help='number of training epochs')
    parser.add_argument('--batch-size', type=int, default=128,
                        help='batch size')
    parser.add_argument('--lr', type=float, default=1e-4,
                        help='learning rate')
    parser.add_argument('--min-lr', type=float, default=5e-5,
                        help='min learning rate for LR scheduler. '
                             '-1 to disable annealing')
    parser.add_argument('--plot-interval', type=int, default=10,
                        help='plot interval. 0 to disable plotting.')
    parser.add_argument('--save-interval', type=int, default=0,
                        help='interval to save models. 0 to disable saving.')
    parser.add_argument('--prefix', default='pvae',
                        help='prefix of output directory')
    parser.add_argument('--comp', type=int, default=5,
                        help='continuous convolution kernel size')
    parser.add_argument('--sigma', type=float, default=.2,
                        help='standard deviation for Gaussian likelihood')
    parser.add_argument('--overlap', type=float, default=.5,
                        help='kernel overlap')
    # squash is off when rescale is off
    parser.add_argument('--squash', dest='squash', action='store_const',
                        const=True, default=True,
                        help='bound the generated time series value '
                             'using tanh')
    parser.add_argument('--no-squash', dest='squash', action='store_const',
                        const=False)

    # rescale to [-1, 1]
    parser.add_argument('--rescale', dest='rescale', action='store_const',
                        const=True, default=True,
                        help='if set, rescale time to [-1, 1]')
    parser.add_argument('--no-rescale', dest='rescale', action='store_const',
                        const=False)

    args = parser.parse_args()

    batch_size = args.batch_size
    nz = args.nz

    epochs = args.epoch
    plot_interval = args.plot_interval
    save_interval = args.save_interval

    try:
        npz = np.load(args.data)
        train_data = npz['data']
        train_time = npz['time']
        train_mask = npz['mask']
    except FileNotFoundError:
        if args.data != default_dataset:
            raise
        # Generate the default toy dataset from scratch
        train_data, train_time, train_mask, _, _ = gen_data(
            n_samples=10000, seq_len=200, max_time=1, poisson_rate=50,
            obs_span_rate=.25, save_file=default_dataset)

    _, in_channels, seq_len = train_data.shape
    train_time *= train_mask

    if args.seed is None:
        rnd = np.random.RandomState(None)
        random_seed = rnd.randint(np.iinfo(np.uint32).max)
    else:
        random_seed = args.seed
    rnd = np.random.RandomState(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)

    # Scale time
    max_time = 5
    train_time *= max_time

    squash = None
    rescaler = None
    if args.rescale:
        rescaler = Rescaler(train_data)
        train_data = rescaler.rescale(train_data)
        if args.squash:
            squash = torch.tanh

    out_channels = 64
    cconv_ref = 98

    train_dataset = TimeSeries(
        train_data, train_time, train_mask, label=None, max_time=max_time,
        cconv_ref=cconv_ref, overlap_rate=args.overlap, device=device)

    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True,
        drop_last=True, collate_fn=train_dataset.collate_fn)
    n_train_batch = len(train_loader)

    test_batch_size = 64
    test_loader = DataLoader(train_dataset, batch_size=test_batch_size,
                             collate_fn=train_dataset.collate_fn)

    grid_decoder = SeqGeneratorDiscrete(in_channels, nz, squash)
    decoder = Decoder(grid_decoder, max_time=max_time).to(device)

    cconv = ContinuousConv1D(
        in_channels, out_channels, max_time, cconv_ref,
        overlap_rate=args.overlap, kernel_size=args.comp, norm=True).to(device)

    encoder = Encoder(nz, cconv).to(device)

    pvae = PVAE(encoder, decoder, sigma=args.sigma).to(device)

    optimizer = optim.Adam(pvae.parameters(), lr=args.lr)

    scheduler = make_scheduler(optimizer, args.lr, args.min_lr, epochs)

    path = '{}_{}_{}'.format(
        args.prefix, datetime.now().strftime('%m%d.%H%M%S'),
        '_'.join([f'lr_{args.lr:g}']))

    output_dir = Path('results') / 'toy-pvae' / path
    print(output_dir)
    log_dir = mkdir(output_dir / 'log')
    model_dir = mkdir(output_dir / 'model')

    start_epoch = 0

    with (log_dir / 'seed.txt').open('w') as f:
        print(random_seed, file=f)
    with (log_dir / 'gpu.txt').open('a') as f:
        print(torch.cuda.device_count(), start_epoch, file=f)
    with (log_dir / 'args.txt').open('w') as f:
        for key, val in sorted(vars(args).items()):
            print(f'{key}: {val}', file=f)

    tracker = Tracker(log_dir, n_train_batch)
    visualizer = Visualizer(encoder, decoder, test_batch_size, max_time,
                            test_loader, rescaler, output_dir, device)
    start = time.time()
    epoch_start = start

    for epoch in range(start_epoch, epochs):
        loss_breakdown = defaultdict(float)
        for val, idx, mask, _, cconv_graph in train_loader:
            optimizer.zero_grad()
            loss = pvae(val, idx, mask, cconv_graph)
            loss.backward()
            optimizer.step()
            loss_breakdown['loss'] += loss.item()

        if scheduler:
            scheduler.step()

        cur_time = time.time()
        tracker.log(
            epoch, loss_breakdown, cur_time - epoch_start, cur_time - start)

        if plot_interval > 0 and (epoch + 1) % plot_interval == 0:
            visualizer.plot(epoch)

        model_dict = {
            'pvae': pvae.state_dict(),
            'epoch': epoch + 1,
            'args': args,
        }
        torch.save(model_dict, str(log_dir / 'model.pth'))
        if save_interval > 0 and (epoch + 1) % save_interval == 0:
            torch.save(model_dict, str(model_dir / f'{epoch:04d}.pth'))

    print(output_dir)
Esempio n. 24
0
import torch
import torch.nn as nn

from network import Net
from prepare_data import prepare_data
from time_series import TimeSeries
from trainer import Trainer

if __name__ == '__main__':
    time_series_data = prepare_data()

    epoch_num = 100
    batch_size = 4

    dataset = TimeSeries(time_series_data,
                         input_time_interval=365,
                         output_time_interval=7,
                         output_keyword='peak_load')
    net = Net(in_ch=dataset.data_channels, out_ch=dataset.output_time_interval)

    optimizer = torch.optim.Adam(net.parameters(), lr=0.0001)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=0.1,
        patience=10,
        verbose=True,
        threshold=0.0001,
        threshold_mode='rel',
        cooldown=0,
        min_lr=0,
Esempio n. 25
0
# which ADC channel
ADC_CHANNEL = 0


def temp(volt):
    """Convert analog voltage to temperature in C"""
    # TMP36 reads 0V at -50C and 2V at +150C
    return 100 * volt - 50


if 'debug' in sys.argv:
    DEBUG = True
else:
    DEBUG = False

ts = TimeSeries(["volts", "temp"])

if DEBUG:
    print("\nPress CTRL+C to exit.\n")
time.sleep(
    INTERVAL)  # short pause after ads1015 class creation recommended(??)

try:
    while True:
        t = time.time()

        value = adc.read_adc(ADC_CHANNEL, gain=GAIN, data_rate=DATA_RATE)
        volts = float(value) / MAX_VALUE * GAIN_VOLTAGE / VOLT_DIVIDER
        temp_C = temp(volts)
        temp_F = 9 * temp_C / 5 + 32
Esempio n. 26
0
def test_max_min():
    ts = TimeSeries('test', dates_perc)
    assert ts.max() == (1587000000, 3)
    assert ts.min() == (1586000000, 1)
Esempio n. 27
0
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Bokeh component classes
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

# Categories map of dropdown values, SQL column, and SQL table (and data source for range_categories)
categories = Categories(sources)

# Bokeh table objects
data_tables = DataTables(sources)

# Bokeh objects for each tab layout
planning_data = PlanningData(custom_title, data_tables)
roi_viewer = ROI_Viewer(sources, custom_title)
mlc_analyzer = MLC_Analyzer(sources, custom_title, data_tables)
time_series = TimeSeries(sources, categories.range, custom_title, data_tables)
correlation = Correlation(sources, categories, custom_title)
regression = Regression(sources, time_series, correlation,
                        categories.multi_var_reg_var_names, custom_title,
                        data_tables)
correlation.add_regression_link(regression)
rad_bio = RadBio(sources, time_series, correlation, regression, custom_title,
                 data_tables)
dvhs = DVHs(sources, time_series, correlation, regression, custom_title,
            data_tables)
query = Query(sources, categories, dvhs, rad_bio, roi_viewer, time_series,
              correlation, regression, mlc_analyzer, custom_title, data_tables)
dvhs.add_query_link(query)

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Listen for changes to sources
Esempio n. 28
0
def test_dist():
    ts1 = TimeSeries('test', dates_perc)
    ts2 = TimeSeries('tost', dates_cut)
    assert ts1.sum() != ts2.sum()
    with pytest.raises(Exception):
        dist = ts1.dist(ts2)
Esempio n. 29
0
def test_div(source, x):
    ts = TimeSeries("test", source)

    assert abs((ts / x).sum() - ts.sum() / x) < 0.001
def sarimax(ts_obj,
            gaussian_window_size,
            step_size,
            plot_anomaly_score=False,
            plot_forecast=False):
    slide_size = 200
    if ts_obj.get_length() >= slide_size:
        n = slide_size
        list_df = [
            ts_obj.dataframe[i:i + n]
            for i in range(0, ts_obj.dataframe.shape[0], n)
        ]

        anomaly_scores_list = []
        times_list = []
        forecasts_list = []
        for chunk_df in tqdm(list_df):
            print(ts_obj.name)
            if len(chunk_df) >= slide_size:
                chunk_ts_obj = TimeSeries(chunk_df,
                                          timestep=ts_obj.timestep,
                                          dateformat=ts_obj.dateformat,
                                          name=ts_obj.name)
                # NEED TO SET CHARACTERISTIC OF SEASONALITY ONLY
                chunk_ts_obj.set_seasonality()
                chunk_result = sarimax_mini(chunk_ts_obj,
                                            gaussian_window_size,
                                            step_size,
                                            plot_anomaly_score=False,
                                            plot_forecast=False)
                anomaly_scores_list.append(chunk_result["Anomaly Scores"])
                times_list.append(chunk_result["Time"])
                forecasts_list.append(chunk_result["Forecast"])

        anomaly_scores = []
        for sublist in anomaly_scores_list:
            for item in sublist:
                anomaly_scores.append(item)

        forecast = []
        for sublist in forecasts_list:
            for item in sublist:
                forecast.append(item)

        while len(anomaly_scores) < ts_obj.get_length():
            anomaly_scores.append(0)

        while len(forecast) < ts_obj.get_length():
            forecast.append(0)

        if plot_forecast:
            plt.plot(forecast, alpha=.7, label="Predictions")
            plt.plot(ts_obj.dataframe["value"].values, alpha=.5, label="Data")
            plt.legend()
            plt.show()

        if plot_anomaly_score:
            plt.subplot(211)
            plt.title("Anomaly Scores")
            plt.plot(anomaly_scores)
            plt.ylim([.99, 1])
            plt.subplot(212)
            plt.title("Time Series")
            plt.plot(ts_obj.dataframe["value"].values)
            plt.axvline(ts_obj.get_probationary_index(),
                        color="black",
                        label="probationary line")
            plt.tight_layout()
            plt.show()

        return {
            "Anomaly Scores": np.asarray(anomaly_scores),
            "Time": sum(times_list),
            "Forecast": forecast
        }

    else:
        return sarimax_mini(ts_obj,
                            gaussian_window_size,
                            step_size,
                            plot_anomaly_score=plot_anomaly_score,
                            plot_forecast=plot_forecast)