Beispiel #1
0
def test_cum_sum(source):
    ts = TimeSeries("test", source)

    cs = ts.cumsum()
    assert len(cs.ts) == len(set(source))

    assert cs.vs[-1] == ts.vs.sum()
Beispiel #2
0
def engine_func():

    global feature_set, label_set
    get_metric()
    df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv')
    feature_set, label_set = hybrid_data(df_in)
    model = TimeSeries(model=MODEL)
    # df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv')
    history = model.train_model(features=feature_set,
                                labels=label_set,
                                epochs=10)
    write_history(history)
    prediction = model.get_prediction(feature_set)
    write_prediction(prediction.tolist())
    model.save_model()

    # Write predictions and scores to disk

    mail_interval = int(time.time())
    train_interval = int(time.time())
    predict_interval = int(time.time())
    get_metric_interval = int(time.time())
    idle_status = False

    while True:
        time_now = int(time.time())

        if time_now - get_metric_interval >= GET_METRIC_INTERVAL:
            get_metric()
            feature_set, label_set = hybrid_data(df_in)

        if time_now - predict_interval >= PREDICT_INTERVAL:
            idle_status = False
            print("Predicting ...")
            prediction = model.get_prediction(feature_set)
            write_prediction(prediction.tolist())
            predict_interval = int(time.time())

        elif time_now - mail_interval >= MAIL_INTERVAL:
            idle_status = False
            print("Sending Email ... ")
            status = mail(TO_ADDRESS, read_prediction())
            print(status)
            mail_interval = int(time.time())

        elif time_now - train_interval >= TRAIN_INTERVAL:
            idle_status = False
            print("Training model ....")
            # df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv')
            history = model.train_model(features=feature_set,
                                        labels=label_set,
                                        epochs=1)
            write_history(history)
            model.save_model()
            train_interval = int(time.time())

        else:
            if not idle_status:
                print("Engine Idle ...")
                idle_status = True
Beispiel #3
0
    def open(self, *args, **kwds):
        r"""
        Return a time series containing historical opening prices for this
        stock. If no arguments are given, will return last acquired historical
        data. Otherwise, data will be gotten from Google Finance.

        INPUT:

        - ``startdate`` -- string, (default: ``'Jan+1,+1900'``)

        - ``enddate`` -- string, (default: current date)

        - ``histperiod`` -- string, (``'daily'`` or ``'weekly'``)

        OUTPUT:

        A time series -- close price data.

        EXAMPLES:

        You can directly obtain Open data as so::

            sage: finance.Stock('vmw').open(startdate='Jan+1,+2008', enddate='Feb+1,+2008')                 # optional -- internet
            [83.0500, 85.4900, 84.9000, 82.0000, 81.2500 ... 82.0000, 58.2700, 54.4900, 55.6000, 56.9800]

        Or, you can initialize stock data first and then extract the Open
        data::

            sage: c = finance.Stock('vmw')
            sage: c.google(startdate='Feb+1,+2008', enddate='Mar+1,+2008')[:5]    # optional -- internet
            [
             31-Jan-08 55.60 57.35 55.52 56.67    2591100,
              1-Feb-08 56.98 58.14 55.06 57.85    2473000,
              4-Feb-08 58.00 60.47 56.91 58.05    1816500,
              5-Feb-08 57.60 59.30 57.17 59.30    1709000,
              6-Feb-08 60.32 62.00 59.50 61.52    2191100
            ]
            sage: c.open()    # optional -- internet
            [55.6000, 56.9800, 58.0000, 57.6000, 60.3200 ... 56.5500, 59.3000, 60.0000, 59.7900, 59.2600]

        Otherwise, ``self.google()`` will be called with the default
        arguments returning a year's worth of data::

            sage: finance.Stock('vmw').open()   # random; optional -- internet
            [52.1100, 60.9900, 59.0000, 56.0500, 57.2500 ... 83.0500, 85.4900, 84.9000, 82.0000, 81.2500]
        """

        from time_series import TimeSeries

        if len(args) != 0:
            return TimeSeries([x.open for x in self.google(*args, **kwds)])

        try:
            return TimeSeries([x.open for x in self.__historical])
        except AttributeError:
            pass

        return TimeSeries([x.open for x in self.google(*args, **kwds)])
Beispiel #4
0
    def close(self, *args, **kwds):
        r"""
        Return the time series of all historical closing prices for this stock.
        If no arguments are given, will return last acquired historical data.
        Otherwise, data will be gotten from Google Finance.

        INPUT:

        - ``startdate`` -- string, (default: ``'Jan+1,+1900'``)

        - ``enddate`` -- string, (default: current date)

        - ``histperiod`` -- string, (``'daily'`` or ``'weekly'``)

        OUTPUT:

        A time series -- close price data.

        EXAMPLES:

        You can directly obtain close data as so::

            sage: finance.Stock('vmw').close(startdate='Jan+1,+2008', enddate='Feb+1,+2008')                 # optional -- internet
            [84.6000, 83.9500, 80.4900, 72.9900, ... 83.0000, 54.8700, 56.4200, 56.6700, 57.8500]

        Or, you can initialize stock data first and then extract the Close
        data::

            sage: c = finance.Stock('vmw')  # optional -- internet
            sage: c.history(startdate='Feb+1,+2008', enddate='Mar+1,+2008')[:5]    # optional -- internet
            [
              1-Feb-08 56.98 58.14 55.06 57.85    2490481,
              4-Feb-08 58.00 60.47 56.91 58.05    1840709,
              5-Feb-08 57.60 59.30 57.17 59.30    1712179,
              6-Feb-08 60.32 62.00 59.50 61.52    2211775,
              7-Feb-08 60.50 62.75 59.56 60.80    1521651
            ]
            sage: c.close()    # optional -- internet
            [57.8500, 58.0500, 59.3000, 61.5200, ... 58.2900, 60.1800, 59.8600, 59.9500, 58.6700]

        Otherwise, :meth:`history` will be called with the default
        arguments returning a year's worth of data::

            sage: finance.Stock('vmw').close()   # random; optional -- internet
            [57.7100, 56.9900, 55.5500, 57.3300, 65.9900 ... 84.9900, 84.6000, 83.9500, 80.4900, 72.9900]
        """

        from time_series import TimeSeries

        if len(args) != 0:
            return TimeSeries([x.close for x in self.history(*args, **kwds)])

        try:
            return TimeSeries([x.close for x in self.__historical])
        except AttributeError:
            pass

        return TimeSeries([x.close for x in self.history(*args, **kwds)])
Beispiel #5
0
def test_eq():
    # generate variant
    variants = [[i // 5 for i in range(100 * j, 199 * j)] for j in range(1, 5)]

    for one in variants:
        for two in variants:
            if one == two:
                assert TimeSeries('a', one) == TimeSeries('b', two)
            else:
                assert TimeSeries('c', one) != TimeSeries('d', two)
Beispiel #6
0
 def _create_monthly_ts(self):
     return TimeSeries.create(name=u'Test',
                              date_frame=u'Monthly',
                              is_snapshot=False,
                              dates=[
                                  datetime(2017, 1, 31),
                                  datetime(2017, 2, 28),
                                  datetime(2017, 3, 31),
                                  datetime(2017, 4, 30),
                                  datetime(2017, 5, 31),
                                  datetime(2017, 6, 30),
                                  datetime(2017, 7, 31),
                                  datetime(2017, 8, 31),
                                  datetime(2017, 9, 30),
                                  datetime(2017, 10, 31),
                                  datetime(2017, 11, 30),
                                  datetime(2017, 12, 31),
                              ],
                              values=[
                                  100.0,
                                  200.0,
                                  300.0,
                                  400.0,
                                  500.0,
                                  600.0,
                                  700.0,
                                  800.0,
                                  900.0,
                                  1000.0,
                                  1100.0,
                                  1200.0,
                              ])
Beispiel #7
0
    def get(self, stock_code, metric_name):
        """Get metric values by metric names.

        Args:
            metric_name: A string representing metric name.

        Returns:
            A map of TimeSeries. Keys are date frames and values are
            corresponding TimeSeries.
        """
        output = {}
        literal_metric_name = self.metric_names.get(metric_name)
        statement_ids = self._get_statement_ids_containing(
            stock_code, literal_metric_name)
        for statement_id in statement_ids:
            date_frame = self.financial_statement_store.get_date_frame(
                statement_id)
            is_snapshot = self.financial_statement_store.get_is_snapshot(
                statement_id)
            results = self._get_by_statement_id(stock_code,
                                                literal_metric_name,
                                                statement_id)
            dates = [entry.statement_date for entry in results]
            values = [entry.metric_value for entry in results]

            output[date_frame] = TimeSeries.create(name=metric_name,
                                                   date_frame=date_frame,
                                                   is_snapshot=is_snapshot,
                                                   dates=dates,
                                                   values=values)
        return output
Beispiel #8
0
def test_dt():
    dt_from_ = datetime.datetime(2019, 12, 1)
    dt_to_ = datetime.datetime(2020, 1, 1)

    from_ = dt_from_.timestamp()
    to_ = dt_to_.timestamp()

    orig_grid = 10

    ts_raw = [
        i * orig_grid + from_
        for i in range(-2 * orig_grid,
                       int((to_ - from_) / orig_grid) + 2 * orig_grid)
    ]

    assert ts_raw

    ts = TimeSeries("test", ts_raw)

    assert ts.ts.min() < from_
    assert ts.ts.max() > to_

    time_interval = datetime.timedelta(days=1)

    assert ts[from_:to_:time_interval.total_seconds(
    )] == ts[dt_from_:dt_to_:time_interval]
Beispiel #9
0
    def clean_timeseries(self,
                         attr='values',
                         inplace=True,
                         time_index_name='year',
                         time_index=None,
                         lower=0,
                         upper=None,
                         interpolation_method='missing',
                         extrapolation_method='missing'):
        if time_index is None:
            time_index = cfg.cfgfile.get('case', 'years')
        interpolation_method = self.interpolation_method if interpolation_method is 'missing' else interpolation_method
        extrapolation_method = self.extrapolation_method if extrapolation_method is 'missing' else extrapolation_method

        data = getattr(self, attr)
        clean_data = TimeSeries.clean(
            data=data,
            newindex=time_index,
            time_index_name=time_index_name,
            interpolation_method=interpolation_method,
            extrapolation_method=extrapolation_method).clip(lower=lower,
                                                            upper=upper)

        if inplace:
            setattr(self, attr, clean_data)
        else:
            return clean_data
    def _collect_price_time_series(self):
        """Parse the requested JSON for daily close time series data 
        and daily average time series data.

        Returns:
            tuple: Ordered pair of dictionaries containing time series data
            for both the daily close time series and daily average time series.
            Keys are strings representing ms since epoch.
            Values are integers representing the signal (amount of gp)
        """
        r = requests.get(self.GRAPH_URL)
        #dictionary of 2 dictionaries, "daily" and "average"
        response = r.json()
        daily_series = TimeSeries.from_dictionary(response["daily"])
        average_series = TimeSeries.from_dictionary(response["average"])
        return (daily_series, average_series)
    def _collect_volume_time_series(self):
        """Parse the OSRS GX tradeable item webpage for trade volume data.

        Returns:
            dict: Single dictionary containing trade volume time series data.
            Keys are strings in `%Y-%m-%d` format.
            Values are integers representing the number of unites moved by close.
        """
        #fetch the item page as text
        page_as_text = requests.get(self.VOLUME_URL).text

        #search the item page for tags that contain volume information
        volume_tags = re.findall("trade180.push.*", page_as_text)

        volume_series = {}
        #iterate over all the tags just found
        for match in volume_tags:
            tv_pairs = re.findall("Date\(.*\), \d+", match)
            #separate the timestamps from volume data
            for pair in tv_pairs:
                t, v = tuple(pair.split(','))
                #remove text surrounding Y/M/D piece of timestamp
                t = t.strip("Date('").strip("')'")
                volume_series[t] = int(v)
        volume_series = TimeSeries.from_dictionary(volume_series)
        return volume_series
Beispiel #12
0
 def _create_one_ts(self):
     return TimeSeries.create(name=u'One',
                              date_frame=u'Quarterly',
                              is_snapshot=False,
                              dates=[
                                  datetime(2016, 3, 31),
                                  datetime(2016, 6, 30),
                                  datetime(2016, 9, 30)
                              ],
                              values=[100.0, 200.0, 300.0])
Beispiel #13
0
 def _create_other_ts(self):
     return TimeSeries.create(name=u'Other',
                              date_frame=u'Quarterly',
                              is_snapshot=False,
                              dates=[
                                  datetime(2016, 3, 31),
                                  datetime(2016, 6, 30),
                                  datetime(2016, 9, 30)
                              ],
                              values=[400.0, 500.0, 600.0])
Beispiel #14
0
def to_future_matrix(X, days_predict=5, days_window=5, train_model=None):
    #Note X is the dataframe that follow the format when first read from excel
    #initialize TS model
    ts_model = TimeSeries(days_window, train_model)
    all_ctry_new_df = pd.DataFrame(columns=[
        "country_id", "date", "cases", "deaths", "cases_14_100k", "cases_100k"
    ])
    country_id_col = X.loc[:, "country_id"].unique()
    for country in country_id_col:
        X_cur = X[X["country_id"] == country].copy(deep=True)
        ctry_df = process_ts_ctry(ts_model, country, X_cur, days_predict,
                                  days_window, train_model)
        all_ctry_new_df = pd.concat([all_ctry_new_df, ctry_df], axis=0)
    return all_ctry_new_df
Beispiel #15
0
 def test_annualize(self):
     one = TimeSeries.create(name=u'Test',
                             date_frame=u'Yearly',
                             is_snapshot=False,
                             dates=[
                                 datetime(2013, 12, 31),
                                 datetime(2014, 12, 31),
                                 datetime(2015, 12, 31)
                             ],
                             values=[100.0, 200.0, 300.0])
     other = TimeSeries.create(name=u'Test',
                               date_frame=u'Quarterly',
                               is_snapshot=False,
                               dates=[
                                   datetime(2016, 3, 31),
                                   datetime(2016, 6, 30),
                                   datetime(2016, 9, 30)
                               ],
                               values=[400.0, 500.0, 600.0])
     actual = one.annualize(other).get()
     expected = {
         'name':
         u'Test',
         'date_frame':
         u'Yearly',
         'is_snapshot':
         False,
         'date': [
             datetime(2013, 12, 31),
             datetime(2014, 12, 31),
             datetime(2015, 12, 31),
             datetime(2016, 12, 31)
         ],
         'value': [100.0, 200.0, 300.0, 2000.0],
     }
     self.assertEqual(actual, expected)
    def clean_timeseries(self, attr='values', inplace=True, time_index_name='year', 
                         time_index=None, lower=0, upper=None, interpolation_method='missing', extrapolation_method='missing'):
        if time_index is None:
            time_index=cfg.cfgfile.get('case', 'years')
        interpolation_method= self.interpolation_method if interpolation_method is 'missing' else interpolation_method
        extrapolation_method = self.extrapolation_method if extrapolation_method is 'missing' else extrapolation_method
        
        data = getattr(self, attr)
        clean_data = TimeSeries.clean(data=data, newindex=time_index, time_index_name=time_index_name,
                                      interpolation_method=interpolation_method,
                                      extrapolation_method=extrapolation_method).clip(lower=lower, upper=upper)

        if inplace:
            setattr(self, attr, clean_data)
        else:
            return clean_data
Beispiel #17
0
def test_to_grid(start, stop, count):
    ts = TimeSeries("test", dates_perc)

    sliced = ts[start:stop]
    assert len(sliced.ts)
    assert sliced.sum()

    period = ((stop or sliced.ts.max()) - (start or sliced.ts.min())) / count

    gridded = ts[start:stop:period]

    if start:
        assert gridded.ts.min() == start
    if count != 1 and stop:
        assert gridded.ts.max() == stop

    assert len(gridded.ts) == count
    assert ts[start:stop].sum() == gridded.sum()
Beispiel #18
0
    def test_create(self):
        ts = TimeSeries.create(
            name=u'Test',
            date_frame=u'Quarterly',
            is_snapshot=False,
            dates=[datetime(2016, 6, 30),
                   datetime(2016, 3, 31)],
            values=[100, 200])

        expected = {
            'name': u'Test',
            'date_frame': u'Quarterly',
            'is_snapshot': False,
            'date': [datetime(2016, 3, 31),
                     datetime(2016, 6, 30)],
            'value': [200, 100],
        }
        self.assertEqual(ts.get(), expected)
Beispiel #19
0
def get_filtered_ts(dt_start, dt_end, in_dir, target_month, target_year):
    filtered_ts_list = []

    date_start = str(target_month).zfill(2) + "/" + str(
        dt_start.day).zfill(2) + "/" + str(target_year)
    date_end = str(target_month).zfill(2) + "/" + str(
        dt_end.day).zfill(2) + "/" + str(target_year)

    for server in os.listdir(in_dir):
        print server
        for file_name in os.listdir(in_dir + "/" + server + "/"):
            mac = file_name.split(".")[0]
            csv_path = in_dir + "/" + server + "/" + file_name

            ts = TimeSeries(csv_path, target_month, target_year, metric,
                            dt_start, dt_end)
            if filter_ts(ts):
                filtered_ts_list.append(
                    [mac, server, csv_path, date_start, date_end])
    return filtered_ts_list
Beispiel #20
0
 def test_accumulate_annually(self):
     ts = TimeSeries.create(
         name=u'Test',
         date_frame=u'Quarterly',
         is_snapshot=False,
         dates=[
             datetime(2015, 3, 31),
             datetime(2015, 6, 30),
             datetime(2015, 9, 30),
             datetime(2015, 12, 31),
             datetime(2016, 3, 31),
             datetime(2016, 6, 30),
             datetime(2016, 9, 30),
             datetime(2016, 12, 31)
         ],
         values=[100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0])
     actual = ts.accumulate_annually().get()
     expected = {
         'name':
         u'Test',
         'date_frame':
         u'Quarterly',
         'is_snapshot':
         False,
         'date': [
             datetime(2015, 3, 31),
             datetime(2015, 6, 30),
             datetime(2015, 9, 30),
             datetime(2015, 12, 31),
             datetime(2016, 3, 31),
             datetime(2016, 6, 30),
             datetime(2016, 9, 30),
             datetime(2016, 12, 31)
         ],
         'value': [
             100.0, 100.0 + 200.0, 100.0 + 200.0 + 300.0,
             100.0 + 200.0 + 300.0 + 400.0, 500.0, 500.0 + 600.0,
             500.0 + 600.0 + 700.0, 500.0 + 600.0 + 700.0 + 800.0
         ],
     }
     self.assertEqual(actual, expected)
Beispiel #21
0
 def test_yoy(self):
     ts = TimeSeries.create(
         name=u'Test',
         date_frame=u'Quarterly',
         is_snapshot=False,
         dates=[
             datetime(2015, 3, 31),
             datetime(2015, 6, 30),
             datetime(2015, 9, 30),
             datetime(2015, 12, 31),
             datetime(2016, 3, 31),
             datetime(2016, 6, 30),
             datetime(2016, 9, 30),
             datetime(2016, 12, 31)
         ],
         values=[100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0])
     actual = ts.yoy().get()
     expected = {
         'name':
         u'Test',
         'date_frame':
         u'Quarterly',
         'is_snapshot':
         False,
         'date': [
             datetime(2016, 3, 31),
             datetime(2016, 6, 30),
             datetime(2016, 9, 30),
             datetime(2016, 12, 31)
         ],
         'value': [
             (500.0 - 100.0) / 100.0,
             (600.0 - 200.0) / 200.0,
             (700.0 - 300.0) / 300.0,
             (800.0 - 400.0) / 400.0,
         ],
     }
     self.assertEqual(actual, expected)
 def correct_signal(self, time, signal, tmin=None, tmax=None, r0=5e-2,
     correction="bassetbound", window="blackmanharris", impedance=None):
     if tmin is None:
         tmin =time[0]
     if tmax is None:
         tmax = time[-1]
     dt = time[1] - time[0]
     mask = np.logical_and(time>tmin, time<tmax)
     sig = signal[mask]
     t = time[mask]
     signal_length = len(sig)
     freq = rfftfreq(signal_length, dt)[1:]
     amp = getattr(self, f"amplitude_ratio_{correction}")(freq)
     phase = getattr(self, f"phase_{correction}")(freq)
     if impedance is None:
         impedance = np.ones_like(amp)
     else:
         impedance = getattr(self, f"{impedance}_impedance")(freq, r0)
     response = amp * np.exp(1j * phase) / impedance
     response = np.r_[1, response]
     win = get_window(window, signal_length)
     corrected_signal = irfft(rfft(sig * win) / response, n=signal_length)
     return TimeSeries(corrected_signal, t, name="Corrected")
Beispiel #23
0
    def test_copy(self):
        original = TimeSeries.create(
            name=u'Test',
            date_frame=u'Quarterly',
            is_snapshot=False,
            dates=[datetime(2016, 3, 31),
                   datetime(2016, 6, 30)],
            values=[100.0, 200.0])

        original_expected = {
            'name': u'Test',
            'date_frame': u'Quarterly',
            'is_snapshot': False,
            'date': [datetime(2016, 3, 31),
                     datetime(2016, 6, 30)],
            'value': [100.0, 200.0],
        }

        # Make a copy.
        copied = original.copy()

        # Original time series won't be changed even we modify copied one.
        copied.name = u'Copied'
        copied.date_frame = u'Yearly'
        copied.is_snapshot = True
        copied_expected = {
            'name': u'Copied',
            'date_frame': u'Yearly',
            'is_snapshot': True,
            'date': [datetime(2016, 3, 31),
                     datetime(2016, 6, 30)],
            'value': [100.0, 200.0],
        }

        self.assertEqual(original.get(), original_expected)
        self.assertEqual(copied.get(), copied_expected)
Beispiel #24
0
def plot():
	global ax, fig, serverMac, change_points, ts, label_text, serverMac_id
		
	id_stringvar.set(str(serverMac_id[serverMac] + 1) + "/" + str(len(serverMac_id)))
		
	server = serverMac.split("_")[0]
	mac = serverMac.split("_")[1]
	in_file_path = "../../input/" + date_dir + "/" + server + "/" + mac + ".csv"
	in_file_path_cp = "./output/" + date_dir + "/" + server + "/" + mac + ".csv"

	dt_axvline = []
	if os.path.exists(in_file_path_cp):
		df = pd.read_csv(in_file_path_cp)
		for idx, row in df.iterrows():
			dt_axvline.append(row["dt"])
			change_points.append(row["dt"])

	fig.clf()
	ax = fig.add_subplot(111)

	ts = TimeSeries(in_file_path, target_month, target_year, metric)
	plot_procedures.plotax_ts(ax, ts, dt_axvline = dt_axvline, ylim = [-0.01, 1.01])
	canvas.show()
	fig.canvas.mpl_connect('button_press_event', handle_mouse_click)
Beispiel #25
0
# pHat has a voltage divider using 120k + 820k resistors
# (mapping 25.85V onto the 3.3V max)
VOLT_DIVIDER = 120.0 / (120.0 + 820.0)

# our sampling time in secs
INTERVAL = 1.0

# which ADC channel
ADC_CHANNEL = 0

if 'debug' in sys.argv:
    DEBUG = True
else:
    DEBUG = False

ts = TimeSeries(["voltage"])
if DEBUG:
    print("\nPress CTRL+C to exit.\n")
time.sleep(
    INTERVAL)  # short pause after ads1015 class creation recommended(??)

try:
    while True:
        t = time.time()
        value = adc.read_adc(ADC_CHANNEL, gain=GAIN, data_rate=DATA_RATE)
        volts = float(value) / MAX_VALUE * GAIN_VOLTAGE / VOLT_DIVIDER

        if DEBUG:
            print("{0:.3f} {1:5d} {2:.6f}".format(t, value, volts))
        ts.store(t, [volts])
import torch
import torch.nn as nn

from network import Net
from prepare_data import prepare_data
from time_series import TimeSeries
from trainer import Trainer

if __name__ == '__main__':
    time_series_data = prepare_data()

    epoch_num = 100
    batch_size = 4

    dataset = TimeSeries(time_series_data,
                         input_time_interval=365,
                         output_time_interval=7,
                         output_keyword='peak_load')
    net = Net(in_ch=dataset.data_channels, out_ch=dataset.output_time_interval)

    optimizer = torch.optim.Adam(net.parameters(), lr=0.0001)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=0.1,
        patience=10,
        verbose=True,
        threshold=0.0001,
        threshold_mode='rel',
        cooldown=0,
        min_lr=0,
from time_series import TimeSeries

# Holt-Winters or Triple Exponential Smoothing model
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Imports for data visualization
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.dates import DateFormatter
from matplotlib import dates as mpld

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

ts = TimeSeries('dataset/average_temp_india.csv', train_size=0.7)

plt.plot(ts.data.iloc[:, 1].index, ts.data.iloc[:, 1])
plt.gcf().autofmt_xdate()
plt.title("Average Temperature of India (2000-2018)")
plt.xlabel("Time")
plt.ylabel("Temparature (°C)")
plt.show()

model = ExponentialSmoothing(ts.train, trend='additive',
                             seasonal='additive').fit()
prediction = model.predict(start=ts.data.iloc[:, 1].index[0],
                           end=ts.data.iloc[:, 1].index[-1])
"""Brutlag Algorithm"""
PERIOD = 12  # The given time series has seasonal_period=12
GAMMA = 0.3684211  # the seasonility component
Beispiel #28
0
class MockFinancialStatementEntryStore(object):
    mock_data = {
        '2317': {
            # CapitalIncreaseHistoryService
            'CapitalIncreaseByCash': {
                u'Yearly':
                TimeSeries.create(name=u'CapitalIncreaseByCash',
                                  date_frame=u'Yearly',
                                  is_snapshot=False,
                                  dates=[
                                      datetime(2005, 12, 31),
                                      datetime(2006, 12, 31),
                                      datetime(2007, 12, 31)
                                  ],
                                  values=[26.44, 27.01, 27.01]),
            },
            'CapitalIncreaseByEarnings': {
                u'Yearly':
                TimeSeries.create(name=u'CapitalIncreaseByEarnings',
                                  date_frame=u'Yearly',
                                  is_snapshot=False,
                                  dates=[
                                      datetime(2005, 12, 31),
                                      datetime(2006, 12, 31),
                                      datetime(2007, 12, 31)
                                  ],
                                  values=[346.52, 435.51, 547.78]),
            },
            'CapitalIncreaseBySurplus': {
                u'Yearly':
                TimeSeries.create(name=u'CapitalIncreaseBySurplus',
                                  date_frame=u'Yearly',
                                  is_snapshot=False,
                                  dates=[
                                      datetime(2005, 12, 31),
                                      datetime(2006, 12, 31),
                                      datetime(2007, 12, 31)
                                  ],
                                  values=[36.01, 53.83, 53.83]),
            },
            # DuPontService
            'NetProfit': {
                u'Yearly':
                TimeSeries.create(name=u'NetProfit',
                                  date_frame=u'Yearly',
                                  is_snapshot=False,
                                  dates=[
                                      datetime(2014, 12, 31),
                                      datetime(2015, 12, 31),
                                      datetime(2016, 12, 31)
                                  ],
                                  values=[132482, 150201, 151357]),
                u'Quarterly':
                TimeSeries.create(name=u'NetProfit',
                                  date_frame=u'Quarterly',
                                  is_snapshot=False,
                                  dates=[
                                      datetime(2017, 3, 31),
                                      datetime(2017, 6, 30),
                                      datetime(2017, 9, 30)
                                  ],
                                  values=[29207, 14919, 19665]),
            },
            'Assets': {
                u'Yearly':
                TimeSeries.create(name=u'Assets',
                                  date_frame=u'Yearly',
                                  is_snapshot=True,
                                  dates=[
                                      datetime(2014, 12, 31),
                                      datetime(2015, 12, 31),
                                      datetime(2016, 12, 31)
                                  ],
                                  values=[2312461, 2462715, 2308300]),
                u'Quarterly':
                TimeSeries.create(name=u'Assets',
                                  date_frame=u'Quarterly',
                                  is_snapshot=True,
                                  dates=[
                                      datetime(2017, 3, 31),
                                      datetime(2017, 6, 30),
                                      datetime(2017, 9, 30)
                                  ],
                                  values=[2332342, 2457578, 2762655]),
            },
            'Equity': {
                u'Yearly':
                TimeSeries.create(name=u'Equity',
                                  date_frame=u'Yearly',
                                  is_snapshot=True,
                                  dates=[
                                      datetime(2014, 12, 31),
                                      datetime(2015, 12, 31),
                                      datetime(2016, 12, 31)
                                  ],
                                  values=[984677, 1060391, 1133789]),
                u'Quarterly':
                TimeSeries.create(name=u'Equity',
                                  date_frame=u'Quarterly',
                                  is_snapshot=True,
                                  dates=[
                                      datetime(2017, 3, 31),
                                      datetime(2017, 6, 30),
                                      datetime(2017, 9, 30)
                                  ],
                                  values=[1183505, 1132860, 1156638]),
            },
            'Sales': {
                u'Yearly':
                TimeSeries.create(name=u'Sales',
                                  date_frame=u'Yearly',
                                  is_snapshot=False,
                                  dates=[
                                      datetime(2014, 12, 31),
                                      datetime(2015, 12, 31),
                                      datetime(2016, 12, 31)
                                  ],
                                  values=[4213172, 4482146, 4358733]),
                u'Quarterly':
                TimeSeries.create(name=u'Sales',
                                  date_frame=u'Quarterly',
                                  is_snapshot=False,
                                  dates=[
                                      datetime(2017, 3, 31),
                                      datetime(2017, 6, 30),
                                      datetime(2017, 9, 30)
                                  ],
                                  values=[975044, 922412, 1078892]),
            }
        }
    }

    def get(self, stock_code, metric_name):
        if stock_code in self.mock_data and metric_name in self.mock_data[
                stock_code]:
            return self.mock_data[stock_code][metric_name]

        raise ValueError(u'Could not get mock data: stock_code={stock_code} metric_name={metric_name}' \
            .format(stock_code=stock_code, metric_name=metric_name))
Beispiel #29
0
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Bokeh component classes
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

# Categories map of dropdown values, SQL column, and SQL table (and data source for range_categories)
categories = Categories(sources)

# Bokeh table objects
data_tables = DataTables(sources)

# Bokeh objects for each tab layout
planning_data = PlanningData(custom_title, data_tables)
roi_viewer = ROI_Viewer(sources, custom_title)
mlc_analyzer = MLC_Analyzer(sources, custom_title, data_tables)
time_series = TimeSeries(sources, categories.range, custom_title, data_tables)
correlation = Correlation(sources, categories, custom_title)
regression = Regression(sources, time_series, correlation,
                        categories.multi_var_reg_var_names, custom_title,
                        data_tables)
correlation.add_regression_link(regression)
rad_bio = RadBio(sources, time_series, correlation, regression, custom_title,
                 data_tables)
dvhs = DVHs(sources, time_series, correlation, regression, custom_title,
            data_tables)
query = Query(sources, categories, dvhs, rad_bio, roi_viewer, time_series,
              correlation, regression, mlc_analyzer, custom_title, data_tables)
dvhs.add_query_link(query)

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Listen for changes to sources
Beispiel #30
0
def main():
    parser = argparse.ArgumentParser()

    default_dataset = 'toy-data.npz'
    parser.add_argument('--data', default=default_dataset, help='data file')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='random seed. Randomly set if not specified.')

    # training options
    parser.add_argument('--nz',
                        type=int,
                        default=32,
                        help='dimension of latent variable')
    parser.add_argument('--epoch',
                        type=int,
                        default=1000,
                        help='number of training epochs')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        help='batch size')
    parser.add_argument('--lr',
                        type=float,
                        default=8e-5,
                        help='encoder/decoder learning rate')
    parser.add_argument('--dis-lr',
                        type=float,
                        default=1e-4,
                        help='discriminator learning rate')
    parser.add_argument('--min-lr',
                        type=float,
                        default=5e-5,
                        help='min encoder/decoder learning rate for LR '
                        'scheduler. -1 to disable annealing')
    parser.add_argument('--min-dis-lr',
                        type=float,
                        default=7e-5,
                        help='min discriminator learning rate for LR '
                        'scheduler. -1 to disable annealing')
    parser.add_argument('--wd', type=float, default=0, help='weight decay')
    parser.add_argument('--overlap',
                        type=float,
                        default=.5,
                        help='kernel overlap')
    parser.add_argument('--no-norm-trans',
                        action='store_true',
                        help='if set, use Gaussian posterior without '
                        'transformation')
    parser.add_argument('--plot-interval',
                        type=int,
                        default=1,
                        help='plot interval. 0 to disable plotting.')
    parser.add_argument('--save-interval',
                        type=int,
                        default=0,
                        help='interval to save models. 0 to disable saving.')
    parser.add_argument('--prefix',
                        default='pbigan',
                        help='prefix of output directory')
    parser.add_argument('--comp',
                        type=int,
                        default=7,
                        help='continuous convolution kernel size')
    parser.add_argument('--ae',
                        type=float,
                        default=.2,
                        help='autoencoding regularization strength')
    parser.add_argument('--aeloss',
                        default='smooth_l1',
                        help='autoencoding loss. (options: mse, smooth_l1)')
    parser.add_argument('--ema',
                        dest='ema',
                        type=int,
                        default=-1,
                        help='start epoch of exponential moving average '
                        '(EMA). -1 to disable EMA')
    parser.add_argument('--ema-decay',
                        type=float,
                        default=.9999,
                        help='EMA decay')
    parser.add_argument('--mmd',
                        type=float,
                        default=1,
                        help='MMD strength for latent variable')

    # squash is off when rescale is off
    parser.add_argument('--squash',
                        dest='squash',
                        action='store_const',
                        const=True,
                        default=True,
                        help='bound the generated time series value '
                        'using tanh')
    parser.add_argument('--no-squash',
                        dest='squash',
                        action='store_const',
                        const=False)

    # rescale to [-1, 1]
    parser.add_argument('--rescale',
                        dest='rescale',
                        action='store_const',
                        const=True,
                        default=True,
                        help='if set, rescale time to [-1, 1]')
    parser.add_argument('--no-rescale',
                        dest='rescale',
                        action='store_const',
                        const=False)

    args = parser.parse_args()

    batch_size = args.batch_size
    nz = args.nz

    epochs = args.epoch
    plot_interval = args.plot_interval
    save_interval = args.save_interval

    try:
        npz = np.load(args.data)
        train_data = npz['data']
        train_time = npz['time']
        train_mask = npz['mask']
    except FileNotFoundError:
        if args.data != default_dataset:
            raise
        # Generate the default toy dataset from scratch
        train_data, train_time, train_mask, _, _ = gen_data(
            n_samples=10000,
            seq_len=200,
            max_time=1,
            poisson_rate=50,
            obs_span_rate=.25,
            save_file=default_dataset)

    _, in_channels, seq_len = train_data.shape
    train_time *= train_mask

    if args.seed is None:
        rnd = np.random.RandomState(None)
        random_seed = rnd.randint(np.iinfo(np.uint32).max)
    else:
        random_seed = args.seed
    rnd = np.random.RandomState(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)

    # Scale time
    max_time = 5
    train_time *= max_time

    squash = None
    rescaler = None
    if args.rescale:
        rescaler = Rescaler(train_data)
        train_data = rescaler.rescale(train_data)
        if args.squash:
            squash = torch.tanh

    out_channels = 64
    cconv_ref = 98

    train_dataset = TimeSeries(train_data,
                               train_time,
                               train_mask,
                               label=None,
                               max_time=max_time,
                               cconv_ref=cconv_ref,
                               overlap_rate=args.overlap,
                               device=device)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              drop_last=True,
                              collate_fn=train_dataset.collate_fn)
    n_train_batch = len(train_loader)

    time_loader = DataLoader(train_dataset,
                             batch_size=batch_size,
                             shuffle=True,
                             drop_last=True,
                             collate_fn=train_dataset.collate_fn)

    test_loader = DataLoader(train_dataset,
                             batch_size=batch_size,
                             collate_fn=train_dataset.collate_fn)

    grid_decoder = SeqGeneratorDiscrete(in_channels, nz, squash)
    decoder = Decoder(grid_decoder, max_time=max_time).to(device)

    cconv = ContinuousConv1D(in_channels,
                             out_channels,
                             max_time,
                             cconv_ref,
                             overlap_rate=args.overlap,
                             kernel_size=args.comp,
                             norm=True).to(device)
    encoder = Encoder(cconv, nz, not args.no_norm_trans).to(device)

    pbigan = PBiGAN(encoder, decoder, args.aeloss).to(device)

    critic_cconv = ContinuousConv1D(in_channels,
                                    out_channels,
                                    max_time,
                                    cconv_ref,
                                    overlap_rate=args.overlap,
                                    kernel_size=args.comp,
                                    norm=True).to(device)
    critic = ConvCritic(critic_cconv, nz).to(device)

    ema = None
    if args.ema >= 0:
        ema = EMA(pbigan, args.ema_decay, args.ema)

    optimizer = optim.Adam(pbigan.parameters(),
                           lr=args.lr,
                           weight_decay=args.wd)
    critic_optimizer = optim.Adam(critic.parameters(),
                                  lr=args.dis_lr,
                                  weight_decay=args.wd)

    scheduler = make_scheduler(optimizer, args.lr, args.min_lr, epochs)
    dis_scheduler = make_scheduler(critic_optimizer, args.dis_lr,
                                   args.min_dis_lr, epochs)

    path = '{}_{}'.format(args.prefix, datetime.now().strftime('%m%d.%H%M%S'))

    output_dir = Path('results') / 'toy-pbigan' / path
    print(output_dir)
    log_dir = mkdir(output_dir / 'log')
    model_dir = mkdir(output_dir / 'model')

    start_epoch = 0

    with (log_dir / 'seed.txt').open('w') as f:
        print(random_seed, file=f)
    with (log_dir / 'gpu.txt').open('a') as f:
        print(torch.cuda.device_count(), start_epoch, file=f)
    with (log_dir / 'args.txt').open('w') as f:
        for key, val in sorted(vars(args).items()):
            print(f'{key}: {val}', file=f)

    tracker = Tracker(log_dir, n_train_batch)
    visualizer = Visualizer(encoder, decoder, batch_size, max_time,
                            test_loader, rescaler, output_dir, device)
    start = time.time()
    epoch_start = start

    for epoch in range(start_epoch, epochs):
        loss_breakdown = defaultdict(float)

        for ((val, idx, mask, _, cconv_graph),
             (_, idx_t, mask_t, index, _)) in zip(train_loader, time_loader):

            z_enc, x_recon, z_gen, x_gen, ae_loss = pbigan(
                val, idx, mask, cconv_graph, idx_t, mask_t)

            cconv_graph_gen = train_dataset.make_graph(x_gen, idx_t, mask_t,
                                                       index)

            real = critic(cconv_graph, batch_size, z_enc)
            fake = critic(cconv_graph_gen, batch_size, z_gen)

            D_loss = gan_loss(real, fake, 1, 0)

            critic_optimizer.zero_grad()
            D_loss.backward(retain_graph=True)
            critic_optimizer.step()

            G_loss = gan_loss(real, fake, 0, 1)

            mmd_loss = mmd(z_enc, z_gen)

            loss = G_loss + ae_loss * args.ae + mmd_loss * args.mmd

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if ema:
                ema.update()

            loss_breakdown['D'] += D_loss.item()
            loss_breakdown['G'] += G_loss.item()
            loss_breakdown['AE'] += ae_loss.item()
            loss_breakdown['MMD'] += mmd_loss.item()
            loss_breakdown['total'] += loss.item()

        if scheduler:
            scheduler.step()
        if dis_scheduler:
            dis_scheduler.step()

        cur_time = time.time()
        tracker.log(epoch, loss_breakdown, cur_time - epoch_start,
                    cur_time - start)

        if plot_interval > 0 and (epoch + 1) % plot_interval == 0:
            if ema:
                ema.apply()
                visualizer.plot(epoch)
                ema.restore()
            else:
                visualizer.plot(epoch)

        model_dict = {
            'pbigan': pbigan.state_dict(),
            'critic': critic.state_dict(),
            'ema': ema.state_dict() if ema else None,
            'epoch': epoch + 1,
            'args': args,
        }
        torch.save(model_dict, str(log_dir / 'model.pth'))
        if save_interval > 0 and (epoch + 1) % save_interval == 0:
            torch.save(model_dict, str(model_dir / f'{epoch:04d}.pth'))

    print(output_dir)
Beispiel #31
0
# Imports for data visualization
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
from matplotlib.dates import DateFormatter
from matplotlib import dates as mpld

# Seasonal Decompose
from statsmodels.tsa.seasonal import seasonal_decompose

# Holt-Winters or Triple Exponential Smoothing model
from statsmodels.tsa.holtwinters import ExponentialSmoothing

register_matplotlib_converters()

ts = TimeSeries('dataset/monthly_sales.csv', train_size=0.8)

print("Sales Data\n")
print(ts.data.describe())

print("\nHead and Tail of the time series\n")
print(ts.data.head(5).iloc[:, 1:])
print(ts.data.tail(5).iloc[:, 1:])

# Plot of raw time series data
plt.plot(ts.data.index, ts.data.sales)
plt.gcf().autofmt_xdate()
date_format = mpld.DateFormatter('%Y-%m')
plt.gca().xaxis.set_major_formatter(date_format)
plt.title("Sales Data Analysis (2013-2016)")
plt.xlabel("Time")