Exemple #1
0
        data = pd.Series([fixed_rate], [dt.date.min])
        IndexProvider.__init__(self, data)
        self._data = data
        self._index_name = self._data.index.name
        if self._index_name == 'Date':
            self._o_dates = [d.toordinal() for d in self._data.index]

    def index_value(self, date):
        return self._data[0]


# Example
if __name__ == '__main__':
    # This is to test time series
    start_date = dt.date(2014, 11, 11)
    dates = pd.date_range(start_date, periods=120, freq="MS").date
    index_values = np.random.randn(120)
    index_data = pd.TimeSeries(index_values, index=dates)
    inv_index = IndexProvider(index_data)
    test_date = dt.date(2015, 1, 1)
    print(inv_index.index_value(test_date))

    # This is to test DataFrame
    start_date = dt.date(2014, 11, 11)
    dates = pd.date_range(start_date, periods=120, freq="MS").date
    index_values = np.random.randn(360).reshape(120, 3)
    index_data = pd.DataFrame(index_values, index=dates)
    inv_index = IndexProvider(index_data)
    test_date = dt.date(2015, 1, 1)
    print(inv_index.index_value(test_date))
Exemple #2
0
    def test_TimeSeries_deprecation(self):

        # deprecation TimeSeries, #10890
        with tm.assert_produces_warning(FutureWarning):
            pd.TimeSeries(1, index=date_range('20130101', periods=3))
Exemple #3
0
 def finalise(self):
     self.data = pd.TimeSeries(index=self.period, data=self._data)
Exemple #4
0
# Getting started
# ---------------

data = sm.datasets.sunspots.load()

# Right now an annual date series must be datetimes at the end of the year.

from datetime import datetime
dates = sm.tsa.datetools.dates_from_range('1700', length=len(data.endog))

# Using Pandas
# ------------

# Make a pandas TimeSeries or DataFrame
endog = pandas.TimeSeries(data.endog, index=dates)

# and instantiate the model
ar_model = sm.tsa.AR(endog, freq='A')
pandas_ar_res = ar_model.fit(maxlag=9, method='mle', disp=-1)

# Let's do some out-of-sample prediction
pred = pandas_ar_res.predict(start='2005', end='2015')
print pred

# Using explicit dates
# --------------------

ar_model = sm.tsa.AR(data.endog, dates=dates, freq='A')
ar_res = ar_model.fit(maxlag=9, method='mle', disp=-1)
pred = ar_res.predict(start='2005', end='2015')
Exemple #5
0
import numpy
import pandas
from datetime import datetime

import py
import serializer


simple_tests = [
#   type: expected before/after repr
    4,
    3.25,
    [1,2,3],
    ["123", "hello"],
    (1,2,3),
    {(1,2,3): 32},
    datetime(2014,1,1),
    numpy.array([datetime(2014,1,1)]),
    pandas.date_range(datetime(2014,1,1), periods=12),
    pandas.DataFrame({"col1": pandas.TimeSeries(datetime(2014,1,1))})
]

@py.test.mark.parametrize("obj", simple_tests)
def test_simple(obj):
    j = serializer.data_to_json(obj)
    back = serializer.json_to_data(j)
    try:
        assert back == obj
    except ValueError:
        assert all(back == obj)
Exemple #6
0
])
def test_numpy_array_handler(arr_before):
    buf = dumps(arr_before)
    arr_after = loads(buf)
    assert_equal(arr_before, arr_after)


def test_nested_array():
    data_before = {"1": np.array([1, 2])}
    buf = dumps(data_before)
    data_after = loads(buf)
    assert_equal(data_before["1"], data_after["1"])


@pytest.mark.parametrize('ts_before', [
    pd.TimeSeries([1, 2, 3], index=[0, 1, 2]),
    pd.TimeSeries([1., 2., 3.], pd.date_range(
        '1970-01-01', periods=3, freq='S')),
    pd.TimeSeries([1., 2., 3.], pd.date_range(
        '1970-01-01', periods=3, freq='D')),
])
def test_pandas_timeseries_handler(ts_before):
    buf = dumps(ts_before)
    ts_after = loads(buf)
    assert_series_equal_strict(ts_before, ts_after)


@pytest.mark.parametrize(
    'index_before',
    [
        pd.Index([0, 1, 2]),
Exemple #7
0
def butter_bandpass_filter(data, lowcut=0.1, highcut=20.0, fs=512.0, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    return pd.TimeSeries(lfilter(b, a, data), index=data.index.copy())
Exemple #8
0
def null_transformer(data, genome, loci, prediction_steps):
    """This prediction model assumes tomorrow will be entirely flat."""
    return pd.TimeSeries(data=data["Load"][:-prediction_steps].mean(),
                         index=data.index[-prediction_steps:])
Exemple #9
0
def test_pandas_endog():
    # Test various types of pandas endog inputs (e.g. TimeSeries, etc.)

    # Example (failure): pandas.Series, no dates
    endog = pd.Series([1., 2.])
    # raises error due to no dates
    assert_raises(ValueError, check_endog, endog, **kwargs)

    # Example : pandas.Series
    dates = pd.date_range(start='1980-01-01', end='1981-01-01', freq='AS')
    endog = pd.Series([1., 2.], index=dates)
    mod = check_endog(endog, **kwargs)
    mod.filter([])

    # Example : pandas.Series, string datatype
    endog = pd.Series(['a'], index=dates)
    # raises error due to direct type casting check in Statsmodels base classes
    assert_raises(ValueError, check_endog, endog, **kwargs)

    # Example : pandas.TimeSeries
    endog = pd.TimeSeries([1., 2.], index=dates)
    mod = check_endog(endog, **kwargs)
    mod.filter([])

    # Example : pandas.DataFrame with 1 column
    endog = pd.DataFrame({'a': [1., 2.]}, index=dates)
    mod = check_endog(endog, **kwargs)
    mod.filter([])

    # Example (failure): pandas.DataFrame with 2 columns
    endog = pd.DataFrame({'a': [1., 2.], 'b': [3., 4.]}, index=dates)
    # raises error because 2-columns means k_endog=2, but the design matrix
    # set in **kwargs is shaped (1,1)
    assert_raises(ValueError, check_endog, endog, **kwargs)

    # Check behavior of the link maintained between passed `endog` and
    # `mod.endog` arrays
    endog = pd.DataFrame({'a': [1., 2.]}, index=dates)
    mod = check_endog(endog, **kwargs)
    assert_equal(mod.endog.base is not mod.data.orig_endog, True)
    assert_equal(mod.endog.base is not endog, True)
    assert_equal(mod.data.orig_endog.values.base is not endog, True)
    endog.iloc[0, 0] = 2
    # there is no link to mod.endog
    assert_equal(mod.endog, np.r_[1, 2].reshape(2, 1))
    # there remains a link to mod.data.orig_endog
    assert_allclose(mod.data.orig_endog, endog)

    # Example : pandas.DataFrame with 2 columns
    # Update kwargs for k_endog=2
    kwargs2 = {
        'k_states': 1,
        'design': [[1], [0.]],
        'obs_cov': [[1, 0], [0, 1]],
        'transition': [[1]],
        'selection': [[1]],
        'state_cov': [[1]],
        'initialization': 'approximate_diffuse'
    }
    endog = pd.DataFrame({'a': [1., 2.], 'b': [3., 4.]}, index=dates)
    mod = check_endog(endog, k_endog=2, **kwargs2)
    mod.filter([])
Exemple #10
0
    plt.savefig(fig_filename + str(timeperiod_num) + '_' + str(plot_num))
    plt.show()
    
    plot_num+=1
    r = np.sqrt(plc['CTLSOLUTION1']**2 + plc['CTLSOLUTION2']**2)
    ax = r.plot()
    ax.set_ylabel('arcsec')
    #ax.set_ybound(2800,3200)
    ax.set_title('CTL Solution period ' + str(timeperiod_num))
    plt.savefig(fig_filename + str(timeperiod_num) + '_' + str(plot_num))
    plt.show()
    
    fit = np.polyfit(r.index.astype(np.int64), r.values,1)
    ylin = fit[0]*r.index.astype(np.int64) + fit[1]

    fit = pandas.TimeSeries(ylin, index=r.index)
    df = pandas.DataFrame(r)
    df[1] = fit
    df.columns = ['data', 'fit']

    plot_num+=1
    ax = df.plot()
    ax.set_ylabel('arcsec')
    #ax.set_ybound(2800,3200)
    ax.set_title('CTL Solution period ' + str(timeperiod_num))
    plt.savefig(fig_filename + str(timeperiod_num) + '_' + str(plot_num))
    plt.show()
    
    diff = df['data'] - df['fit']

    plot_num+=1
Exemple #11
0
 def get_time_series(self):
     values, dates = self.get_data()
     return pd.TimeSeries(values, index=dates)
Exemple #12
0
        fig.tight_layout()
        return fig


if __name__ == "__main__":
    import numpy as np
    from statsmodels.tsa.arima_process import ArmaProcess
    np.random.seed(123)
    ar = [1, .35, .8]
    ma = [1, .8]
    arma = ArmaProcess(ar, ma, nobs=100)
    assert arma.isstationary()
    assert arma.isinvertible()
    y = arma.generate_sample()
    dates = pd.date_range("1/1/1990", periods=len(y), freq='M')
    ts = pd.TimeSeries(y, index=dates)

    xpath = "/home/skipper/src/x12arima/x12a"

    try:
        results = x13_arima_analysis(xpath, ts)
    except:
        print("Caught exception")

    results = x13_arima_analysis(xpath, ts, log=False)

    # import pandas as pd
    # seas_y = pd.read_csv("usmelec.csv")
    # seas_y = pd.TimeSeries(seas_y["usmelec"].values,
    #                        index=pd.DatetimeIndex(seas_y["date"], freq="MS"))
    # results = x13_arima_analysis(xpath, seas_y)
Exemple #13
0
#fill the dataframe for shares to add (for each trade) of each symbol (df_trade_matrix)
#do this by iterating through the orders (df_orders) and filling the number of shares for that particular symbol and date
for date, row in df_orders.iterrows():
    if row['action'] == 'Buy':
        shares_add = int(row['volume'])
    elif row['action'] == 'Sell':
        shares_add = -1 * int(row['volume'])
    else:
        next
    symbol = row['sym']
    df_trade_matrix.loc[date][symbol] += float(shares_add)

## STEP 4
# create a timeseries for CASH - tells you what your CASH VALUE is (starting cash + any buy/sell you've made)
#df_cash = pd.DataFrame( s.zeros(len(ls_dt_unique)), ls_dt_unique, columns=['CASH'])
ts_cash = pd.TimeSeries(0.0, ldt_timestamps)
ts_cash[0] = i_start_cash

# for each order, subtract the cash used in that trade
# need to multiple volume*price
df_close = d_data['close']

for date, row in df_trade_matrix.iterrows():
    ##    for sym in df_trade_matrix.columns:
    ##        price = df_close[sym].ix[date]
    ##        print price, sym
    ##        s_cash.loc[date] -= price * df_trade_matrix.loc[date][sym] #update the cash SPENT
    ##use dot product - faster than nested for loop (commented above this line)

    cash = np.dot(row.values.astype(float), df_close.ix[date].values)
    ts_cash[date] -= cash
def _hourbyhour_ar_ga_with_lags(data, genome, loci, prediction_steps, lags_2d):
    ar_col = data.columns.tolist().index('Load')
    start = max(0, len(data) - genome[loci.hindsight] - prediction_steps)
    prediction, _ = vector_ar(data[start:].values, lags_2d, prediction_steps,
                              [ar_col])
    return pd.TimeSeries(data=prediction, index=data[-prediction_steps:].index)
def queue_to_series(a, freq="s"):
    t = pd.date_range(end=datetime.now(), freq=freq, periods=len(a))
    return pd.TimeSeries(a, index=t)
Exemple #16
0
offset_end_date = '2015-08-25'

asset1 = Quandl.get('WIKI/MMM',
                    trim_start=offset_start_date,
                    trim_end=offset_end_date,
                    authtoken=AUTHTOKEN)['Adj. Close'].pct_change()[1:]
treasury_ret = Quandl.get('FRED/DTB3',
                          trim_start=start_date,
                          trim_end=end_date,
                          authtoken=AUTHTOKEN)['VALUE'].pct_change()[1:]
bench = Quandl.get('YAHOO/INDEX_GSPC',
                   trim_start=start_date,
                   trim_end=end_date,
                   authtoken=AUTHTOKEN)['Adjusted Close'].pct_change()[1:]

constant = pd.TimeSeries(np.ones(len(asset1.index)), index=asset1.index)
df = pd.DataFrame({
    'R1': asset1,
    'SPY': bench,
    'RF': treasury_ret,
    'Constant': constant
})
df = df.dropna()

OLS_model = regression.linear_model.OLS(df['R1'], df[['SPY', 'RF',
                                                      'Constant']])
fitted_model = OLS_model.fit()
b_SPY = fitted_model.params['SPY']
b_RF = fitted_model.params['RF']
a = fitted_model.params['Constant']
Exemple #17
0
def plot_acf_pacf(df0):  # Doesn't converge on different ads
    dframe = df0.copy()
    pivoted = dframe.pivot('date', 'ad', 'shown')
    pivoted.index = pd.to_datetime(pivoted.index)
    for ad in pivoted.columns:
        # for ad in ['ad_group_1']:
        print("Processing ad #%s" % ad.split('_')[2])
        subset = pivoted[np.isfinite(pivoted[ad])]
        time_series_initial = pd.TimeSeries(subset[ad].ravel(),
                                            index=pd.to_datetime(subset.index))
        time_series = np.log(time_series_initial)
        filename = out_dir_plots + 'AutoCorrPlots_' + ad + '.png'
        if not os.path.exists(filename):
            # print time_series
            pa = sm.tsa.pacf(time_series)
            acf = sm.tsa.acf(time_series)
            fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
            ax1.plot(acf)
            z = stats.norm.ppf(0.99)
            n = time_series.shape[0]
            ax1.axhline(y=z / np.sqrt(n), linestyle='--', color='red')
            ax1.axhline(y=-z / np.sqrt(n), linestyle='--', color='red')
            ax1.set_ylabel('Auto-Corr Func.')
            ax1.set_title(ad)
            ax2.plot(pa)
            ax2.axhline(y=z / np.sqrt(n), linestyle='--', color='red')
            ax2.axhline(y=-z / np.sqrt(n), linestyle='--', color='red')
            ax2.set_ylabel('Partial Auto-Corr Func.')
            plt.savefig(filename)
            plt.close()

        filename = out_dir_plots + 'Prediction_' + ad + '.png'
        if not os.path.exists(filename):
            try:
                # Most of the plots show a 1 peak at lag =1 for ACF and for PACF --> model with params p=1, q=0
                # res10 = sm.tsa.ARMA(time_series, (1, 0)).fit()
                # res71 = sm.tsa.ARMA(time_series, (7, 1)).fit()
                # res81 = sm.tsa.ARMA(time_series, (8, 1)).fit()
                res51 = sm.tsa.ARMA(time_series, (5, 1)).fit()
                # res121 = sm.tsa.ARMA(time_series, (12, 1)).fit()
                fig, ax = plt.subplots()
                ax = time_series.ix['2015-10-01':].plot(ax=ax)
                # fig = res10.plot_predict('2015-11-23', '2015-12-16', dynamic=True, ax=ax, plot_insample = False)
                # fig = res20.plot_predict('2015-11-23', '2015-12-16', dynamic=True, ax=ax, plot_insample = False)
                # fig = res11.plot_predict('2015-11-23', '2015-12-16', dynamic=True, ax=ax, plot_insample = False)
                # fig = res81.plot_predict('2015-11-23', '2015-12-16', dynamic=True, ax=ax, plot_insample = False)
                fig = res51.plot_predict('2015-11-23',
                                         '2015-12-16',
                                         dynamic=True,
                                         ax=ax,
                                         plot_insample=False)
                # fig = res121.plot_predict('2015-11-23', '2015-12-16', dynamic=True, ax=ax, plot_insample = False)
                # fig2, ax2 = plt.subplots()
                # y_resid81 = res81.resid
                # y_resid91 = res91.resid
                # y_resid121 = res121.resid
                # y_resid81.plot()
                # y_resid91.plot()
                # y_resid121.plot()
            except ValueError:
                continue

            plt.savefig(filename)
            plt.close()
Exemple #18
0
def daily_average(data, genome, loci, prediction_steps):
    start = -prediction_steps - genome[loci.hindsight]
    end = -prediction_steps
    return pd.TimeSeries(data=data["Load"][start:end].mean(),
                         index=data.index[-prediction_steps:])
Exemple #19
0
import pandas as pd

# Getting started
# ---------------

data = sm.datasets.sunspots.load()

# Right now an annual date series must be datetimes at the end of the year.

dates = sm.tsa.datetools.dates_from_range('1700', length=len(data.endog))

# Using Pandas
# ------------

# Make a pandas TimeSeries or DataFrame
endog = pd.TimeSeries(data.endog, index=dates)

# and instantiate the model
ar_model = sm.tsa.AR(endog, freq='A')
pandas_ar_res = ar_model.fit(maxlag=9, method='mle', disp=-1)

# Let's do some out-of-sample prediction
pred = pandas_ar_res.predict(start='2005', end='2015')
print(pred)

# Using explicit dates
# --------------------

ar_model = sm.tsa.AR(data.endog, dates=dates, freq='A')
ar_res = ar_model.fit(maxlag=9, method='mle', disp=-1)
pred = ar_res.predict(start='2005', end='2015')
Exemple #20
0
      (num_hours, smoothness, zscore)
    sys.stdout.flush()
    start_time = time.time()
    # This is the part that takes time
    smoother = _get_smoother()(data, smoothness)
    cleaner = cln.RegressionCleaner(smoother, zscore)
    cleaned, _ = cleaner.get_cleaned_data(
        method=cln.RegressionCleaner.replace_with_bound)
    # Wrap up and plot the result
    end_time = time.time()
    print "Done in %s." % SimpleTimer.period_to_string(start_time, end_time)

    print cleaned
    sys.stdout.flush()
    plt.figure()
    data.plot(style='r', label='Raw load')

    spline = pd.TimeSeries(data=smoother.splev(range(len(cleaned))),
                           index=cleaned.index)
    spline.plot(style='g', label='Smoothing spline')

    # THE SAUSAGE!
    lower, upper = cleaner.get_confidence_interval()
    ax = plt.gca()
    ax.fill_between(cleaned.index, lower, upper, facecolor='g', alpha=0.1)

    cleaned.plot(style='b', label='Cleaned load')
    plt.legend(loc=3)

plt.show()

MARKET_DATA_MANAGER = MarketDataManager()

# ---------- See the example on how all these work together ----------------
if __name__ == '__main__':
    from utils.database import pickle_save
    import pandas as pd
    import datetime as dt
    import numpy as np
    from Managers.ScenarioManager import EqBSEngine, ScenarioGenerator, FixRateEngine
    from lib.constants import BDAYS_PER_YEAR

    sample_credit_curve = IndexProvider(
        pd.TimeSeries(index=pd.date_range(start=dt.date(2000, 1, 1),
                                          periods=600,
                                          freq='MS').date,
                      data=[0.03] * 600))
    pickle_save(sample_credit_curve,
                'sample_credit_curve',
                db_path=MARKET_DATA_DB)

    MARKET_DATA_MANAGER.reset()

    # =========== test re-set market data date ================
    print(MARKET_DATA_MANAGER.get_index('fake_libor_3m').data)
    MARKET_DATA_MANAGER.market_data_date = dt.date(2008, 1, 1)
    print(MARKET_DATA_MANAGER.get_index('fake_libor_3m').data)

    # ========== test scen gen table =============
    print(MARKET_DATA_MANAGER.scen_gen_table)
    eng = FixRateEngine(0.05)
Exemple #22
0
    def score(self, filename, subname=None):
        # here subname must be a valid sub challenge (10,100,100_multifactorial
        # the batch will be infered from the file name

        # if a list, return the overall score otherwise just score for that filename
        if isinstance(filename, str):
            end = self._check_filename(filename)

            assert subname is not None, "If one file provided, subname must be provided e.g., 10"
            subname = subname + "_" + end
            results = self.score_prediction(filename, subname=subname)
            del results['tpr']
            del results['fpr']
            del results['rec']
            del results['prec']
            return results
        elif isinstance(filename, list):
            assert len(
                filename
            ) == 5, "if a list of gilenames is provide, it must contains 5 names"

            results = {}
            for i in [1, 2, 3, 4, 5]:
                tag = subname + "_" + str(i)
                assert tag in filename[
                    i -
                    1], "files must be sorted and ending in Size10_1, Size10_2, ...Size10_5"
                results['Net%s' % i] = self.score_prediction(filename[i - 1],
                                                             subname=tag)
            df = pd.DataFrame(results).T

            # get rid of non important data
            df = df[['AUROC', 'AUPR', 'p_aupr', 'p_auroc']]
            df = df.astype('float64')

            final_score = -np.mean(np.log10(df[['p_auroc', 'p_aupr']]))

            results = {}
            results['AUPR_SCORE'] = final_score['p_aupr']
            results['AUROC_SCORE'] = final_score['p_auroc']
            overall_score = np.mean(final_score)

            for index in df.index:
                results['%s_AUROC' % index] = df.ix[index]['AUROC']
            for index in df.index:
                results['%s_AUPR' % index] = df.ix[index]['AUPR']

            final_score = 10**-(final_score)
            results['AUPR_PVAL'] = final_score['p_aupr']
            results['AUROC_PVAL'] = final_score['p_auroc']

            results['SCORE'] = overall_score
            results = pd.TimeSeries(results)

            results = results[[
                'SCORE', 'AUPR_PVAL', 'AUPR_SCORE', 'AUROC_PVAL',
                'AUROC_SCORE', 'Net1_AUPR', 'Net2_AUPR', 'Net3_AUPR',
                'Net4_AUPR', 'Net5_AUPR', 'Net1_AUROC', 'Net2_AUROC',
                'Net3_AUROC', 'Net4_AUROC', 'Net5_AUROC'
            ]]
            return results
Exemple #23
0
 def compute_quotient_metric(name, num_metric, den_metric):
     series = pd.TimeSeries(data[name])
     num_period = series.resample(resample_period, how=num_metric)
     den_period = series.resample(resample_period, how=den_metric)
     return num_period[shift:] / den_period[:len(num_period) - shift].values
Exemple #24
0
try:
    os.remove(dbfilename)
except:
    pass

setup_blank_tables(dbfilename, [
    "CREATE TABLE timeseries (datetime text, code text, price float)",
    "CREATE TABLE static (code text, fullname text)"
])

st_table = staticdata(dbname)
st_table.add("FTSE", "FTSE 100 index")
assert st_table.read("FTSE") == "FTSE 100 index"
st_table.modify("FTSE", "FTSE all share")
assert st_table.read("FTSE") == "FTSE all share"
st_table.delete("FTSE")
assert st_table.read("FTSE") is None

dt_table = tsdata(dbname)

somprices = pd.TimeSeries(range(100), pd.date_range('1/1/2014', periods=100))

dt_table.add("FTSE", somprices)

assert dt_table.read("FTSE").values[-1] == 99.0

## Remove the file so example is clean next time
os.remove(dbfilename)

print "No problems"
Exemple #25
0
"""
Example: scikits.statsmodels.tsa.ARMA
"""
import numpy as np
import scikits.statsmodels.api as sm

# Generate some data from an ARMA process
from scikits.statsmodels.tsa.arima_process import arma_generate_sample

arparams = np.array([.75, -.25])
maparams = np.array([.65, .35])

# The conventions of the arma_generate function require that we specify a
# 1 for the zero-lag of the AR and MA parameters and that the AR parameters
# be negated.
arparams = np.r_[1, -arparams]
maparam = np.r_[1, maparams]
nobs = 250
y = arma_generate_sample(arparams, maparams, nobs)

# Now, optionally, we can add some dates information. For this example,
# we'll use a pandas time series.
import pandas
dates = sm.tsa.datetools.dates_from_range('1980m1', length=nobs)
y = pandas.TimeSeries(y, index=dates)
arma_mod = sm.tsa.ARMA(y, freq='M')
arma_res = arma_mod.fit(order=(2,2), trend='nc', disp=-1)
Exemple #26
0
    def run_va_model(self):
        raw_input = {"Acct Value": 1344581.6,
                     "Attained Age": 52.8,
                     "ID": "000001",
                     "Issue Age": 45.1,
                     "Issue Date": dt.date(2005, 6, 22),
                     "Initial Date": dt.date(2013, 2, 1),
                     "Maturity Age": 90,
                     "Population": 1,
                     "Riders": dict({}),
                     "ROP Amount": 1038872.0,
                     "Gender": "F",
                     "RPB": 1038872.0,
                     "Free Withdrawal Rate": 0.1,
                     "Asset Names": ["Fund A", "Fund B"],
                     "Asset Values": [1344581.6/2, 1344581.6/2]}

        # For now, we assume the init_date is month begin
        step_per_year = 12
        periods = 360
        init_date = dt.date(2013, 2, 1)
        pricing_date = init_date
        # Set up the investment index
        #credit_rider = isr.InsCreditRateFixed(credit_rate)

        # set up the mutual fund return index
        init_df = [ pd.TimeSeries(data=[100], index=[init_date], name='stock A'),
                    pd.TimeSeries(data=[100], index=[init_date], name='stock B')
                    ]
        eq_index = [ip.IndexProvider(init_df[0], 'stock A'), ip.IndexProvider(init_df[1], 'stock B')]

        # no vol, otherwise randomness will break my test
        sim_engine = EqBSEngine(np.array([0.02, 0.02]), np.array([0.0, 0.0]), corr=np.array([[1., 0.3], [0.3, 1.]]))
        simulator = ScenarioGenerator(eq_index, sim_engine, **{'max_time_step': 5. / BDAYS_PER_YEAR})

        MARKET_DATA_MANAGER.reset()
        MARKET_DATA_MANAGER.setup(init_date)
        MARKET_DATA_MANAGER.index_table[ 'stock A'] = eq_index[0]
        MARKET_DATA_MANAGER.index_table[ 'stock B'] = eq_index[1]
        MARKET_DATA_MANAGER.scen_gen_table['stock A']=simulator
        MARKET_DATA_MANAGER.scen_gen_table['stock B']=simulator

        fund_info = {'Fund A':
                         {
                             'Allocations': {
                                 'stock A': 1,
                                 'stock B': 0,
                             },
                             'Management Fee': 0.01,
                             'Description': 'blah blah',
                         },
                     'Fund B':
                         {
                             'Allocations': {
                                 'stock A': 0,
                                 'stock B': 1,
                             },
                             'Management Fee': 0.01,
                             'Description': 'blah blah',
                         },
        }

        credit_rider = isr.InsCreditRateMutualFunds(fund_info=fund_info)

        # Set up non-rider fees
        annual_fee_rate = 0.01
        annual_booking_fee = 100
        mgmt_fee = mif.InsFeeProp(annual_fee_rate, fee_name="Mgmt Fee")
        booking_fee = mif.InsFeeConst(annual_booking_fee, fee_name="Booking Fee")
        fees = [mgmt_fee, booking_fee]

        # Set up rider
        db_rider_fee_rate = 0.005
        db_rider = mir.InsRiderDB(extract_strict(raw_input, "ROP Amount"), db_rider_fee_rate, rider_name="UWL")
        riders = [db_rider]

        # Setup investment index
        inv_index = credit_rider.inv_index(init_date, periods, step_per_year)

        # Setup iteration
        product = InsProduct(riders, fees, inv_index)
        acct = InsAcct(raw_input, product)
        acct_iter = acct.acct_iterator()

        # Setup lapse function and lapse model
        xs = [0]
        ys = [0.0, 0.1]
        shock_func = linear_comp_bounded(1, 0, floor=0.5, cap=1.5)
        lapse_model = LapseDynamic(InsStepFunc(xs, ys), shock_func, rider_name='UWL')

        # Setup surrender charge
        xs = [0]
        ys = [100, 100]
        fixed_charge_func = InsStepFunc(xs, ys)
        xs = [0, 1, 2]
        ys = [0.0, 0.3, 0.2, 0.0]
        pct_charge_func = InsStepFunc(xs, ys)
        surrender_charge = SurrenderCharge(fixed_charge_func, pct_charge_func)

        # Setup mortality function and mortality model
        xs = [x for x in range(0, 100)]
        ys = [0.01] * 100
        ys.append(float('inf'))
        mort_model = InsMortModel(InsStepFunc(xs, ys))

        # Setup VA Model
        model = InsModelVA(acct, lapse_model, mort_model, surrender_charge)
        model_iter = model.create_iterator(pricing_date)

        # model iterator to evolve the model_iter to move forward
        metrics = ['Account Value',
                   'Active Population',
                   'Benefit Base.UWL',
                   'Rider Fee.UWL',
                   'Benefit.UWL',
                   'Fee.Mgmt Fee',
                   'Fee.Booking Fee',
                   'Date',
                   'Attained Age',
                   'Anniv Flag',
                   'Death',
                   'Lapse',
                   'Paid Benefit.UWL',
                   'Surrender Charge',
                   ]
        crv_aggregator = create_curve_aggregator(metrics)

        params = {'pricing date': init_date, 'periods': 60, 'frequency': 'MS'}
        proj_mgr = ProjectionManager(crv_aggregator, model_iter, **params)
        proj_mgr.run()

        df = crv_aggregator.to_dataframe()
        # df[['Rider Fee.UWL', 'Fee.Mgmt Fee', 'Fee.Booking Fee', 'Surrender Charge']].plot(kind='bar', stacked=True)
        return df
Exemple #27
0
    import scikits.timeseries as ts
    d1 = ts.Date(year=1700, freq='A')
    #NOTE: have to have yearBegin offset for annual data until parser rewrite
    #should this be up to the user, or should it be done in TSM init?
    #NOTE: not anymore, it's end of year now
    ts_dr = ts.date_array(start_date=d1, length=len(sunspots.endog))
    pandas_dr = pandas.DateRange(start=d1.datetime,
                                 periods=len(sunspots.endog),
                                 timeRule='A@DEC')
    #pandas_dr = pandas_dr.shift(-1, pandas.datetools.yearBegin)

    dates = np.arange(1700, 1700 + len(sunspots.endog))
    dates = ts.date_array(dates, freq='A')
    #sunspots = pandas.TimeSeries(sunspots.endog, index=dates)

    #NOTE: pandas only does business days for dates it looks like
    import datetime
    dt_dates = np.asarray(
        map(datetime.datetime.fromordinal,
            ts_dr.toordinal().astype(int)))
    sunspots = pandas.TimeSeries(sunspots.endog, index=dt_dates)

    #NOTE: pandas can't handle pre-1900 dates
    mod = AR(sunspots, freq='A')
    res = mod.fit(method='mle', maxlag=9)

    # some data for an example in Box Jenkins
    IBM = np.asarray([460, 457, 452, 459, 462, 459, 463, 479, 493, 490.])
    w = np.diff(IBM)
    theta = .5
og_dir = 'processed\\SW\\GATE_daily\\'
for g, g_str in zip(g_sites, g_str_num):
    #--find the records for this gate
    for gg_str in g_str:
        this_gate_files, this_gate_info = [], []
        for f in gate_files:
            fdict = dbu.parse_fname(f)
            if fdict['site'] == g and fdict['strnum'] == gg_str:
                this_gate_files.append(f)
                this_gate_info.append(fdict)
        #print this_gate_files
        p_series = []
        for gf, gi in zip(this_gate_files, this_gate_info):
            if gi['dtype'].upper() == 'BK':
                series, flg = dbu.load_series(g_dir + gf)
                series = dbu.interp_breakpoint(series, flg)
                p_series.append(
                    pandas.TimeSeries(series[:, 1], index=series[:, 0]))
            else:
                #raise TypeError,'Only use breakpoint data for gate openings'
                print 'non break point record - skipping'

        #--create a full record
        final_p_series = dbu.create_full_record(p_series)
        dbu.save_series(og_dir + g + '.' + str(gg_str) + '.dat',
                        final_p_series)
        print 'processed record saved for structure,gate:', str(g), str(
            gg_str), '\n'
        #break
    #break
Exemple #29
0
def marketsim(starting_cash, order_file, out_file):
    dates = []
    symbols = []
    #order_list=[]

    #starting_cash = float(sys.argv[1])
    #order_file = sys.argv[2]
    #out_file = sys.argv[3]

    #step1: read in csv file and remove duplicates
    #see marketsim-guidelines.pdf
    reader = csv.reader(open(order_file, 'rU'), delimiter=',')
    for row in reader:
        #ex: 2008, 12, 3, AAPL, BUY, 130
        dates.append(dt.datetime(int(row[0]), int(row[1]), int(row[2])))
        #need int, otherwise get "TypeError: an integer is required"
        symbols.append(row[3])

    #order_list.sort(['date'])

    #remove duplicates
    #set(listWithDuplicates) is an unordered collection without duplicates
    #so it removes the duplicates in listWithDuplicates
    uniqueDates = list(set(dates))
    uniqueSymbols = list(set(symbols))

    #step 2 - read the data like in previous HW and tutorials
    sortedDates = sorted(uniqueDates)
    dt_start = sortedDates[0]
    #End date should be offset-ed by 1 day to
    #read the close for the last date. - see marketsim-guidelines.pdf
    dt_end = sortedDates[-1] + dt.timedelta(days=1)

    dataobj = da.DataAccess('Yahoo')
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))

    ldf_data = dataobj.get_data(ldt_timestamps, uniqueSymbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    #step 3: create dataframe that contains trade matrix
    #see marketsim-guidelines.pdf
    df_trade = np.zeros((len(ldt_timestamps), len(uniqueSymbols)))
    df_trade = pd.DataFrame(df_trade,
                            index=[ldt_timestamps],
                            columns=uniqueSymbols)

    #iterate orders file and fill the number of shares for that
    #symbol and date to create trade matrix

    reader = csv.reader(open(order_file, 'rU'), delimiter=',')
    for orderrow in reader:
        order_date = dt.datetime(int(orderrow[0]), int(orderrow[1]),
                                 int(orderrow[2])).date()
        for index, row in df_trade.iterrows():
            if order_date == index.date():
                if orderrow[4] == 'Buy':
                    df_trade.set_value(index, orderrow[3], float(orderrow[5]))
                    #df_trade.ix[index][orderrow[3]] += float(orderrow[5])
                    #print ts_cash[index]
                elif orderrow[4] == "Sell":
                    #df_trade.ix[index][orderrow[3]] -= float(orderrow[5])
                    df_trade.set_value(index, orderrow[3], -float(orderrow[5]))
    print df_trade

    #step4: create timeseries containing cash values, all values are 0 initially
    ts_cash = pd.TimeSeries(0.0, index=ldt_timestamps)
    ts_cash[0] = starting_cash
    #for each order in trade matrix, subtract the cash used in that trade
    for index, row in df_trade.iterrows():
        ts_cash[index] -= np.dot(row.values.astype(float),
                                 d_data['close'].ix[index])

    #print 'df_trade',df_trade.head()
    #step5:
    #append '_CASH' into the price date
    df_close = d_data['close']
    df_close['_CASH'] = 1.0

    #append cash time series into the trade matrix
    df_trade['_CASH'] = ts_cash

    #convert to holding matrix
    df_holding = df_trade.cumsum()
    #df_trade = df_trade.cumsum(axis=1)
    #axis=1 means sum over columns
    #see http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.cumsum.html

    #dot product on price (df_close) and holding/trade matrix (df_trade) to
    #calculate portfolio on each date
    ts_fund = np.zeros((len(ldt_timestamps), 1))
    #ts_fund = pd.DataFrame(ts_fund, index=ldt_timestamps, columns='portfolio value')

    ts_fund = df_holding.mul(df_close, axis='columns',
                             fill_value=0).sum(axis=1)
    #better to avoid iterating over rows unless necessary
    #and try to use pandas' vectorized operations
    #for index, row in df_trade.iterrows():
    #        portfolio_value = np.dot(row.values.astype(float), df_close.ix[index].values)
    #        ts_fund[index] = portfolio_value

    #write this to csv
    writer = csv.writer(open(out_file, 'wb'), delimiter=',')
    for row_index in ts_fund.index:
        row_to_enter = [
            row_index.year, row_index.month, row_index.day, ts_fund[row_index]
        ]
        writer.writerow(row_to_enter)

    return out_file
Exemple #30
0

def set_value_at(crv, date, set_value):
    crv.set_value_at(date, set_value)


# ---------
if __name__ == '__main__':
    import pandas as pd
    import datetime as dt
    import numpy as np
    from Infra.IndexProvider import IndexProvider

    curve = IndexProvider(
        pd.TimeSeries(index=pd.date_range(start=dt.date(2011, 1, 1),
                                          periods=10,
                                          freq='D').date,
                      data=np.arange(10)))
    spread = IndexProvider(
        pd.TimeSeries(index=pd.date_range(start=dt.date(2011, 1, 1),
                                          periods=3,
                                          freq='3D').date,
                      data=np.arange(3)))
    # parallel_shift(curve, 2)
    # print(curve.data)
    # scale(curve, 1.1)
    # print(curve.data)
    print('curve', curve.data)
    print('spread', spread.data)
    add_spread(curve, spread)
    print(curve.data)
    # set_value_at(curve, dt.date(2011, 1, 2), 10)