예제 #1
0
def benchmark():
    from time import clock, time
    da = DataAccess('./data')
    da.empty_dirs(delete=False)

    print ('Directory empty: Download and save 5 stocks')
    t1, t2 = clock(), time()
    symbols = ["AAPL","GLD","GOOG","SPY","XOM"]
    start_date = datetime(2008, 1, 1)
    end_date = datetime(2009, 12, 31)
    fields = "Close"
    da.get_data(symbols, start_date, end_date, fields)
    t1_f, t2_f = clock(), time()
    print ("   ", t1_f - t1, t2_f - t2)

    print ('Load 5 stocks from .csv')
    t1, t2 = clock(), time()
    symbols = ["AAPL","GLD","GOOG","SPY","XOM"]
    start_date = datetime(2008, 1, 1)
    end_date = datetime(2009, 12, 31)
    fields = "Close"
    da.get_data(symbols, start_date, end_date, fields, useCache=False)
    t1_f, t2_f = clock(), time()
    print ("   ", t1_f - t1, t2_f - t2)

    print ('Load 5 stocks from serialized')
    t1, t2 = clock(), time()
    symbols = ["AAPL","GLD","GOOG","SPY","XOM"]
    start_date = datetime(2008, 1, 1)
    end_date = datetime(2009, 12, 31)
    fields = "Close"
    da.get_data(symbols, start_date, end_date, fields, useCache=True)
    t1_f, t2_f = clock(), time()
    print ("   ", t1_f - t1, t2_f - t2)
예제 #2
0
    def __init__(self):
        self.data_access = DataAccess()

        self.list = None
        self.market = 'SPY'
        self.lookback_days = 20
        self.lookforward_days = 20
        self.estimation_period = 200
        self.field = 'Adj Close'

        # Result
        self.equities_window = None
        self.equities_estimation = None
        self.market_window = None
        self.market_estimation = None

        self.reg_estimation = None

        self.dr_equities_window = None
        self.dr_equities_estimation = None
        self.dr_market_window = None
        self.dr_market_estimation = None

        self.er = None
        self.ar = None
        self.car = None
예제 #3
0
    def setUpDataAccess(self, delete=False):
        self_dir = os.path.dirname(
            os.path.abspath(inspect.getfile(inspect.currentframe())))
        DataAccess.path = os.path.join(self_dir, 'data')
        self.data_access = DataAccess()

        self.data_access.empty_cache(delete=delete)
        self.data_access.empty_dir(delete=delete)
예제 #4
0
    def __init__(self):
        self.da = DataAccess()

        self.initial_cash = 0
        self.field = 'adjusted_close'

        self.trades = None
        self.prices = None
        self.num_of_shares = None
        self.cash = None
        self.equities = None
        self.portfolio = None
예제 #5
0
    def __init__(self):
        self.data_access = DataAccess()

        self.symbols = []
        self.start_date = None
        self.end_date = None
        self.field = 'Adj Close'

        self.condition = Condition()
        self.matrix = None
        self.num_events = 0

        self.oneEventPerEquity = True
예제 #6
0
    def setUpDataAccess(self, delete=False):
        self_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
        DataAccess.path = os.path.join(self_dir, 'data')
        self.data_access = DataAccess()

        self.data_access.empty_cache(delete=delete)
        self.data_access.empty_dir(delete=delete)
예제 #7
0
    def __init__(self):
        self.data_access = DataAccess()

        self.list = None
        self.market = "SPY"
        self.lookback_days = 20
        self.lookforward_days = 20
        self.estimation_period = 200
        self.field = "Adj Close"

        # Result
        self.equities_window = None
        self.equities_estimation = None
        self.market_window = None
        self.market_estimation = None

        self.reg_estimation = None

        self.dr_equities_window = None
        self.dr_equities_estimation = None
        self.dr_market_window = None
        self.dr_market_estimation = None

        self.er = None
        self.ar = None
        self.car = None
예제 #8
0
    def __init__(self):
        # Utils
        self.data_access = DataAccess()

        # Variables
        self.date = None  # Date of the event
        self.symbol = None
        self.field = 'adjusted_close'
        self.lookback_days = 20
        self.lookforward_days = 20
        self.estimation_period = 255
        self.market = "SPY"

        # Results
        self.evt_window_data = None
        self.er = None
        self.ar = None
        self.car = None
        self.t_test = None
        self.prob = None
예제 #9
0
    def __init__(self):
        self.da = DataAccess()

        self.initial_cash = 0
        self.field = 'adjusted_close'

        self.trades = None
        self.prices = None
        self.num_of_shares = None
        self.cash = None
        self.equities = None
        self.portfolio = None
예제 #10
0
    def __init__(self, path='./data'):
        self.da = DataAccess(path)

        self.initial_cash = 0
        self.current_cash = 0

        self.trades = None
        self.prices = None
        self.own = None
        self.cash = None
        self.equities = None
        self.portfolio = None
예제 #11
0
class PastEvent(SimpleEvent):
    def __init__(self, path="./data"):
        # Utils
        self.da = DataAccess(path)

        # Variables
        self.date = None  # Date of the event
        self.symbol = None
        self.field = "Adj Close"
        self.lookback_days = 20
        self.lookforward_days = 20
        self.estimation_period = 255
        self.market = "SPY"

        # Results
        self.er = None
        self.ar = None
        self.car = None
        self.t_test = None
        self.prob = None

    def run(self):
        dates = DateUtils.get_nyse_dates_event(
            self.date, self.lookback_days + self.estimation_period, self.lookforward_days, list=True
        )
        start_date = dates[0]
        end_date = dates[-1]

        # Data to the general event
        self.data = self.da.get_data(self.symbol, start_date, end_date, self.field)
        self.market = self.da.get_data(self.market, start_date, end_date, self.field)

        # Parameters of the General Event
        self.start_period = dates[0]
        self.end_period = dates[self.estimation_period]
        self.start_window = dates[self.estimation_period]
        self.end_window = dates[-1]

        # Run the Market Return method
        super().market_return()
예제 #12
0
    def __init__(self):
        self.data_access = DataAccess()

        self.symbols = []
        self.start_date = None
        self.end_date = None
        self.field = 'adjusted_close'

        self.condition = Condition()
        self.matrix = None
        self.num_events = 0

        self.oneEventPerEquity = True
예제 #13
0
    def __init__(self, path="./data"):
        # Utils
        self.da = DataAccess(path)

        # Variables
        self.date = None  # Date of the event
        self.symbol = None
        self.field = "Adj Close"
        self.lookback_days = 20
        self.lookforward_days = 20
        self.estimation_period = 255
        self.market = "SPY"

        # Results
        self.er = None
        self.ar = None
        self.car = None
        self.t_test = None
        self.prob = None
예제 #14
0
    def __init__(self):
        # Utils
        self.data_access = DataAccess()

        # Variables
        self.date = None # Date of the event
        self.symbol = None
        self.field = 'Adj Close'
        self.lookback_days = 20
        self.lookforward_days = 20
        self.estimation_period = 255
        self.market = "SPY"

        # Results
        self.evt_window_data = None
        self.er = None
        self.ar = None
        self.car = None
        self.t_test = None
        self.prob = None
예제 #15
0
class MultipleEvents(object):
    def __init__(self):
        self.data_access = DataAccess()

        self.list = None
        self.market = 'SPY'
        self.lookback_days = 20
        self.lookforward_days = 20
        self.estimation_period = 200
        self.field = 'Adj Close'

        # Result
        self.equities_window = None
        self.equities_estimation = None
        self.market_window = None
        self.market_estimation = None

        self.reg_estimation = None

        self.dr_equities_window = None
        self.dr_equities_estimation = None
        self.dr_market_window = None
        self.dr_market_estimation = None

        self.er = None
        self.ar = None
        self.car = None

    def run(self):
        '''
        Assess the events

        |-----100-----|-------20-------|-|--------20--------|
           estimation      lookback   event   lookforward

        Prerequisites
        -------------
            self.matrix
            self.market = 'SPY'
            self.lookback_days = 20
            self.lookforward_days = 20
            self.estimation_period = 200
            self.field = 'Adj Close'
        '''
        # 0. Get the dates and Download/Import the data
        symbols = list(set(self.list))
        start_date = self.list.index[0]
        end_date = self.list.index[-1]
        nyse_dates = DateUtils.nyse_dates(
            start=start_date,
            end=end_date,
            lookbackDays=self.lookback_days + self.estimation_period + 1,
            lookforwardDays=self.lookforward_days)

        data = self.data_access.get_data(symbols, nyse_dates[0],
                                         nyse_dates[-1], self.field)
        market = self.data_access.get_data(self.market, nyse_dates[0],
                                           nyse_dates[-1], self.field)

        if len(data.columns) == 1:
            data.columns = symbols
        if len(data) > len(market):
            market = market.reindex(data.index)
            market.columns = [self.field]

        data = data.fillna(method='ffill').fillna(method='bfill')
        market = market.fillna(method='ffill').fillna(method='bfill')

        # 1. Create DataFrames with the data of each event
        windows_indexes = range(-self.lookback_days, self.lookforward_days + 1)
        estimation_indexes = range(
            -self.estimation_period - self.lookback_days, -self.lookback_days)
        self.equities_window = pd.DataFrame(index=windows_indexes)
        self.equities_estimation = pd.DataFrame(index=estimation_indexes)
        self.market_window = pd.DataFrame(index=windows_indexes)
        self.market_estimation = pd.DataFrame(index=estimation_indexes)

        dr_data = Calculator.returns(data)
        dr_market = Calculator.returns(market)
        self.dr_equities_window = pd.DataFrame(index=windows_indexes)
        self.dr_equities_estimation = pd.DataFrame(index=estimation_indexes)
        self.dr_market_window = pd.DataFrame(index=windows_indexes)
        self.dr_market_estimation = pd.DataFrame(index=estimation_indexes)

        # 2. Iterate over the list of events and fill the DataFrames
        for i in range(len(self.list)):
            symbol = self.list[i]
            evt_date = self.list.index[i].to_pydatetime()
            col_name = symbol + ' ' + evt_date.strftime('%Y-%m-%d')
            evt_idx = DateUtils.search_closer_date(evt_date,
                                                   data[symbol].index,
                                                   exact=True)

            # 1.1 Data on the estimation period: self.equities_estimation
            start_idx = evt_idx - self.lookback_days - self.estimation_period  # estimation start idx on self.data
            end_idx = evt_idx - self.lookback_days  # estimation end idx on self.data
            new_equities_estimation = data[symbol][start_idx:end_idx]
            new_equities_estimation.index = self.equities_estimation.index
            self.equities_estimation[col_name] = new_equities_estimation
            # Daily return of the equities on the estimation period
            new_dr_equities_estimation = dr_data[symbol][start_idx:end_idx]
            new_dr_equities_estimation.index = self.dr_equities_estimation.index
            self.dr_equities_estimation[col_name] = new_dr_equities_estimation

            # 1.4 Market on the estimation period: self.market_estimation
            new_market_estimation = market[self.field][start_idx:end_idx]
            new_market_estimation.index = self.market_estimation.index
            self.market_estimation[col_name] = new_market_estimation
            # Daily return of the market on the estimation period
            new_dr_market_estimation = dr_market[start_idx:end_idx]
            new_dr_market_estimation.index = self.dr_market_estimation.index
            self.dr_market_estimation[col_name] = new_dr_market_estimation

            # 1.3 Equities on the event window: self.equities_window
            start_idx = evt_idx - self.lookback_days  # window start idx on self.data
            end_idx = evt_idx + self.lookforward_days + 1  # window end idx on self.data
            new_equities_window = data[symbol][start_idx:end_idx]
            new_equities_window.index = self.equities_window.index
            self.equities_window[col_name] = new_equities_window
            # Daily return of the equities on the event window
            new_dr_equities_window = dr_data[symbol][start_idx:end_idx]
            new_dr_equities_window.index = self.dr_equities_window.index
            self.dr_equities_window[col_name] = new_dr_equities_window

            # 1.4 Market on the event window: self.market_window
            new_market_window = market[self.field][start_idx:end_idx]
            new_market_window.index = self.market_window.index
            self.market_window[col_name] = new_market_window
            # Daily return of the market on the event window
            new_dr_market_window = dr_market[start_idx:end_idx]
            new_dr_market_window.index = self.dr_market_window.index
            self.dr_market_window[col_name] = new_dr_market_window

        # 3. Calculate the linear regression -> expected return
        self.reg_estimation = pd.DataFrame(
            index=self.dr_market_estimation.columns,
            columns=['Intercept', 'Slope', 'Std Error'])
        self.er = pd.DataFrame(index=self.dr_market_window.index,
                               columns=self.dr_market_window.columns)
        # For each column (event) on the estimation period
        for col in self.dr_market_estimation.columns:
            # 3.1 Calculate the regression
            x = self.dr_market_estimation[col]
            y = self.dr_equities_estimation[col]
            slope, intercept, r_value, p_value, slope_std_error = stats.linregress(
                x, y)
            self.reg_estimation['Slope'][col] = slope
            self.reg_estimation['Intercept'][col] = intercept
            self.reg_estimation['Std Error'][col] = slope_std_error
            # 3.2 Calculate the expected return of each date using the regression
            self.er[col] = intercept + self.dr_market_window[col] * slope

        # 4. Final results
        self.er.columns.name = 'Expected return'
        self.mean_er = self.er.mean(axis=1)
        self.mean_er.name = 'Mean ER'
        self.std_er = self.er.std(axis=1)
        self.std_er.name = 'Std ER'

        self.ar = self.dr_equities_window - self.er
        self.ar.columns.name = 'Abnormal return'
        self.mean_ar = self.ar.mean(axis=1)
        self.mean_ar.name = 'Mean AR'
        self.std_ar = self.ar.std(axis=1)
        self.std_ar.name = 'Std AR'

        self.car = self.ar.apply(np.cumsum)
        self.car.columns.name = 'Cum Abnormal Return'
        self.mean_car = self.car.mean(axis=1)
        self.mean_car.name = 'Mean CAR'
        self.std_car = self.car.std(axis=1)
        self.mean_car.name = 'Mean CAR'

    def plot(self, which):
        x = self.mean_car.index.values
        if which == 'car':
            y = self.mean_car.values
            yerr = self.std_car.values
            label = self.mean_car.name
        elif which == 'ar':
            y = self.mean_ar.values
            yerr = self.std_ar.values
            label = self.mean_ar.name
        elif which == 'er':
            y = self.mean_er.values
            yerr = self.std_er.values
            label = self.mean_er.name
        errorfill(x, y, yerr, label=label)
예제 #16
0
import os
from datetime import datetime
from finance.utils import DataAccess

# Option 1: Set the Enviroment Variable FINANCEPATH
os.environ["FINANCEPATH"] = './data'
da = DataAccess()
symbols = ["GOOG", "SPY", "XOM"]
start_date = datetime(2015, 1, 1)
end_date = datetime(2017, 12, 31)
fields = 'close'
close = da.get_data(symbols, start_date, end_date, fields)
print(close)

# Option 2: Manualy set the PATH, overwrites option 1

DataAccess.path = 'data2'
da = DataAccess()

symbols = ["AAPL", "GLD"]
start_date = datetime(2015, 1, 1)
end_date = datetime(2017, 12, 31)
fields = 'close'
close = da.get_data(symbols, start_date, end_date, fields)
print(close)

예제 #17
0
class MarketSimulator(object):
    '''
    Market Simulator

    Needs a list of trades to simulate, options are:
        1. Provide a custom pandas.DataFrame(index=DatetimeIndex):
                        symbol  action    num_of_shares
            2011-01-10   AAPL    Buy           1500
            2011-01-13   AAPL    Sell          1500
            2011-01-13   IBM     Buy           4000
            2011-01-26   GOOG    Buy           1000
        2. Load the trades from a csv file:
            year,month,day,symbol,action,num_of_shares
            2011,1,10,AAPL,Buy,1500
            2011,1,13,AAPL,Sell,1500
            2011,1,13,IBM,Buy,4000
            2011,1,26,GOOG,Buy,1000
        3. Create trades from an event list: usually from EventFinder
            
    '''
    def __init__(self):
        self.da = DataAccess()

        self.initial_cash = 0
        self.field = 'adjusted_close'

        self.trades = None
        self.prices = None
        self.num_of_shares = None
        self.cash = None
        self.equities = None
        self.portfolio = None

    def load_trades(self, file_path):
        '''
        Load trades from a csv file

        csv file example:
        year,month,day,symbol,action,num_of_shares
        2011,1,10,AAPL,Buy,1500
        2011,1,13,AAPL,Sell,1500
        2011,1,13,IBM,Buy,4000
        2011,1,26,GOOG,Buy,1000

        Parameters
        ----------
            file_path: str, path to the csv containing the orders

        '''
        # 1. Read the .csv file
        self.trades = pd.read_csv(file_path)

        # 2. Set the indexes as the value of a the columns (year, month, day)
        dates = list()
        for idx, row in self.trades.iterrows():
            date = datetime(row['year'], row['month'], row['day'])
            dates.append(date)
        dates = pd.Series(dates)
        self.trades = self.trades.set_index(dates)

        # 3. Delete unnescessary columns
        self.trades = self.trades[['symbol', 'action', 'num_of_shares']]

        # 4. Sort the DataFrame by the index (dates)
        self.trades = self.trades.sort_index()

    def create_trades_from_event(self,
                                 eventList,
                                 eventDayAction='Buy',
                                 eventDayShares=100,
                                 actionAfter='Sell',
                                 daysAfter=5,
                                 sharesAfter=100,
                                 actionBefore=None,
                                 daysBefore=5,
                                 sharesBefore=100):
        '''
        Creates trades using an event list; usually from the EventFinder.
        Also creates aditional order after and before of the event as defined by the user

        Parameters
        ----------
            eventList: pandas.Series
        '''
        self.trades = pd.DataFrame(
            index=eventList.index,
            columns=['symbol', 'action', 'num_of_shares'])
        self.trades['symbol'] = eventList
        self.trades['action'] = eventDayAction
        self.trades['num_of_shares'] = eventDayShares

        # TODO: Actions BEFORE

        if actionAfter is not None:
            dicts = []
            for idx, row in self.trades.iterrows():
                after_date = DateUtils.nyse_add(idx.to_pydatetime(), daysAfter)
                after = pd.DataFrame([{
                    'symbol': row['symbol'],
                    'action': actionAfter,
                    'num_of_shares': sharesAfter
                }],
                                     index=[after_date],
                                     columns=self.trades.columns)
                self.trades = self.trades.append(after)

        self.trades = self.trades.sort()

    def simulate(self):
        '''
        Simulates the trades

        Parameters
        ----------
            trades: str(filepath) or pandas.DataFrame, if str loads the orders from a csv file

        Returns
        -------
            Nothing: Fills the DataFrames: cash, equities, porfolio
        '''
        # 0.1 Load/Download required data
        symbols = list(set(self.trades['symbol']))
        start_date = self.trades.index[0].to_pydatetime(
        )  # Convert from TimeStamp to datetime
        end_date = self.trades.index[-1].to_pydatetime(
        )  # Convert from TimeStamp to datetime
        self.prices = self.da.get_data(symbols, start_date, end_date,
                                       self.field)
        # 0.2 Init DataFrames
        self.cash = pd.Series(index=self.prices.index,
                              name='Cash',
                              dtype=np.float64)
        self.num_of_shares = pd.DataFrame(index=self.prices.index,
                                          columns=self.prices.columns,
                                          dtype=np.float64)

        # 1. Fill the DataFrames
        current_cash = self.initial_cash
        current_shares = dict([(symbol, 0) for symbol in symbols])
        for idx, row in self.trades.iterrows():
            # 1.2.0 Get info of the row
            symbol = row['symbol']
            action = row['action'].lower()[0:1]
            num_of_shares = row['num_of_shares']

            # 1.2.1 Fill the self.cash DataFrame - ammount of cash on each date
            # NOTE: but stocks spends cash, sell stocks wins cash
            cash_change = self.prices[symbol][idx] * num_of_shares
            if action == 'b':
                current_cash = current_cash - cash_change
            elif action == 's':
                current_cash = current_cash + cash_change
            # Modify self.cash DataFrame
            self.cash.ix[idx] = current_cash

            # 1.2.3 Fill the self.num_of_shares DataFrame - num of each stocks on each date
            if action == 'b':
                current_shares[symbol] = current_shares[symbol] + num_of_shares
            elif action == 's':
                current_shares[symbol] = current_shares[symbol] - num_of_shares
            # Modify self.num_of_shares DataFrame
            self.num_of_shares.ix[idx][symbol] = current_shares[symbol]

        # Fill forward missing values
        self.cash = self.cash.fillna(method='ffill')
        self.prices = self.prices.fillna(method='ffill').fillna(method='bfill')
        self.num_of_shares = self.num_of_shares.fillna(
            method='ffill').fillna(0)

        # 2. Get the value of the equitues
        self.equities = pd.Series(index=self.prices.index,
                                  name='Equities value')
        equities = self.num_of_shares * self.prices
        self.equities = equities.sum(axis=1)
        self.equities.name = 'Equities value'
        # 3. Get the value of the porfolio = cash + equities_value
        self.portfolio = pd.Series(index=self.prices.index,
                                   name='Portfolio value')
        self.portfolio = self.cash + self.equities
        self.portfolio.name = 'Portfolio value'
예제 #18
0
class PastEvent(EventStudy):
    '''
    Analyse a particular equity on a particular date
    
    Necesary Parameters
    -------------------
        date: datetime
        symbol: str, eg: AAPL
    
    Optional Parameters
    -------------------
        market: str, default2-'SPY' - used to asses the event
        lookback_days: int, default=20 - past event window size
        lookforward_days: int, default=20 - future event window size
        estimation_period: int, default=255

    |-----255-----|-------20-------|-|--------20--------|
       estimation      lookback   event   lookforward
    '''

    def __init__(self):
        # Utils
        self.data_access = DataAccess()

        # Variables
        self.date = None # Date of the event
        self.symbol = None
        self.field = 'Adj Close'
        self.lookback_days = 20
        self.lookforward_days = 20
        self.estimation_period = 255
        self.market = "SPY"

        # Results
        self.evt_window_data = None
        self.er = None
        self.ar = None
        self.car = None
        self.t_test = None
        self.prob = None


    def run(self):
        dates = DateUtils.nyse_dates_event(self.date,
                            self.lookback_days, self.lookforward_days, self.estimation_period)
        start_date = dates[0]
        end_date = dates[-1]

        # Data to the General market_return Study
        self.data = self.data_access.get_data(self.symbol, start_date, end_date, self.field)
        evt_window_dates = dates[- self.lookforward_days - self.lookback_days - 1:]
        self.evt_window_data = self.data[evt_window_dates[0]:dates[-1]]
        self.market = self.data_access.get_data(self.market, start_date, end_date, self.field)
        # Parameters of the General market_return Study
        self.start_period = dates[0]
        self.end_period = dates[self.estimation_period]
        self.start_window = dates[self.estimation_period]
        self.end_window = dates[-1]

        # Run the Market Return method
        super().market_return()
예제 #19
0
 def setUp1(self):
     DataAccess('./data').empty_dirs()
     self.da = DataAccess('./data')
예제 #20
0
class MultipleEvents(object):
    def __init__(self):
        self.data_access = DataAccess()

        self.list = None
        self.market = "SPY"
        self.lookback_days = 20
        self.lookforward_days = 20
        self.estimation_period = 200
        self.field = "Adj Close"

        # Result
        self.equities_window = None
        self.equities_estimation = None
        self.market_window = None
        self.market_estimation = None

        self.reg_estimation = None

        self.dr_equities_window = None
        self.dr_equities_estimation = None
        self.dr_market_window = None
        self.dr_market_estimation = None

        self.er = None
        self.ar = None
        self.car = None

    def run(self):
        """
        Assess the events

        |-----100-----|-------20-------|-|--------20--------|
           estimation      lookback   event   lookforward

        Prerequisites
        -------------
            self.matrix
            self.market = 'SPY'
            self.lookback_days = 20
            self.lookforward_days = 20
            self.estimation_period = 200
            self.field = 'Adj Close'
        """
        # 0. Get the dates and Download/Import the data
        symbols = list(set(self.list))
        start_date = self.list.index[0]
        end_date = self.list.index[-1]
        nyse_dates = DateUtils.nyse_dates(
            start=start_date,
            end=end_date,
            lookbackDays=self.lookback_days + self.estimation_period + 1,
            lookforwardDays=self.lookforward_days,
        )

        data = self.data_access.get_data(symbols, nyse_dates[0], nyse_dates[-1], self.field)
        market = self.data_access.get_data(self.market, nyse_dates[0], nyse_dates[-1], self.field)

        if len(data.columns) == 1:
            data.columns = symbols
        if len(data) > len(market):
            market = market.reindex(data.index)
            market.columns = [self.field]

        data = data.fillna(method="ffill").fillna(method="bfill")
        market = market.fillna(method="ffill").fillna(method="bfill")

        # 1. Create DataFrames with the data of each event
        windows_indexes = range(-self.lookback_days, self.lookforward_days + 1)
        estimation_indexes = range(-self.estimation_period - self.lookback_days, -self.lookback_days)
        self.equities_window = pd.DataFrame(index=windows_indexes)
        self.equities_estimation = pd.DataFrame(index=estimation_indexes)
        self.market_window = pd.DataFrame(index=windows_indexes)
        self.market_estimation = pd.DataFrame(index=estimation_indexes)

        dr_data = Calculator.returns(data)
        dr_market = Calculator.returns(market)
        self.dr_equities_window = pd.DataFrame(index=windows_indexes)
        self.dr_equities_estimation = pd.DataFrame(index=estimation_indexes)
        self.dr_market_window = pd.DataFrame(index=windows_indexes)
        self.dr_market_estimation = pd.DataFrame(index=estimation_indexes)

        # 2. Iterate over the list of events and fill the DataFrames
        for i in range(len(self.list)):
            symbol = self.list[i]
            evt_date = self.list.index[i].to_pydatetime()
            col_name = symbol + " " + evt_date.strftime("%Y-%m-%d")
            evt_idx = DateUtils.search_closer_date(evt_date, data[symbol].index, exact=True)

            # 1.1 Data on the estimation period: self.equities_estimation
            start_idx = evt_idx - self.lookback_days - self.estimation_period  # estimation start idx on self.data
            end_idx = evt_idx - self.lookback_days  # estimation end idx on self.data
            new_equities_estimation = data[symbol][start_idx:end_idx]
            new_equities_estimation.index = self.equities_estimation.index
            self.equities_estimation[col_name] = new_equities_estimation
            # Daily return of the equities on the estimation period
            new_dr_equities_estimation = dr_data[symbol][start_idx:end_idx]
            new_dr_equities_estimation.index = self.dr_equities_estimation.index
            self.dr_equities_estimation[col_name] = new_dr_equities_estimation

            # 1.4 Market on the estimation period: self.market_estimation
            new_market_estimation = market[self.field][start_idx:end_idx]
            new_market_estimation.index = self.market_estimation.index
            self.market_estimation[col_name] = new_market_estimation
            # Daily return of the market on the estimation period
            new_dr_market_estimation = dr_market[start_idx:end_idx]
            new_dr_market_estimation.index = self.dr_market_estimation.index
            self.dr_market_estimation[col_name] = new_dr_market_estimation

            # 1.3 Equities on the event window: self.equities_window
            start_idx = evt_idx - self.lookback_days  # window start idx on self.data
            end_idx = evt_idx + self.lookforward_days + 1  # window end idx on self.data
            new_equities_window = data[symbol][start_idx:end_idx]
            new_equities_window.index = self.equities_window.index
            self.equities_window[col_name] = new_equities_window
            # Daily return of the equities on the event window
            new_dr_equities_window = dr_data[symbol][start_idx:end_idx]
            new_dr_equities_window.index = self.dr_equities_window.index
            self.dr_equities_window[col_name] = new_dr_equities_window

            # 1.4 Market on the event window: self.market_window
            new_market_window = market[self.field][start_idx:end_idx]
            new_market_window.index = self.market_window.index
            self.market_window[col_name] = new_market_window
            # Daily return of the market on the event window
            new_dr_market_window = dr_market[start_idx:end_idx]
            new_dr_market_window.index = self.dr_market_window.index
            self.dr_market_window[col_name] = new_dr_market_window

        # 3. Calculate the linear regression -> expected return
        self.reg_estimation = pd.DataFrame(
            index=self.dr_market_estimation.columns, columns=["Intercept", "Slope", "Std Error"]
        )
        self.er = pd.DataFrame(index=self.dr_market_window.index, columns=self.dr_market_window.columns)
        # For each column (event) on the estimation period
        for col in self.dr_market_estimation.columns:
            # 3.1 Calculate the regression
            x = self.dr_market_estimation[col]
            y = self.dr_equities_estimation[col]
            slope, intercept, r_value, p_value, slope_std_error = stats.linregress(x, y)
            self.reg_estimation["Slope"][col] = slope
            self.reg_estimation["Intercept"][col] = intercept
            self.reg_estimation["Std Error"][col] = slope_std_error
            # 3.2 Calculate the expected return of each date using the regression
            self.er[col] = intercept + self.dr_market_window[col] * slope

        # 4. Final results
        self.er.columns.name = "Expected return"
        self.mean_er = self.er.mean(axis=1)
        self.mean_er.name = "Mean ER"
        self.std_er = self.er.std(axis=1)
        self.std_er.name = "Std ER"

        self.ar = self.dr_equities_window - self.er
        self.ar.columns.name = "Abnormal return"
        self.mean_ar = self.ar.mean(axis=1)
        self.mean_ar.name = "Mean AR"
        self.std_ar = self.ar.std(axis=1)
        self.std_ar.name = "Std AR"

        self.car = self.ar.apply(np.cumsum)
        self.car.columns.name = "Cum Abnormal Return"
        self.mean_car = self.car.mean(axis=1)
        self.mean_car.name = "Mean CAR"
        self.std_car = self.car.std(axis=1)
        self.mean_car.name = "Mean CAR"

    def plot(self, which):
        x = self.mean_car.index.values
        if which == "car":
            y = self.mean_car.values
            yerr = self.std_car.values
            label = self.mean_car.name
        elif which == "ar":
            y = self.mean_ar.values
            yerr = self.std_ar.values
            label = self.mean_ar.name
        elif which == "er":
            y = self.mean_er.values
            yerr = self.std_er.values
            label = self.mean_er.name
        errorfill(x, y, yerr, label=label)
예제 #21
0
class FinanceTest(unittest.TestCase):
    def setUpDataAccess(self, delete=False):
        self_dir = os.path.dirname(
            os.path.abspath(inspect.getfile(inspect.currentframe())))
        DataAccess.path = os.path.join(self_dir, 'data')
        self.data_access = DataAccess()

        self.data_access.empty_cache(delete=delete)
        self.data_access.empty_dir(delete=delete)

    @staticmethod
    def delete_data():
        self_dir = os.path.dirname(
            os.path.abspath(inspect.getfile(inspect.currentframe())))
        DataAccess.path = os.path.join(self_dir, 'data')
        data_access = DataAccess()
        data_access.empty_dirs()

    def assertEqual(self, ans, sol, digits=0):
        if type(ans) == np.ndarray and type(sol) == np.ndarray:
            self.assertArrayEqual(ans, sol, digits)
        elif type(ans) == pd.Series and type(sol) == pd.Series:
            self.assertSeriesEqual(ans, sol)
        elif type(ans) == pd.TimeSeries and type(sol) == pd.TimeSeries:
            self.assertSeriesEqual(ans, sol, digits)
        elif type(ans) == pd.DataFrame and type(sol) == pd.DataFrame:
            self.assertFrameEqual(ans, sol, digits)
        else:
            if digits == 0:
                super().assertEqual(ans, sol)
            else:
                super().assertAlmostEqual(ans, sol, digits)

    def assertFloat(self, obj):
        self.assertIs(type(obj), (np.float64))

    def assertArray(self, obj):
        self.assertIs(type(obj), np.ndarray)

    def assertArrayEqual(self, ans, sol, digits=0):
        self.assertArray(ans)
        self.assertArray(sol)
        if digits == 0:
            np_test.assert_array_equal(ans, sol)
        else:
            np_test.assert_array_almost_equal(ans, sol, digits)

    def assertSeries(self, obj):
        if type(obj) is pd.Series or type(obj) is pd.TimeSeries:
            return
        else:
            self.assertIs(type(obj), pd.Series)

    def assertSeriesEqual(self, ans, sol, digits=0):
        self.assertSeries(ans)
        self.assertSeries(sol)
        self.assertEquals(ans.name, sol.name)

        if digits == 0:
            pd_test.assert_series_equal(ans, sol, digits)
        else:
            np_test.assert_array_almost_equal(ans.values, sol.values, digits)

    def assertFrame(self, obj):
        self.assertIs(type(obj), pd.DataFrame)

    def assertFrameEqual(self, ans, sol, digits=0):
        self.assertFrame(ans)
        self.assertFrame(sol)
        self.assertEquals(ans.columns.name, sol.columns.name)

        if digits == 0:
            pd_test.assert_frame_equal(ans, sol)
        else:
            np_test.assert_array_almost_equal(ans.values, sol.values, digits)
예제 #22
0
class PastEventTest(unittest.TestCase):
    def setUp0(self):
        self.da = DataAccess("./data")
        self.da.empty_dirs()
        self.setUp1()

    def setUp1(self):
        self.evt = PastEvent("./data")
        self.evt.symbol = "AAPL"
        self.evt.market = "^gspc"
        self.evt.lookback_days = 10
        self.evt.lookforward_days = 10
        self.evt.estimation_period = 252
        self.evt.date = datetime(2009, 1, 5)
        self.evt.run()

    def suite(self):
        suite = unittest.TestSuite()
        suite.addTest(PastEventTest("test_success"))
        return suite

    def test_success(self):
        self.setUp1()

        # Test: Series names
        self.assertEquals(self.evt.er.name, "Expected Return")
        self.assertEquals(self.evt.ar.name, "Abnormal Return")
        self.assertEquals(self.evt.car.name, "Cumulative Abnormal Return")
        self.assertEquals(self.evt.t_test.name, "t test")
        self.assertEquals(self.evt.prob.name, "Probability")

        # Test: Index values
        self.assertEquals(self.evt.er.index[0].to_pydatetime(), datetime(2008, 12, 18))
        self.assertEquals(self.evt.er.index[-1].to_pydatetime(), datetime(2009, 1, 20))

        # Test: Values
        ans_er = [
            "-0.0212234",
            "0.00230225",
            "-0.0184308",
            "-0.0100507",
            "0.00507867",
            "0.00466393",
            "-0.0043450",
            "0.02326106",
            "0.01325643",
            "0.03029057",
            "-0.0051219",
            "0.00706623",
            "-0.0298596",
            "0.00275149",
            "-0.0213603",
            "-0.0225914",
            "0.00115150",
            "-0.0332277",
            "0.00073280",
            "0.00681676",
            "-0.0521229",
        ]
        ans_ar = [
            "0.02416528",
            "0.00412824",
            "-0.0288732",
            "0.01746335",
            "-0.0206124",
            "0.00435256",
            "0.01375131",
            "-0.0269885",
            "-0.0241298",
            "0.03294819",
            "0.04736761",
            "-0.0235995",
            "0.00827612",
            "0.01576086",
            "-0.0014955",
            "0.00142864",
            "-0.0118479",
            "0.00608256",
            "-0.0235514",
            "-0.0193999",
            "0.00188397",
        ]
        ans_car = [
            "0.02416528",
            "0.02829353",
            "-0.0005797",
            "0.01688362",
            "-0.0037288",
            "0.00062375",
            "0.01437507",
            "-0.0126134",
            "-0.0367432",
            "-0.0037950",
            "0.04357256",
            "0.01997299",
            "0.02824911",
            "0.04400998",
            "0.04251447",
            "0.04394312",
            "0.03209521",
            "0.03817778",
            "0.01462634",
            "-0.0047736",
            "-0.0028896",
        ]
        ans_t_test = [
            "0.93826998",
            "1.09855833",
            "-0.0225090",
            "0.65554389",
            "-0.1447789",
            "0.02421877",
            "0.55814377",
            "-0.4897438",
            "-1.4266368",
            "-0.1473509",
            "1.69180043",
            "0.77549544",
            "1.09683395",
            "1.70878414",
            "1.65071766",
            "1.70618799",
            "1.24616708",
            "1.48233614",
            "0.56789979",
            "-0.1853454",
            "-0.1121960",
        ]
        ans_prob = [
            "0.82594716",
            "0.86401961",
            "0.49102096",
            "0.74394118",
            "0.44244271",
            "0.50966094",
            "0.71162689",
            "0.31215756",
            "0.07684230",
            "0.44142751",
            "0.95465798",
            "0.78097652",
            "0.86364300",
            "0.95625452",
            "0.95060188",
            "0.95601345",
            "0.89364846",
            "0.93087456",
            "0.71494849",
            "0.42647903",
            "0.45533397",
        ]
        self.assertEquals([str(x)[0:10] for x in self.evt.er.values.tolist()], ans_er)
        self.assertEquals([str(x)[0:10] for x in self.evt.ar.values.tolist()], ans_ar)
        self.assertEquals([str(x)[0:10] for x in self.evt.car.values.tolist()], ans_car)
        self.assertEquals([str(x)[0:10] for x in self.evt.t_test.values.tolist()], ans_t_test)
        self.assertEquals([str(x)[0:10] for x in self.evt.prob.values.tolist()], ans_prob)

        # Test: Compare results of equal events
        evt2 = PastEvent("./data")
        evt2.symbol = "AAPL"
        evt2.market = "^gspc"
        evt2.lookback_days = 10
        evt2.lookforward_days = 10
        evt2.estimation_period = 252
        evt2.date = datetime(2009, 1, 5)
        evt2.run()

        self.assertListEqual(
            [str(x)[0:10] for x in self.evt.er.values.tolist()], [str(x)[0:10] for x in evt2.er.values.tolist()]
        )
        self.assertEqual(
            [str(x)[0:10] for x in self.evt.ar.values.tolist()], [str(x)[0:10] for x in evt2.ar.values.tolist()]
        )
        self.assertListEqual(
            [str(x)[0:10] for x in self.evt.car.values.tolist()], [str(x)[0:10] for x in evt2.car.values.tolist()]
        )
        self.assertListEqual(
            [str(x)[0:10] for x in self.evt.t_test.values.tolist()], [str(x)[0:10] for x in evt2.t_test.values.tolist()]
        )
        self.assertListEqual(
            [str(x)[0:10] for x in self.evt.prob.values.tolist()], [str(x)[0:10] for x in evt2.prob.values.tolist()]
        )
예제 #23
0
 def delete_data():
     self_dir = os.path.dirname(
         os.path.abspath(inspect.getfile(inspect.currentframe())))
     DataAccess.path = os.path.join(self_dir, 'data')
     data_access = DataAccess()
     data_access.empty_dirs()
예제 #24
0
class DataAccessTest(unittest.TestCase):

    def setUp1(self):
        DataAccess('./data').empty_dirs()
        self.da = DataAccess('./data')

    def suite(self):
        suite = unittest.TestSuite()
        suite.addTest(DataAccessTest('test_get_data'))
        suite.addTest(DataAccessTest('test_save_load_custom_name'))
        return suite

    def test_get_data(self):
        '''
        Tests the length of row and columns and their names

        Note 1: File downloads are managed by finance.utils.FileManager
                Test for that on FileManagerTest.py
        Note 2: Other tests were done on the benchmark
        '''
        self.setUp1()

        start_date = datetime(2008, 1, 1)
        end_date = datetime(2009, 12, 31)

        # Single symbol, single field
        symbols = "AAPL"
        field_s = "Close"
        df = self.da.get_data(symbols, start_date, end_date, field_s)
        self.assertEqual(len(df), 505)
        self.assertEqual(len(df.columns), 1)
        names = [field_s]
        self.assertEqual(list(df.columns), names)

        # Multiple symbols, single field
        symbols = ["AAPL","GLD","GOOG","SPY","XOM"]
        field_s = "Close"
        df = self.da.get_data(symbols, start_date, end_date, field_s)
        self.assertEqual(len(df), 505)
        self.assertEqual(len(df.columns), 5)
        names = symbols
        self.assertEqual(list(df.columns), names)

        # Single symbol, multiple fields
        symbols = "AAPL"
        field_s = ["Close", "Volume"]
        df = self.da.get_data(symbols, start_date, end_date, field_s)
        self.assertEqual(len(df), 505)
        self.assertEqual(len(df.columns), 2)
        names = ['Close', 'Volume']
        self.assertEqual(list(df.columns), names)


        # Multiple symbol, multiple fields
        symbols = ["AAPL","GLD","GOOG","SPY","XOM"]
        field_s = ["Close", "Volume"]
        df = self.da.get_data(symbols, start_date, end_date, field_s)
        self.assertEqual(len(df), 505)
        self.assertEqual(len(df.columns), 10)
        names = ['AAPL Close', 'AAPL Volume', 'GLD Close', 'GLD Volume', 'GOOG Close',
                    'GOOG Volume', 'SPY Close', 'SPY Volume', 'XOM Close', 'XOM Volume']
        self.assertEqual(list(df.columns), names)

    def test_save_load_custom_name(self):
        self.setUp1()

        symbols = ["AAPL", "GLD", "GOOG", "SPY", "XOM"]
        start_date = datetime(2008, 1, 1)
        end_date = datetime(2009, 12, 31)
        fields = "Close"

        close = self.da.get_data(symbols, start_date, end_date, fields, save=False)
        self.da.save(close, "customName.data")

        close_loaded = self.da.load("customName.data")

        self.assertEqual(list(close.columns), list(close_loaded.columns))
        self.assertEqual(len(close), len(close_loaded))
예제 #25
0
class MarketSimulator(object):
    '''
    Market Simulator.
    Receives:
        1. Initial cash
        2. List of trades (automaticly search and downloads missing information)
    After simulation:
        portfolio is a pandas.DataFrame with the values of the portfolio on each date

    '''
    def __init__(self, path='./data'):
        self.da = DataAccess(path)

        self.initial_cash = 0
        self.current_cash = 0

        self.trades = None
        self.prices = None
        self.own = None
        self.cash = None
        self.equities = None
        self.portfolio = None

    def load_trades(self, file_path):
        '''
        Reads the csv file and parse the data
        '''
        # 1. Read the .csv file
        self.trades = pd.read_csv(file_path)
        # 2. Set the indexes as the value of a the columns (year, month, day)
        dates = list()
        for idx, row in self.trades.iterrows():
            date = datetime(row['year'], row['month'], row['day'])
            dates.append(date)
        dates = pd.Series(dates)
        self.trades = self.trades.set_index(dates)
        # 3. Delete unnescessary columns
        self.trades = self.trades[['symbol', 'action', 'num_of_shares']]
        # 4. Sort the DataFrame by the index (dates)
        self.trades = self.trades.sort()

    def simulate(self, trades=None, ordersIsDataFrame=False):
        '''
        Simulates the trades, fills the DataFrames: cash, equities_value, porfolio
        '''
        # 0. Init the required data
        # 0.1 if trades is not None load them
        if trades is not None:
            if ordersIsDataFrame:
                self.set_trades(trades)
            else:
                # If there is no DataFrame then is a file to be loaded
                self.load_trades(trades)
        # 0.2 Load/Download required data
        symbols = list(set(self.trades['symbol']))
        start_date = self.trades.index[0].to_pydatetime()  # Convert from TimeStamp to datetime
        end_date = self.trades.index[-1].to_pydatetime()
        self.prices = self.da.get_data(symbols, start_date, end_date, "Adj Close")
        # 0.3 Init other DataFrames, dictionaries
        self.cash = pd.DataFrame(index=self.prices.index, columns=['Cash'])
        self.own = pd.DataFrame(index=self.prices.index, columns=self.prices.columns)
        current_stocks = dict([(symbol, 0) for symbol in list(set(self.trades['symbol']))])
        # 0.3 Set the current cash to the initial cash before star the simulation
        self.current_cash = self.initial_cash

        # 1. Fill the DataFrames
        for idx, row in self.trades.iterrows():
            # For each order
            # Note: idx is Timestamp, row is Series
            # Note 2: If there are various trades on the same day overwrites the previous value.

            # 1.0 Get info of the row
            symbol = row['symbol']
            action = row['action'].lower()[0:1]
            num_of_shares = row['num_of_shares']

            # 1.1 Fill the cash DataFrame
            # Get the change of cash on the order
            cash_change = self.prices[symbol][idx] * num_of_shares
            if action == 'b':
                self.current_cash = self.current_cash - cash_change
            elif action == 's':
                self.current_cash = self.current_cash + cash_change
            # Modify self.cash DataFrame
            self.cash.ix[idx] = self.current_cash

            # 1.2 Fill the own DataFrame - num of stocks on each date
            if action == 'b':
                current_stocks[symbol] = current_stocks[symbol] + num_of_shares
            elif action == 's':
                current_stocks[symbol] = current_stocks[symbol] - num_of_shares
            # Modify self.own DataFrame
            self.own.ix[idx][symbol] = current_stocks[symbol]

        # Fill forward missing values
        self.cash = self.cash.fillna(method='ffill')
        self.own = self.own.fillna(method='ffill')
        # After forward-fill fill with zeros because initial values are still NaN
        self.own = self.own.fillna(0)

        # 2. Get the value of the equitues
        self.equities = self.own * self.prices
        self.equities = self.equities.sum(1)

        # 3. Get the value of the porfolio = cash + equities_value
        self.portfolio = self.cash + self.equities
        self.portfolio.columns = ['Portfolio']
예제 #26
0
 def delete_data():
     self_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
     DataAccess.path = os.path.join(self_dir, 'data')
     data_access = DataAccess()
     data_access.empty_dirs()
예제 #27
0
class PastEvent(EventStudy):
    '''
    Analyse a particular equity on a particular date
    
    Necesary Parameters
    -------------------
        date: datetime
        symbol: str, eg: AAPL
    
    Optional Parameters
    -------------------
        market: str, default2-'SPY' - used to asses the event
        lookback_days: int, default=20 - past event window size
        lookforward_days: int, default=20 - future event window size
        estimation_period: int, default=255

    |-----255-----|-------20-------|-|--------20--------|
       estimation      lookback   event   lookforward
    '''
    def __init__(self):
        # Utils
        self.data_access = DataAccess()

        # Variables
        self.date = None  # Date of the event
        self.symbol = None
        self.field = 'adjusted_close'
        self.lookback_days = 20
        self.lookforward_days = 20
        self.estimation_period = 255
        self.market = "SPY"

        # Results
        self.evt_window_data = None
        self.er = None
        self.ar = None
        self.car = None
        self.t_test = None
        self.prob = None

    def run(self):
        dates = DateUtils.nyse_dates_event(self.date, self.lookback_days,
                                           self.lookforward_days,
                                           self.estimation_period)
        start_date = dates[0]
        end_date = dates[-1]

        # Data to the General market_return Study
        self.data = self.data_access.get_data(self.symbol, start_date,
                                              end_date, self.field)
        evt_window_dates = dates[-self.lookforward_days - self.lookback_days -
                                 1:]
        self.evt_window_data = self.data[evt_window_dates[0]:dates[-1]]
        self.market = self.data_access.get_data(self.market, start_date,
                                                end_date, self.field)
        # Parameters of the General market_return Study
        self.start_period = dates[0]
        self.end_period = dates[self.estimation_period]
        self.start_window = dates[self.estimation_period]
        self.end_window = dates[-1]

        # Run the Market Return method
        super().market_return()
예제 #28
0
class FinanceTest(unittest.TestCase):

    def setUpDataAccess(self, delete=False):
        self_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
        DataAccess.path = os.path.join(self_dir, 'data')
        self.data_access = DataAccess()

        self.data_access.empty_cache(delete=delete)
        self.data_access.empty_dir(delete=delete)

    @staticmethod
    def delete_data():
        self_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
        DataAccess.path = os.path.join(self_dir, 'data')
        data_access = DataAccess()
        data_access.empty_dirs()

    def assertEqual(self, ans, sol, digits=0):
        if type(ans) == np.ndarray and type(sol) == np.ndarray:
            self.assertArrayEqual(ans, sol, digits)
        elif type(ans) == pd.Series and type(sol) == pd.Series:
            self.assertSeriesEqual(ans, sol)
        elif type(ans) == pd.TimeSeries and type(sol) == pd.TimeSeries:
            self.assertSeriesEqual(ans, sol, digits)
        elif type(ans) == pd.DataFrame and type(sol) == pd.DataFrame:
            self.assertFrameEqual(ans, sol, digits)
        else:
            if digits == 0:
                super().assertEqual(ans, sol)
            else:
                super().assertAlmostEqual(ans, sol, digits)


    def assertFloat(self, obj):
        self.assertIs(type(obj), (np.float64))

    def assertArray(self, obj):
        self.assertIs(type(obj), np.ndarray)

    def assertArrayEqual(self, ans, sol, digits=0):
        self.assertArray(ans)
        self.assertArray(sol)
        if digits == 0:
            np_test.assert_array_equal(ans, sol)
        else:
            np_test.assert_array_almost_equal(ans, sol, digits)
    
    def assertSeries(self, obj):
        if type(obj) is pd.Series or type(obj) is pd.TimeSeries:
            return
        else:
            self.assertIs(type(obj), pd.Series)

    def assertSeriesEqual(self, ans, sol, digits=0):
        self.assertSeries(ans)
        self.assertSeries(sol)
        self.assertEquals(ans.name, sol.name)

        if digits == 0:
            pd_test.assert_series_equal(ans, sol, digits)
        else:
            np_test.assert_array_almost_equal(ans.values, sol.values, digits)

    def assertFrame(self, obj):
        self.assertIs(type(obj), pd.DataFrame)

    def assertFrameEqual(self, ans, sol, digits=0):
        self.assertFrame(ans)
        self.assertFrame(sol)
        self.assertEquals(ans.columns.name, sol.columns.name)

        if digits == 0:
            pd_test.assert_frame_equal(ans, sol)
        else:
            np_test.assert_array_almost_equal(ans.values, sol.values, digits)
예제 #29
0
class MarketSimulator(object):
    '''
    Market Simulator

    Needs a list of trades to simulate, options are:
        1. Provide a custom pandas.DataFrame(index=DatetimeIndex):
                        symbol  action    num_of_shares
            2011-01-10   AAPL    Buy           1500
            2011-01-13   AAPL    Sell          1500
            2011-01-13   IBM     Buy           4000
            2011-01-26   GOOG    Buy           1000
        2. Load the trades from a csv file:
            year,month,day,symbol,action,num_of_shares
            2011,1,10,AAPL,Buy,1500
            2011,1,13,AAPL,Sell,1500
            2011,1,13,IBM,Buy,4000
            2011,1,26,GOOG,Buy,1000
        3. Create trades from an event list: usually from EventFinder
            
    '''
    def __init__(self):
        self.da = DataAccess()

        self.initial_cash = 0
        self.field = 'adjusted_close'

        self.trades = None
        self.prices = None
        self.num_of_shares = None
        self.cash = None
        self.equities = None
        self.portfolio = None

    def load_trades(self, file_path):
        '''
        Load trades from a csv file

        csv file example:
        year,month,day,symbol,action,num_of_shares
        2011,1,10,AAPL,Buy,1500
        2011,1,13,AAPL,Sell,1500
        2011,1,13,IBM,Buy,4000
        2011,1,26,GOOG,Buy,1000

        Parameters
        ----------
            file_path: str, path to the csv containing the orders

        '''
        # 1. Read the .csv file
        self.trades = pd.read_csv(file_path)

        # 2. Set the indexes as the value of a the columns (year, month, day)
        dates = list()
        for idx, row in self.trades.iterrows():
            date = datetime(row['year'], row['month'], row['day'])
            dates.append(date)
        dates = pd.Series(dates)
        self.trades = self.trades.set_index(dates)

        # 3. Delete unnescessary columns
        self.trades = self.trades[['symbol', 'action', 'num_of_shares']]
        
        # 4. Sort the DataFrame by the index (dates)
        self.trades = self.trades.sort_index()

    def create_trades_from_event(self, eventList, 
                                eventDayAction='Buy', eventDayShares=100,
                                actionAfter='Sell', daysAfter=5, sharesAfter=100,
                                actionBefore=None, daysBefore=5, sharesBefore=100):
        '''
        Creates trades using an event list; usually from the EventFinder.
        Also creates aditional order after and before of the event as defined by the user

        Parameters
        ----------
            eventList: pandas.Series
        '''
        self.trades = pd.DataFrame(index=eventList.index, columns=['symbol', 'action', 'num_of_shares'])
        self.trades['symbol'] = eventList
        self.trades['action'] = eventDayAction
        self.trades['num_of_shares'] = eventDayShares

        # TODO: Actions BEFORE

        if actionAfter is not None:
            dicts = []
            for idx, row in self.trades.iterrows():
                after_date = DateUtils.nyse_add(idx.to_pydatetime(), daysAfter)
                after = pd.DataFrame([  {'symbol': row['symbol'], 
                                        'action': actionAfter, 
                                        'num_of_shares': sharesAfter}],
                                    index=[after_date], columns=self.trades.columns)
                self.trades = self.trades.append(after)

        self.trades = self.trades.sort()
    

    def simulate(self):
        '''
        Simulates the trades

        Parameters
        ----------
            trades: str(filepath) or pandas.DataFrame, if str loads the orders from a csv file

        Returns
        -------
            Nothing: Fills the DataFrames: cash, equities, porfolio
        '''
        # 0.1 Load/Download required data
        symbols = list(set(self.trades['symbol']))
        start_date = self.trades.index[0].to_pydatetime()  # Convert from TimeStamp to datetime
        end_date = self.trades.index[-1].to_pydatetime()  # Convert from TimeStamp to datetime
        self.prices = self.da.get_data(symbols, start_date, end_date, self.field)
        # 0.2 Init DataFrames
        self.cash = pd.Series(index=self.prices.index, name='Cash', dtype=np.float64)
        self.num_of_shares = pd.DataFrame(index=self.prices.index, columns=self.prices.columns, dtype=np.float64)

        # 1. Fill the DataFrames
        current_cash = self.initial_cash
        current_shares = dict([(symbol, 0) for symbol in symbols])
        for idx, row in self.trades.iterrows():
            # 1.2.0 Get info of the row
            symbol = row['symbol']
            action = row['action'].lower()[0:1]
            num_of_shares = row['num_of_shares']

            # 1.2.1 Fill the self.cash DataFrame - ammount of cash on each date
            # NOTE: but stocks spends cash, sell stocks wins cash
            cash_change = self.prices[symbol][idx] * num_of_shares
            if action == 'b':
                current_cash = current_cash - cash_change
            elif action == 's':
                current_cash = current_cash + cash_change
            # Modify self.cash DataFrame
            self.cash.ix[idx] = current_cash

            # 1.2.3 Fill the self.num_of_shares DataFrame - num of each stocks on each date
            if action == 'b':
                current_shares[symbol] = current_shares[symbol] + num_of_shares
            elif action == 's':
                current_shares[symbol] = current_shares[symbol] - num_of_shares
            # Modify self.num_of_shares DataFrame
            self.num_of_shares.ix[idx][symbol] = current_shares[symbol]

        # Fill forward missing values
        self.cash = self.cash.fillna(method='ffill')
        self.prices = self.prices.fillna(method='ffill').fillna(method='bfill')
        self.num_of_shares = self.num_of_shares.fillna(method='ffill').fillna(0)

        # 2. Get the value of the equitues
        self.equities = pd.Series(index=self.prices.index, name='Equities value')
        equities = self.num_of_shares * self.prices
        self.equities = equities.sum(axis=1)
        self.equities.name = 'Equities value'
        # 3. Get the value of the porfolio = cash + equities_value
        self.portfolio = pd.Series(index=self.prices.index, name='Portfolio value')
        self.portfolio = self.cash + self.equities
        self.portfolio.name = 'Portfolio value'
예제 #30
0
class EventFinder(object):
    def __init__(self):
        self.data_access = DataAccess()

        self.symbols = []
        self.start_date = None
        self.end_date = None
        self.field = 'Adj Close'

        self.condition = Condition()
        self.matrix = None
        self.num_events = 0

        self.oneEventPerEquity = True

    def generate_filename(self):
        return '%s%s%s%s%s%s' % (
            ''.join(self.symbols), self.start_date.strftime('%Y-%m-%d'),
            self.end_date.strftime('%Y-%m-%d'), self.field, self.condition.id,
            str(self.oneEventPerEquity))

    def search(self, oneEventPerEquity=True, useCache=True, save=True):
        self.oneEventPerEquity = oneEventPerEquity

        # 1. Load the data if requested and available
        self.matrix = self.data_access.load(self.generate_filename(),
                                            '.evt_matrix')
        if useCache and self.matrix is not None:
            pass
        else:
            # 2. Data was not loaded
            # 2.1 Get the dates, and Download/Import the data
            nyse_dates = DateUtils.nyse_dates(start=self.start_date,
                                              end=self.end_date)
            data = self.data_access.get_data(self.symbols, nyse_dates[0],
                                             nyse_dates[-1], self.field)
            # Special case
            if len(data.columns) == 1:
                data.columns = self.symbols

            # 2.2 Create and fill the matrix of events
            data = data[self.start_date:self.end_date]
            self.matrix = pd.DataFrame(index=data.index, columns=self.symbols)

            for symbol in self.symbols:
                i = 0
                for item in data[symbol][1:]:
                    e = self.condition.function(i, item, data[symbol][1:])
                    if e:
                        self.matrix[symbol][i + 1] = 1
                        if oneEventPerEquity == True:
                            break
                    i = i + 1

        # 3. Calculate other results and save if requested
        # Reduce Matrix: Sum each row and columns: if is greater than 0 there is an event
        self.matrix = self.matrix[self.matrix.fillna(value=0).sum(axis=1) > 0]
        valid_cols = self.matrix.columns[self.matrix.fillna(value=0).sum(
            axis=0) > 0].values
        self.matrix = self.matrix[valid_cols]
        # 3.2 Create list of events
        self.list = pd.Series(index=self.matrix.index,
                              name='Equity',
                              dtype=str)
        for idx, row in self.matrix.iterrows():
            equity = row[row == 1].index[0]
            self.list.ix[idx] = equity
        # 3.3 Save
        self.num_events = len(self.list)
        if save:
            self.data_access.save(self.matrix, self.generate_filename(),
                                  '.evt_matrix')
예제 #31
0
class EventFinder(object):
    def __init__(self):
        self.data_access = DataAccess()

        self.symbols = []
        self.start_date = None
        self.end_date = None
        self.field = 'adjusted_close'

        self.condition = Condition()
        self.matrix = None
        self.num_events = 0

        self.oneEventPerEquity = True

    def generate_filename(self):
        return '%s%s%s%s%s%s' % (''.join(self.symbols), self.start_date.strftime('%Y-%m-%d'),
                self.end_date.strftime('%Y-%m-%d'), self.field, self.condition.id,
                str(self.oneEventPerEquity))

    def search(self, oneEventPerEquity=True, useCache=True, save=True):
        self.oneEventPerEquity = oneEventPerEquity

        # 1. Load the data if requested and available
        self.matrix = self.data_access.load(self.generate_filename(), '.evt_matrix')
        if useCache and self.matrix is not None:
            pass
        else:
            # 2. Data was not loaded
            # 2.1 Get the dates, and Download/Import the data
            nyse_dates = DateUtils.nyse_dates(start=self.start_date, end=self.end_date)
            data = self.data_access.get_data(self.symbols, nyse_dates[0], nyse_dates[-1], self.field)
            # Special case
            if len(data.columns) == 1:
                data.columns = self.symbols

            # 2.2 Create and fill the matrix of events
            data = data[self.start_date:self.end_date]
            self.matrix = pd.DataFrame(index=data.index, columns=self.symbols)

            for symbol in self.symbols:
                i = 0
                for item in data[symbol][1:]:
                    e = self.condition.function(i, item, data[symbol][1:])
                    if e:
                        self.matrix[symbol][i+1] = 1
                        if oneEventPerEquity == True:
                            break
                    i = i + 1


        # 3. Calculate other results and save if requested
        # Reduce Matrix: Sum each row and columns: if is greater than 0 there is an event
        self.matrix = self.matrix[self.matrix.fillna(value=0).sum(axis=1) > 0]
        valid_cols = self.matrix.columns[self.matrix.fillna(value=0).sum(axis=0) > 0].values
        self.matrix = self.matrix[valid_cols]
        # 3.2 Create list of events
        self.list = pd.Series(index=self.matrix.index, name='Equity')
        for idx, row in self.matrix.iterrows():
            equity = row[row == 1].index[0]
            self.list.loc[idx] = equity
        # 3.3 Save
        self.num_events = len(self.list)
        if save:
            self.data_access.save(self.matrix, self.generate_filename(), '.evt_matrix')
예제 #32
0
 def setUp0(self):
     self.da = DataAccess("./data")
     self.da.empty_dirs()
     self.setUp1()