def benchmark(): from time import clock, time da = DataAccess('./data') da.empty_dirs(delete=False) print ('Directory empty: Download and save 5 stocks') t1, t2 = clock(), time() symbols = ["AAPL","GLD","GOOG","SPY","XOM"] start_date = datetime(2008, 1, 1) end_date = datetime(2009, 12, 31) fields = "Close" da.get_data(symbols, start_date, end_date, fields) t1_f, t2_f = clock(), time() print (" ", t1_f - t1, t2_f - t2) print ('Load 5 stocks from .csv') t1, t2 = clock(), time() symbols = ["AAPL","GLD","GOOG","SPY","XOM"] start_date = datetime(2008, 1, 1) end_date = datetime(2009, 12, 31) fields = "Close" da.get_data(symbols, start_date, end_date, fields, useCache=False) t1_f, t2_f = clock(), time() print (" ", t1_f - t1, t2_f - t2) print ('Load 5 stocks from serialized') t1, t2 = clock(), time() symbols = ["AAPL","GLD","GOOG","SPY","XOM"] start_date = datetime(2008, 1, 1) end_date = datetime(2009, 12, 31) fields = "Close" da.get_data(symbols, start_date, end_date, fields, useCache=True) t1_f, t2_f = clock(), time() print (" ", t1_f - t1, t2_f - t2)
def __init__(self): self.data_access = DataAccess() self.list = None self.market = 'SPY' self.lookback_days = 20 self.lookforward_days = 20 self.estimation_period = 200 self.field = 'Adj Close' # Result self.equities_window = None self.equities_estimation = None self.market_window = None self.market_estimation = None self.reg_estimation = None self.dr_equities_window = None self.dr_equities_estimation = None self.dr_market_window = None self.dr_market_estimation = None self.er = None self.ar = None self.car = None
def setUpDataAccess(self, delete=False): self_dir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) DataAccess.path = os.path.join(self_dir, 'data') self.data_access = DataAccess() self.data_access.empty_cache(delete=delete) self.data_access.empty_dir(delete=delete)
def __init__(self): self.da = DataAccess() self.initial_cash = 0 self.field = 'adjusted_close' self.trades = None self.prices = None self.num_of_shares = None self.cash = None self.equities = None self.portfolio = None
def __init__(self): self.data_access = DataAccess() self.symbols = [] self.start_date = None self.end_date = None self.field = 'Adj Close' self.condition = Condition() self.matrix = None self.num_events = 0 self.oneEventPerEquity = True
def setUpDataAccess(self, delete=False): self_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) DataAccess.path = os.path.join(self_dir, 'data') self.data_access = DataAccess() self.data_access.empty_cache(delete=delete) self.data_access.empty_dir(delete=delete)
def __init__(self): self.data_access = DataAccess() self.list = None self.market = "SPY" self.lookback_days = 20 self.lookforward_days = 20 self.estimation_period = 200 self.field = "Adj Close" # Result self.equities_window = None self.equities_estimation = None self.market_window = None self.market_estimation = None self.reg_estimation = None self.dr_equities_window = None self.dr_equities_estimation = None self.dr_market_window = None self.dr_market_estimation = None self.er = None self.ar = None self.car = None
def __init__(self): # Utils self.data_access = DataAccess() # Variables self.date = None # Date of the event self.symbol = None self.field = 'adjusted_close' self.lookback_days = 20 self.lookforward_days = 20 self.estimation_period = 255 self.market = "SPY" # Results self.evt_window_data = None self.er = None self.ar = None self.car = None self.t_test = None self.prob = None
def __init__(self, path='./data'): self.da = DataAccess(path) self.initial_cash = 0 self.current_cash = 0 self.trades = None self.prices = None self.own = None self.cash = None self.equities = None self.portfolio = None
class PastEvent(SimpleEvent): def __init__(self, path="./data"): # Utils self.da = DataAccess(path) # Variables self.date = None # Date of the event self.symbol = None self.field = "Adj Close" self.lookback_days = 20 self.lookforward_days = 20 self.estimation_period = 255 self.market = "SPY" # Results self.er = None self.ar = None self.car = None self.t_test = None self.prob = None def run(self): dates = DateUtils.get_nyse_dates_event( self.date, self.lookback_days + self.estimation_period, self.lookforward_days, list=True ) start_date = dates[0] end_date = dates[-1] # Data to the general event self.data = self.da.get_data(self.symbol, start_date, end_date, self.field) self.market = self.da.get_data(self.market, start_date, end_date, self.field) # Parameters of the General Event self.start_period = dates[0] self.end_period = dates[self.estimation_period] self.start_window = dates[self.estimation_period] self.end_window = dates[-1] # Run the Market Return method super().market_return()
def __init__(self): self.data_access = DataAccess() self.symbols = [] self.start_date = None self.end_date = None self.field = 'adjusted_close' self.condition = Condition() self.matrix = None self.num_events = 0 self.oneEventPerEquity = True
def __init__(self, path="./data"): # Utils self.da = DataAccess(path) # Variables self.date = None # Date of the event self.symbol = None self.field = "Adj Close" self.lookback_days = 20 self.lookforward_days = 20 self.estimation_period = 255 self.market = "SPY" # Results self.er = None self.ar = None self.car = None self.t_test = None self.prob = None
def __init__(self): # Utils self.data_access = DataAccess() # Variables self.date = None # Date of the event self.symbol = None self.field = 'Adj Close' self.lookback_days = 20 self.lookforward_days = 20 self.estimation_period = 255 self.market = "SPY" # Results self.evt_window_data = None self.er = None self.ar = None self.car = None self.t_test = None self.prob = None
class MultipleEvents(object): def __init__(self): self.data_access = DataAccess() self.list = None self.market = 'SPY' self.lookback_days = 20 self.lookforward_days = 20 self.estimation_period = 200 self.field = 'Adj Close' # Result self.equities_window = None self.equities_estimation = None self.market_window = None self.market_estimation = None self.reg_estimation = None self.dr_equities_window = None self.dr_equities_estimation = None self.dr_market_window = None self.dr_market_estimation = None self.er = None self.ar = None self.car = None def run(self): ''' Assess the events |-----100-----|-------20-------|-|--------20--------| estimation lookback event lookforward Prerequisites ------------- self.matrix self.market = 'SPY' self.lookback_days = 20 self.lookforward_days = 20 self.estimation_period = 200 self.field = 'Adj Close' ''' # 0. Get the dates and Download/Import the data symbols = list(set(self.list)) start_date = self.list.index[0] end_date = self.list.index[-1] nyse_dates = DateUtils.nyse_dates( start=start_date, end=end_date, lookbackDays=self.lookback_days + self.estimation_period + 1, lookforwardDays=self.lookforward_days) data = self.data_access.get_data(symbols, nyse_dates[0], nyse_dates[-1], self.field) market = self.data_access.get_data(self.market, nyse_dates[0], nyse_dates[-1], self.field) if len(data.columns) == 1: data.columns = symbols if len(data) > len(market): market = market.reindex(data.index) market.columns = [self.field] data = data.fillna(method='ffill').fillna(method='bfill') market = market.fillna(method='ffill').fillna(method='bfill') # 1. Create DataFrames with the data of each event windows_indexes = range(-self.lookback_days, self.lookforward_days + 1) estimation_indexes = range( -self.estimation_period - self.lookback_days, -self.lookback_days) self.equities_window = pd.DataFrame(index=windows_indexes) self.equities_estimation = pd.DataFrame(index=estimation_indexes) self.market_window = pd.DataFrame(index=windows_indexes) self.market_estimation = pd.DataFrame(index=estimation_indexes) dr_data = Calculator.returns(data) dr_market = Calculator.returns(market) self.dr_equities_window = pd.DataFrame(index=windows_indexes) self.dr_equities_estimation = pd.DataFrame(index=estimation_indexes) self.dr_market_window = pd.DataFrame(index=windows_indexes) self.dr_market_estimation = pd.DataFrame(index=estimation_indexes) # 2. Iterate over the list of events and fill the DataFrames for i in range(len(self.list)): symbol = self.list[i] evt_date = self.list.index[i].to_pydatetime() col_name = symbol + ' ' + evt_date.strftime('%Y-%m-%d') evt_idx = DateUtils.search_closer_date(evt_date, data[symbol].index, exact=True) # 1.1 Data on the estimation period: self.equities_estimation start_idx = evt_idx - self.lookback_days - self.estimation_period # estimation start idx on self.data end_idx = evt_idx - self.lookback_days # estimation end idx on self.data new_equities_estimation = data[symbol][start_idx:end_idx] new_equities_estimation.index = self.equities_estimation.index self.equities_estimation[col_name] = new_equities_estimation # Daily return of the equities on the estimation period new_dr_equities_estimation = dr_data[symbol][start_idx:end_idx] new_dr_equities_estimation.index = self.dr_equities_estimation.index self.dr_equities_estimation[col_name] = new_dr_equities_estimation # 1.4 Market on the estimation period: self.market_estimation new_market_estimation = market[self.field][start_idx:end_idx] new_market_estimation.index = self.market_estimation.index self.market_estimation[col_name] = new_market_estimation # Daily return of the market on the estimation period new_dr_market_estimation = dr_market[start_idx:end_idx] new_dr_market_estimation.index = self.dr_market_estimation.index self.dr_market_estimation[col_name] = new_dr_market_estimation # 1.3 Equities on the event window: self.equities_window start_idx = evt_idx - self.lookback_days # window start idx on self.data end_idx = evt_idx + self.lookforward_days + 1 # window end idx on self.data new_equities_window = data[symbol][start_idx:end_idx] new_equities_window.index = self.equities_window.index self.equities_window[col_name] = new_equities_window # Daily return of the equities on the event window new_dr_equities_window = dr_data[symbol][start_idx:end_idx] new_dr_equities_window.index = self.dr_equities_window.index self.dr_equities_window[col_name] = new_dr_equities_window # 1.4 Market on the event window: self.market_window new_market_window = market[self.field][start_idx:end_idx] new_market_window.index = self.market_window.index self.market_window[col_name] = new_market_window # Daily return of the market on the event window new_dr_market_window = dr_market[start_idx:end_idx] new_dr_market_window.index = self.dr_market_window.index self.dr_market_window[col_name] = new_dr_market_window # 3. Calculate the linear regression -> expected return self.reg_estimation = pd.DataFrame( index=self.dr_market_estimation.columns, columns=['Intercept', 'Slope', 'Std Error']) self.er = pd.DataFrame(index=self.dr_market_window.index, columns=self.dr_market_window.columns) # For each column (event) on the estimation period for col in self.dr_market_estimation.columns: # 3.1 Calculate the regression x = self.dr_market_estimation[col] y = self.dr_equities_estimation[col] slope, intercept, r_value, p_value, slope_std_error = stats.linregress( x, y) self.reg_estimation['Slope'][col] = slope self.reg_estimation['Intercept'][col] = intercept self.reg_estimation['Std Error'][col] = slope_std_error # 3.2 Calculate the expected return of each date using the regression self.er[col] = intercept + self.dr_market_window[col] * slope # 4. Final results self.er.columns.name = 'Expected return' self.mean_er = self.er.mean(axis=1) self.mean_er.name = 'Mean ER' self.std_er = self.er.std(axis=1) self.std_er.name = 'Std ER' self.ar = self.dr_equities_window - self.er self.ar.columns.name = 'Abnormal return' self.mean_ar = self.ar.mean(axis=1) self.mean_ar.name = 'Mean AR' self.std_ar = self.ar.std(axis=1) self.std_ar.name = 'Std AR' self.car = self.ar.apply(np.cumsum) self.car.columns.name = 'Cum Abnormal Return' self.mean_car = self.car.mean(axis=1) self.mean_car.name = 'Mean CAR' self.std_car = self.car.std(axis=1) self.mean_car.name = 'Mean CAR' def plot(self, which): x = self.mean_car.index.values if which == 'car': y = self.mean_car.values yerr = self.std_car.values label = self.mean_car.name elif which == 'ar': y = self.mean_ar.values yerr = self.std_ar.values label = self.mean_ar.name elif which == 'er': y = self.mean_er.values yerr = self.std_er.values label = self.mean_er.name errorfill(x, y, yerr, label=label)
import os from datetime import datetime from finance.utils import DataAccess # Option 1: Set the Enviroment Variable FINANCEPATH os.environ["FINANCEPATH"] = './data' da = DataAccess() symbols = ["GOOG", "SPY", "XOM"] start_date = datetime(2015, 1, 1) end_date = datetime(2017, 12, 31) fields = 'close' close = da.get_data(symbols, start_date, end_date, fields) print(close) # Option 2: Manualy set the PATH, overwrites option 1 DataAccess.path = 'data2' da = DataAccess() symbols = ["AAPL", "GLD"] start_date = datetime(2015, 1, 1) end_date = datetime(2017, 12, 31) fields = 'close' close = da.get_data(symbols, start_date, end_date, fields) print(close)
class MarketSimulator(object): ''' Market Simulator Needs a list of trades to simulate, options are: 1. Provide a custom pandas.DataFrame(index=DatetimeIndex): symbol action num_of_shares 2011-01-10 AAPL Buy 1500 2011-01-13 AAPL Sell 1500 2011-01-13 IBM Buy 4000 2011-01-26 GOOG Buy 1000 2. Load the trades from a csv file: year,month,day,symbol,action,num_of_shares 2011,1,10,AAPL,Buy,1500 2011,1,13,AAPL,Sell,1500 2011,1,13,IBM,Buy,4000 2011,1,26,GOOG,Buy,1000 3. Create trades from an event list: usually from EventFinder ''' def __init__(self): self.da = DataAccess() self.initial_cash = 0 self.field = 'adjusted_close' self.trades = None self.prices = None self.num_of_shares = None self.cash = None self.equities = None self.portfolio = None def load_trades(self, file_path): ''' Load trades from a csv file csv file example: year,month,day,symbol,action,num_of_shares 2011,1,10,AAPL,Buy,1500 2011,1,13,AAPL,Sell,1500 2011,1,13,IBM,Buy,4000 2011,1,26,GOOG,Buy,1000 Parameters ---------- file_path: str, path to the csv containing the orders ''' # 1. Read the .csv file self.trades = pd.read_csv(file_path) # 2. Set the indexes as the value of a the columns (year, month, day) dates = list() for idx, row in self.trades.iterrows(): date = datetime(row['year'], row['month'], row['day']) dates.append(date) dates = pd.Series(dates) self.trades = self.trades.set_index(dates) # 3. Delete unnescessary columns self.trades = self.trades[['symbol', 'action', 'num_of_shares']] # 4. Sort the DataFrame by the index (dates) self.trades = self.trades.sort_index() def create_trades_from_event(self, eventList, eventDayAction='Buy', eventDayShares=100, actionAfter='Sell', daysAfter=5, sharesAfter=100, actionBefore=None, daysBefore=5, sharesBefore=100): ''' Creates trades using an event list; usually from the EventFinder. Also creates aditional order after and before of the event as defined by the user Parameters ---------- eventList: pandas.Series ''' self.trades = pd.DataFrame( index=eventList.index, columns=['symbol', 'action', 'num_of_shares']) self.trades['symbol'] = eventList self.trades['action'] = eventDayAction self.trades['num_of_shares'] = eventDayShares # TODO: Actions BEFORE if actionAfter is not None: dicts = [] for idx, row in self.trades.iterrows(): after_date = DateUtils.nyse_add(idx.to_pydatetime(), daysAfter) after = pd.DataFrame([{ 'symbol': row['symbol'], 'action': actionAfter, 'num_of_shares': sharesAfter }], index=[after_date], columns=self.trades.columns) self.trades = self.trades.append(after) self.trades = self.trades.sort() def simulate(self): ''' Simulates the trades Parameters ---------- trades: str(filepath) or pandas.DataFrame, if str loads the orders from a csv file Returns ------- Nothing: Fills the DataFrames: cash, equities, porfolio ''' # 0.1 Load/Download required data symbols = list(set(self.trades['symbol'])) start_date = self.trades.index[0].to_pydatetime( ) # Convert from TimeStamp to datetime end_date = self.trades.index[-1].to_pydatetime( ) # Convert from TimeStamp to datetime self.prices = self.da.get_data(symbols, start_date, end_date, self.field) # 0.2 Init DataFrames self.cash = pd.Series(index=self.prices.index, name='Cash', dtype=np.float64) self.num_of_shares = pd.DataFrame(index=self.prices.index, columns=self.prices.columns, dtype=np.float64) # 1. Fill the DataFrames current_cash = self.initial_cash current_shares = dict([(symbol, 0) for symbol in symbols]) for idx, row in self.trades.iterrows(): # 1.2.0 Get info of the row symbol = row['symbol'] action = row['action'].lower()[0:1] num_of_shares = row['num_of_shares'] # 1.2.1 Fill the self.cash DataFrame - ammount of cash on each date # NOTE: but stocks spends cash, sell stocks wins cash cash_change = self.prices[symbol][idx] * num_of_shares if action == 'b': current_cash = current_cash - cash_change elif action == 's': current_cash = current_cash + cash_change # Modify self.cash DataFrame self.cash.ix[idx] = current_cash # 1.2.3 Fill the self.num_of_shares DataFrame - num of each stocks on each date if action == 'b': current_shares[symbol] = current_shares[symbol] + num_of_shares elif action == 's': current_shares[symbol] = current_shares[symbol] - num_of_shares # Modify self.num_of_shares DataFrame self.num_of_shares.ix[idx][symbol] = current_shares[symbol] # Fill forward missing values self.cash = self.cash.fillna(method='ffill') self.prices = self.prices.fillna(method='ffill').fillna(method='bfill') self.num_of_shares = self.num_of_shares.fillna( method='ffill').fillna(0) # 2. Get the value of the equitues self.equities = pd.Series(index=self.prices.index, name='Equities value') equities = self.num_of_shares * self.prices self.equities = equities.sum(axis=1) self.equities.name = 'Equities value' # 3. Get the value of the porfolio = cash + equities_value self.portfolio = pd.Series(index=self.prices.index, name='Portfolio value') self.portfolio = self.cash + self.equities self.portfolio.name = 'Portfolio value'
class PastEvent(EventStudy): ''' Analyse a particular equity on a particular date Necesary Parameters ------------------- date: datetime symbol: str, eg: AAPL Optional Parameters ------------------- market: str, default2-'SPY' - used to asses the event lookback_days: int, default=20 - past event window size lookforward_days: int, default=20 - future event window size estimation_period: int, default=255 |-----255-----|-------20-------|-|--------20--------| estimation lookback event lookforward ''' def __init__(self): # Utils self.data_access = DataAccess() # Variables self.date = None # Date of the event self.symbol = None self.field = 'Adj Close' self.lookback_days = 20 self.lookforward_days = 20 self.estimation_period = 255 self.market = "SPY" # Results self.evt_window_data = None self.er = None self.ar = None self.car = None self.t_test = None self.prob = None def run(self): dates = DateUtils.nyse_dates_event(self.date, self.lookback_days, self.lookforward_days, self.estimation_period) start_date = dates[0] end_date = dates[-1] # Data to the General market_return Study self.data = self.data_access.get_data(self.symbol, start_date, end_date, self.field) evt_window_dates = dates[- self.lookforward_days - self.lookback_days - 1:] self.evt_window_data = self.data[evt_window_dates[0]:dates[-1]] self.market = self.data_access.get_data(self.market, start_date, end_date, self.field) # Parameters of the General market_return Study self.start_period = dates[0] self.end_period = dates[self.estimation_period] self.start_window = dates[self.estimation_period] self.end_window = dates[-1] # Run the Market Return method super().market_return()
def setUp1(self): DataAccess('./data').empty_dirs() self.da = DataAccess('./data')
class MultipleEvents(object): def __init__(self): self.data_access = DataAccess() self.list = None self.market = "SPY" self.lookback_days = 20 self.lookforward_days = 20 self.estimation_period = 200 self.field = "Adj Close" # Result self.equities_window = None self.equities_estimation = None self.market_window = None self.market_estimation = None self.reg_estimation = None self.dr_equities_window = None self.dr_equities_estimation = None self.dr_market_window = None self.dr_market_estimation = None self.er = None self.ar = None self.car = None def run(self): """ Assess the events |-----100-----|-------20-------|-|--------20--------| estimation lookback event lookforward Prerequisites ------------- self.matrix self.market = 'SPY' self.lookback_days = 20 self.lookforward_days = 20 self.estimation_period = 200 self.field = 'Adj Close' """ # 0. Get the dates and Download/Import the data symbols = list(set(self.list)) start_date = self.list.index[0] end_date = self.list.index[-1] nyse_dates = DateUtils.nyse_dates( start=start_date, end=end_date, lookbackDays=self.lookback_days + self.estimation_period + 1, lookforwardDays=self.lookforward_days, ) data = self.data_access.get_data(symbols, nyse_dates[0], nyse_dates[-1], self.field) market = self.data_access.get_data(self.market, nyse_dates[0], nyse_dates[-1], self.field) if len(data.columns) == 1: data.columns = symbols if len(data) > len(market): market = market.reindex(data.index) market.columns = [self.field] data = data.fillna(method="ffill").fillna(method="bfill") market = market.fillna(method="ffill").fillna(method="bfill") # 1. Create DataFrames with the data of each event windows_indexes = range(-self.lookback_days, self.lookforward_days + 1) estimation_indexes = range(-self.estimation_period - self.lookback_days, -self.lookback_days) self.equities_window = pd.DataFrame(index=windows_indexes) self.equities_estimation = pd.DataFrame(index=estimation_indexes) self.market_window = pd.DataFrame(index=windows_indexes) self.market_estimation = pd.DataFrame(index=estimation_indexes) dr_data = Calculator.returns(data) dr_market = Calculator.returns(market) self.dr_equities_window = pd.DataFrame(index=windows_indexes) self.dr_equities_estimation = pd.DataFrame(index=estimation_indexes) self.dr_market_window = pd.DataFrame(index=windows_indexes) self.dr_market_estimation = pd.DataFrame(index=estimation_indexes) # 2. Iterate over the list of events and fill the DataFrames for i in range(len(self.list)): symbol = self.list[i] evt_date = self.list.index[i].to_pydatetime() col_name = symbol + " " + evt_date.strftime("%Y-%m-%d") evt_idx = DateUtils.search_closer_date(evt_date, data[symbol].index, exact=True) # 1.1 Data on the estimation period: self.equities_estimation start_idx = evt_idx - self.lookback_days - self.estimation_period # estimation start idx on self.data end_idx = evt_idx - self.lookback_days # estimation end idx on self.data new_equities_estimation = data[symbol][start_idx:end_idx] new_equities_estimation.index = self.equities_estimation.index self.equities_estimation[col_name] = new_equities_estimation # Daily return of the equities on the estimation period new_dr_equities_estimation = dr_data[symbol][start_idx:end_idx] new_dr_equities_estimation.index = self.dr_equities_estimation.index self.dr_equities_estimation[col_name] = new_dr_equities_estimation # 1.4 Market on the estimation period: self.market_estimation new_market_estimation = market[self.field][start_idx:end_idx] new_market_estimation.index = self.market_estimation.index self.market_estimation[col_name] = new_market_estimation # Daily return of the market on the estimation period new_dr_market_estimation = dr_market[start_idx:end_idx] new_dr_market_estimation.index = self.dr_market_estimation.index self.dr_market_estimation[col_name] = new_dr_market_estimation # 1.3 Equities on the event window: self.equities_window start_idx = evt_idx - self.lookback_days # window start idx on self.data end_idx = evt_idx + self.lookforward_days + 1 # window end idx on self.data new_equities_window = data[symbol][start_idx:end_idx] new_equities_window.index = self.equities_window.index self.equities_window[col_name] = new_equities_window # Daily return of the equities on the event window new_dr_equities_window = dr_data[symbol][start_idx:end_idx] new_dr_equities_window.index = self.dr_equities_window.index self.dr_equities_window[col_name] = new_dr_equities_window # 1.4 Market on the event window: self.market_window new_market_window = market[self.field][start_idx:end_idx] new_market_window.index = self.market_window.index self.market_window[col_name] = new_market_window # Daily return of the market on the event window new_dr_market_window = dr_market[start_idx:end_idx] new_dr_market_window.index = self.dr_market_window.index self.dr_market_window[col_name] = new_dr_market_window # 3. Calculate the linear regression -> expected return self.reg_estimation = pd.DataFrame( index=self.dr_market_estimation.columns, columns=["Intercept", "Slope", "Std Error"] ) self.er = pd.DataFrame(index=self.dr_market_window.index, columns=self.dr_market_window.columns) # For each column (event) on the estimation period for col in self.dr_market_estimation.columns: # 3.1 Calculate the regression x = self.dr_market_estimation[col] y = self.dr_equities_estimation[col] slope, intercept, r_value, p_value, slope_std_error = stats.linregress(x, y) self.reg_estimation["Slope"][col] = slope self.reg_estimation["Intercept"][col] = intercept self.reg_estimation["Std Error"][col] = slope_std_error # 3.2 Calculate the expected return of each date using the regression self.er[col] = intercept + self.dr_market_window[col] * slope # 4. Final results self.er.columns.name = "Expected return" self.mean_er = self.er.mean(axis=1) self.mean_er.name = "Mean ER" self.std_er = self.er.std(axis=1) self.std_er.name = "Std ER" self.ar = self.dr_equities_window - self.er self.ar.columns.name = "Abnormal return" self.mean_ar = self.ar.mean(axis=1) self.mean_ar.name = "Mean AR" self.std_ar = self.ar.std(axis=1) self.std_ar.name = "Std AR" self.car = self.ar.apply(np.cumsum) self.car.columns.name = "Cum Abnormal Return" self.mean_car = self.car.mean(axis=1) self.mean_car.name = "Mean CAR" self.std_car = self.car.std(axis=1) self.mean_car.name = "Mean CAR" def plot(self, which): x = self.mean_car.index.values if which == "car": y = self.mean_car.values yerr = self.std_car.values label = self.mean_car.name elif which == "ar": y = self.mean_ar.values yerr = self.std_ar.values label = self.mean_ar.name elif which == "er": y = self.mean_er.values yerr = self.std_er.values label = self.mean_er.name errorfill(x, y, yerr, label=label)
class FinanceTest(unittest.TestCase): def setUpDataAccess(self, delete=False): self_dir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) DataAccess.path = os.path.join(self_dir, 'data') self.data_access = DataAccess() self.data_access.empty_cache(delete=delete) self.data_access.empty_dir(delete=delete) @staticmethod def delete_data(): self_dir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) DataAccess.path = os.path.join(self_dir, 'data') data_access = DataAccess() data_access.empty_dirs() def assertEqual(self, ans, sol, digits=0): if type(ans) == np.ndarray and type(sol) == np.ndarray: self.assertArrayEqual(ans, sol, digits) elif type(ans) == pd.Series and type(sol) == pd.Series: self.assertSeriesEqual(ans, sol) elif type(ans) == pd.TimeSeries and type(sol) == pd.TimeSeries: self.assertSeriesEqual(ans, sol, digits) elif type(ans) == pd.DataFrame and type(sol) == pd.DataFrame: self.assertFrameEqual(ans, sol, digits) else: if digits == 0: super().assertEqual(ans, sol) else: super().assertAlmostEqual(ans, sol, digits) def assertFloat(self, obj): self.assertIs(type(obj), (np.float64)) def assertArray(self, obj): self.assertIs(type(obj), np.ndarray) def assertArrayEqual(self, ans, sol, digits=0): self.assertArray(ans) self.assertArray(sol) if digits == 0: np_test.assert_array_equal(ans, sol) else: np_test.assert_array_almost_equal(ans, sol, digits) def assertSeries(self, obj): if type(obj) is pd.Series or type(obj) is pd.TimeSeries: return else: self.assertIs(type(obj), pd.Series) def assertSeriesEqual(self, ans, sol, digits=0): self.assertSeries(ans) self.assertSeries(sol) self.assertEquals(ans.name, sol.name) if digits == 0: pd_test.assert_series_equal(ans, sol, digits) else: np_test.assert_array_almost_equal(ans.values, sol.values, digits) def assertFrame(self, obj): self.assertIs(type(obj), pd.DataFrame) def assertFrameEqual(self, ans, sol, digits=0): self.assertFrame(ans) self.assertFrame(sol) self.assertEquals(ans.columns.name, sol.columns.name) if digits == 0: pd_test.assert_frame_equal(ans, sol) else: np_test.assert_array_almost_equal(ans.values, sol.values, digits)
class PastEventTest(unittest.TestCase): def setUp0(self): self.da = DataAccess("./data") self.da.empty_dirs() self.setUp1() def setUp1(self): self.evt = PastEvent("./data") self.evt.symbol = "AAPL" self.evt.market = "^gspc" self.evt.lookback_days = 10 self.evt.lookforward_days = 10 self.evt.estimation_period = 252 self.evt.date = datetime(2009, 1, 5) self.evt.run() def suite(self): suite = unittest.TestSuite() suite.addTest(PastEventTest("test_success")) return suite def test_success(self): self.setUp1() # Test: Series names self.assertEquals(self.evt.er.name, "Expected Return") self.assertEquals(self.evt.ar.name, "Abnormal Return") self.assertEquals(self.evt.car.name, "Cumulative Abnormal Return") self.assertEquals(self.evt.t_test.name, "t test") self.assertEquals(self.evt.prob.name, "Probability") # Test: Index values self.assertEquals(self.evt.er.index[0].to_pydatetime(), datetime(2008, 12, 18)) self.assertEquals(self.evt.er.index[-1].to_pydatetime(), datetime(2009, 1, 20)) # Test: Values ans_er = [ "-0.0212234", "0.00230225", "-0.0184308", "-0.0100507", "0.00507867", "0.00466393", "-0.0043450", "0.02326106", "0.01325643", "0.03029057", "-0.0051219", "0.00706623", "-0.0298596", "0.00275149", "-0.0213603", "-0.0225914", "0.00115150", "-0.0332277", "0.00073280", "0.00681676", "-0.0521229", ] ans_ar = [ "0.02416528", "0.00412824", "-0.0288732", "0.01746335", "-0.0206124", "0.00435256", "0.01375131", "-0.0269885", "-0.0241298", "0.03294819", "0.04736761", "-0.0235995", "0.00827612", "0.01576086", "-0.0014955", "0.00142864", "-0.0118479", "0.00608256", "-0.0235514", "-0.0193999", "0.00188397", ] ans_car = [ "0.02416528", "0.02829353", "-0.0005797", "0.01688362", "-0.0037288", "0.00062375", "0.01437507", "-0.0126134", "-0.0367432", "-0.0037950", "0.04357256", "0.01997299", "0.02824911", "0.04400998", "0.04251447", "0.04394312", "0.03209521", "0.03817778", "0.01462634", "-0.0047736", "-0.0028896", ] ans_t_test = [ "0.93826998", "1.09855833", "-0.0225090", "0.65554389", "-0.1447789", "0.02421877", "0.55814377", "-0.4897438", "-1.4266368", "-0.1473509", "1.69180043", "0.77549544", "1.09683395", "1.70878414", "1.65071766", "1.70618799", "1.24616708", "1.48233614", "0.56789979", "-0.1853454", "-0.1121960", ] ans_prob = [ "0.82594716", "0.86401961", "0.49102096", "0.74394118", "0.44244271", "0.50966094", "0.71162689", "0.31215756", "0.07684230", "0.44142751", "0.95465798", "0.78097652", "0.86364300", "0.95625452", "0.95060188", "0.95601345", "0.89364846", "0.93087456", "0.71494849", "0.42647903", "0.45533397", ] self.assertEquals([str(x)[0:10] for x in self.evt.er.values.tolist()], ans_er) self.assertEquals([str(x)[0:10] for x in self.evt.ar.values.tolist()], ans_ar) self.assertEquals([str(x)[0:10] for x in self.evt.car.values.tolist()], ans_car) self.assertEquals([str(x)[0:10] for x in self.evt.t_test.values.tolist()], ans_t_test) self.assertEquals([str(x)[0:10] for x in self.evt.prob.values.tolist()], ans_prob) # Test: Compare results of equal events evt2 = PastEvent("./data") evt2.symbol = "AAPL" evt2.market = "^gspc" evt2.lookback_days = 10 evt2.lookforward_days = 10 evt2.estimation_period = 252 evt2.date = datetime(2009, 1, 5) evt2.run() self.assertListEqual( [str(x)[0:10] for x in self.evt.er.values.tolist()], [str(x)[0:10] for x in evt2.er.values.tolist()] ) self.assertEqual( [str(x)[0:10] for x in self.evt.ar.values.tolist()], [str(x)[0:10] for x in evt2.ar.values.tolist()] ) self.assertListEqual( [str(x)[0:10] for x in self.evt.car.values.tolist()], [str(x)[0:10] for x in evt2.car.values.tolist()] ) self.assertListEqual( [str(x)[0:10] for x in self.evt.t_test.values.tolist()], [str(x)[0:10] for x in evt2.t_test.values.tolist()] ) self.assertListEqual( [str(x)[0:10] for x in self.evt.prob.values.tolist()], [str(x)[0:10] for x in evt2.prob.values.tolist()] )
def delete_data(): self_dir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) DataAccess.path = os.path.join(self_dir, 'data') data_access = DataAccess() data_access.empty_dirs()
class DataAccessTest(unittest.TestCase): def setUp1(self): DataAccess('./data').empty_dirs() self.da = DataAccess('./data') def suite(self): suite = unittest.TestSuite() suite.addTest(DataAccessTest('test_get_data')) suite.addTest(DataAccessTest('test_save_load_custom_name')) return suite def test_get_data(self): ''' Tests the length of row and columns and their names Note 1: File downloads are managed by finance.utils.FileManager Test for that on FileManagerTest.py Note 2: Other tests were done on the benchmark ''' self.setUp1() start_date = datetime(2008, 1, 1) end_date = datetime(2009, 12, 31) # Single symbol, single field symbols = "AAPL" field_s = "Close" df = self.da.get_data(symbols, start_date, end_date, field_s) self.assertEqual(len(df), 505) self.assertEqual(len(df.columns), 1) names = [field_s] self.assertEqual(list(df.columns), names) # Multiple symbols, single field symbols = ["AAPL","GLD","GOOG","SPY","XOM"] field_s = "Close" df = self.da.get_data(symbols, start_date, end_date, field_s) self.assertEqual(len(df), 505) self.assertEqual(len(df.columns), 5) names = symbols self.assertEqual(list(df.columns), names) # Single symbol, multiple fields symbols = "AAPL" field_s = ["Close", "Volume"] df = self.da.get_data(symbols, start_date, end_date, field_s) self.assertEqual(len(df), 505) self.assertEqual(len(df.columns), 2) names = ['Close', 'Volume'] self.assertEqual(list(df.columns), names) # Multiple symbol, multiple fields symbols = ["AAPL","GLD","GOOG","SPY","XOM"] field_s = ["Close", "Volume"] df = self.da.get_data(symbols, start_date, end_date, field_s) self.assertEqual(len(df), 505) self.assertEqual(len(df.columns), 10) names = ['AAPL Close', 'AAPL Volume', 'GLD Close', 'GLD Volume', 'GOOG Close', 'GOOG Volume', 'SPY Close', 'SPY Volume', 'XOM Close', 'XOM Volume'] self.assertEqual(list(df.columns), names) def test_save_load_custom_name(self): self.setUp1() symbols = ["AAPL", "GLD", "GOOG", "SPY", "XOM"] start_date = datetime(2008, 1, 1) end_date = datetime(2009, 12, 31) fields = "Close" close = self.da.get_data(symbols, start_date, end_date, fields, save=False) self.da.save(close, "customName.data") close_loaded = self.da.load("customName.data") self.assertEqual(list(close.columns), list(close_loaded.columns)) self.assertEqual(len(close), len(close_loaded))
class MarketSimulator(object): ''' Market Simulator. Receives: 1. Initial cash 2. List of trades (automaticly search and downloads missing information) After simulation: portfolio is a pandas.DataFrame with the values of the portfolio on each date ''' def __init__(self, path='./data'): self.da = DataAccess(path) self.initial_cash = 0 self.current_cash = 0 self.trades = None self.prices = None self.own = None self.cash = None self.equities = None self.portfolio = None def load_trades(self, file_path): ''' Reads the csv file and parse the data ''' # 1. Read the .csv file self.trades = pd.read_csv(file_path) # 2. Set the indexes as the value of a the columns (year, month, day) dates = list() for idx, row in self.trades.iterrows(): date = datetime(row['year'], row['month'], row['day']) dates.append(date) dates = pd.Series(dates) self.trades = self.trades.set_index(dates) # 3. Delete unnescessary columns self.trades = self.trades[['symbol', 'action', 'num_of_shares']] # 4. Sort the DataFrame by the index (dates) self.trades = self.trades.sort() def simulate(self, trades=None, ordersIsDataFrame=False): ''' Simulates the trades, fills the DataFrames: cash, equities_value, porfolio ''' # 0. Init the required data # 0.1 if trades is not None load them if trades is not None: if ordersIsDataFrame: self.set_trades(trades) else: # If there is no DataFrame then is a file to be loaded self.load_trades(trades) # 0.2 Load/Download required data symbols = list(set(self.trades['symbol'])) start_date = self.trades.index[0].to_pydatetime() # Convert from TimeStamp to datetime end_date = self.trades.index[-1].to_pydatetime() self.prices = self.da.get_data(symbols, start_date, end_date, "Adj Close") # 0.3 Init other DataFrames, dictionaries self.cash = pd.DataFrame(index=self.prices.index, columns=['Cash']) self.own = pd.DataFrame(index=self.prices.index, columns=self.prices.columns) current_stocks = dict([(symbol, 0) for symbol in list(set(self.trades['symbol']))]) # 0.3 Set the current cash to the initial cash before star the simulation self.current_cash = self.initial_cash # 1. Fill the DataFrames for idx, row in self.trades.iterrows(): # For each order # Note: idx is Timestamp, row is Series # Note 2: If there are various trades on the same day overwrites the previous value. # 1.0 Get info of the row symbol = row['symbol'] action = row['action'].lower()[0:1] num_of_shares = row['num_of_shares'] # 1.1 Fill the cash DataFrame # Get the change of cash on the order cash_change = self.prices[symbol][idx] * num_of_shares if action == 'b': self.current_cash = self.current_cash - cash_change elif action == 's': self.current_cash = self.current_cash + cash_change # Modify self.cash DataFrame self.cash.ix[idx] = self.current_cash # 1.2 Fill the own DataFrame - num of stocks on each date if action == 'b': current_stocks[symbol] = current_stocks[symbol] + num_of_shares elif action == 's': current_stocks[symbol] = current_stocks[symbol] - num_of_shares # Modify self.own DataFrame self.own.ix[idx][symbol] = current_stocks[symbol] # Fill forward missing values self.cash = self.cash.fillna(method='ffill') self.own = self.own.fillna(method='ffill') # After forward-fill fill with zeros because initial values are still NaN self.own = self.own.fillna(0) # 2. Get the value of the equitues self.equities = self.own * self.prices self.equities = self.equities.sum(1) # 3. Get the value of the porfolio = cash + equities_value self.portfolio = self.cash + self.equities self.portfolio.columns = ['Portfolio']
def delete_data(): self_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) DataAccess.path = os.path.join(self_dir, 'data') data_access = DataAccess() data_access.empty_dirs()
class PastEvent(EventStudy): ''' Analyse a particular equity on a particular date Necesary Parameters ------------------- date: datetime symbol: str, eg: AAPL Optional Parameters ------------------- market: str, default2-'SPY' - used to asses the event lookback_days: int, default=20 - past event window size lookforward_days: int, default=20 - future event window size estimation_period: int, default=255 |-----255-----|-------20-------|-|--------20--------| estimation lookback event lookforward ''' def __init__(self): # Utils self.data_access = DataAccess() # Variables self.date = None # Date of the event self.symbol = None self.field = 'adjusted_close' self.lookback_days = 20 self.lookforward_days = 20 self.estimation_period = 255 self.market = "SPY" # Results self.evt_window_data = None self.er = None self.ar = None self.car = None self.t_test = None self.prob = None def run(self): dates = DateUtils.nyse_dates_event(self.date, self.lookback_days, self.lookforward_days, self.estimation_period) start_date = dates[0] end_date = dates[-1] # Data to the General market_return Study self.data = self.data_access.get_data(self.symbol, start_date, end_date, self.field) evt_window_dates = dates[-self.lookforward_days - self.lookback_days - 1:] self.evt_window_data = self.data[evt_window_dates[0]:dates[-1]] self.market = self.data_access.get_data(self.market, start_date, end_date, self.field) # Parameters of the General market_return Study self.start_period = dates[0] self.end_period = dates[self.estimation_period] self.start_window = dates[self.estimation_period] self.end_window = dates[-1] # Run the Market Return method super().market_return()
class FinanceTest(unittest.TestCase): def setUpDataAccess(self, delete=False): self_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) DataAccess.path = os.path.join(self_dir, 'data') self.data_access = DataAccess() self.data_access.empty_cache(delete=delete) self.data_access.empty_dir(delete=delete) @staticmethod def delete_data(): self_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) DataAccess.path = os.path.join(self_dir, 'data') data_access = DataAccess() data_access.empty_dirs() def assertEqual(self, ans, sol, digits=0): if type(ans) == np.ndarray and type(sol) == np.ndarray: self.assertArrayEqual(ans, sol, digits) elif type(ans) == pd.Series and type(sol) == pd.Series: self.assertSeriesEqual(ans, sol) elif type(ans) == pd.TimeSeries and type(sol) == pd.TimeSeries: self.assertSeriesEqual(ans, sol, digits) elif type(ans) == pd.DataFrame and type(sol) == pd.DataFrame: self.assertFrameEqual(ans, sol, digits) else: if digits == 0: super().assertEqual(ans, sol) else: super().assertAlmostEqual(ans, sol, digits) def assertFloat(self, obj): self.assertIs(type(obj), (np.float64)) def assertArray(self, obj): self.assertIs(type(obj), np.ndarray) def assertArrayEqual(self, ans, sol, digits=0): self.assertArray(ans) self.assertArray(sol) if digits == 0: np_test.assert_array_equal(ans, sol) else: np_test.assert_array_almost_equal(ans, sol, digits) def assertSeries(self, obj): if type(obj) is pd.Series or type(obj) is pd.TimeSeries: return else: self.assertIs(type(obj), pd.Series) def assertSeriesEqual(self, ans, sol, digits=0): self.assertSeries(ans) self.assertSeries(sol) self.assertEquals(ans.name, sol.name) if digits == 0: pd_test.assert_series_equal(ans, sol, digits) else: np_test.assert_array_almost_equal(ans.values, sol.values, digits) def assertFrame(self, obj): self.assertIs(type(obj), pd.DataFrame) def assertFrameEqual(self, ans, sol, digits=0): self.assertFrame(ans) self.assertFrame(sol) self.assertEquals(ans.columns.name, sol.columns.name) if digits == 0: pd_test.assert_frame_equal(ans, sol) else: np_test.assert_array_almost_equal(ans.values, sol.values, digits)
class MarketSimulator(object): ''' Market Simulator Needs a list of trades to simulate, options are: 1. Provide a custom pandas.DataFrame(index=DatetimeIndex): symbol action num_of_shares 2011-01-10 AAPL Buy 1500 2011-01-13 AAPL Sell 1500 2011-01-13 IBM Buy 4000 2011-01-26 GOOG Buy 1000 2. Load the trades from a csv file: year,month,day,symbol,action,num_of_shares 2011,1,10,AAPL,Buy,1500 2011,1,13,AAPL,Sell,1500 2011,1,13,IBM,Buy,4000 2011,1,26,GOOG,Buy,1000 3. Create trades from an event list: usually from EventFinder ''' def __init__(self): self.da = DataAccess() self.initial_cash = 0 self.field = 'adjusted_close' self.trades = None self.prices = None self.num_of_shares = None self.cash = None self.equities = None self.portfolio = None def load_trades(self, file_path): ''' Load trades from a csv file csv file example: year,month,day,symbol,action,num_of_shares 2011,1,10,AAPL,Buy,1500 2011,1,13,AAPL,Sell,1500 2011,1,13,IBM,Buy,4000 2011,1,26,GOOG,Buy,1000 Parameters ---------- file_path: str, path to the csv containing the orders ''' # 1. Read the .csv file self.trades = pd.read_csv(file_path) # 2. Set the indexes as the value of a the columns (year, month, day) dates = list() for idx, row in self.trades.iterrows(): date = datetime(row['year'], row['month'], row['day']) dates.append(date) dates = pd.Series(dates) self.trades = self.trades.set_index(dates) # 3. Delete unnescessary columns self.trades = self.trades[['symbol', 'action', 'num_of_shares']] # 4. Sort the DataFrame by the index (dates) self.trades = self.trades.sort_index() def create_trades_from_event(self, eventList, eventDayAction='Buy', eventDayShares=100, actionAfter='Sell', daysAfter=5, sharesAfter=100, actionBefore=None, daysBefore=5, sharesBefore=100): ''' Creates trades using an event list; usually from the EventFinder. Also creates aditional order after and before of the event as defined by the user Parameters ---------- eventList: pandas.Series ''' self.trades = pd.DataFrame(index=eventList.index, columns=['symbol', 'action', 'num_of_shares']) self.trades['symbol'] = eventList self.trades['action'] = eventDayAction self.trades['num_of_shares'] = eventDayShares # TODO: Actions BEFORE if actionAfter is not None: dicts = [] for idx, row in self.trades.iterrows(): after_date = DateUtils.nyse_add(idx.to_pydatetime(), daysAfter) after = pd.DataFrame([ {'symbol': row['symbol'], 'action': actionAfter, 'num_of_shares': sharesAfter}], index=[after_date], columns=self.trades.columns) self.trades = self.trades.append(after) self.trades = self.trades.sort() def simulate(self): ''' Simulates the trades Parameters ---------- trades: str(filepath) or pandas.DataFrame, if str loads the orders from a csv file Returns ------- Nothing: Fills the DataFrames: cash, equities, porfolio ''' # 0.1 Load/Download required data symbols = list(set(self.trades['symbol'])) start_date = self.trades.index[0].to_pydatetime() # Convert from TimeStamp to datetime end_date = self.trades.index[-1].to_pydatetime() # Convert from TimeStamp to datetime self.prices = self.da.get_data(symbols, start_date, end_date, self.field) # 0.2 Init DataFrames self.cash = pd.Series(index=self.prices.index, name='Cash', dtype=np.float64) self.num_of_shares = pd.DataFrame(index=self.prices.index, columns=self.prices.columns, dtype=np.float64) # 1. Fill the DataFrames current_cash = self.initial_cash current_shares = dict([(symbol, 0) for symbol in symbols]) for idx, row in self.trades.iterrows(): # 1.2.0 Get info of the row symbol = row['symbol'] action = row['action'].lower()[0:1] num_of_shares = row['num_of_shares'] # 1.2.1 Fill the self.cash DataFrame - ammount of cash on each date # NOTE: but stocks spends cash, sell stocks wins cash cash_change = self.prices[symbol][idx] * num_of_shares if action == 'b': current_cash = current_cash - cash_change elif action == 's': current_cash = current_cash + cash_change # Modify self.cash DataFrame self.cash.ix[idx] = current_cash # 1.2.3 Fill the self.num_of_shares DataFrame - num of each stocks on each date if action == 'b': current_shares[symbol] = current_shares[symbol] + num_of_shares elif action == 's': current_shares[symbol] = current_shares[symbol] - num_of_shares # Modify self.num_of_shares DataFrame self.num_of_shares.ix[idx][symbol] = current_shares[symbol] # Fill forward missing values self.cash = self.cash.fillna(method='ffill') self.prices = self.prices.fillna(method='ffill').fillna(method='bfill') self.num_of_shares = self.num_of_shares.fillna(method='ffill').fillna(0) # 2. Get the value of the equitues self.equities = pd.Series(index=self.prices.index, name='Equities value') equities = self.num_of_shares * self.prices self.equities = equities.sum(axis=1) self.equities.name = 'Equities value' # 3. Get the value of the porfolio = cash + equities_value self.portfolio = pd.Series(index=self.prices.index, name='Portfolio value') self.portfolio = self.cash + self.equities self.portfolio.name = 'Portfolio value'
class EventFinder(object): def __init__(self): self.data_access = DataAccess() self.symbols = [] self.start_date = None self.end_date = None self.field = 'Adj Close' self.condition = Condition() self.matrix = None self.num_events = 0 self.oneEventPerEquity = True def generate_filename(self): return '%s%s%s%s%s%s' % ( ''.join(self.symbols), self.start_date.strftime('%Y-%m-%d'), self.end_date.strftime('%Y-%m-%d'), self.field, self.condition.id, str(self.oneEventPerEquity)) def search(self, oneEventPerEquity=True, useCache=True, save=True): self.oneEventPerEquity = oneEventPerEquity # 1. Load the data if requested and available self.matrix = self.data_access.load(self.generate_filename(), '.evt_matrix') if useCache and self.matrix is not None: pass else: # 2. Data was not loaded # 2.1 Get the dates, and Download/Import the data nyse_dates = DateUtils.nyse_dates(start=self.start_date, end=self.end_date) data = self.data_access.get_data(self.symbols, nyse_dates[0], nyse_dates[-1], self.field) # Special case if len(data.columns) == 1: data.columns = self.symbols # 2.2 Create and fill the matrix of events data = data[self.start_date:self.end_date] self.matrix = pd.DataFrame(index=data.index, columns=self.symbols) for symbol in self.symbols: i = 0 for item in data[symbol][1:]: e = self.condition.function(i, item, data[symbol][1:]) if e: self.matrix[symbol][i + 1] = 1 if oneEventPerEquity == True: break i = i + 1 # 3. Calculate other results and save if requested # Reduce Matrix: Sum each row and columns: if is greater than 0 there is an event self.matrix = self.matrix[self.matrix.fillna(value=0).sum(axis=1) > 0] valid_cols = self.matrix.columns[self.matrix.fillna(value=0).sum( axis=0) > 0].values self.matrix = self.matrix[valid_cols] # 3.2 Create list of events self.list = pd.Series(index=self.matrix.index, name='Equity', dtype=str) for idx, row in self.matrix.iterrows(): equity = row[row == 1].index[0] self.list.ix[idx] = equity # 3.3 Save self.num_events = len(self.list) if save: self.data_access.save(self.matrix, self.generate_filename(), '.evt_matrix')
class EventFinder(object): def __init__(self): self.data_access = DataAccess() self.symbols = [] self.start_date = None self.end_date = None self.field = 'adjusted_close' self.condition = Condition() self.matrix = None self.num_events = 0 self.oneEventPerEquity = True def generate_filename(self): return '%s%s%s%s%s%s' % (''.join(self.symbols), self.start_date.strftime('%Y-%m-%d'), self.end_date.strftime('%Y-%m-%d'), self.field, self.condition.id, str(self.oneEventPerEquity)) def search(self, oneEventPerEquity=True, useCache=True, save=True): self.oneEventPerEquity = oneEventPerEquity # 1. Load the data if requested and available self.matrix = self.data_access.load(self.generate_filename(), '.evt_matrix') if useCache and self.matrix is not None: pass else: # 2. Data was not loaded # 2.1 Get the dates, and Download/Import the data nyse_dates = DateUtils.nyse_dates(start=self.start_date, end=self.end_date) data = self.data_access.get_data(self.symbols, nyse_dates[0], nyse_dates[-1], self.field) # Special case if len(data.columns) == 1: data.columns = self.symbols # 2.2 Create and fill the matrix of events data = data[self.start_date:self.end_date] self.matrix = pd.DataFrame(index=data.index, columns=self.symbols) for symbol in self.symbols: i = 0 for item in data[symbol][1:]: e = self.condition.function(i, item, data[symbol][1:]) if e: self.matrix[symbol][i+1] = 1 if oneEventPerEquity == True: break i = i + 1 # 3. Calculate other results and save if requested # Reduce Matrix: Sum each row and columns: if is greater than 0 there is an event self.matrix = self.matrix[self.matrix.fillna(value=0).sum(axis=1) > 0] valid_cols = self.matrix.columns[self.matrix.fillna(value=0).sum(axis=0) > 0].values self.matrix = self.matrix[valid_cols] # 3.2 Create list of events self.list = pd.Series(index=self.matrix.index, name='Equity') for idx, row in self.matrix.iterrows(): equity = row[row == 1].index[0] self.list.loc[idx] = equity # 3.3 Save self.num_events = len(self.list) if save: self.data_access.save(self.matrix, self.generate_filename(), '.evt_matrix')
def setUp0(self): self.da = DataAccess("./data") self.da.empty_dirs() self.setUp1()