def create_sid_table_from_file(filepath): """reads the raw file, maps tickers -> SIDS, then maps sector strings to integers, and saves to the file: SID_FILE""" register( BUNDLE_NAME, int, ) df = pd.read_csv(filepath, index_col="ticker") assert df.shape[0] > 10001 # there should be more than 10k tickers df = df[df.exchange != 'None'] df = df[df.exchange != 'INDEX'] coded_sectors_for_ticker = df["sector"].map(SECTOR_CODING) ae_d = get_ticker_sid_dict_from_bundle(BUNDLE_NAME) N = max(ae_d.values()) + 1 # create empty 1-D array to hold data where index = SID sectors = np.full(N, -1, np.dtype('int64')) # iterate over Assets in the bundle, and fill in sectors for ticker, sid in ae_d.items(): sectors[sid] = coded_sectors_for_ticker.get(ticker, -1) print(sectors) # finally save the file to disk np.save(ZIPLINE_DATA_DIR + SID_FILE, sectors)
def register_tdx(assets=None, minute=False, start=None, overwrite=False, end=None): try: bundles.unregister('tdx') except bundles.UnknownBundle: pass calendar = get_calendar('SHSZ') if start: if not calendar.is_session(start): start = calendar.all_sessions[searchsorted(calendar.all_sessions, start)] bundles.register('tdx', partial(tdx_bundle, assets, minute, overwrite), 'SHSZ', start, end, minutes_per_day=240)
def create_factor_loadings_files(): """Creates a dataframe for each risk factor, the dataframes are persisted to files. Each dataframe has dates as the index, and Assets as the columns, with values being the factor loadings of each equity of each day. """ data_dates = ('2008-01-01', '2016-12-31') backtest_dates = ('2012-01-04', '2016-12-31') pd_data_dates = (pd.to_datetime(data_dates[0], utc=True), pd.to_datetime(data_dates[1], utc=True)) register('crsp', int) # dummy register # get Assets for all symbols stored all_assets = get_all_assets_for_bundle('crsp') symbol2asset_map = dict(map(lambda x: (x.symbol, x), all_assets)) asset_symbols = [ line.strip() for line in open(EQUITIES_OF_INTEREST_FILE).readlines() ] equities_of_interest = map(lambda x: symbol2asset_map[x], asset_symbols) print equities_of_interest # create continer (DataFrame for factor loadings) # columns=equities_of_interest, rows=dates trading_days_in_bt, cal = get_trading_days( 'crsp', backtest_dates[0], backtest_dates[1]) # index for dates factor_loadings = {} # dictonary of DataFrames for factor in RISK_FACTORS: factor_loadings[factor] = pd.DataFrame(index=trading_days_in_bt, columns=equities_of_interest) # loop to go over all days in backtest, calculate loadings over two years and pack in Pandas Panel cal_list = cal.tolist() for day in trading_days_in_bt: i = cal_list.index(day) print cal[i - 504], day factor_loading_calc_period = (cal[i - 504], day) one_day_exposures = calc_exposures_to_equities( equities_of_interest, 'crsp', pd_data_dates, factor_loading_calc_period) one_day_t = one_day_exposures.transpose() print one_day_t # this contains all risk factors for factor in RISK_FACTORS: # pack loadings by factor and date factor_loadings[factor].loc[ factor_loading_calc_period[1]] = one_day_t.loc[factor] # create Panel of factor_loadings and save to HDF5 file pd.Panel(factor_loadings).to_hdf(FACTOR_LOADINGS_FILE, 'key', mode='w')
def create_static_table_from_file(filepath): """Stores static items to a persisted np array. The following static fields are currently persisted. -Sector -exchange -category """ register( BUNDLE_NAME, int, ) df = pd.read_csv(filepath, index_col="ticker") assert df.shape[0] > 10001 # there should be more than 10k tickers df = df[df.exchange != 'None'] df = df[df.exchange != 'INDEX'] df = df[df.table == 'SEP'] coded_sectors_for_ticker = df['sector'].map(SECTOR_CODING) coded_exchange_for_ticker = df['exchange'].map(EXCHANGE_CODING) coded_category_for_ticker = df['category'].map(CATEGORY_CODING) ae_d = get_ticker_sid_dict_from_bundle(BUNDLE_NAME) N = max(ae_d.values()) + 1 # create 2-D array to hold data where index = SID sectors = np.full((3, N), -1, np.dtype('int64')) # sectors = np.full(N, -1, np.dtype('int64')) # iterate over Assets in the bundle, and fill in static fields for ticker, sid in ae_d.items(): print(ticker, sid, coded_sectors_for_ticker.get(ticker, -1)) sectors[0, sid] = coded_sectors_for_ticker.get(ticker, -1) sectors[1, sid] = coded_exchange_for_ticker.get(ticker, -1) sectors[2, sid] = coded_category_for_ticker.get(ticker, -1) print(sectors) print(sectors[:, -10:]) # finally save the file to disk np.save(ZIPLINE_DATA_DIR + STATIC_FILE, sectors)
from zipline.data.bundles.core import register from zipline.pipeline import Pipeline from zipline.pipeline.data import USEquityPricing from alphacompiler.util.zipline_data_tools import make_pipeline_engine def str2dt(datestr): return pd.to_datetime(datestr, utc=True) # constants BUNDLE = 'crsp' data_dates = ('2015-01-06', '2015-01-30') backtest_dates = ('2015-01-06', '2015-01-30') pipeline_data_dates = (pd.to_datetime(data_dates[0], utc=True), pd.to_datetime(data_dates[1], utc=True)) # Step 1. Run Pipeline # 1.0 dummy bundle register register(BUNDLE, int) # dummy register of a bundle # 1.1 create the pipeline engine spe = make_pipeline_engine(BUNDLE, pipeline_data_dates) # 1.2 create your pipeline (this could be more elaborate) pipe = Pipeline(columns={'Close': USEquityPricing.close.latest},) # 1.3 run your pipeline with the pipeline engine stocks = spe.run_pipeline(pipe, str2dt(backtest_dates[0]), str2dt(backtest_dates[1])) print stocks
def num_tkrs_in_bundle(bundle_name): return len(get_ticker_sid_dict_from_bundle(bundle_name)) if __name__ == '__main__': fields = ['member'] dimensions = ['SP500'] from zipline.data.bundles.core import register from alphacompiler.data.loaders.sep_quandl import from_sep_dump BUNDLE_NAME = 'sep' register( BUNDLE_NAME, from_sep_dump('.'), ) num_tickers = num_tkrs_in_bundle(BUNDLE_NAME) print('number of tickers: ', num_tickers) all_tickers_for_bundle(fields, dimensions, 'sep') # downloads the data to /raw pack_sparse_data( num_tickers + 1, # number of tickers in buldle + 1 os.path.join(BASE, RAW_FLDR), fields, ZIPLINE_DATA_DIR + FN) # write directly to the zipline data dir print("this worked master")
if __name__ == '__main__': cal: TradingCalendar = trading_calendars.get_calendar('NYSE') start_date = pd.Timestamp('1999-11-1', tz='utc') end_date = pd.Timestamp(date.today() - timedelta(days=1), tz='utc') while not cal.is_session(end_date): end_date -= timedelta(days=1) print('ingesting tiingo-data from: ' + str(start_date) + ' to: ' + str(end_date)) start_time = time.time() register('tiingo', tiingo_bundle, calendar_name='NYSE', start_session=start_date, end_session=end_date) assets_version = ((), )[0] # just a weird way to create an empty tuple bundles_module.ingest( "tiingo", os.environ, assets_versions=assets_version, show_progress=True, ) print("--- %s seconds ---" % (time.time() - start_time))
def create_static_table_from_database(ZIPLINE_DATA_DIR, STATIC_FILE, BUNDLE_NAME, SECTOR_CODING, EXCHANGE_CODING, CATEGORY_CODING): """Stores static items to a persisted np array. The following static fields are currently persisted. -Sector -exchange -category -code ->siccode """ register( BUNDLE_NAME, int, ) query = """SELECT ticker, code, sector, exchange_id, category FROM security WHERE ttable = 'SF1' """ df = pd.read_sql_query(query, engine) # add the exchange based on the exchange and exchange_id relation # get the exchange and exchange_id relation name_ex_id = get_name_exchange_id() my_EXCHANGE_CODING = name_ex_id.set_index('id')['name'].to_dict() # add the exchange based on the exchange and exchange_id relation df['exchange'] = df['exchange_id'].map(my_EXCHANGE_CODING) df['sectors_'] = df['sector'].map(SECTOR_CODING) df['exchange_'] = df['exchange'].map(EXCHANGE_CODING) df['category_'] = df['category'].map(CATEGORY_CODING) df['code_'] = df['code'].astype( int) # just multiply the siccode by 10 and get integer df = df.fillna(-1) ae_d = get_ticker_sid_dict_from_bundle(BUNDLE_NAME) N = max(ae_d.values()) + 1 # create 2-D array to hold data where index = SID sectors = np.full((4, N), -1, np.dtype('int64')) # sectors = np.full(N, -1, np.dtype('int64')) # iterate over Assets in the bundle, and fill in static fields for ticker, sid in tqdm(ae_d.items(), total=len(ae_d)): #for ticker, sid in ae_d.items(): #sector_coded = coded_sectors_for_ticker.get(ticker) if not df[df['ticker'] == ticker].empty: #sector_ = df.sectors_[df['ticker']==ticker].iloc[0] #exchange_ = df.exchange_[df['ticker']==ticker].iloc[0] #category_ = df.category_[df['ticker']==ticker].iloc[0] #code_ = df.code_[df['ticker']==ticker].iloc[0] #print(ticker, sid, sector_coded, exchange_, category_, code_ ,'<-end') sectors[0, sid] = df.sectors_[df['ticker'] == ticker].iloc[0] sectors[1, sid] = df.exchange_[df['ticker'] == ticker].iloc[0] sectors[2, sid] = df.category_[df['ticker'] == ticker].iloc[0] sectors[3, sid] = df.code_[df['ticker'] == ticker].iloc[0] else: sectors[0, sid] = -1 sectors[1, sid] = -1 sectors[2, sid] = -1 sectors[3, sid] = -1 #print('ticker missing but filled with -1, everthing under control keep cool= ',ticker) print(sectors) print(sectors[:, -10:]) # finally save the file to disk np.save(ZIPLINE_DATA_DIR + STATIC_FILE, sectors) print("this worked master")
eg.exit() def register_tdx(assets=None, minute=False, start=None, overwrite=False, end=None): try: bundles.unregister('tdx') except bundles.UnknownBundle: pass calendar = get_calendar('SHSZ') if start: if not calendar.is_session(start): start = calendar.all_sessions[searchsorted(calendar.all_sessions, start)] bundles.register('tdx', partial(tdx_bundle, assets, minute, overwrite), 'SHSZ', start, end, minutes_per_day=240) bundles.register('tdx', partial(tdx_bundle, None, False, False),minutes_per_day=240) if __name__ == '__main__': eg = Engine(auto_retry=True, multithread=True, thread_num=8) with eg.connect(): symbols = fetch_symbols(eg) symbols = symbols[:3] data = [] metas = [] for symbol in symbols.symbol: data.append((int(symbol), fetch_single_equity(eg, symbol))) metas.append(get_meta_from_bars(data[-1][1])) symbols = pd.concat([symbols, pd.DataFrame(data=metas)], axis=1) splits, dividends = fetch_splits_and_dividends(eg, symbols)