def create_sid_table_from_file(filepath):
    """reads the raw file, maps tickers -> SIDS,
    then maps sector strings to integers, and saves
    to the file: SID_FILE"""
    register(
        BUNDLE_NAME,
        int,
    )

    df = pd.read_csv(filepath, index_col="ticker")
    assert df.shape[0] > 10001  # there should be more than 10k tickers
    df = df[df.exchange != 'None']
    df = df[df.exchange != 'INDEX']

    coded_sectors_for_ticker = df["sector"].map(SECTOR_CODING)

    ae_d = get_ticker_sid_dict_from_bundle(BUNDLE_NAME)
    N = max(ae_d.values()) + 1

    # create empty 1-D array to hold data where index = SID
    sectors = np.full(N, -1, np.dtype('int64'))

    # iterate over Assets in the bundle, and fill in sectors
    for ticker, sid in ae_d.items():
        sectors[sid] = coded_sectors_for_ticker.get(ticker, -1)
    print(sectors)

    # finally save the file to disk
    np.save(ZIPLINE_DATA_DIR + SID_FILE, sectors)
Example #2
0
def register_tdx(assets=None, minute=False, start=None, overwrite=False, end=None):
    try:
        bundles.unregister('tdx')
    except bundles.UnknownBundle:
        pass
    calendar = get_calendar('SHSZ')
    if start:
        if not calendar.is_session(start):
            start = calendar.all_sessions[searchsorted(calendar.all_sessions, start)]
    bundles.register('tdx', partial(tdx_bundle, assets, minute, overwrite), 'SHSZ', start, end, minutes_per_day=240)
def create_factor_loadings_files():
    """Creates a dataframe for each risk factor, the dataframes are persisted to files.
    Each dataframe has dates as the index, and Assets as the columns, with values being the factor
    loadings of each equity of each day.  """

    data_dates = ('2008-01-01', '2016-12-31')
    backtest_dates = ('2012-01-04', '2016-12-31')
    pd_data_dates = (pd.to_datetime(data_dates[0], utc=True),
                     pd.to_datetime(data_dates[1], utc=True))

    register('crsp', int)  # dummy register

    # get Assets for all symbols stored
    all_assets = get_all_assets_for_bundle('crsp')
    symbol2asset_map = dict(map(lambda x: (x.symbol, x), all_assets))

    asset_symbols = [
        line.strip() for line in open(EQUITIES_OF_INTEREST_FILE).readlines()
    ]
    equities_of_interest = map(lambda x: symbol2asset_map[x], asset_symbols)
    print equities_of_interest

    # create continer (DataFrame for factor loadings)
    # columns=equities_of_interest, rows=dates
    trading_days_in_bt, cal = get_trading_days(
        'crsp', backtest_dates[0], backtest_dates[1])  # index for dates
    factor_loadings = {}  # dictonary of DataFrames
    for factor in RISK_FACTORS:
        factor_loadings[factor] = pd.DataFrame(index=trading_days_in_bt,
                                               columns=equities_of_interest)

    # loop to go over all days in backtest, calculate loadings over two years and pack in Pandas Panel
    cal_list = cal.tolist()
    for day in trading_days_in_bt:
        i = cal_list.index(day)
        print cal[i - 504], day
        factor_loading_calc_period = (cal[i - 504], day)

        one_day_exposures = calc_exposures_to_equities(
            equities_of_interest, 'crsp', pd_data_dates,
            factor_loading_calc_period)
        one_day_t = one_day_exposures.transpose()
        print one_day_t  # this contains all risk factors

        for factor in RISK_FACTORS:  # pack loadings by factor and date
            factor_loadings[factor].loc[
                factor_loading_calc_period[1]] = one_day_t.loc[factor]

    # create Panel of factor_loadings and save to HDF5 file
    pd.Panel(factor_loadings).to_hdf(FACTOR_LOADINGS_FILE, 'key', mode='w')
def create_static_table_from_file(filepath):
    """Stores static items to a persisted np array.
    The following static fields are currently persisted.
    -Sector
    -exchange
    -category
    """
    register(
        BUNDLE_NAME,
        int,
    )

    df = pd.read_csv(filepath, index_col="ticker")
    assert df.shape[0] > 10001  # there should be more than 10k tickers
    df = df[df.exchange != 'None']
    df = df[df.exchange != 'INDEX']
    df = df[df.table == 'SEP']

    coded_sectors_for_ticker = df['sector'].map(SECTOR_CODING)
    coded_exchange_for_ticker = df['exchange'].map(EXCHANGE_CODING)
    coded_category_for_ticker = df['category'].map(CATEGORY_CODING)

    ae_d = get_ticker_sid_dict_from_bundle(BUNDLE_NAME)
    N = max(ae_d.values()) + 1

    # create 2-D array to hold data where index = SID
    sectors = np.full((3, N), -1, np.dtype('int64'))
    # sectors = np.full(N, -1, np.dtype('int64'))

    # iterate over Assets in the bundle, and fill in static fields
    for ticker, sid in ae_d.items():
        print(ticker, sid, coded_sectors_for_ticker.get(ticker, -1))
        sectors[0, sid] = coded_sectors_for_ticker.get(ticker, -1)
        sectors[1, sid] = coded_exchange_for_ticker.get(ticker, -1)
        sectors[2, sid] = coded_category_for_ticker.get(ticker, -1)

    print(sectors)
    print(sectors[:, -10:])

    # finally save the file to disk
    np.save(ZIPLINE_DATA_DIR + STATIC_FILE, sectors)
from zipline.data.bundles.core import register
from zipline.pipeline import Pipeline
from zipline.pipeline.data import USEquityPricing

from alphacompiler.util.zipline_data_tools import make_pipeline_engine

def str2dt(datestr):
    return pd.to_datetime(datestr, utc=True)

# constants
BUNDLE = 'crsp'
data_dates = ('2015-01-06', '2015-01-30')
backtest_dates = ('2015-01-06', '2015-01-30')
pipeline_data_dates = (pd.to_datetime(data_dates[0], utc=True), pd.to_datetime(data_dates[1], utc=True))

# Step 1. Run Pipeline

# 1.0 dummy bundle register
register(BUNDLE, int)  # dummy register of a bundle

# 1.1 create the pipeline engine
spe = make_pipeline_engine(BUNDLE, pipeline_data_dates)

# 1.2 create your pipeline (this could be more elaborate)
pipe = Pipeline(columns={'Close': USEquityPricing.close.latest},)

# 1.3 run your pipeline with the pipeline engine
stocks = spe.run_pipeline(pipe, str2dt(backtest_dates[0]), str2dt(backtest_dates[1]))

print stocks
def num_tkrs_in_bundle(bundle_name):
    return len(get_ticker_sid_dict_from_bundle(bundle_name))


if __name__ == '__main__':

    fields = ['member']
    dimensions = ['SP500']

    from zipline.data.bundles.core import register
    from alphacompiler.data.loaders.sep_quandl import from_sep_dump

    BUNDLE_NAME = 'sep'
    register(
        BUNDLE_NAME,
        from_sep_dump('.'),
    )
    num_tickers = num_tkrs_in_bundle(BUNDLE_NAME)
    print('number of tickers: ', num_tickers)

    all_tickers_for_bundle(fields, dimensions,
                           'sep')  # downloads the data to /raw
    pack_sparse_data(
        num_tickers + 1,  # number of tickers in buldle + 1
        os.path.join(BASE, RAW_FLDR),
        fields,
        ZIPLINE_DATA_DIR + FN)  # write directly to the zipline data dir

    print("this worked master")
Example #7
0
if __name__ == '__main__':
    cal: TradingCalendar = trading_calendars.get_calendar('NYSE')

    start_date = pd.Timestamp('1999-11-1', tz='utc')
    end_date = pd.Timestamp(date.today() - timedelta(days=1), tz='utc')

    while not cal.is_session(end_date):
        end_date -= timedelta(days=1)

    print('ingesting tiingo-data from: ' + str(start_date) + ' to: ' +
          str(end_date))

    start_time = time.time()

    register('tiingo',
             tiingo_bundle,
             calendar_name='NYSE',
             start_session=start_date,
             end_session=end_date)

    assets_version = ((), )[0]  # just a weird way to create an empty tuple
    bundles_module.ingest(
        "tiingo",
        os.environ,
        assets_versions=assets_version,
        show_progress=True,
    )

    print("--- %s seconds ---" % (time.time() - start_time))
Example #8
0
def create_static_table_from_database(ZIPLINE_DATA_DIR, STATIC_FILE,
                                      BUNDLE_NAME, SECTOR_CODING,
                                      EXCHANGE_CODING, CATEGORY_CODING):
    """Stores static items to a persisted np array.
    The following static fields are currently persisted.
    -Sector
    -exchange
    -category
    -code ->siccode
    """

    register(
        BUNDLE_NAME,
        int,
    )

    query = """SELECT ticker, code, sector, exchange_id, category FROM security WHERE ttable = 'SF1' """
    df = pd.read_sql_query(query, engine)

    # add the exchange based on the exchange and exchange_id relation
    # get the exchange and exchange_id relation
    name_ex_id = get_name_exchange_id()
    my_EXCHANGE_CODING = name_ex_id.set_index('id')['name'].to_dict()
    # add the exchange based on the exchange and exchange_id relation
    df['exchange'] = df['exchange_id'].map(my_EXCHANGE_CODING)

    df['sectors_'] = df['sector'].map(SECTOR_CODING)
    df['exchange_'] = df['exchange'].map(EXCHANGE_CODING)
    df['category_'] = df['category'].map(CATEGORY_CODING)
    df['code_'] = df['code'].astype(
        int)  # just multiply the siccode by 10 and get integer

    df = df.fillna(-1)

    ae_d = get_ticker_sid_dict_from_bundle(BUNDLE_NAME)
    N = max(ae_d.values()) + 1

    # create 2-D array to hold data where index = SID
    sectors = np.full((4, N), -1, np.dtype('int64'))
    # sectors = np.full(N, -1, np.dtype('int64'))

    # iterate over Assets in the bundle, and fill in static fields
    for ticker, sid in tqdm(ae_d.items(), total=len(ae_d)):
        #for ticker, sid in ae_d.items():
        #sector_coded = coded_sectors_for_ticker.get(ticker)
        if not df[df['ticker'] == ticker].empty:
            #sector_   = df.sectors_[df['ticker']==ticker].iloc[0]
            #exchange_ = df.exchange_[df['ticker']==ticker].iloc[0]
            #category_ = df.category_[df['ticker']==ticker].iloc[0]
            #code_     = df.code_[df['ticker']==ticker].iloc[0]
            #print(ticker, sid, sector_coded, exchange_, category_, code_ ,'<-end')
            sectors[0, sid] = df.sectors_[df['ticker'] == ticker].iloc[0]
            sectors[1, sid] = df.exchange_[df['ticker'] == ticker].iloc[0]
            sectors[2, sid] = df.category_[df['ticker'] == ticker].iloc[0]
            sectors[3, sid] = df.code_[df['ticker'] == ticker].iloc[0]
        else:
            sectors[0, sid] = -1
            sectors[1, sid] = -1
            sectors[2, sid] = -1
            sectors[3, sid] = -1

            #print('ticker missing but filled with -1, everthing under control keep cool= ',ticker)
    print(sectors)
    print(sectors[:, -10:])

    # finally save the file to disk
    np.save(ZIPLINE_DATA_DIR + STATIC_FILE, sectors)
    print("this worked master")
Example #9
0
    eg.exit()


def register_tdx(assets=None, minute=False, start=None, overwrite=False, end=None):
    try:
        bundles.unregister('tdx')
    except bundles.UnknownBundle:
        pass
    calendar = get_calendar('SHSZ')
    if start:
        if not calendar.is_session(start):
            start = calendar.all_sessions[searchsorted(calendar.all_sessions, start)]
    bundles.register('tdx', partial(tdx_bundle, assets, minute, overwrite), 'SHSZ', start, end, minutes_per_day=240)


bundles.register('tdx', partial(tdx_bundle, None, False, False),minutes_per_day=240)

if __name__ == '__main__':
    eg = Engine(auto_retry=True, multithread=True, thread_num=8)
    with eg.connect():
        symbols = fetch_symbols(eg)
        symbols = symbols[:3]
        data = []
        metas = []
        for symbol in symbols.symbol:
            data.append((int(symbol), fetch_single_equity(eg, symbol)))
            metas.append(get_meta_from_bars(data[-1][1]))
        symbols = pd.concat([symbols, pd.DataFrame(data=metas)], axis=1)
        splits, dividends = fetch_splits_and_dividends(eg, symbols)