예제 #1
0
def main():
    numerai_tickers = pd.DataFrame(numerapi.SignalsAPI().ticker_universe(),
                                   columns=['bloomberg_ticker'])
    public_ticker_map = get_public_numerai_ticker_map()
    unique_bloomberg_tickers = list(
        set(numerai_tickers.bloomberg_ticker.tolist() +
            public_ticker_map.bloomberg_ticker.tolist()))

    print(
        f'Tickers in current Numerai Signals tournament: {len(numerai_tickers)}'
    )
    print(f'Tickers in public numerai mapping: {len(public_ticker_map)}')
    print(
        f'Total number of unique bloomberg tickers: {len(unique_bloomberg_tickers)}'
    )

    ticker_map = pd.DataFrame({
        'bloomberg': unique_bloomberg_tickers,
        'alpha_vantage': None
    })
    ticker_map['ticker'] = ticker_map.bloomberg.str[:-3]
    ticker_map['is_available_on_alpha_vantage'] = ticker_map.bloomberg.apply(
        is_available_on_alpha_vantage)
    ticker_map['alpha_vantage'] = ticker_map.apply(
        get_alpha_vantage_ticker_for_row, axis=1)
    ticker_map.drop(['ticker', 'is_available_on_alpha_vantage'],
                    axis=1,
                    inplace=True)

    ticker_map = ticker_map.merge(public_ticker_map,
                                  how='left',
                                  left_on='bloomberg',
                                  right_on='bloomberg_ticker')
    ticker_map.yahoo.fillna(SYMBOL_NOT_FOUND, inplace=True)
    print_ticker_map_stats(ticker_map)
    ticker_map.sort_values('bloomberg', inplace=True)
    ticker_map.to_csv(TICKER_MAP_FNAME,
                      index=False,
                      columns=['bloomberg', 'yahoo', 'alpha_vantage'])
    print(f'done - saved tickers to file {TICKER_MAP_FNAME}')
예제 #2
0
def download_yfinance(ticker_map: pd.DataFrame):
    napi = numerapi.SignalsAPI()
    eligible_tickers = pd.Series(napi.ticker_universe(),
                                 name='bloomberg_ticker')
    print(f"Number of eligible tickers: {len(eligible_tickers)}")
    print(f"Number of tickers in map: {len(ticker_map)}")
    yfinance_tickers = eligible_tickers.map(
        dict(zip(ticker_map['bloomberg_ticker'],
                 ticker_map['yahoo']))).dropna()
    bloomberg_tickers = ticker_map['bloomberg_ticker']
    print(f'Number of eligible, mapped tickers: {len(yfinance_tickers)}')

    n = 1000  # chunk row size
    chunk_df = [
        yfinance_tickers.iloc[i:i + n]
        for i in range(0, len(yfinance_tickers), n)
    ]

    concat_dfs = []
    print("Downloading data...")
    for df in chunk_df:
        try:
            # set threads = True for faster performance, but tickers will fail, scipt may hang
            # set threads = False for slower performance, but more tickers will succeed
            temp_df = yfinance.download(df.str.cat(sep=' '),
                                        start='2005-12-01',
                                        threads=True)
            temp_df = temp_df['Adj Close'].stack().reset_index()
            concat_dfs.append(temp_df)
        except:
            pass

    full_data = pd.concat(concat_dfs)

    full_data.columns = ['date', 'ticker', 'price']
    full_data['bloomberg_ticker'] = full_data.ticker.map(
        dict(zip(ticker_map['yahoo'], bloomberg_tickers)))

    return full_data
예제 #3
0
import joblib
import numerapi
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from dateutil.relativedelta import relativedelta, FR

TARGET_NAME = "target"
PREDICTION_NAME = "signal"
TRAINED_MODEL_PREFIX = './trained_model'

# Pull model id from "MODEL_ID" environment variable
# defaults to None, change to a model id from
MODEL_ID = os.getenv('MODEL_ID', None)
MODEL = GradientBoostingRegressor(subsample=0.1)

napi = numerapi.SignalsAPI()


def download_data(live_data_date):
    eligible_tickers = pd.Series(napi.ticker_universe(),
                                 name="bloomberg_ticker")
    logging.info(f"Number of eligible tickers: {len(eligible_tickers)}")

    yfinance_tickers = map_tickers(eligible_tickers, "bloomberg_ticker",
                                   "yahoo")
    logging.info(f"Number of yahoo tickers: {len(yfinance_tickers)}")

    num_days_lag = 5
    if os.path.exists('full_data.csv'):
        full_data = pd.read_csv('full_data.csv')
        quintile_lag, rsi_diff, rsi_diff_abs = get_rsi_feature_names(
예제 #4
0
def main():
    napi = numerapi.SignalsAPI()

    # Numerai Universe
    eligible_tickers = pd.Series(napi.ticker_universe(),
                                 name="bloomberg_ticker")
    print(f"Number of eligible tickers : {len(eligible_tickers)}")

    ticker_map = pd.read_csv(
        "https://numerai-signals-public-data.s3-us-west-2.amazonaws.com/signals_ticker_map_w_bbg.csv"
    )

    # ----- Yahoo <-> Bloomberg mapping -----
    yfinance_tickers = eligible_tickers.map(
        dict(zip(ticker_map["bloomberg_ticker"],
                 ticker_map["yahoo"]))).dropna()
    bloomberg_tickers = ticker_map["bloomberg_ticker"]
    print(f"Number of eligible, mapped tickers: {len(yfinance_tickers)}")

    us_ticker_map = ticker_map[ticker_map.bloomberg_ticker.str[-2:] == "US"]
    #tickers = us_ticker_map.yahoo.dropna().values #for US tickers
    tickers = ticker_map.yahoo.dropna().values  #For possible tickers

    # ----- Raw data loading and formatting -----
    print(f"using tickers: {len(tickers)}")
    full_data = load_data(tickers,
                          "full_data.csv",
                          threads=LOAD_DATA_IN_PARALLEL)

    full_data["bloomberg_ticker"] = full_data.ticker.map(
        dict(zip(ticker_map["yahoo"], bloomberg_tickers)))

    full_data = full_data[[
        "bloomberg_ticker", "open", "high", "low", "close", "adjusted close"
    ]].sort_index(ascending=True)
    full_data.dropna(inplace=True, axis=0)

    # ----- Merging targets -----
    url = "https://numerai-signals-public-data.s3-us-west-2.amazonaws.com/signals_train_val_bbg.csv"
    targets = pd.read_csv(url)

    targets["target"] = targets["target"].astype(np.float16)
    targets["date"] = pd.to_datetime(targets["friday_date"], format="%Y%m%d")
    gc.collect()

    # ----- Generate and select features -----
    full_data = generate_featues(full_data)
    feature_names = [f for f in full_data.columns if "quintile" in f]

    ML_data = pd.merge(
        full_data.reset_index(),
        targets,
        on=["date", "bloomberg_ticker"],
    ).set_index("date")
    print(f"Number of eras in data: {len(ML_data.index.unique())}")

    ML_data = ML_data[ML_data.index.weekday == 4]
    ML_data = ML_data[ML_data.index.value_counts() > 200]

    # ----- Train test split -----
    train_data = ML_data[ML_data["data_type"] == "train"]
    test_data = ML_data[ML_data["data_type"] == "validation"]

    corrs = train_data.groupby(train_data.index).apply(
        lambda x: x[feature_names + [TARGET_NAME]].corr()[TARGET_NAME])
    mean_corr = corrs[feature_names].mean(0)
    print(mean_corr)

    last_friday = datetime.now() + relativedelta(weekday=FR(-1))
    print(last_friday)
    date_string = last_friday.strftime("%Y-%m-%d")

    try:
        live_data = full_data.loc[date_string].copy()
    except KeyError as e:
        print(f"No ticker on {e}")
        live_data = full_data.iloc[:0].copy()
    live_data.dropna(subset=feature_names, inplace=True)
    print(len(live_data))
    # ----- Train model -----
    print("Training model...")
    model = GradientBoostingRegressor()
    model.fit(train_data[feature_names], train_data[TARGET_NAME])
    print("Model trained.")

    # ----- Predict test data -----
    train_data[PREDICTION_NAME] = model.predict(train_data[feature_names])
    test_data[PREDICTION_NAME] = model.predict(test_data[feature_names])
    live_data[PREDICTION_NAME] = model.predict(live_data[feature_names])

    diagnostic_df = pd.concat([test_data, live_data])
    diagnostic_df["friday_date"] = diagnostic_df.friday_date.fillna(
        last_friday.strftime("%Y%m%d")).astype(int)
    diagnostic_df["data_type"] = diagnostic_df.data_type.fillna("live")
    diagnostic_df[["bloomberg_ticker", "friday_date", "data_type",
                   "signal"]].reset_index(drop=True).to_csv(
                       "example_signal_alphavantage.csv", index=False)
    print(
        "Submission saved to example_signal_alphavantage.csv. Upload to signals.numer.ai for scores and diagnostics"
    )
예제 #5
0
def main():
    '''Creates example_signal_upload.csv to upload for validation and live data submission'''
    napi = numerapi.SignalsAPI()

    # read in list of active Signals tickers which can change slightly era to era
    eligible_tickers = pd.Series(napi.ticker_universe(),
                                 name='bloomberg_ticker')
    print(f"Number of eligible tickers: {len(eligible_tickers)}")

    # read in yahoo to bloomberg ticker map, still a work in progress, h/t wsouza
    ticker_map = pd.read_csv(
        'https://numerai-signals-public-data.s3-us-west-2.amazonaws.com/signals_ticker_map_w_bbg.csv'
    )
    print(f"Number of tickers in map: {len(ticker_map)}")

    # map eligible numerai tickers to yahoo finance tickers
    yfinance_tickers = eligible_tickers.map(
        dict(zip(ticker_map['bloomberg_ticker'],
                 ticker_map['yahoo']))).dropna()
    bloomberg_tickers = ticker_map['bloomberg_ticker']
    print(f'Number of eligible, mapped tickers: {len(yfinance_tickers)}')

    # download data
    n = 1000  # chunk row size
    chunk_df = [
        yfinance_tickers.iloc[i:i + n]
        for i in range(0, len(yfinance_tickers), n)
    ]

    concat_dfs = []
    print("Downloading data...")
    for df in chunk_df:
        try:
            # set threads = True for faster performance, but tickers will fail, scipt may hang
            # set threads = False for slower performance, but more tickers will succeed
            temp_df = yfinance.download(df.str.cat(sep=' '),
                                        start='2005-12-01',
                                        threads=False)
            temp_df = temp_df['Adj Close'].stack().reset_index()
            concat_dfs.append(temp_df)
        except:  # simplejson.errors.JSONDecodeError:
            pass

    full_data = pd.concat(concat_dfs)

    # properly position and clean raw data, after taking adjusted close only
    full_data.columns = ['date', 'ticker', 'price']
    full_data.set_index('date', inplace=True)
    # convert yahoo finance tickers back to numerai tickers
    full_data['bloomberg_ticker'] = full_data.ticker.map(
        dict(zip(ticker_map['yahoo'], bloomberg_tickers)))
    print('Data downloaded.')
    print(
        f"Number of tickers with data: {len(full_data.bloomberg_ticker.unique())}"
    )

    ticker_groups = full_data.groupby('ticker')
    full_data['RSI'] = ticker_groups['price'].transform(lambda x: RSI(x))

    # group by era (date) and create quintile labels within each era, useful for learning relative ranking
    date_groups = full_data.groupby(full_data.index)
    full_data['RSI_quintile'] = date_groups['RSI'].transform(
        lambda group: pd.qcut(group, 5, labels=False, duplicates='drop'))
    full_data.dropna(inplace=True)

    # create lagged features grouped by ticker
    ticker_groups = full_data.groupby('ticker')
    num_days = 5
    # lag 0 is that day's value, lag 1 is yesterday's value, etc
    for day in range(num_days + 1):
        full_data[f'RSI_quintile_lag_{day}'] = ticker_groups[
            'RSI_quintile'].transform(lambda group: group.shift(day))

    # create difference of the lagged features and absolute difference of the lagged features (change in RSI quintile by day)
    for day in range(num_days):
        full_data[f'RSI_diff_{day}'] = full_data[
            f'RSI_quintile_lag_{day}'] - full_data[
                f'RSI_quintile_lag_{day + 1}']
        full_data[f'RSI_abs_diff_{day}'] = np.abs(
            full_data[f'RSI_quintile_lag_{day}'] -
            full_data[f'RSI_quintile_lag_{day + 1}'])

    # define column names of features, target, and prediction
    feature_names = [f'RSI_quintile_lag_{num}' for num in range(num_days)] + [
        f'RSI_diff_{num}' for num in range(num_days)
    ] + [f'RSI_abs_diff_{num}' for num in range(num_days)]
    print(f'Features for training:\n {feature_names}')

    TARGET_NAME = 'target'
    PREDICTION_NAME = 'signal'

    # read in Signals targets
    targets = pd.read_csv('historical_targets.csv')
    targets['date'] = pd.to_datetime(targets['friday_date'], format='%Y%m%d')

    # merge our feature data with Numerai targets
    ML_data = pd.merge(full_data.reset_index(),
                       targets,
                       on=['date', 'bloomberg_ticker']).set_index('date')
    # print(f'Number of eras in data: {len(ML_data.index.unique())}')

    # for training and testing we want clean, complete data only
    ML_data.dropna(inplace=True)
    ML_data = ML_data[ML_data.index.weekday ==
                      4]  # ensure we have only fridays
    ML_data = ML_data[ML_data.index.value_counts() >
                      50]  # drop eras with under 50 observations per era

    # train test split
    train_data = ML_data[ML_data['data_type'] == 'train']
    test_data = ML_data[ML_data['data_type'] == 'validation']

    # train model
    print("Training model...")
    model = GradientBoostingRegressor(subsample=0.1)
    model.fit(train_data[feature_names], train_data[TARGET_NAME])
    print("Model trained.")

    # predict test data
    test_data[PREDICTION_NAME] = model.predict(test_data[feature_names])

    # predict live data
    # choose data as of most recent friday
    last_friday = datetime.now() + relativedelta(weekday=FR(-1))
    date_string = last_friday.strftime('%Y-%m-%d')

    try:
        live_data = full_data.loc[date_string].copy()
    except KeyError as e:
        print(f"No ticker on {e}")
        live_data = full_data.iloc[:0].copy()
    live_data.dropna(subset=feature_names, inplace=True)

    # get data from the day before, for markets that were closed
    # on the most recent friday
    last_thursday = last_friday - datetime.timedelta(days=1)
    thursday_date_string = last_thursday.strftime('%Y-%m-%d')
    thursday_data = full_data.loc[thursday_date_string]
    # Only select tickers than aren't already present in live_data
    thursday_data = thursday_data[~thursday_data.ticker.isin(live_data.ticker.
                                                             values)].copy()
    thursday_data.dropna(subset=feature_names, inplace=True)

    live_data = pd.concat([live_data, thursday_data])

    print(f"Number of live tickers to submit: {len(live_data)}")
    live_data[PREDICTION_NAME] = model.predict(live_data[feature_names])

    # prepare and writeout example file
    diagnostic_df = pd.concat([test_data, live_data])
    diagnostic_df['friday_date'] = diagnostic_df.friday_date.fillna(
        last_friday.strftime('%Y%m%d')).astype(int)
    diagnostic_df['data_type'] = diagnostic_df.data_type.fillna('live')
    diagnostic_df[['bloomberg_ticker', 'friday_date', 'data_type',
                   'signal']].reset_index(drop=True).to_csv(
                       'example_signal_upload.csv', index=False)
    print(
        'Example submission completed. Upload to signals.numer.ai for scores and live submission'
    )
예제 #6
0
def main():
    # -----Tickers and mapping-----
    napi = numerapi.SignalsAPI()
    eligible_tickers = pd.Series(napi.ticker_universe(),
                                 name="bloomberg_ticker")

    ticker_map = pd.read_csv(
        "https://numerai-signals-public-data.s3-us-west-2.amazonaws.com/signals_ticker_map_w_bbg.csv"
    )
    ticker_map = ticker_map[ticker_map.bloomberg_ticker.isin(eligible_tickers)]

    numerai_tickers = ticker_map["bloomberg_ticker"]
    yfinance_tickers = ticker_map["yahoo"]

    eod_tickers = pd.read_csv(
        "https://s3.amazonaws.com/quandl-production-static/end_of_day_us_stocks/ticker_list.csv"
    )
    print(f"Number of eligible tickers : {len(eligible_tickers)}")

    common_tickers = np.intersect1d(yfinance_tickers.values.astype(str),
                                    eod_tickers["Ticker"].values.astype(str))
    print(
        f"Number of tickers common between EOD and Bloomberg: {len(common_tickers)}"
    )

    # downloads the whole dataset as zip and read data (takes around 1.5min)
    full_data = download_full_and_load(ticker_map,
                                       common_tickers,
                                       f_name="full_EOD.zip")

    # Building a custom feature
    full_data["day_chg"] = full_data["close"] / full_data["open"] - 1
    gc.collect()

    # -----Feature engineering-----
    ticker_groups = full_data.groupby("bloomberg_ticker")

    # RSI
    full_data["close_RSI_14"] = ticker_groups["close"].transform(
        lambda x: RSI(x, 14))
    full_data["close_RSI_21"] = ticker_groups["close"].transform(
        lambda x: RSI(x, 21))
    full_data["day_chg_RSI_14"] = ticker_groups["day_chg"].transform(
        lambda x: RSI(x, 14))
    full_data["day_chg_RSI_21"] = ticker_groups["day_chg"].transform(
        lambda x: RSI(x, 21))

    # SMA
    full_data["close_SMA_14"] = ticker_groups["close"].transform(
        lambda x: x.rolling(14).mean())
    full_data["close_SMA_21"] = ticker_groups["close"].transform(
        lambda x: x.rolling(21).mean())

    indicators = [
        "close_RSI_14", "close_RSI_21", "day_chg_RSI_14", "close_SMA_14",
        "close_SMA_21", "day_chg_RSI_21"
    ]

    full_data.dropna(axis=0, inplace=True)
    del ticker_groups

    # -----Feature engineering: Quintile-----
    date_groups = full_data.groupby(full_data.index)
    print("Quintiling...")
    for indicator in indicators:
        full_data[f"{indicator}_quintile"] = (
            date_groups[indicator].transform(lambda group: pd.qcut(
                group, 100, labels=False, duplicates="drop")).astype(
                    np.float16))
        gc.collect()

    del date_groups
    gc.collect()

    # -----Feature engineering: Quintile lag-----
    ticker_groups = full_data.groupby("ticker")
    # create lagged features, lag 0 is that day's value, lag 1 is yesterday's value, etc
    print("Calculating lag...")
    for indicator in indicators:
        num_days = 5
        for day in range(num_days + 1):
            full_data[f"{indicator}_quintile_lag_{day}"] = ticker_groups[
                f"{indicator}_quintile"].transform(
                    lambda group: group.shift(day))

        gc.collect()

    full_data.dropna(axis=0, inplace=True)

    del ticker_groups
    gc.collect()
    print("Calculating changes in lag...")
    # create difference of the lagged features (change in RSI quintile by day)
    for indicator in indicators:
        for day in range(0, num_days):
            full_data[f"{indicator}_diff_{day}"] = (
                full_data[f"{indicator}_quintile_lag_{day}"] -
                full_data[f"{indicator}_quintile_lag_{day + 1}"]).astype(
                    np.float16)
            gc.collect()

    # create difference of the lagged features (change in RSI quintile by day)
    for indicator in indicators:
        full_data[f"{indicator}_abs_diff_{day}"] = np.abs(
            full_data[f"{indicator}_quintile_lag_{day}"] -
            full_data[f"{indicator}_quintile_lag_{day + 1}"]).astype(
                np.float16)
        gc.collect()

    TARGET_NAME = "target"
    PREDICTION_NAME = "signal"

    # read in Signals targets
    numerai_targets = "https://numerai-signals-public-data.s3-us-west-2.amazonaws.com/signals_train_val_bbg.csv"
    targets = pd.read_csv(numerai_targets)
    targets["date"] = pd.to_datetime(targets["friday_date"], format="%Y%m%d")

    # merge our feature data with Numerai targets
    ML_data = pd.merge(full_data.reset_index(),
                       targets,
                       on=["date", "bloomberg_ticker"]).set_index("date")
    print(f"Number of eras in data: {len(ML_data.index.unique())}")

    # for training and testing we want clean, complete data only
    ML_data.dropna(inplace=True)
    ML_data = ML_data[ML_data.index.weekday ==
                      4]  # ensure we have only fridays
    ML_data = ML_data[ML_data.index.value_counts() >
                      200]  # drop eras with under 200 observations per era
    feature_names = [
        f for f in ML_data.columns for y in ["lag", "diff"] if y in f
    ]
    print(f"Using {len(feature_names)} features")

    last_friday = datetime.now() + relativedelta(weekday=FR(-1))
    date_string = last_friday.strftime("%Y-%m-%d")

    try:
        live_data = full_data.loc[date_string].copy()
    except KeyError as e:
        print(f"No ticker on {e}")
        live_data = full_data.iloc[:0].copy()
    live_data.dropna(subset=feature_names, inplace=True)

    # get data from the day before, for markets that were closed
    # on the most recent friday
    last_thursday = last_friday - timedelta(days=1)
    thursday_date_string = last_thursday.strftime("%Y-%m-%d")
    thursday_data = full_data.loc[thursday_date_string]
    # Only select tickers than aren't already present in live_data
    thursday_data = thursday_data[~thursday_data.ticker.isin(live_data.ticker.
                                                             values)].copy()
    thursday_data.dropna(subset=feature_names, inplace=True)

    live_data = pd.concat([live_data, thursday_data])

    # train test split
    train_data = ML_data[ML_data["data_type"] == "train"].copy()
    test_data = ML_data[ML_data["data_type"] == "validation"].copy()

    train_data[feature_names] /= 100.0
    test_data[feature_names] /= 100.0
    live_data[feature_names] /= 100.0

    del ML_data
    gc.collect()

    # train model
    print("Training model...")
    model = GradientBoostingRegressor(n_estimators=50)
    model.fit(train_data[feature_names], train_data[TARGET_NAME])
    print("Model trained.")

    # predict test data
    train_data[PREDICTION_NAME] = model.predict(train_data[feature_names])
    test_data[PREDICTION_NAME] = model.predict(test_data[feature_names])

    print(f"Number of live tickers to submit: {len(live_data)}")
    live_data[PREDICTION_NAME] = model.predict(live_data[feature_names])

    # prepare and writeout example file
    diagnostic_df = pd.concat([test_data, live_data])
    diagnostic_df["friday_date"] = diagnostic_df.friday_date.fillna(
        last_friday.strftime("%Y%m%d")).astype(int)
    diagnostic_df["data_type"] = diagnostic_df.data_type.fillna("live")
    diagnostic_df[["bloomberg_ticker", "friday_date", "data_type",
                   "signal"]].reset_index(drop=True).to_csv(
                       "example_quandl_signal_upload.csv", index=False)
    print(
        "Example submission completed. Upload to signals.numer.ai for scores and live submission"
    )
예제 #7
0
from dateutil.relativedelta import relativedelta, FR

TARGET_NAME = "target"
PREDICTION_NAME = "signal"
TRAINED_MODEL_PREFIX = './trained_model'

# Define models here as (ID, model instance),
# a model ID of None is submitted as your default model
MODEL_CONFIGS = [
    (None, GradientBoostingRegressor(subsample=0.1)),
    # (YOUR MODEL ID, LinearRegression(n_jobs=10))
    #  etc...
]

if os.getenv('NUMERAI_PUBLIC_ID') and os.getenv('NUMERAI_SECRET_KEY'):
    napi = numerapi.SignalsAPI()

else:
    config = configparser.ConfigParser()
    config.read('../.numerai/.keys')
    # initialize API client
    napi = numerapi.SignalsAPI(
        public_id=config['numerai']['NUMERAI_PUBLIC_ID'],
        secret_key=config['numerai']['NUMERAI_SECRET_KEY'])


def download_data(live_data_date):
    eligible_tickers = pd.Series(napi.ticker_universe(),
                                 name="bloomberg_ticker")
    logging.info(f"Number of eligible tickers: {len(eligible_tickers)}")
예제 #8
0
line_post(notification_message)

try:
    """
    Catboostを呼び出しnumeraisignalsに予測を提出する
    """
    #!pip install numerapi
    #!pip install yfinance
    #!pip install simplejson
    #
    #!pip install catboost

    # Tickers that Numerai signals want. These are bloomberg tickers. yfinance asks for yahoo finance tickers.

    # Data acquisition
    napi = numerapi.SignalsAPI()

    eligible_tickers = pd.Series(napi.ticker_universe(),
                                 name="bloomberg_ticker")
    print(f"Number of eligible tickers : {len(eligible_tickers)}")

    print(eligible_tickers.head(10))

    # This file has mapping from bloomberg to yahoo finance tickers. So, we can use yfinance tickers to download and then map/rename them back to bloomberg tickers.

    ticker_map = pd.read_csv(
        'https://numerai-signals-public-data.s3-us-west-2.amazonaws.com/signals_ticker_map_w_bbg.csv'
    )

    print(len(ticker_map))
def api_fixture():
    api = numerapi.SignalsAPI(verbosity='DEBUG')
    return api
start_date = args.startDate
end_date = args.endDate

to_date = str(
    int(
        time.mktime(
            datetime.datetime.strptime(end_date, "%d/%m/%Y").timetuple())))
from_date = str(
    int(
        time.mktime(
            datetime.datetime.strptime(start_date, "%d/%m/%Y").timetuple())))

subReddit = 'wallstreetbets'

numerAI = numerapi.SignalsAPI()
eligible_tickers = pd.Series(numerAI.ticker_universe(),
                             name="bloomberg_ticker")
numerAI_ticker_map = pd.read_csv(
    'https://numerai-signals-public-data.s3-us-west-2.amazonaws.com/signals_ticker_map_w_bbg.csv'
)
bb_tickers = numerAI_ticker_map["bloomberg_ticker"]


def get_Pushshift_Data(query, after, before, sub):
    reURL = 'https://api.pushshift.io/reddit/search/submission/?title=' + \
        str(query)+'&size=1000&after='+str(after) + \
        '&before='+str(before)+'&subreddit='+str(sub)
    print(reURL)
    r = requests.get(reURL)
    data = json.loads(r.text)