def main(data_path: str = None):
    '''
    Download commodities price history from 
    https://blog.quandl.com/api-for-commodity-data

    Note:
        To download this dataset you need to register at quandl 
        and paste token to `~/.ml_investment/secrets.json`

    Parameters
    ----------
    data_path:
        path to folder in which downloaded data will be stored.
        OR ``None`` (downloading path will be as ``commodities_data_path`` from 
        `~/.ml_investment/config.json`
    '''
    if data_path is None:
        config = load_config()
        data_path = config['commodities_data_path']

    downloader = QuandlDownloader(sleep_time=0.8)
    for code in tqdm(quandl_commodities_codes):
        downloader.single_download(
            'datasets/{}'.format(code),
            '{}/{}.json'.format(data_path, code.replace('/', '_')))
Example #2
0
def main(data_path: str=None):
    '''
    Download daily price bars for base US stocks and indexes. 

    Parameters
    ----------
    data_path:
        path to folder in which downloaded data will be stored.
        OR ``None`` (downloading path will be as ``daily_bars_data_path`` from 
        `~/.ml_investment/config.json`
    '''
    if data_path is None:
        config = load_config()
        data_path = config['daily_bars_data_path']

    global _data_path
    _data_path = data_path
    tickers = load_tickers()['base_us_stocks']
    index_tickers = ['SPY', 'TLT', 'QQQ']
    os.makedirs(data_path, exist_ok=True)
    
    p = Pool(8)
    for _ in tqdm(p.imap(_single_ticker_download,
                         tickers + index_tickers)):
        None
Example #3
0
def main(data_path :str=None):
    '''
    Download quarterly fundamental data from
    https://www.quandl.com/databases/SF1/data

    Note:
        SF1 is paid, so you need to subscribe 
        and paste quandl token to `~/.ml_investment/secrets.json`
        ``quandl_api_key``

    Parameters
    ----------
    data_path:
        path to folder in which downloaded data will be stored.
        OR ``None`` (downloading path will be as ``sf1_data_path`` from 
        `~/.ml_investment/config.json`
    '''
    if data_path is None:
        config = load_config()
        data_path = config['sf1_data_path']

    downloader = QuandlDownloader(sleep_time=0.8)
    downloader.zip_download('datatables/SHARADAR/TICKERS?qopts.export=true',
                            '{}/tickers.zip'.format(data_path))

    base_df = SF1BaseData(data_path).load()
    tickers = base_df['ticker'].unique().tolist()
    
    downloader.ticker_download('datatables/SHARADAR/SF1?ticker={ticker}', tickers, 
                               save_dirpath='{}/core_fundamental'.format(data_path), 
                               skip_exists=False,  batch_size=10, n_jobs=4)

    downloader.ticker_download('datatables/SHARADAR/DAILY?ticker={ticker}', tickers, 
                               save_dirpath='{}/daily'.format(data_path), 
                               skip_exists=False, batch_size=5, n_jobs=4)
Example #4
0
def main(data_path: str = None):
    '''
    Download quarterly and base data from https://finance.yahoo.com

    Parameters
    ----------
    data_path:
        path to folder in which downloaded data will be stored.
        OR ``None`` (downloading path will be as ``yahoo_data_path`` from 
        `~/.ml_investment/config.json`
    '''
    if data_path is None:
        config = load_config()
        data_path = config['yahoo_data_path']

    tickers = load_tickers()['base_us_stocks']
    downloader = YahooDownloaderV1()
    downloader.download_quarterly_data(data_path, tickers)
    downloader.download_base_data(data_path, tickers)
Example #5
0
def main(data_path: str = None):
    '''
    Download quarterly and base data from https://finance.yahoo.com

    Parameters
    ----------
    data_path:
        path to folder in which downloaded data will be stored.
        OR ``None`` (downloading path will be as ``yahoo_data_path`` from 
        `~/.ml_investment/config.json`
    '''
    if data_path is None:
        config = load_config()
        data_path = config['yahoo_data_path']

    global _data_path
    _data_path = data_path
    tickers = load_tickers()['base_us_stocks']
    os.makedirs('{}/quarterly'.format(data_path), exist_ok=True)
    os.makedirs('{}/base'.format(data_path), exist_ok=True)

    p = Pool(12)
    for _ in tqdm(p.imap(_single_ticker_download, tickers)):
        None
Example #6
0
import lightgbm as lgbm
import catboost as ctb
from urllib.request import urlretrieve
from ml_investment.utils import load_config, load_tickers
from ml_investment.data_loaders.yahoo import YahooBaseData, YahooQuarterlyData
from ml_investment.data_loaders.daily_bars import DailyBarsData
from ml_investment.features import QuarterlyFeatures, BaseCompanyFeatures, \
                                   FeatureMerger, DailyAggQuarterFeatures, \
                                   QuarterlyDiffFeatures
from ml_investment.targets import DailyAggTarget
from ml_investment.models import TimeSeriesOOFModel, EnsembleModel, LogExpModel
from ml_investment.metrics import median_absolute_relative_error, down_std_norm
from ml_investment.pipelines import Pipeline
from ml_investment.download_scripts import download_yahoo, download_daily_bars

config = load_config()

URL = 'https://github.com/fartuk/ml_investment/releases/download/weights/marketcap_down_std_yahoo.pickle'
OUT_NAME = 'marketcap_down_std_yahoo'
TARGET_HORIZON = 90
MAX_BACK_QUARTER = 2
FOLD_CNT = 5
QUARTER_COUNTS = [1, 2, 4]
COMPARE_QUARTER_IDXS = [1, 4]
CAT_COLUMNS = ["sector"]
QUARTER_COLUMNS = [
    'totalRevenue',
    'netIncome',
    'cash',
    'totalAssets',
    'costOfRevenue',