Exemple #1
0
    def filter_by_market(self, filter):
        """
        If another market already exists, then we reapply the conditions of the `StockScreener` so far
        to the new market, and do a *union* (instead of an *intersection*) with the current filtered stocks.

        :param filter:  object of type `Regions`, `Exchanges`, `MarketIndices`, `SIC_Sectors`,
                        `GICS_Sectors`, `SIC_Industries`, or `GICS_Industries`
        :return: list of companies satisfying conditions so far in additional to market filter.

        """

        # TODO DO UNION BETWEEN, AND INTERSECTION WITHIN

        if isinstance(filter, Regions):
            companies_ = macro.companies_in_location(location=filter, date=self.date)

        elif isinstance(filter, Exchanges):
            companies_ = macro.companies_in_exchange(exchange=filter, date=self.date)

        elif isinstance(filter, MarketIndices):
            companies_ = macro.companies_in_index(market_index=filter, date=self.date)

        elif isinstance(filter, SIC_Sectors) or isinstance(filter, GICS_Sectors):
            companies_ = macro.companies_in_sector(sector=filter, date=self.date)

        elif isinstance(filter, SIC_Industries) or isinstance(filter, GICS_Industries):
            companies_ = macro.companies_in_industry(industry=filter, date=self.date)

        else:
            raise Exception("'filter' doesn't match any Region, Exchange, Market Index, Sector, or Industry")

        self.stocks = list(set(self.stocks).intersection(companies_))
        self.conditions.append((StockScreener.filter_by_market, filter))
        return self.stocks
def save_stock_prices(stock, start=datetime(1970, 1, 1), end=datetime.now()):
    if isinstance(stock, config.MarketIndices):
        stock = macro.companies_in_index(market_index=stock, date=end)
    if isinstance(stock, typing.List):
        stock = [stk.replace('.', '-') for stk in stock]
    else:
        stock = stock.replace('.', '-')
    for stk in list(stock):
        df: pd.DataFrame = web.DataReader(stk, data_source='yahoo', start=start, end=end)
        df.index = df.index + timedelta(days=1) - timedelta(seconds=1)  # TODO think about EOD?
        # path = os.path.join(config.STOCK_PRICES_DIR_PATH, '{}.xlsx'.format(stk))
        # excel.save_into_csv(path, df, overwrite_sheet=True)
        path = os.path.join(config.STOCK_PRICES_DIR_PATH, '{}.pkl'.format(stk))
        df.to_pickle(path=path)
Exemple #3
0
    def __init__(self, securities_universe=None, date=datetime.now()):
        '''

        :param securities_universe:     by default, we start with the S&P 500, due to current functionality.
                                        It stays constant throughout the `StockScreener`, which is why we
                                        separate it from the `stocks` attribute.
        :param date: by default, now.
        '''

        self.securities_universe = securities_universe  # starting universe

        if securities_universe is None:
            self.stocks = macro.companies_in_index(MarketIndices.SP_500, date=date)
        else:
            self.stocks = securities_universe

        self.date = date
        self.conditions = []
        self.dataframe = pd.DataFrame()
        path = '{}/{}.xlsx'.format(config.FINANCIAL_STATEMENTS_DIR_PATH_EXCEL,
                                   ticker)
        data_preparation_helpers.save_pretty_excel(
            path, financials_dictio=master_dict, with_pickle=save_to_pickle)
        master_dict = data_preparation_helpers.unflatten(
            data_preparation_helpers.flatten_dict(master_dict))
        pprint(master_dict)

    multiple_pickle_path = os.path.join(
        config.FINANCIAL_STATEMENTS_DIR_PATH_PICKLE, 'multiples.pkl')

    try:
        with open(multiple_pickle_path, 'rb') as handle:
            existing_dictio = pickle.load(handle)
    except:
        existing_dictio = {}

    with open(multiple_pickle_path, 'wb') as handle:
        existing_dictio.update(multiples_dictio)
        pickle.dump(existing_dictio, handle, protocol=pickle.HIGHEST_PROTOCOL)


if __name__ == '__main__':
    # path = os.path.join(config.MARKET_TICKERS_DIR_PATH, 'Dow-Jones-Stock-Tickers.xlsx')
    # tickers = data_preparation_helpers.read_df_from_csv(path=path).iloc[0, :]

    tickers = companies_in_index(config.MarketIndices.DOW_JONES)
    scrape_macrotrend(tickers[:1])
    # save_stock_prices(ticker)
        super().__init__(portfolio)

    def solve_weights(self, risk_metric=None, objective=None, leverage=0, long_short_exposure=0):
        pass


class NestedClusteredOptimization(PortfolioAllocationModel):
    def __init__(self, portfolio: Portfolio):
        super().__init__(portfolio)

    def solve_weights(self, risk_metric=None, objective=None, leverage=0, long_short_exposure=0):
        pass


if __name__ == '__main__':
    assets = macro.companies_in_index(MarketIndices.DOW_JONES)
    portfolio = Portfolio(assets=assets)
    portfolio.set_frequency(frequency='M', inplace=True)
    portfolio.slice_dataframe(from_date=datetime(2016, 1, 1), to_date=datetime(2020, 1, 1), inplace=True)
    print(portfolio.df_returns.tail(10))

    MPT = ModernPortfolioTheory(portfolio)
    weights = MPT.solve_weights(use_sharpe=True)
    print(weights)

    market_portfolio = Portfolio(assets='^DJI')
    market_portfolio.set_frequency(frequency='M', inplace=True)
    market_portfolio.slice_dataframe(from_date=datetime(2016, 1, 1), to_date=datetime(2020, 1, 1), inplace=True)

    stats = MPT.markowitz_efficient_frontier(market_portfolio=market_portfolio, plot_assets=True, plot_cal=True)
    pd.set_option('display.max_columns', None)
Exemple #6
0
def get_company_meta():
    '''
    TODO: Need to do this for all companies ever listed, not only current.
    :return:
    '''

    init_df = pd.read_csv('https://www.ishares.com/us/products/239724/ishares-core-sp-total-us-stock-market-etf/1467271812596.ajax?fileType=csv&fileName=ITOT_holdings&dataType=fund',
                          skiprows=9, index_col=0)
    tickers = init_df.index.tolist()
    tickers = companies_in_index(config.MarketIndices.DOW_JONES)
    driver = webdriver.Chrome(ChromeDriverManager().install())

    sic_codes_division = {(1, 9 + 1): 'Agriculture, Forestry, and Fishing',
                          (10, 14 + 1): 'Mining',
                          (15, 17 + 1): 'Construction',
                          (20, 39 + 1): 'Manufacturing',
                          (40, 49 + 1): 'Transportation, Communications, Electric, Gas, And Sanitary Services',
                          (50, 51 + 1): 'Wholesale Trade',
                          (52, 59 + 1): 'Retail Trade',
                          (60, 67 + 1): 'Finance, Insurance, and Real Estate',
                          (70, 89 + 1): 'Services',
                          (90, 99 + 1): 'Public Administration'}

    exchanges_dict = {'AMEX': companies_in_exchange('AMEX'),
                      'NYSE': companies_in_exchange('NYSE'),
                      'NASDAQ': companies_in_exchange('NASDAQ')}

    with open(os.path.join(config.DATA_DIR_PATH, "market_data/country_codes_dictio.pickle"),
              "rb") as f:
        country_codes = pickle.load(f)
    edgar_dict = {}
    for ticker in tickers:
        edgar_dict[ticker] = {}
        try:
            for i in range(2):  # just try again if didn't work first time, might be advertisement showed up
                try:
                    button = driver.find_element_by_xpath("//a[@class='acsCloseButton acsAbandonButton ']")
                    button.click()
                    sleep(1)
                except:
                    pass
                # if nasdaq_df['ETF'].loc[ticker] == 'Y':
                #     driver.get('https://www.sec.gov/edgar/searchedgar/mutualsearch.html')
                #     field = driver.find_element_by_xpath("//input[@id='gen_input']")
                #     field.send_keys(ticker)  # TODO might split ticker from the '$' or '.' (classes)
                #     sleep(1)
                #     field.send_keys(Keys.ENTER)
                #     sleep(1)
                #     if 'No records matched your query' not in driver.page_source:
                #         for t in driver.find_elements_by_xpath("//b[@class='blue']"):  # TODO
                #             if t.text == ticker:
                #                 cik = driver.find_element_by_xpath('').text
                #                 security_type = driver.find_element_by_xpath('').text
                #     break  # still should go to the 'finally' block

                base_url = 'https://www.sec.gov/cgi-bin/browse-edgar?CIK={}'.format(ticker)
                resp = requests.get(base_url).text

                if 'No matching Ticker Symbol' in resp or 'No records matched your query' in resp:
                    driver.get('https://www.sec.gov/edgar/searchedgar/companysearch.html')
                    # html = driver.page_source TODO for new 10-K forms maybe works?
                    input_box = driver.find_element_by_xpath("//input[@id='company']")
                    input_box.send_keys(ticker)
                    html = driver.page_source
                    # wait until the autofill box loads
                    WebDriverWait(driver, 10).until(EC.visibility_of_element_located(
                        (By.XPATH, "//tr[@class='smart-search-hint smart-search-selected-hint']")))
                    element = driver.find_element_by_xpath(
                        "//tr[@class='smart-search-hint smart-search-selected-hint']")
                    if not re.search(r'(\(|[^A-Z]){}([^A-Z]|\))'.format(ticker), element.text):
                        break
                    sleep(1)
                    input_box.send_keys(Keys.ENTER)
                    # wait until company page loads
                    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "seriesDiv")))
                    resp = requests.get(driver.current_url).text

                soup = BeautifulSoup(resp, 'html.parser')
                # name = soup.find('span', class_='companyName').text.split(' CIK')[0]
                edgar_dict[ticker]['Company Name'] = titlecase(re.compile(r'(.*) CIK#').findall(soup.text)[0])
                edgar_dict[ticker]['CIK'] = re.compile(r'.*CIK#: (\d{10}).*').findall(soup.text)[0]

                ident_info = soup.find('p', class_="identInfo")
                edgar_dict[ticker]['SIC Industry'] = str(ident_info.find('br').previousSibling.split('- ')[-1]).title()
                sic_code = re.search(r'(\d{4})', ident_info.text).group()
                country_code = re.compile(r'.*State location: (..)').findall(soup.text)[0]
                for type, code_dict in country_codes.items():
                    if country_code in code_dict.keys():
                        edgar_dict[ticker]['Location'] = type + '/' + code_dict[country_code]
                        break

                for exchange, tickers in exchanges_dict.items():
                    if ticker in tickers:
                        if 'Exchange' in edgar_dict[ticker].keys():
                            edgar_dict[ticker]['Exchange'] += '|' + exchange
                        else:
                            edgar_dict[ticker]['Exchange'] = exchange

                for key, value in sic_codes_division.items():
                    if int(sic_code[0]) == 0:
                        if int(sic_code[1]) in range(key[0], key[1]):
                            edgar_dict[ticker]['SIC Sector'] = value
                            break
                    elif int(sic_code[:2]) in range(key[0], key[1]):
                        edgar_dict[ticker]['SIC Sector'] = value
                        break

                break

            # except TimeoutException or ElementNotInteractableException:
        except:
            driver.get('https://www.sec.gov/edgar/searchedgar/companysearch.html')

    edgar_df = pd.DataFrame.from_dict(edgar_dict, orient='index')
    init_df.rename(columns={'Sector': 'GICS Sector'}, inplace=True)
    init_df = init_df[['GICS Sector', 'Asset Class']]
    df = edgar_df.join(init_df)
    df = df[['Company Name', 'SIC Industry', 'SIC Sector', 'GICS Sector', 'Location', 'CIK', 'Exchange', 'Asset Class']]
    # df = pd.concat([edgar_df, init_df], axis=1)
    path = os.path.join(config.MARKET_DATA_DIR_PATH, 'US-Stock-Market')
    df.to_excel(path+'.xlsx', engine='xlsxwriter')
    df.to_pickle(path=path+'.pkl')