def filter_by_market(self, filter): """ If another market already exists, then we reapply the conditions of the `StockScreener` so far to the new market, and do a *union* (instead of an *intersection*) with the current filtered stocks. :param filter: object of type `Regions`, `Exchanges`, `MarketIndices`, `SIC_Sectors`, `GICS_Sectors`, `SIC_Industries`, or `GICS_Industries` :return: list of companies satisfying conditions so far in additional to market filter. """ # TODO DO UNION BETWEEN, AND INTERSECTION WITHIN if isinstance(filter, Regions): companies_ = macro.companies_in_location(location=filter, date=self.date) elif isinstance(filter, Exchanges): companies_ = macro.companies_in_exchange(exchange=filter, date=self.date) elif isinstance(filter, MarketIndices): companies_ = macro.companies_in_index(market_index=filter, date=self.date) elif isinstance(filter, SIC_Sectors) or isinstance(filter, GICS_Sectors): companies_ = macro.companies_in_sector(sector=filter, date=self.date) elif isinstance(filter, SIC_Industries) or isinstance(filter, GICS_Industries): companies_ = macro.companies_in_industry(industry=filter, date=self.date) else: raise Exception("'filter' doesn't match any Region, Exchange, Market Index, Sector, or Industry") self.stocks = list(set(self.stocks).intersection(companies_)) self.conditions.append((StockScreener.filter_by_market, filter)) return self.stocks
def save_stock_prices(stock, start=datetime(1970, 1, 1), end=datetime.now()): if isinstance(stock, config.MarketIndices): stock = macro.companies_in_index(market_index=stock, date=end) if isinstance(stock, typing.List): stock = [stk.replace('.', '-') for stk in stock] else: stock = stock.replace('.', '-') for stk in list(stock): df: pd.DataFrame = web.DataReader(stk, data_source='yahoo', start=start, end=end) df.index = df.index + timedelta(days=1) - timedelta(seconds=1) # TODO think about EOD? # path = os.path.join(config.STOCK_PRICES_DIR_PATH, '{}.xlsx'.format(stk)) # excel.save_into_csv(path, df, overwrite_sheet=True) path = os.path.join(config.STOCK_PRICES_DIR_PATH, '{}.pkl'.format(stk)) df.to_pickle(path=path)
def __init__(self, securities_universe=None, date=datetime.now()): ''' :param securities_universe: by default, we start with the S&P 500, due to current functionality. It stays constant throughout the `StockScreener`, which is why we separate it from the `stocks` attribute. :param date: by default, now. ''' self.securities_universe = securities_universe # starting universe if securities_universe is None: self.stocks = macro.companies_in_index(MarketIndices.SP_500, date=date) else: self.stocks = securities_universe self.date = date self.conditions = [] self.dataframe = pd.DataFrame()
path = '{}/{}.xlsx'.format(config.FINANCIAL_STATEMENTS_DIR_PATH_EXCEL, ticker) data_preparation_helpers.save_pretty_excel( path, financials_dictio=master_dict, with_pickle=save_to_pickle) master_dict = data_preparation_helpers.unflatten( data_preparation_helpers.flatten_dict(master_dict)) pprint(master_dict) multiple_pickle_path = os.path.join( config.FINANCIAL_STATEMENTS_DIR_PATH_PICKLE, 'multiples.pkl') try: with open(multiple_pickle_path, 'rb') as handle: existing_dictio = pickle.load(handle) except: existing_dictio = {} with open(multiple_pickle_path, 'wb') as handle: existing_dictio.update(multiples_dictio) pickle.dump(existing_dictio, handle, protocol=pickle.HIGHEST_PROTOCOL) if __name__ == '__main__': # path = os.path.join(config.MARKET_TICKERS_DIR_PATH, 'Dow-Jones-Stock-Tickers.xlsx') # tickers = data_preparation_helpers.read_df_from_csv(path=path).iloc[0, :] tickers = companies_in_index(config.MarketIndices.DOW_JONES) scrape_macrotrend(tickers[:1]) # save_stock_prices(ticker)
super().__init__(portfolio) def solve_weights(self, risk_metric=None, objective=None, leverage=0, long_short_exposure=0): pass class NestedClusteredOptimization(PortfolioAllocationModel): def __init__(self, portfolio: Portfolio): super().__init__(portfolio) def solve_weights(self, risk_metric=None, objective=None, leverage=0, long_short_exposure=0): pass if __name__ == '__main__': assets = macro.companies_in_index(MarketIndices.DOW_JONES) portfolio = Portfolio(assets=assets) portfolio.set_frequency(frequency='M', inplace=True) portfolio.slice_dataframe(from_date=datetime(2016, 1, 1), to_date=datetime(2020, 1, 1), inplace=True) print(portfolio.df_returns.tail(10)) MPT = ModernPortfolioTheory(portfolio) weights = MPT.solve_weights(use_sharpe=True) print(weights) market_portfolio = Portfolio(assets='^DJI') market_portfolio.set_frequency(frequency='M', inplace=True) market_portfolio.slice_dataframe(from_date=datetime(2016, 1, 1), to_date=datetime(2020, 1, 1), inplace=True) stats = MPT.markowitz_efficient_frontier(market_portfolio=market_portfolio, plot_assets=True, plot_cal=True) pd.set_option('display.max_columns', None)
def get_company_meta(): ''' TODO: Need to do this for all companies ever listed, not only current. :return: ''' init_df = pd.read_csv('https://www.ishares.com/us/products/239724/ishares-core-sp-total-us-stock-market-etf/1467271812596.ajax?fileType=csv&fileName=ITOT_holdings&dataType=fund', skiprows=9, index_col=0) tickers = init_df.index.tolist() tickers = companies_in_index(config.MarketIndices.DOW_JONES) driver = webdriver.Chrome(ChromeDriverManager().install()) sic_codes_division = {(1, 9 + 1): 'Agriculture, Forestry, and Fishing', (10, 14 + 1): 'Mining', (15, 17 + 1): 'Construction', (20, 39 + 1): 'Manufacturing', (40, 49 + 1): 'Transportation, Communications, Electric, Gas, And Sanitary Services', (50, 51 + 1): 'Wholesale Trade', (52, 59 + 1): 'Retail Trade', (60, 67 + 1): 'Finance, Insurance, and Real Estate', (70, 89 + 1): 'Services', (90, 99 + 1): 'Public Administration'} exchanges_dict = {'AMEX': companies_in_exchange('AMEX'), 'NYSE': companies_in_exchange('NYSE'), 'NASDAQ': companies_in_exchange('NASDAQ')} with open(os.path.join(config.DATA_DIR_PATH, "market_data/country_codes_dictio.pickle"), "rb") as f: country_codes = pickle.load(f) edgar_dict = {} for ticker in tickers: edgar_dict[ticker] = {} try: for i in range(2): # just try again if didn't work first time, might be advertisement showed up try: button = driver.find_element_by_xpath("//a[@class='acsCloseButton acsAbandonButton ']") button.click() sleep(1) except: pass # if nasdaq_df['ETF'].loc[ticker] == 'Y': # driver.get('https://www.sec.gov/edgar/searchedgar/mutualsearch.html') # field = driver.find_element_by_xpath("//input[@id='gen_input']") # field.send_keys(ticker) # TODO might split ticker from the '$' or '.' (classes) # sleep(1) # field.send_keys(Keys.ENTER) # sleep(1) # if 'No records matched your query' not in driver.page_source: # for t in driver.find_elements_by_xpath("//b[@class='blue']"): # TODO # if t.text == ticker: # cik = driver.find_element_by_xpath('').text # security_type = driver.find_element_by_xpath('').text # break # still should go to the 'finally' block base_url = 'https://www.sec.gov/cgi-bin/browse-edgar?CIK={}'.format(ticker) resp = requests.get(base_url).text if 'No matching Ticker Symbol' in resp or 'No records matched your query' in resp: driver.get('https://www.sec.gov/edgar/searchedgar/companysearch.html') # html = driver.page_source TODO for new 10-K forms maybe works? input_box = driver.find_element_by_xpath("//input[@id='company']") input_box.send_keys(ticker) html = driver.page_source # wait until the autofill box loads WebDriverWait(driver, 10).until(EC.visibility_of_element_located( (By.XPATH, "//tr[@class='smart-search-hint smart-search-selected-hint']"))) element = driver.find_element_by_xpath( "//tr[@class='smart-search-hint smart-search-selected-hint']") if not re.search(r'(\(|[^A-Z]){}([^A-Z]|\))'.format(ticker), element.text): break sleep(1) input_box.send_keys(Keys.ENTER) # wait until company page loads WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "seriesDiv"))) resp = requests.get(driver.current_url).text soup = BeautifulSoup(resp, 'html.parser') # name = soup.find('span', class_='companyName').text.split(' CIK')[0] edgar_dict[ticker]['Company Name'] = titlecase(re.compile(r'(.*) CIK#').findall(soup.text)[0]) edgar_dict[ticker]['CIK'] = re.compile(r'.*CIK#: (\d{10}).*').findall(soup.text)[0] ident_info = soup.find('p', class_="identInfo") edgar_dict[ticker]['SIC Industry'] = str(ident_info.find('br').previousSibling.split('- ')[-1]).title() sic_code = re.search(r'(\d{4})', ident_info.text).group() country_code = re.compile(r'.*State location: (..)').findall(soup.text)[0] for type, code_dict in country_codes.items(): if country_code in code_dict.keys(): edgar_dict[ticker]['Location'] = type + '/' + code_dict[country_code] break for exchange, tickers in exchanges_dict.items(): if ticker in tickers: if 'Exchange' in edgar_dict[ticker].keys(): edgar_dict[ticker]['Exchange'] += '|' + exchange else: edgar_dict[ticker]['Exchange'] = exchange for key, value in sic_codes_division.items(): if int(sic_code[0]) == 0: if int(sic_code[1]) in range(key[0], key[1]): edgar_dict[ticker]['SIC Sector'] = value break elif int(sic_code[:2]) in range(key[0], key[1]): edgar_dict[ticker]['SIC Sector'] = value break break # except TimeoutException or ElementNotInteractableException: except: driver.get('https://www.sec.gov/edgar/searchedgar/companysearch.html') edgar_df = pd.DataFrame.from_dict(edgar_dict, orient='index') init_df.rename(columns={'Sector': 'GICS Sector'}, inplace=True) init_df = init_df[['GICS Sector', 'Asset Class']] df = edgar_df.join(init_df) df = df[['Company Name', 'SIC Industry', 'SIC Sector', 'GICS Sector', 'Location', 'CIK', 'Exchange', 'Asset Class']] # df = pd.concat([edgar_df, init_df], axis=1) path = os.path.join(config.MARKET_DATA_DIR_PATH, 'US-Stock-Market') df.to_excel(path+'.xlsx', engine='xlsxwriter') df.to_pickle(path=path+'.pkl')