def stock_statistics(corpus): status_messages.status(113) statistics = {} for content in corpus.select("tr"): key = content.find_next("span").text value = content.find_next("span").find_next("td").text # What is this pattern_sh..: Pragmatic correction of table header label strings where short ratio information since these differ for e. g. US or European stocks. # Solution: Detect and unify by deleting a few characters from right to left. # Why: Table headers need to be identical for each stock to be able to merge dataframes later on with pandas. pattern_shares_short_matched = re.match("^(?:Aktien (\(Short+)(\)|\,)+).*", key) pattern_short_ratio = re.match("^(?:Short (% [A-Za-z\/ ]*|Ratio )).*", key) if bool(pattern_shares_short_matched): if "," in key: key = key.replace(",",")").split(")")[0]+str(") - Vormonat") else: key = key.split(")")[0]+str(") - letzter Stand") elif bool(pattern_short_ratio): key = key.split(" (")[0] if key.endswith(" "): key = key[:-1] statistics[key] = value return statistics
def stock_history(corpus): status_messages.status(111) dates = [] prices = [] volumes = [] stock_date_and_price = [] stock_date_and_volume = [] for row in corpus.select("tr"): if len(row.contents) == 7 and row.contents[4].text != "-": dates.append(row.contents[0].text) prices.append(row.contents[4].text) volumes.append(row.contents[6].text) dates = dates[1:] prices = prices[1:] volumes = volumes[1:] dates = converter.to_date_object(dates) prices = [converter.to_float(price) for price in prices] volumes = [int((volume).replace("-", "1").replace(".", "")) for volume in volumes] for i in range(0, len(dates)): stock_date_and_price += [[dates[i], prices[i]]] stock_date_and_volume += [[dates[i], volumes[i]]] return stock_date_and_price, stock_date_and_volume
def stock_dividends(corpus): status_messages.status(112) dates = [] dividends = [] stock_date_and_dividends = [] cells = corpus.find_all("td", class_=regex_dividend) for content in cells: dividend = content.text.replace("Dividende", "").replace(" ", "") if dividend == "": dividend = "0.0001" dividend = converter.to_float(dividend) dividends.append(dividend) cells = corpus.find_all("td", class_=regex_dividend_date) for content in cells: dates.append(content.text) dates = converter.to_date_object(dates) if dividends != []: for i in range(0,len(dates)): stock_date_and_dividends += [[dates[i], dividends[i]]] else: string = "01.01.1980" dividend = "0.00" dividend = converter.to_float(dividend) stock_date_and_dividends = [[datetime.strptime(string, "%d.%m.%Y").date(), dividend]] return stock_date_and_dividends
def initiate_export(dataframe, custom_filename, custom_folder): current_date = datetime.now() xlsx_root = os.path.join(os.path.dirname(__file__), "..") filename = xlsx_root + "/" + custom_folder + "/" + str( current_date.date()) + "_" + custom_filename if custom_folder not in os.listdir(xlsx_root): status_messages.status(282) os.mkdir(xlsx_root + "/" + custom_folder) dataframe_to_xlsx(filename, dataframe)
def intitiate_analysis_and_export(stock_current, stock_objects, override_analysis_only_df_crawling): stock_objects = add_stock_object(stock_current["name"], stock_objects) import_parsed_data(stock_current, stock_objects[-1]) stock_objects[-1].time_series_dataframe = converter.create_time_series_dataframe_based_on_parsed_stock_data(stock_objects[-1]) if not override_analysis_only_df_crawling: stopwatch = runtimer.TimeKeeper() stopwatch.start() render_financial_data(stock_objects[-1]) export_to_xls(stock_objects[-1]) status_messages.status(302) stopwatch.show()
def initiate_crawling(stock_current, average_crawling_delay, override_crawling_use_testdata): stopwatch = runtimer.TimeKeeper() stopwatch.start() status_messages.crawler_task(stock_current) header = create_header() cookie = create_cookie() urls = yahoo_ini.get_urls(stock_current["symbol"]) stock_current.update(urls) if override_crawling_use_testdata != True: html = get_html(stock_current["url_statistics"], header, cookie, average_crawling_delay) stock_current["statistics"] = parser.stock_statistics(html) html = get_html(stock_current["url_weeks"], header, cookie, average_crawling_delay) stock_current["stock_price_weeks"], stock_current[ "stock_volume_weeks"] = parser.stock_history(html) stock_current = yahoo_ini.update_unixtime(stock_current) html = get_html(stock_current["url_months"], header, cookie, average_crawling_delay) stock_current["stock_price_months"], stock_current[ "stock_volume_months"] = parser.stock_history(html) stock_current = yahoo_ini.update_unixtime(stock_current) html = get_html(stock_current["url_years"], header, cookie, average_crawling_delay) stock_current["stock_price_years"], stock_current[ "stock_volume_years"] = parser.stock_history(html) stock_current = yahoo_ini.update_unixtime(stock_current) html = get_html(stock_current["url_dividends"], header, cookie, average_crawling_delay) stock_current["dividends"] = parser.stock_dividends(html) else: stock_current["statistics"] = stocks_testdata.get("statistics") stock_current["stock_price_weeks"], stock_current[ "stock_volume_weeks"] = stocks_testdata.get( "weekly_prices"), stocks_testdata.get("weekly_volume") stock_current["stock_price_months"], stock_current[ "stock_volume_months"] = stocks_testdata.get( "monthly_prices"), stocks_testdata.get("monthly_volume") stock_current["stock_price_years"], stock_current[ "stock_volume_years"] = stocks_testdata.get( "yearly_prices"), stocks_testdata.get("yearly_volume") stock_current["dividends"] = stocks_testdata.get("dividends") status_messages.status(301) stopwatch.show() return stock_current
def add_stock_object(stock_name, stock_objects): status_messages.status(201) stock_objects += [financial_analysis.StockObject(stock_name)] #stock_objects_dictionary[stock_name] = stock_objects[-1] return stock_objects