def request_html(state: State): """ Scrap financial information for a given ticker by retrieving the URL. The HTML is a maze so the idea is to store all paths into many attributes within the state.ticker object and crawl from there :param state: :param: string :return: """ try: soup = BeautifulSoup( requests.get( f"https://finance.yahoo.com/quote/{state.ticker.symbol}/key-statistics?p={state.ticker.symbol}" ).content, "lxml") script = soup.find("script", text=re.compile("root.App.main")).text data = loads( re.search("root.App.main\s+=\s+(\{.*\})", script).group(1)) state.url = data['context']['dispatcher']['stores'] LOGGER.info(f"{state.ticker.symbol} | Successfully get URL") except Except as e: state.url = [] LOGGER.info(f"{state.ticker.symbol} | Could not scrap URL") return parse_current_price(state=state)
def filter_unique_ticker(state: State): """ Because a ticker can be used in many exchanges, only return list of unique tickers :param state: :return: Call transform status function """ if state.events.extract_company_list + state.events.load_company_list == 200: try: state.files.combined_exchanges.columns = map( str.lower, state.files.combined_exchanges.columns) # Following line is dropping duplicates but there's not? state.output = state.files.combined_exchanges[[ "symbol", 'name', 'lastsale', 'marketcap', 'ipoyear', 'sector', 'industry' ]].drop_duplicates() state.output.to_csv(f"{PATH}/data/combined_exchanges.csv") state.events.transform_company_list = 100 except Exception as e: state.output = None LOGGER.warning(f"Could not transform company data , error: {e}") else: state.output = pd.read_csv( f"{PATH}/data/combined_exchanges_sample.csv") LOGGER.warning(f"Using old company ticker file")
def parse_current_price(state: State): """ :param state: :type state: State :rtype: dict :return: object """ # print(state.url) # print(state.url['QuoteSummaryStore']['financialData']['currentPrice']['fmt']) # for i in state.url: # print(i) try: state.ticker.current_price = state.url['QuoteSummaryStore'][ 'financialData']['currentPrice']['fmt'] LOGGER.info( f"{state.ticker.symbol} | Current company price: {state.ticker.current_price}" ) except ValueError: state.current_price = None LOGGER.WARNING( f"{state.ticker.symbol} | Current company price: {state.ticker.current_price}" ) return parse_current_price_to_book(state=state)
def parse_current_price_to_book(state: State): """ :param state: :type state: State :rtype: dict :return: object """ try: state.ticker.current_price_to_book = state.url["QuoteTimeSeriesStore"][ "timeSeries"]["trailingPbRatio"][2]["reportedValue"]["fmt"] LOGGER.info( f"{state.ticker.symbol} | Current price to book Q0: {state.ticker.current_price_to_book}" ) except ValueError: state.price_to_book = None return parse_current_price_to_book_date(state=state)
def get_ticker_information(state: State): """ Call financial scrapper :param state: state :return: end of the script """ # state.output["current_price"] = None # state.output[state.output['symbol'] == "BCOW"]["current_price"] = 3 # print(state.output[state.output['symbol'] == "BCOW"]) # # exit(1) for index, row in state.output.iterrows(): symbol = row["symbol"] manager_financial_scrapper(state=state, symbol=symbol) state.output.at[index, 'current_price'] = state.ticker.current_price state.output.at[ index, 'current_price_to_book_date'] = state.ticker.current_price_to_book_date state.output.at[ index, 'current_price_to_book'] = state.ticker.current_price_to_book state.output.at[index, 'price_to_book_q1'] = state.ticker.price_to_book_q1 state.output.at[index, 'price_to_book_q2'] = state.ticker.price_to_book_q2 state.output.at[index, 'price_to_book_q3'] = state.ticker.price_to_book_q3 state.output.at[index, 'price_to_book_q4'] = state.ticker.price_to_book_q4 state.event = Event() # for attribut in vars(state.ticker): # delattr(state.ticker, attribut) state.output.to_csv(f"{PATH}/data/output.csv") return state
from libs.state import State from libs.logger import BASE_LOGGER from services.manager import manager as manager_services # 3rd Party from dotenv import load_dotenv # Load environment load_dotenv() # Activate logger logger = BASE_LOGGER.getChild(__name__) def invoke(state: State): """ Invocation :param state: :type state: :return: :rtype: """ return manager_services(state=state) if __name__ == "__main__": logger.info("Initialization of the script") invoke(state = State())
def manager(): LOGGER.info("Start of the script") execute_etl(state=State())