def get_peers(ticker_symbol, page=None):
    """
    Gets the list of Top Peers for a stock as listed on the "Premium Research: Industry Analysis" section
    :param ticker_symbol: The ticker symbol of the interested stock (e.g., "AAPL", "GOOG", "MSFT")
    :param page: html tree structure based on the html markup of the scraped website
    :return: a list of the Top Peers as listed on a stock's "Premium Research: Industry Analysis" section
    on it's respective zacks page
    """
    if page is None:
        page = scrape_page(BASE_URL+ ticker_symbol)

    peers = page.xpath(PEERS_XPATH)

    if peers:
        try:
            peers.remove(ticker_symbol.upper())
        except:
            pass

        if peers:
            return peers
        else:
            return None
    else:
        return None
Beispiel #2
0
def scrape_site(site):
    session = HTMLSession()
    with open(f'links/{site}_urls.txt', 'r') as f:
        urls = f.readlines()

    urls = {url[:-1] for url in urls}

    save_dir = f'data/{site}/'
    os.makedirs(save_dir, exist_ok=True)

    scraped = []
    if os.path.isfile(save_dir + 'scraped_urls.txt'):
        with open(save_dir + 'scraped_urls.txt', 'r') as f:
            scraped = f.readlines()

    if scraped:
        with open(save_dir + 'data.json', 'r') as f:
            data = json.load(f)
    else:
        data = {'title': [], 'text': [], 'site': []}

    scraped = {url[:-1] for url in scraped}

    to_scrape = urls - scraped

    if not to_scrape:
        print(f'{site} scraping completed.')
        return

    print(f'{site} scraping initiated!')
    for i, url in enumerate(tqdm(to_scrape)):
        try:
            title, text = scrape_page(url, selectors[site], session, site)
            data['title'].append(title)
            data['text'].append(text)
            data['site'].append(site)
            scraped.add(url)
        except IndexError as e:
            scraped.add(url)
        except ValueError as e:
            pass
        except Exception as e:
            print(e)
            print(site)
        if i % checkpoint_steps == 0 and i > 0:
            with open(save_dir + 'data.json', 'w') as f:
                json.dump(data, f)
            with open(save_dir + 'scraped_urls.txt', 'w') as f:
                f.writelines(scraped)
            # print(f'{site}: {i} of {len(to_scrape)} done.')

    data_df = pd.DataFrame(data)
    data_df.to_csv(save_dir + 'data_all.tsv', sep='\t', index=False)
    print(f'{site} scraping completed.')
def get_bullish_sentiment(ticker_symbol, page=None):
    """
    Gets the bullish sentiment of the target ticker symbol
    :param ticker_symbol: The ticker symbol of the interested stock (e.g., "AAPL", "GOOG", "MSFT")
    :param page: html tree structure based on the html markup of the scraped website
    :return: a string of the percentage of bullish sentiment as listed on a stock's StockTwit's page
    """
    if page is None:
        page = scrape_page(BASE_URL + ticker_symbol)

    sentiment = page.xpath(BULLISH_SENTIMENT_XPATH)

    if not sentiment:
        return None
    else:
        return sentiment[0].replace("\n", "") + " Bullish"
def get_bullish_sentiment(ticker_symbol, page=None):
    """
    Gets the bullish sentiment of the target ticker symbol
    :param ticker_symbol: The ticker symbol of the interested stock (e.g., "AAPL", "GOOG", "MSFT")
    :param page: html tree structure based on the html markup of the scraped website
    :return: a string of the percentage of bullish sentiment as listed on a stock's StockTwit's page
    """
    if page is None:
        page = scrape_page(BASE_URL + ticker_symbol)

    sentiment = page.xpath(BULLISH_SENTIMENT_XPATH)

    if not sentiment:
        return None
    else:
        return sentiment[0].replace("\n", "") + " Bullish"
def get_rating(ticker_symbol, page=None):
    """
    Gets the Zack's Rank Rating of the target ticker symbol
    :param ticker_symbol: The ticker symbol of the interested stock (e.g., "AAPL", "GOOG", "MSFT")
    :param page: html tree structure based on the html markup of the scraped website
    :return: String of Zack's Rank Rating as listed on a stock's Zacks page
    """
    if page is None:
        page = scrape_page(BASE_URL + ticker_symbol)

    rating = page.xpath(RATING_XPATH)

    if not rating:
        return None
    else:
        return rating[0]
def get_sentiment(ticker_symbol, page=None):
    """
    Gets both the bullish and bearish sentiment of the target ticker symbol
    :param ticker_symbol: The ticker symbol of the interested stock (e.g., "AAPL", "GOOG", "MSFT")
    :param page: html tree structure based on the html markup of the scraped website
    :return: a tuple of strings containing both the bullish and bearish sentiment as listed on a stock's
    StockTwits page
    """
    if page is None:
        page = scrape_page(BASE_URL + ticker_symbol)

    bullish_sentiment = get_bullish_sentiment(ticker_symbol, page)

    if bullish_sentiment:
        return bullish_sentiment, get_bearish_sentiment(ticker_symbol, page)
    else:
        return None
def get_sentiment(ticker_symbol, page=None):
    """
    Gets both the bullish and bearish sentiment of the target ticker symbol
    :param ticker_symbol: The ticker symbol of the interested stock (e.g., "AAPL", "GOOG", "MSFT")
    :param page: html tree structure based on the html markup of the scraped website
    :return: a tuple of strings containing both the bullish and bearish sentiment as listed on a stock's
    StockTwits page
    """
    if page is None:
        page = scrape_page(BASE_URL + ticker_symbol)

    bullish_sentiment = get_bullish_sentiment(ticker_symbol, page)

    if bullish_sentiment:
        return bullish_sentiment, get_bearish_sentiment(ticker_symbol, page)
    else:
        return None
Beispiel #8
0
def get_all_statistics(ticker_symbol, page=None):
    """
    This function will get all the associated financial statistics from the correspoding finviz page
    given the ticker symbol
    :param ticker_symbol: The ticker symbol of the interested stock (e.g., "AAPL", "GGOG", "MSFT")
    :param page: HTML tree structure based on the html markup of the scraped page. If one is not passed in the
    function will scrape the page
    :return: a dictionary of all the financial statistics listed on a stock's finviz page, otherwise None
    """
    if page is None:
        page = scrape_page(BASE_URL + ticker_symbol)

    table = get_statistics_table(page)

    if table:
        return table
    else:
        return None
Beispiel #9
0
def get_statistic(ticker_symbol, stat_name, page=None):
    """
    This function will get the associated financial statistic from the corresponding finviz page given the
    statistic's name and the ticker symbol
    :param ticker_symbol: The ticker symbol of the interested stock (e.g., "AAPL", "GOOG", "MSFT")
    :param stat_name: The name of the interested financial statistic (e.g., "P/E", "Price", "Volume").
    An exhaustive list of available financial statistics can be found on a stock's finviz page
    :param page: HTML tree structure based on the html markup of the scraped web page. If one is not passed in the
    function will scrape the page
    :return: the value of the interested financial statistic if it exists, otherwise None
    """
    if page is None:
        page = scrape_page(BASE_URL + ticker_symbol)

    table = get_statistics_table(page)

    if stat_name in table.keys() and table[stat_name]:
        return table[stat_name]
    else:
        return None
Beispiel #10
0
def search():
    query = str(request.form['query'])
    data = scrape_page(query)
    return render_template('result.html', data=data)
Beispiel #11
0
    "http://www.pollingreport.com/d.htm",
    "http://www.pollingreport.com/e-f.htm",
    "http://www.pollingreport.com/g.htm",
    "http://www.pollingreport.com/h-j.htm",
    "http://www.pollingreport.com/k.htm",
    "http://www.pollingreport.com/l.htm",
    "http://www.pollingreport.com/o.htm",
    "http://www.pollingreport.com/p.htm",
    "http://www.pollingreport.com/r.htm",
    "http://www.pollingreport.com/S-Z.htm",

    # Congressional Job Ratings
    "http://www.pollingreport.com/CongJob1.htm",
    "http://www.pollingreport.com/cong_dem.htm",
    "http://www.pollingreport.com/cong_rep.htm",

    # Party Approval
    "http://www.pollingreport.com/dem.htm",
    "http://www.pollingreport.com/rep.htm",
]

if __name__ == '__main__':

    os.makedirs("parsed", exist_ok=True)
    os.makedirs("raw", exist_ok=True)

    for url in urls:
        filename = url.split("/")[-1] + ".csv"
        with open("parsed/" + filename, "w", encoding="latin-1") as f:
            scrape_page(url, f)