Beispiel #1
0
def find_story_from_ticker_date(ticker,
                                date_begin_string,
                                browser,
                                exchange='',
                                date_end_string=''):
    """
    Searches GlobeNewswire for by ticker and date.
    :param ticker: Company ticker
    :param date_begin_string: Begin date for search. If there is no end date, this is also the end date.
    :param browser: Browser object
    :param exchange: Exchange on which the ticker is traded
    :param date_end_string: (optional) End date of search
    :return: List of article objects
    """
    # Turn date_string into object for comparison
    date_start_object = normalize_date_return_object(date_begin_string)
    date_start_str = date_start_object.strftime('%Y-%m-%d')

    if date_end_string != '':
        date_end_object = normalize_date_return_object(date_end_string)
        date_end_str = date_end_object.strftime('%Y-%m-%d')
    else:
        date_end_str = date_start_str

    url_page = 1
    all_stories = []
    next_page_button = True
    # While the there is a "Next Page" button on the page, keep paginating
    while next_page_button:
        keyword = ticker
        if exchange:
            keyword = keyword + "," + exchange

        url = 'https://www.globenewswire.com/search/lang/en/exchange/nyse,nasdaq/date/[' + date_start_str + \
              '%2520TO%2520' + date_end_str + ']/keyword/' + keyword + \
              '?page=' + str(url_page)

        next_page_button = check_for_next_page_button(browser)
        search_page_details = get_stories_from_search_page(url, browser)

        for story in search_page_details:
            # TODO - add the ability to also ensure the exchange is the same
            tickers_in_story = get_ticker_objects_from_description(
                story.description)
            tickers_only = [i.ticker for i in tickers_in_story]
            if ticker in tickers_only:
                all_stories.append(story)

        url_page += 1
    return all_stories
Beispiel #2
0
def find_story_from_ticker_two_days(ticker, date_string, browser, exchange=''):
    """
    Finds all stories from a ticker, the date specified, and the date before.
    :param ticker: Company ticker
    :param date_string: Date of event
    :param browser: Browser object
    :param exchange: Exchange of ticker
    :return: Dictionary of a list of stories on the day of event, and a list of stories the day before the event.
    """
    date_start_object = normalize_date_return_object(date_string)
    date_str = date_start_object.strftime('%Y-%m-%d')
    day_before_obj = date_start_object + datetime.timedelta(-1)
    day_before_str = day_before_obj.strftime('%Y-%m-%d')
    same_day_stories = []
    prev_day_stories = []

    stories = find_story_from_ticker_date(ticker, day_before_str, browser,
                                          exchange, date_str)
    sleep(1)
    for story in stories:
        if story.date_time.date_time() == date_start_object.date_time():
            print('Same day = ' + story.title)
            same_day_stories.append(story)

        if story.date_time.date_time() == day_before_obj.date():
            print('Prev day = ' + story.title)
            prev_day_stories.append(story)

    return {
        'same_day_stories': same_day_stories,
        'prev_day_stories': prev_day_stories
    }
Beispiel #3
0
def find_min_date_on_search_results_page(url, browser):
    """
    Find the minimum date of all the articles on the page (to determine when to stop paginating)
    :param url: Url of search
    :param browser: Browser object
    :return: Minimum date (if it exists), None if none exists
    """
    browser.get(url)
    timeout = 20
    try:
        # Wait until the bottom image element loads before reading in data.
        WebDriverWait(browser, timeout). \
            until(EC.visibility_of_element_located((By.XPATH, '//*[@id="bw-group-all"]/div/div/div[3]/section')))
        # Original path to wait: '//*[@id="bw-group-all"]/div/div/div[3]/section/ul/li[last()]/p')))

        # Retrieve min date from page
        min_date_text = browser.find_element_by_xpath(
            '//*[@id="bw-group-all"]/div/div/div[3]/section/'
            'ul/li[last()]/div[1]/time').text

        min_date = normalize_date_return_object(min_date_text)

        return min_date
    except (TimeoutException, NoSuchElementException):
        return None
Beispiel #4
0
def get_stories_from_search_page(url, browser):
    """
    Retrieve all articles that exist on a search page
    :param url: URL of search
    :param browser: Browser object
    :return: List of article objects
    """
    browser.get(url)
    timeout = 20

    try:
        if check_for_no_stories(browser):
            print(check_for_no_stories())
            return None
        # Wait until the bottom image element loads before reading in data.
        WebDriverWait(browser, timeout). \
            until(EC.visibility_of_element_located((By.XPATH, '//*[@id="bw-group-all"]/div/div/div[3]/'
                                                              'section/ul/li[last()]/p')))
        # Retrieve dates, title, desciption, url from each story
        date_elems = browser.find_elements_by_xpath(
            '//*[@id="bw-group-all"]/div/div/div[3]/section/'
            'ul/li[*]/div[1]/time')
        title_elems = browser.find_elements_by_xpath(
            '//*[@id="bw-group-all"]/div/div/div[3]/section/ul/li[*]/h3/a')
        heading_elems = browser.find_elements_by_xpath(
            '//*[@id="bw-group-all"]/div/div/div[3]/section/ul/li[*]/p')
        url_elems = browser.find_elements_by_xpath(
            '//*[@id="bw-group-all"]/div/div/div[3]/section/ul/li[*]/h3/a')

        # Take text from each object and put in lists
        date_text = [elem.text for elem in date_elems]
        title_text = [elem.text for elem in title_elems]
        heading_text = [elem.text for elem in heading_elems]
        urls = [elem.get_attribute('href') for elem in url_elems]

        output = []
        for i, n in enumerate(urls):
            ticker_object_list = get_ticker_objects_from_description(
                heading_text[i])
            if is_english_story(urls[i]) and ticker_object_list:
                date = normalize_date_return_object(date_text[i])
                article_object = NewsArticle(date[i], title_text[i],
                                             ticker_object_list[i],
                                             heading_text[i], urls[i],
                                             'BusinessWire')
                output.append(article_object)

        return output
    except TimeoutException:
        return []
Beispiel #5
0
def find_story_from_ticker_date(ticker, date_string, browser):
    """
    Pulls stories that match a search for a specific ticker and date combination
    :param ticker: Company ticker
    :param date_string: Date
    :param browser: Browser object
    :return: Dictionary of story object lists from the date searched, as well as the date before.
    """

    # Initialize variable to keep it happy
    min_date_on_page = datetime.datetime.today()

    # Turn date_string into object for comparison
    date_object_of_event = normalize_date_return_object(date_string)
    date_object_day_before_event = date_object_of_event + datetime.timedelta(
        -1)

    url_page = 1
    same_day_stories = []
    prev_day_stories = []

    # While the minimum date on the results page is greater than the event, keep paginating
    while min_date_on_page >= date_object_day_before_event:
        url = 'https://www.businesswire.com/portal/site/home/search/?searchType=ticker&searchTerm=' \
              + ticker + '&searchPage=' + str(url_page)

        min_date_on_page = find_min_date_on_search_results_page(url, browser)
        if min_date_on_page is None:
            break
        url_page += 1

        if min_date_on_page <= date_object_day_before_event or min_date_on_page == date_object_of_event:
            search_page_details = get_stories_from_search_page(url, browser)

            for story in search_page_details:
                if story.date_time == date_object_of_event:
                    print('Same day = ' + story.title)
                    same_day_stories.append(story)

                if story.date_time == date_object_of_event + datetime.timedelta(
                        -1):
                    print('Prev day = ' + story.title)
                    prev_day_stories.append(story)

    return {
        'same_day_stories': same_day_stories,
        'prev_day_stories': prev_day_stories
    }
Beispiel #6
0
def get_stories_from_search_page(url, browser):
    """
    Returns all stories from the current search page
    :param url: url of search results
    :param browser: Browser parameter
    :return: List of article objects
    """
    browser.get(url)
    timeout = 20

    try:
        if check_for_no_stories(browser):
            return None

        # Wait until the bottom image element loads before reading in data.
        WebDriverWait(browser, timeout). \
            until(EC.visibility_of_element_located((By.XPATH, '/html/body/div[1]/div[2]/div/div[2]/div/div[2]/span')))
        # Retrieve dates, title, desrciption, url from each story
        date_elems = browser.find_elements_by_xpath(
            '/html/body/div[1]/div[2]/div/div[*]/div/div[2]/div/span[1]')
        title_elems = browser.find_elements_by_xpath(
            '/html/body/div[1]/div[2]/div/div[*]/div/div[2]/a')
        heading_elems = browser.find_elements_by_xpath(
            '/html/body/div[1]/div[2]/div/div[*]/div/div[2]/span')
        url_elems = browser.find_elements_by_xpath(
            '/html/body/div[1]/div[2]/div/div[*]/div/div[2]/a')
        # Take text from each object and put in lists
        date_text = [elem.text for elem in date_elems]
        title_text = [elem.text for elem in title_elems]
        heading_text = [elem.text for elem in heading_elems]
        urls = [elem.get_attribute('href') for elem in url_elems]

        output = []
        for i, n in enumerate(urls):
            ticker_object_list = get_ticker_objects_from_description(
                heading_text[i])
            if ticker_object_list:
                date = normalize_date_return_object(date_text[i])
                article_object = NewsArticle(date[i], title_text[i],
                                             ticker_object_list[i],
                                             heading_text[i], urls[i],
                                             'Globe Newswire')
                output.append(article_object)

        return output
    except TimeoutException:
        return []
Beispiel #7
0
def get_daily_response_iex(ticker, date, token_type='Prod'):
    """
    Gets daiy stock market data for a ticker/date
    :param ticker: Company ticker
    :param date: Date
    :param token_type: Prod or Sandbox
    :return: Response from API
    """
    date_object = datetime.date.today()

    token = get_token_iex(token_type)
    date_object = normalize_date_return_object(date)

    try:
        date_str = date_object.strftime('%Y%m%d')
        url = 'https://cloud.iexapis.com/stable/stock/' + ticker + '/chart/date/' + date_str + \
              '?chartByDay=true&token=' + token
        print(url)
        response = requests.get(url)
        return response
    except (KeyError, IndexError):
        return {}
Beispiel #8
0
def get_stories_from_search_page(url, source, browser):
    """
    Returns all stories from the current search page
    :param url: url of search results
    :param browser: Browser parameter
    :return: List of article objects
    """
    browser.get(url)
    timeout = 20
    try:
        # If the source is Globe Newswire, use one xpath to find elements
        if check_for_no_stories(source, browser):
            return None

        if source == 'gnw':
            # Wait until the bottom image element loads before reading in data.
            WebDriverWait(browser, timeout). \
                until(EC.visibility_of_element_located((By.XPATH, '/html/body/div[1]/div[2]/div/div[2]/div[*]/div[1]/div[2]/span')))
            # Retrieve dates, title, desrciption, url from each story
            date_elems = browser.find_elements_by_xpath(
                '/html/body/div[1]/div[2]/div/div[2]/div[*]/div/div[2]/div/span[1]'
            )
            '/html/body/div[1]/div[2]/div/div[2]/div[*]/div/div[2]/div/span[1]'
            title_elems = browser.find_elements_by_xpath(
                '/html/body/div[1]/div[2]/div/div[2]/div[*]/div/div[2]/a')
            heading_elems = browser.find_elements_by_xpath(
                '/html/body/div[1]/div[2]/div/div[2]/div[*]/div/div[2]/span')
            url_elems = browser.find_elements_by_xpath(
                '/html/body/div[1]/div[2]/div/div[2]/div[*]/div/div[2]/a')
            source_long = 'Globe Newswire'
        # If the source is Business Wire, use another xpath to find elements
        elif source == 'bw':
            # Wait until the bottom image element loads before reading in data.
            WebDriverWait(browser, timeout). \
                until(EC.visibility_of_element_located((By.XPATH, '//*[@id="bw-group-all"]/div/div/div[3]/'
                                                                  'section/ul/li[last()]/p')))
            # Retrieve dates, title, desciption, url from each story
            date_elems = browser.find_elements_by_xpath(
                '//*[@id="bw-group-all"]/div/div/div[3]/section/ul/li[*]/div[1]/time'
            )
            title_elems = browser.find_elements_by_xpath(
                '//*[@id="bw-group-all"]/div/div/div[3]/section/ul/li[*]/h3/a')
            heading_elems = browser.find_elements_by_xpath(
                '//*[@id="bw-group-all"]/div/div/div[3]/section/ul/li[*]/p')
            url_elems = browser.find_elements_by_xpath(
                '//*[@id="bw-group-all"]/div/div/div[3]/section/ul/li[*]/h3/a')
            source_long = 'Business Wire'

        # Take text from each object and put in lists
        date_text = [elem.text for elem in date_elems]
        title_text = [elem.text.strip() for elem in title_elems]
        heading_text = [elem.text.strip() for elem in heading_elems]
        urls = [elem.get_attribute('href') for elem in url_elems]

        output = []
        for i, n in enumerate(urls):
            if is_english_story(n):
                ticker_object_list = get_ticker_objects_from_description(
                    heading_text[i])
                if ticker_object_list:
                    date = normalize_date_return_object(date_text[i])
                    article_object = NewsArticle(date, title_text[i],
                                                 ticker_object_list,
                                                 heading_text[i], n,
                                                 source_long)
                    output.append(article_object)
        return output

    except (TimeoutException) as e:
        print('TimeoutException')
        return None