Python GoogleScraper Examples, scrapers.google.GoogleScraper Python Examples

Example #1

0

Show file

File: scraper.py Project: usagiy/stockrank

    def __init__(self):
        config = configparser.ConfigParser()
        config.read('config.ini')

        ms_username = config.get('Credentials', 'morningstar_username')
        ms_password = config.get('Credentials', 'morningstar_password')

        # we only want stocks with market cap > 50,000
        # TODO: add this value to the config file!
        self._google_scraper = GoogleScraper(50000000)
        self._asx_scraper = AsxScraper()
        self._ms_scraper = MorningStarScraper(ms_username, ms_password)
        self._ms_scraper.login()

Example #2

0

Show file

File: scraper.py Project: xyicheng/stockrank

class StockScraper(object):
    """Used to scrape stock data from a variety of sources on the web. This
    class should be used by the client code instead of the specific scrapers.
    """
    def __init__(self):
        config = configparser.ConfigParser()
        config.read('config.ini')

        ms_username = config.get('Credentials', 'morningstar_username')
        ms_password = config.get('Credentials', 'morningstar_password')

        # we only want stocks with market cap > 50,000
        # TODO: add this value to the config file!
        self._google_scraper = GoogleScraper(50000000)
        self._asx_scraper = AsxScraper()
        self._ms_scraper = MorningStarScraper(ms_username, ms_password)
        self._ms_scraper.login()

    def scrape_stock_profiles(self):
        """Scrapes and returns a list of stock profiles from various sources on
        the web.
        """
        stock_profiles = []

        # using Google stock data as our base, populate all the stock profiles
        for stock in self._google_scraper.scrape_stock_profiles():

            try:
                stock.sector = self._asx_scraper.sector(stock.symbol)
            except KeyError:
                # The Google list sometimes has delisted companies, so if a
                # sector can't be found, it's probably been delisted.
                continue

            # TODO: add these values into the config file?
            if 'Utilities' in stock.sector \
                    or 'Financ' in stock.sector \
                    or 'Banks' in stock.sector \
                    or 'Real Estate' in stock.sector:
                        continue

            # Merge in stock data from MorningStar
            # Note that we keep Google's 'market cap', as it's more up-to-date
            try:
                ms_stock = self._ms_scraper.scrape_stock_profile(stock.symbol)
            except FieldMissingException as e:
                print(ms_stock.symbol, str(e))
                # if an attribute can't be found, we can't really do anything
                # other than just continue. some companies don't have 'return on
                # capital' available.
                continue

            stock.return_on_capital = ms_stock.return_on_capital
            stock.ebit = ms_stock.ebit
            stock.total_debt = ms_stock.total_debt
            stock.cash = ms_stock.cash

            stock_profiles.append(stock)

        return stock_profiles

Example #3

0

Show file

File: scraper.py Project: usagiy/stockrank

class StockScraper(object):
    """Used to scrape stock data from a variety of sources on the web. This
    class should be used by the client code instead of the specific scrapers.
    """
    def __init__(self):
        config = configparser.ConfigParser()
        config.read('config.ini')

        ms_username = config.get('Credentials', 'morningstar_username')
        ms_password = config.get('Credentials', 'morningstar_password')

        # we only want stocks with market cap > 50,000
        # TODO: add this value to the config file!
        self._google_scraper = GoogleScraper(50000000)
        self._asx_scraper = AsxScraper()
        self._ms_scraper = MorningStarScraper(ms_username, ms_password)
        self._ms_scraper.login()

    def scrape_stock_profiles(self):
        """Scrapes and returns a list of stock profiles from various sources on
        the web.
        """
        stock_profiles = []

        # using Google stock data as our base, populate all the stock profiles
        for stock in self._google_scraper.scrape_stock_profiles():

            try:
                stock.sector = self._asx_scraper.sector(stock.symbol)
            except KeyError:
                # The Google list sometimes has delisted companies, so if a
                # sector can't be found, it's probably been delisted.
                continue

            # TODO: add these values into the config file?
            if 'Utilities' in stock.sector \
                    or 'Financ' in stock.sector \
                    or 'Banks' in stock.sector \
                    or 'Real Estate' in stock.sector:
                continue

            # Merge in stock data from MorningStar
            # Note that we keep Google's 'market cap', as it's more up-to-date
            try:
                ms_stock = self._ms_scraper.scrape_stock_profile(stock.symbol)
            except FieldMissingException as e:
                print(ms_stock.symbol, str(e))
                # if an attribute can't be found, we can't really do anything
                # other than just continue. some companies don't have 'return on
                # capital' available.
                continue

            stock.return_on_capital = ms_stock.return_on_capital
            stock.ebit = ms_stock.ebit
            stock.total_debt = ms_stock.total_debt
            stock.cash = ms_stock.cash

            stock_profiles.append(stock)

        return stock_profiles

Example #4

0

Show file

File: scraper.py Project: xyicheng/stockrank

    def __init__(self):
        config = configparser.ConfigParser()
        config.read('config.ini')

        ms_username = config.get('Credentials', 'morningstar_username')
        ms_password = config.get('Credentials', 'morningstar_password')

        # we only want stocks with market cap > 50,000
        # TODO: add this value to the config file!
        self._google_scraper = GoogleScraper(50000000)
        self._asx_scraper = AsxScraper()
        self._ms_scraper = MorningStarScraper(ms_username, ms_password)
        self._ms_scraper.login()

Example #5

0

Show file

    def execute(self, args):
        BINARY_PATH = os.getenv("SCRAPER_CHROME_BINARY_PATH")
        DRIVER_PATH = os.getenv("SCRAPER_CHROME_DRIVER_PATH")

        if BINARY_PATH is None or DRIVER_PATH is None:
            print(
                "Error: SCRAPER_CHROME_BINARY_PATH and SCRAPER_CHROME_DRIVER_PATH environment variables must be set to Selenium Chrome Driver settings."
            )
            return

        binary_path = BINARY_PATH
        driver_path = DRIVER_PATH

        # Parse arguments
        parser = argparse.ArgumentParser(
            description="Scrape a website for images using searchterms.")
        parser.add_argument(
            "site",
            type=str,
            help=
            "The site to search in. Can be 'bigstock', 'google' or 'shutterstock'",
        )
        parser.add_argument("searchterm",
                            type=str,
                            help="The terms to search for.")
        parser.add_argument(
            "pagecount",
            type=int,
            default=100,
            help="The total number of pages to scrape. Default is 100.",
        )
        parser.add_argument(
            "start_page",
            type=int,
            default=1,
            help="The page to start the search on. Default is 1.",
        )
        parser.add_argument(
            "image_size",
            type=str,
            default="regular",
            help=
            "The image size that should be downloaded. Can be 'small', 'regular' or 'large'. Default is 'regular'.",
        )

        arguments = parser.parse_args()

        self.site = arguments.site
        self.searchterm = arguments.searchterm
        self.pagecount = arguments.pagecount
        self.start_page = arguments.start_page
        self.image_size = arguments.image_size

        search_options = {
            "searchterm": self.searchterm,
            "pagecount": self.pagecount,
            "start_page": self.start_page,
            "image_size": self.image_size,
        }

        webdriver_options = {
            "chrome_binary_path": binary_path,
            "chrome_driver_path": driver_path,
        }

        # Select proper scraper based on received ards
        if self.site == "google":
            self.scraper = GoogleScraper(self)
            self.scraper.run(search_options, webdriver_options)

        elif self.site == "bigstock":
            self.scraper = BigStockScraper(self)
            self.scraper.run(search_options, webdriver_options)

        elif self.site == "shutterstock":
            self.scraper = ShutterStockScraper(self)
            self.scraper.run(search_options, webdriver_options)

        else:
            print(f"Error: no scraper found for website '{self.site}'.")