Beispiel #1
0
    def __init__(self):
        config = configparser.ConfigParser()
        config.read('config.ini')

        ms_username = config.get('Credentials', 'morningstar_username')
        ms_password = config.get('Credentials', 'morningstar_password')

        # we only want stocks with market cap > 50,000
        # TODO: add this value to the config file!
        self._google_scraper = GoogleScraper(50000000)
        self._asx_scraper = AsxScraper()
        self._ms_scraper = MorningStarScraper(ms_username, ms_password)
        self._ms_scraper.login()
Beispiel #2
0
    def execute(self, args):
        BINARY_PATH = os.getenv("SCRAPER_CHROME_BINARY_PATH")
        DRIVER_PATH = os.getenv("SCRAPER_CHROME_DRIVER_PATH")

        if BINARY_PATH is None or DRIVER_PATH is None:
            print(
                "Error: SCRAPER_CHROME_BINARY_PATH and SCRAPER_CHROME_DRIVER_PATH environment variables must be set to Selenium Chrome Driver settings."
            )
            return

        binary_path = BINARY_PATH
        driver_path = DRIVER_PATH

        # Parse arguments
        parser = argparse.ArgumentParser(
            description="Scrape a website for images using searchterms.")
        parser.add_argument(
            "site",
            type=str,
            help=
            "The site to search in. Can be 'bigstock', 'google' or 'shutterstock'",
        )
        parser.add_argument("searchterm",
                            type=str,
                            help="The terms to search for.")
        parser.add_argument(
            "pagecount",
            type=int,
            default=100,
            help="The total number of pages to scrape. Default is 100.",
        )
        parser.add_argument(
            "start_page",
            type=int,
            default=1,
            help="The page to start the search on. Default is 1.",
        )
        parser.add_argument(
            "image_size",
            type=str,
            default="regular",
            help=
            "The image size that should be downloaded. Can be 'small', 'regular' or 'large'. Default is 'regular'.",
        )

        arguments = parser.parse_args()

        self.site = arguments.site
        self.searchterm = arguments.searchterm
        self.pagecount = arguments.pagecount
        self.start_page = arguments.start_page
        self.image_size = arguments.image_size

        search_options = {
            "searchterm": self.searchterm,
            "pagecount": self.pagecount,
            "start_page": self.start_page,
            "image_size": self.image_size,
        }

        webdriver_options = {
            "chrome_binary_path": binary_path,
            "chrome_driver_path": driver_path,
        }

        # Select proper scraper based on received ards
        if self.site == "google":
            self.scraper = GoogleScraper(self)
            self.scraper.run(search_options, webdriver_options)

        elif self.site == "bigstock":
            self.scraper = BigStockScraper(self)
            self.scraper.run(search_options, webdriver_options)

        elif self.site == "shutterstock":
            self.scraper = ShutterStockScraper(self)
            self.scraper.run(search_options, webdriver_options)

        else:
            print(f"Error: no scraper found for website '{self.site}'.")