def scrape_apps(self):
        """
        Main method of the 'Worker' layer of this project.

        This method starts the distributed working phase which will
        consume urls from the seed database and scrape apps data out
        of the html pages, storing the result into the
        apps_data collection on MongoDB
        """

        # Arguments Parsing
        args_parser = self.get_arguments_parser()
        self._args = vars(args_parser.parse_args())

        # Log Handler Configuring
        self._logger = Utils.configure_log(self._args)

        # MongoDB Configuring
        if not Utils.configure_mongodb(self,**self._params):
            self._logger.fatal('Error configuring MongoDB')
            sys.exit(errno.ECONNREFUSED)

        # Making sure indexes exist
        self._mongo_wrapper.ensure_index('IsBusy');
        self._mongo_wrapper.ensure_index('_id', self._params['apps_collection'])

        # Proxies Loading
        self._proxies = Utils.load_proxies(self._args)

        # if "Debug Http" is set to true, "verify" must be "false"
        self._verify_certificate = not self._args['debug_https']
        self._is_using_proxies = self._proxies != None

        # Control Variables - Used on the 'retrying logic'
        retries, max_retries = 0, 8

        parser = html_parser()

        # Loop only breaks when there are no more apps to be processed
        while True:

            # Finds an app to be processed and toggles it's state to 'Busy'
            seed_record = self._mongo_wrapper.find_and_modify()

            if not seed_record:
                break

            try:
                url = seed_record['_id']

                # Do we need to normalize the url ?
                if 'http://' not in url and 'https://' not in url:
                    url = 'https://play.google.com' + url

                self._logger.info('Processing: %s' % url)

                # Is this app processed already ?
                if self._mongo_wrapper.app_processed(url, self._params['apps_collection']):

                    self._logger.info('Duplicated App : %s. Skipped' % url)
                    self._mongo_wrapper.remove_app_from_queue(seed_record)
                    continue

                # Get Request for the App's Page
                response = requests.get(url,
                                        HTTPUtils.headers,
                                        verify=self._verify_certificate,
                                        proxies=Utils.get_proxy(self))

                # Sanity Checks on Response
                if not response.text or response.status_code != requests.codes.ok:
                    self._logger.info('Error Opening App Page : %s' % url)

                    retries += 1

                    # Retries logic are different if proxies are being used
                    if self._is_using_proxies:
                        Utils.sleep()
                try:
                    # Scraping Data from HTML
                    app = parser.parse_app_data(response.text)

                    # Stamping URL into app model
                    app['Url'] = url
                    app['_id'] = url

                    # Reaching related apps
                    related_apps = parser.parse_related_apps(response.text)

                    if not related_apps:
                        app['RelatedUrls'] = None
                    else:
                        app['RelatedUrls'] = related_apps
                        self._logger.info('Related Apps: %s - %d' % (url, len(related_apps)))

                    # Inserting data into MongoDB
                    self._mongo_wrapper._insert(app, self._params['apps_collection'])

                    # Re-Feeding seed collection with related-app urls
                    if app['RelatedUrls']:
                        for url in app['RelatedUrls']:
                            if not self._mongo_wrapper.app_processed(url, self._params['apps_collection']) and \
                               not self._mongo_wrapper.app_processed(url, self._params['seed_collection']):
                                self._mongo_wrapper.insert_on_queue(url, self._params['seed_collection'])

                except Exception as exception:
                    self._logger.error(exception)

                    # Toggling app state back to false
                    self._mongo_wrapper.toggle_app_busy(url,False, self._params['seed_collection'])

            except Exception as exception:
                self._logger.error(exception)
    def scrape_apps(self):
        """
        Main method of the 'Worker' layer of this project.

        This method starts the distributed working phase which will
        consume urls from the seed database and scrape apps data out
        of the html pages, storing the result into the
        apps_data collection on MongoDB
        """

        # Arguments Parsing
        args_parser = self.get_arguments_parser()
        self._args = vars(args_parser.parse_args())

        # Log Handler Configuring
        self._logger = Utils.configure_log(self._args)

        # MongoDB Configuring
        if not Utils.configure_mongodb(self, **self._params):
            self._logger.fatal('Error configuring MongoDB')
            sys.exit(errno.ECONNREFUSED)

        # Making sure indexes exist
        self._mongo_wrapper.ensure_index('IsBusy')
        self._mongo_wrapper.ensure_index('_id',
                                         self._params['apps_collection'])

        # Proxies Loading
        self._proxies = Utils.load_proxies(self._args)

        # if "Debug Http" is set to true, "verify" must be "false"
        self._verify_certificate = not self._args['debug_https']
        self._is_using_proxies = self._proxies != None

        # Control Variables - Used on the 'retrying logic'
        retries, max_retries = 0, 8

        parser = html_parser()

        # Loop only breaks when there are no more apps to be processed
        while True:

            # Finds an app to be processed and toggles it's state to 'Busy'
            seed_record = self._mongo_wrapper.find_and_modify()

            if not seed_record:
                break

            try:
                url = seed_record['_id']

                # Do we need to normalize the url ?
                if 'http://' not in url and 'https://' not in url:
                    url = 'https://play.google.com' + url

                self._logger.info('Processing: %s' % url)

                # Is this app processed already ?
                if self._mongo_wrapper.app_processed(
                        url, self._params['apps_collection']):

                    self._logger.info('Duplicated App : %s. Skipped' % url)
                    self._mongo_wrapper.remove_app_from_queue(seed_record)
                    continue

                # Get Request for the App's Page
                response = requests.get(url,
                                        HTTPUtils.headers,
                                        verify=self._verify_certificate,
                                        proxies=Utils.get_proxy(self))

                # Sanity Checks on Response
                if not response.text or response.status_code != requests.codes.ok:
                    self._logger.info('Error Opening App Page : %s' % url)

                    retries += 1

                    # Retries logic are different if proxies are being used
                    if self._is_using_proxies:
                        Utils.sleep()
                try:
                    # Scraping Data from HTML
                    app = parser.parse_app_data(response.text)

                    # Stamping URL into app model
                    app['Url'] = url
                    app['_id'] = url

                    # Reaching related apps
                    related_apps = parser.parse_related_apps(response.text)

                    if not related_apps:
                        app['RelatedUrls'] = None
                    else:
                        app['RelatedUrls'] = related_apps
                        self._logger.info('Related Apps: %s - %d' %
                                          (url, len(related_apps)))

                    # Inserting data into MongoDB
                    self._mongo_wrapper._insert(
                        app, self._params['apps_collection'])

                    # Re-Feeding seed collection with related-app urls
                    if app['RelatedUrls']:
                        for url in app['RelatedUrls']:
                            if not self._mongo_wrapper.app_processed(url, self._params['apps_collection']) and \
                               not self._mongo_wrapper.app_processed(url, self._params['seed_collection']):
                                self._mongo_wrapper.insert_on_queue(
                                    url, self._params['seed_collection'])

                except Exception as exception:
                    self._logger.error(exception)

                    # Toggling app state back to false
                    self._mongo_wrapper.toggle_app_busy(
                        url, False, self._params['seed_collection'])

            except Exception as exception:
                self._logger.error(exception)