def __init__(self,
                 config,
                 query='',
                 page_number=1,
                 search_engine='google',
                 scrape_method='http-async'):
        """
        """
        self.config = config
        self.query = query
        self.page_number = page_number
        self.search_engine_name = search_engine
        self.search_type = 'normal'
        self.scrape_method = scrape_method
        self.requested_at = None
        self.requested_by = 'localhost'
        self.parser = get_parser_by_search_engine(self.search_engine_name)
        self.base_search_url = get_base_search_url_by_search_engine(
            self.config, self.search_engine_name, 'http')
        self.params = get_GET_params_for_search_engine(
            self.query, self.search_engine_name, search_type=self.search_type)
        self.headers = headers
        self.status = 'successful'

        self.num_results_per_page = int(config['num_results_per_page'])
        self.startRecord = self.num_results_per_page * (self.page_number -
                                                        1) + 1
        self.stringStartRecord = "&first=" + str(self.startRecord)
Example #2
0
 def __init__(self, query='', page_number=1, search_engine='google', **kwargs):
     self.query = query
     self.page_number = page_number
     self.search_engine_name = search_engine
     self.search_type = 'normal'
     self.scrape_method = 'http-async'
     self.requested_at = None
     self.requested_by = ''
     self.parser = get_parser_by_search_engine(self.search_engine_name)
     self.base_search_url = get_base_search_url_by_search_engine(self.search_engine_name, 'http')
     self.params = get_GET_params_for_search_engine(self.query, self.search_engine_name, search_type=self.search_type)
     self.headers = headers
Example #3
0
 def __init__(self, config, query='', page_number=1, search_engine='google', scrape_method='http-async'):
     """
     """
     self.config = config
     self.query = query
     self.page_number = page_number
     self.search_engine_name = search_engine
     self.search_type = 'normal'
     self.scrape_method = scrape_method
     self.requested_at = None
     self.requested_by = 'localhost'
     self.parser = get_parser_by_search_engine(self.search_engine_name)
     self.base_search_url = get_base_search_url_by_search_engine(self.config, self.search_engine_name, 'http')
     self.params = get_GET_params_for_search_engine(self.query, self.search_engine_name,
                                                    search_type=self.search_type)
     self.headers = headers
     self.status = 'successful'
Example #4
0
 def __init__(self, query='', page_number=1, search_engine='google', **kwargs):
     """
     @todo: **kwargs doesn't seem to be used, check if any call to init passes additional keyword args and remove it
     """
     self.query = query
     self.page_number = page_number
     self.search_engine_name = search_engine
     self.search_type = 'normal'
     self.scrape_method = 'http-async'
     self.requested_at = None
     self.requested_by = 'localhost'
     self.parser = get_parser_by_search_engine(self.search_engine_name)
     self.base_search_url = get_base_search_url_by_search_engine(self.search_engine_name, 'http')
     self.params = get_GET_params_for_search_engine(self.query, self.search_engine_name,
                                                    search_type=self.search_type)
     self.headers = headers
     self.status = 'successful'
Example #5
0
 def __init__(self,
              config,
              query='',
              page_number=1,
              search_engine='google',
              scrape_method='http-async'):
     """
     """
     self.config = config
     self.query = query
     self.page_number = page_number
     self.search_engine_name = search_engine
     self.search_type = 'normal'
     self.scrape_method = scrape_method
     self.requested_at = None
     self.requested_by = 'localhost'
     self.parser = get_parser_by_search_engine(self.search_engine_name)
     self.base_search_url = get_base_search_url_by_search_engine(
         self.config, self.search_engine_name, 'http')
     self.params = get_GET_params_for_search_engine(
         self.query, self.search_engine_name, search_type=self.search_type)
     self.headers = headers
     self.status = 'successful'