def __init__(self, config, query='', page_number=1, search_engine='google', scrape_method='http-async'): """ """ self.config = config self.query = query self.page_number = page_number self.search_engine_name = search_engine self.search_type = 'normal' self.scrape_method = scrape_method self.requested_at = None self.requested_by = 'localhost' self.parser = get_parser_by_search_engine(self.search_engine_name) self.base_search_url = get_base_search_url_by_search_engine( self.config, self.search_engine_name, 'http') self.params = get_GET_params_for_search_engine( self.query, self.search_engine_name, search_type=self.search_type) self.headers = headers self.status = 'successful' self.num_results_per_page = int(config['num_results_per_page']) self.startRecord = self.num_results_per_page * (self.page_number - 1) + 1 self.stringStartRecord = "&first=" + str(self.startRecord)
def __init__(self, query='', page_number=1, search_engine='google', **kwargs): self.query = query self.page_number = page_number self.search_engine_name = search_engine self.search_type = 'normal' self.scrape_method = 'http-async' self.requested_at = None self.requested_by = '' self.parser = get_parser_by_search_engine(self.search_engine_name) self.base_search_url = get_base_search_url_by_search_engine(self.search_engine_name, 'http') self.params = get_GET_params_for_search_engine(self.query, self.search_engine_name, search_type=self.search_type) self.headers = headers
def __init__(self, config, query='', page_number=1, search_engine='google', scrape_method='http-async'): """ """ self.config = config self.query = query self.page_number = page_number self.search_engine_name = search_engine self.search_type = 'normal' self.scrape_method = scrape_method self.requested_at = None self.requested_by = 'localhost' self.parser = get_parser_by_search_engine(self.search_engine_name) self.base_search_url = get_base_search_url_by_search_engine(self.config, self.search_engine_name, 'http') self.params = get_GET_params_for_search_engine(self.query, self.search_engine_name, search_type=self.search_type) self.headers = headers self.status = 'successful'
def __init__(self, query='', page_number=1, search_engine='google', **kwargs): """ @todo: **kwargs doesn't seem to be used, check if any call to init passes additional keyword args and remove it """ self.query = query self.page_number = page_number self.search_engine_name = search_engine self.search_type = 'normal' self.scrape_method = 'http-async' self.requested_at = None self.requested_by = 'localhost' self.parser = get_parser_by_search_engine(self.search_engine_name) self.base_search_url = get_base_search_url_by_search_engine(self.search_engine_name, 'http') self.params = get_GET_params_for_search_engine(self.query, self.search_engine_name, search_type=self.search_type) self.headers = headers self.status = 'successful'
def __init__(self, config, query='', page_number=1, search_engine='google', scrape_method='http-async'): """ """ self.config = config self.query = query self.page_number = page_number self.search_engine_name = search_engine self.search_type = 'normal' self.scrape_method = scrape_method self.requested_at = None self.requested_by = 'localhost' self.parser = get_parser_by_search_engine(self.search_engine_name) self.base_search_url = get_base_search_url_by_search_engine( self.config, self.search_engine_name, 'http') self.params = get_GET_params_for_search_engine( self.query, self.search_engine_name, search_type=self.search_type) self.headers = headers self.status = 'successful'