def search_task(): task = dict(query='', selected_categories=['general'], pageno=1, settings=get_default_settings()) # task['method'] = request.method x_forwarded_for = request.headers.getlist("X-Forwarded-For") if x_forwarded_for: ip = x_forwarded_for[0] else: ip = request.remote_addr user_data = { 'method': request.method, 'ip': ip, 'ua': request.user_agent } task['user_data'] = user_data if 'query' in request.values: task['query'] = request.values['query'] if 'selected_categories' in request.values: task['selected_categories'].append(request.values['selected_categories']) if 'selected_categories[]' in request.values: task['selected_categories'] = request.values.getlist('selected_categories[]') if 'pageno' in request.values: task['pageno'] = request.values['pageno'] if 'settings' in request.values: task['settings'] = request.values['settings'] if not task['query']: return make_response(jsonify({'error': 'query empty'}), 500) if not task['pageno'] or int(task['pageno']) < 1: return make_response(jsonify({'error': 'wrong pageno'}), 500) try: search = Search(task) except: return make_response(jsonify(dict(error='task ???')), 500) if plugins.callAPI('pre_search', task, locals()): search.search(task) plugins.callAPI('post_search', task, locals()) return jsonify({'results': search.results, 'suggestions': search.suggestions, 'answers': search.answers, 'infoboxes': search.infoboxes })
def search(self, task): global number_of_searches # init vars requests = [] results_queue = Queue() results = {} # increase number of searches number_of_searches += 1 # set default useragent # user_agent = request.headers.get('User-Agent', '') user_agent = gen_useragent() # start search-reqest for all selected engines for selected_engine in self.engines: if selected_engine['name'] not in engines: continue engine = engines[selected_engine['name']] # if paging is not supported, skip if self.pageno > 1 and not engine.paging: continue # if search-language is set and engine does not # provide language-support, skip if self.lang != 'all' and not engine.language_support: continue # set default request parameters request_params = default_request_params() request_params['headers']['User-Agent'] = user_agent request_params['category'] = selected_engine['category'] request_params['started'] = time() request_params['pageno'] = self.pageno if hasattr(engine, 'language') and engine.language: request_params['language'] = engine.language else: request_params['language'] = self.lang # try: # 0 = None, 1 = Moderate, 2 = Strict # request_params['safesearch'] = int(request.cookies.get('safesearch')) # except Exception: request_params['safesearch'] = settings['search']['safe_search'] # update request parameters dependent on # search-engine (contained in engines folder) engine.request(task['query'].encode('utf-8'), request_params) # update request parameters dependent on # search-engine (contained in engines folder) if request_params['url'] is None: # TODO add support of offline engines pass # create a callback wrapper for the search engine results callback = make_callback( selected_engine['name'], results_queue, engine.response, request_params) # create dictionary which contain all # informations about the request request_args = dict( headers=request_params['headers'], hooks=dict(response=callback), cookies=request_params['cookies'], timeout=engine.timeout, verify=request_params['verify'] ) # specific type of request (GET or POST) if request_params['method'] == 'GET': req = requests_lib.get else: req = requests_lib.post request_args['data'] = request_params['data'] # ignoring empty urls if not request_params['url']: continue # append request to list requests.append((req, request_params['url'], request_args, selected_engine['name'])) if not requests: return self # send all search-request threaded_requests(requests) while not results_queue.empty(): engine_name, engine_results = results_queue.get_nowait() # TODO type checks [self.suggestions.append(x['suggestion']) for x in list(engine_results) if 'suggestion' in x and engine_results.remove(x) is None] [self.answers.append(x['answer']) for x in list(engine_results) if 'answer' in x and engine_results.remove(x) is None] self.infoboxes.extend(x for x in list(engine_results) if 'infobox' in x and engine_results.remove(x) is None) results[engine_name] = engine_results # update engine-specific stats for engine_name, engine_results in results.items(): engines[engine_name].stats['search_count'] += 1 engines[engine_name].stats['result_count'] += len(engine_results) # score results and remove duplications self.results = score_results(results) # merge infoboxes according to their ids self.infoboxes = merge_infoboxes(self.infoboxes) # update engine stats, using calculated score for result in self.results: plugins.callAPI('on_result', self.plugins, locals()) for res_engine in result['engines']: engines[result['engine']] \ .stats['score_count'] += result['score'] result['pretty_url'] = prettify_url(result['url']) # TODO, check if timezone is calculated right if 'publishedDate' in result: result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z') if not self.paging and engines[result['engine']].paging: self.paging = True if 'content' in result: result['content_html'] = highlight_content(result['content'], self.query.encode('utf-8')) # noqa result['title_html'] = highlight_content(result['title'], self.query.encode('utf-8')) if result.get('content'): result['content'] = html_to_text(result['content']).strip() # removing html content and whitespace duplications result['title'] = ' '.join(html_to_text(result['title']).strip().split()) # return results, suggestions, answers and infoboxes return self