def index(): db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') config = db_req.request(path='/sp/db/get_config_variables') requests = db_req.request(path='/sp/db/get_connection_requests') return flask.render_template('index.html', config=config, requests=requests)
def test_post_request(self): requester = KearchRequester('https://jsonplaceholder.typicode.com', requester_name='test_requester') payload = {'userId': 1, 'title': 'hello', 'body': 'world'} resp = requester.request(method='POST', path='/posts', payload=payload) print(resp.json()) assert (resp.status_code == 201)
def retrieve(queries, max_urls): elastic_requester = KearchRequester(ELASTIC_HOST, ELASTIC_PORT, conn_type='elastic') query = ' '.join(queries) payload = { 'query': { 'multi_match': { 'query': query, 'type': 'phrase', 'fields': ['title', 'text'] } } } resp = elastic_requester.request(path='/' + ELASTIC_INDEX + '/' + ELASTIC_TYPE + '/_search?pretty', payload=payload, method='POST') hits = [] if 'hits' in resp and 'hits' in resp['hits']: hits = resp['hits']['hits'] print(hits, file=sys.stderr) results = [] for d in hits: results.append({ 'url': d['_source']['url'], 'title': d['_source']['title'], 'description': d['_source']['text'][0:200], 'score': d['_score'] }) return results
def url_to_webpage(url): print('Start checking parameter files.', file=sys.stderr) update_param_file(kearch_classifier.classifier.PARAMS_FILE) update_param_file(ave.CACHE_FILE) print('End checking parameter files.', file=sys.stderr) try: w = kearch_classifier.webpage.Webpage(url) except kearch_classifier.webpage.WebpageError: print('Cannot make webpage of ', url, file=sys.stderr) return None cl_req = KearchRequester(CLASSIFIER_HOST, CLASSIFIER_PORT) payload = {'body_words': w.words, 'title_words': w.title_words} try: res = cl_req.request(path='/sp/classifier/classify', method='POST', payload=payload) except RequesterError: return None if res['result'] == kearch_classifier.classifier.IN_TOPIC: return w else: None
def crawl_a_page(url): if DEBUG_UNIT_TEST: ret = { 'url': 'www.google.com', 'title': 'Google is the biggest IT company.', 'text': 'hello world!', 'inner_links': ['www.facebook.com'], 'outer_links': [] } time.sleep(2) return ret else: crawler_requester = KearchRequester(CRAWLER_CHILD_HOST, CRAWLER_CHILD_PORT) try: print('requesting /crawl_a_page?url={} ...'.format(url)) ret = crawler_requester.request( path='/sp/crawler-child/crawl_a_page', params={'url': url}, timeout=SP_CHILD_TIMEOUT) print('get response /crawl_a_page?url={}'.format(url)) except RequesterError as e: print(e, file=sys.stderr) ret = {} return ret
def index(): database_requester = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type="sql") sp_servers = database_requester.request(path='/me/db/list_up_sp_servers', method='GET') return flask.render_template('index.html', sp_servers=sp_servers)
def test_get_request(self): requester = KearchRequester('https://jsonplaceholder.typicode.com', requester_name='test_requester') resp = requester.request(method='GET', path='/posts', params={'userId': 1}) print(resp.json()) assert (resp.status_code == 200)
def send_a_connection_request(sp_host): me_host = get_me_host() gw_req = KearchRequester(sp_host, SPECIALIST_GATEWAY_PORT) res = gw_req.request(path=SP_GATEWAY_BASEURL + 'add_a_connection_request', method='POST', payload={'me_host': me_host}) return res
def is_connected(me_host): db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') reqs = db_req.request(path='/sp/db/get_connection_requests') if (me_host in reqs['out'] and reqs['out'][me_host]) or \ (me_host in reqs['in'] and reqs['in'][me_host]): # already approved return True return False
def send_a_dump(sp_host, me_host, summary): d = dict() d['host'] = sp_host d['summary'] = summary kr = KearchRequester(me_host, META_GATEWAY_PORT) result = kr.request(path='/v0/me/gateway/add_a_summary', method='POST', payload=d) return result
def test_evaluate(): res = evaluate_main('google') assert(type(res) is dict) for k, v in res.items(): assert(type(k) is str) db_req = KearchRequester( DATABASE_HOST, DATABASE_PORT, conn_type='sql') sp_servers = db_req.request(path='/me/db/list_up_sp_servers') assert(k in sp_servers.keys())
def retrieve(sp_host, queries, max_urls): gw_req = KearchRequester(sp_host, SPECIALIST_GATEWAY_PORT) results = gw_req.request(path=SP_GATEWAY_BASEURL + 'retrieve', method='GET', params={ 'queries': queries, 'max_urls': max_urls }) return results
def retrieve(queries, max_urls): kr = KearchRequester(QUERY_PROCESSOR_HOST, QUERY_PROCESSOR_PORT) results = kr.request(path='/sp/query-processor/retrieve', method='GET', params={ 'queries': queries, 'max_urls': max_urls }) return results
def add_new_sp_server(summary): db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') result = db_req.request(path='/me/db/add_new_sp_server', payload=summary) sp_host = summary['sp_host'] db_req.request('/me/db/approve_a_connection_request', payload={ 'in_or_out': 'out', 'sp_host': sp_host }) return result
def send_a_connection_request(): me_host = flask.request.form['me_host'] db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') payload = {'me_host': me_host, 'in_or_out': 'out', 'scheme': 'http'} db_req.request(path='/sp/db/add_a_connection_request', payload=payload, method='POST') config = db_req.request(path='/sp/db/get_config_variables') sp_host = config['host_name'] engine_name = config['engine_name'] payload = { 'sp_host': sp_host, 'engine_name': engine_name, 'scheme': 'http' } gw_req = KearchRequester(me_host, ME_GATEWAY_PORT) gw_req.request(path=ME_GATEWAY_BASEURL + 'add_a_connection_request', payload=payload, method='POST') return flask.redirect(flask.url_for("index"))
def update_config(): update = dict() if 'connection_policy' in flask.request.form: update['connection_policy'] = flask.request.form['connection_policy'] if 'host_name' in flask.request.form: update['host_name'] = flask.request.form['host_name'] db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') db_req.request(path='/me/db/set_config_variables', payload=update, method='POST') return flask.redirect(flask.url_for("index"))
def learn_params_for_evaluator(): db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') summaries = db_req.request(path='/me/db/get_sp_summaries') e = kearch_evaluator.evaluator.Evaluator() e.learn_params(summaries) e.dump_params(kearch_evaluator.evaluator.PARAMS_FILE) bparam = open(kearch_evaluator.evaluator.PARAMS_FILE, 'rb').read() tparam = base64.b64encode(bparam).decode('utf-8') params = {'name': kearch_evaluator.evaluator.PARAMS_FILE, 'body': tparam} db_req.request(path='/me/db/push_binary_file', params=params) return flask.redirect(flask.url_for("index"))
def init_crawl_urls(): form_input = flask.request.form['urls'] urls = form_input.split('\n') urls = map(lambda x: x.rstrip(), urls) payload = dict() payload['urls'] = urls db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') ret = db_req.request(path='/sp/db/push_urls_to_queue', payload=payload, method='POST') return jsonify(ret)
def approve_a_connection_request(): me_host = flask.request.form['me_host'] db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') send_a_summary(me_host) db_req.request('/sp/db/approve_a_connection_request', payload={ 'in_or_out': 'in', 'me_host': me_host }) return flask.redirect(flask.url_for("index"))
def get_result_from_sp(sp_host, query, max_urls): gw_req = KearchRequester(GATEWAY_HOST, GATEWAY_PORT) r = gw_req.request(path=ME_GATEWAY_BASEURL + 'retrieve', params={ 'sp_host': sp_host, 'queries': query, 'max_urls': max_urls }) res = list() for d in r: d['sp_host'] = sp_host res.append(d) return res
def add_a_connection_request(me_host, scheme): db = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') config = db.request(path='/sp/db/get_config_variables', method='GET') if config[CONFIG_CONNECTION_POLICY] == 'public': sp_host = config[CONFIG_HOST_NAME] dump = db.request(path='/sp/db/dump_database', method='GET') res = send_a_dump(sp_host, me_host, dump) return res else: res = db.request(path='/sp/db/add_a_connection_request', payload={'in_or_out': 'in', 'me_host': me_host, 'scheme': scheme}) return res
def update_param_file(filename): db_req = KearchRequester( DATABASE_HOST, DATABASE_PORT, conn_type='sql') ret = db_req.request(path='/me/db/check_binary_file_timestamp', params={'name': filename}) dt = ret['updated_at'] print('db:', dt, file=sys.stderr) if filename not in timestamp or timestamp[filename] < dt: timestamp[filename] = dt ret = db_req.request(path='/me/db/pull_binary_file', params={'name': filename}) body = base64.b64decode(ret['body'].encode()) with open(filename, 'wb') as f: f.write(body) evaluater.load_params(kearch_evaluater.evaluater.PARAMS_FILE)
def search(): if flask.request.method == 'GET': query = flask.request.args['query'] queries = query.split() kr = KearchRequester(QUERY_PROCESSOR_HOST, QUERY_PROCESSOR_PORT) results = kr.request(path='/sp/query-processor/retrieve', method='GET', params={ 'queries': ' '.join(queries), 'max_urls': MAX_URLS }) return flask.render_template('result.html', results=results, query=query) else: return flask.redirect(flask.url_for('index.html'))
def update_password(): password = flask.request.form['password'] password_again = flask.request.form['password_again'] if password != password_again: r = {'message': 'Passwords do not match.'} abort(500, r) u = current_user.name h = hashlib.sha512(password.encode('utf-8')).hexdigest() print(u, h, file=sys.stderr) db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') ret = db_req.request(path='/sp/db/update_password_hash', payload={ 'username': u, 'password_hash': h }, method='POST') return jsonify(ret)
def delete_a_connection_request(): me_host = flask.request.form['me_host'] db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') db_req.request('/sp/db/delete_a_connection_request', payload={'me_host': me_host}) config = db_req.request(path='/sp/db/get_config_variables') sp_host = config['host_name'] gw_req = KearchRequester(me_host, ME_GATEWAY_PORT) gw_req.request(path=ME_GATEWAY_BASEURL + 'delete_a_connection_request', payload={'sp_host': sp_host}, method='DELETE') return flask.redirect(flask.url_for("index"))
def evaluate_main(query): print('Start checking parameter files.', file=sys.stderr) update_param_file(kearch_evaluater.evaluater.PARAMS_FILE) print('End checking parameter files.', file=sys.stderr) queries = query.split(' ') # Some specialist servers in the evaluater may be deleted # by /me/gateway/delete_a_connection_request. # Therefore, we must confirm all specialist servers in the evaluater # exist in the database truly. db_req = KearchRequester( DATABASE_HOST, DATABASE_PORT, conn_type='sql') sp_servers = db_req.request(path='/me/db/list_up_sp_servers') res_eval = evaluater.evaluate(queries) res = dict() for s in sp_servers.keys(): if s in res_eval: res[s] = res_eval[s] return res
def approve_a_connection_request(): sp_host = flask.request.form['sp_host'] gw_req = KearchRequester(sp_host, SP_GATEWAY_PORT) db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') config = db_req.request(path='/me/db/get_config_variables') me_host = config['host_name'] summary = gw_req.request(path=SP_GATEWAY_BASEURL + 'get_a_summary', params={'me_host': me_host}) db_req.request(path='/me/db/add_new_sp_server', payload=summary, method='POST') db_req.request(path='/me/db/approve_a_connection_request', payload={ 'in_or_out': 'in', 'sp_host': sp_host }, method='POST') return flask.redirect(flask.url_for("index"))
def search(): if flask.request.method == 'GET': query = flask.request.args['query'] sp = flask.request.args['sp'] query_processor_requester = KearchRequester(QUERY_PROCESSOR_HOST, QUERY_PROCESSOR_PORT) database_requester = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type="sql") params = {'query': query, 'max_urls': MAX_URLS} if sp != "": params['sp'] = sp results = query_processor_requester.request( path='/me/query-processor/retrieve', method='GET', params=params) sp_servers = database_requester.request( path='/me/db/list_up_sp_servers', method='GET') print('results = ', results, file=sys.stderr) return flask.render_template('result.html', results=results, selected_sp=sp, sp_servers=sp_servers, query=query) else: return flask.redirect(flask.url_for('index.html'))
def learn_params_from_url(): db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') form_input_topic = flask.request.form['topic_urls'] form_input_random = flask.request.form['random_urls'] language = flask.request.form['language'] topic_urls = form_input_topic.split('\n') topic_urls = list(map(lambda x: x.rstrip(), topic_urls)) random_urls = form_input_random.split('\n') random_urls = list(map(lambda x: x.rstrip(), random_urls)) cls = kearch_classifier.classifier.Classifier() cls.learn_params_from_url(topic_urls, random_urls, language) cls.dump_params(kearch_classifier.classifier.PARAMS_FILE) bparam = open(kearch_classifier.classifier.PARAMS_FILE, 'rb').read() tparam = base64.b64encode(bparam).decode('utf-8') params = {'name': kearch_classifier.classifier.PARAMS_FILE, 'body': tparam} db_req.request(path='/sp/db/push_binary_file', params=params) ave.make_average_document_from_urls(random_urls, language) bparam = open(ave.CACHE_FILE, 'rb').read() tparam = base64.b64encode(bparam).decode('utf-8') params = {'name': ave.CACHE_FILE, 'body': tparam} db_req.request(path='/sp/db/push_binary_file', params=params) return flask.redirect(flask.url_for("index"))
def login(): if request.method == 'POST': username = request.form['username'] password = request.form['password'] db_req = KearchRequester(DATABASE_HOST, DATABASE_PORT, conn_type='sql') auth_info = db_req.request(path='/sp/db/get_authentication') is_valid = False for d in auth_info.values(): u = d['username'] h = d['password_hash'] if u == username and \ h == hashlib.sha512(password.encode('utf-8')).hexdigest(): is_valid = True if is_valid: user = User(0) login_user(user) return redirect(flask.url_for("index")) else: return abort(401) else: return flask.render_template('login.html')