def test_request_2(self): codex = ESCodex(multiplier=2) request = Request() request.url = REQUEST_URL field, query = codex.parse_query(request) self.assertEqual(field, 'message') self.assertEqual(query, b'testing') topk, correct_cids = codex.multiply_request(request) self.assertEqual(topk, 15) self.assertEqual(request.url.query['size'], '30')
def test_request_1(self): codex = ESCodex(multiplier=8) request = Request() request.body = REQUEST_BODY request.url = URL(b'/test') field, query = codex.parse_query(request) self.assertEqual(field, 'passage') self.assertEqual(query, b'this is a test') topk, correct_cids = codex.multiply_request(request) self.assertEqual(topk, 100) size = load_json(request.body)['size'] self.assertEqual(size, 800)
def test_response(self): codex = ESCodex() request = Request() request.url = REQUEST_URL response = Response() response.body = RESPONSE_BODY choices = codex.parse_choices(response, 'message') self.assertEqual(choices[0].body, b'trying out Elasticsearch') self.assertEqual(choices[1].cid, "5") codex.reorder_response(request, response, [1, 0]) body = load_json(response.body) self.assertIn('_nboost', body) hit = body['hits']['hits'][0]['_source']['message'] self.assertEqual(hit, 'second choice')
def loop(self, client_socket, address): response = Response() response.body = dump_json([ {'id': 7, 'body': 'a choice'}, {'id': 23, 'body': 'another choice'}, {'id': 24, 'body': 'a third choice'}, {'id': 3, 'body': 'notha one'}, {'id': 4, 'body': 'banana 🍌'}, ]) client_socket.send(response.prepare(Request())) client_socket.close()
def test_request(self): protocol = HttpProtocol() protocol.set_request_parser() request = Request() protocol.set_request(request) protocol.add_url_hook( lambda url: self.assertEqual(url.path, '/search')) protocol.add_data_hook(lambda data: self.assertIsInstance(data, bytes)) protocol.feed(REQUEST_PART_1) self.assertFalse(protocol._is_done) self.assertEqual(request.method, 'GET') self.assertEqual(request.url.query['para'], 'message') protocol.feed(REQUEST_PART_2) self.assertTrue(protocol._is_done) self.assertEqual(request.headers['content-encoding'], 'gzip') self.assertEqual(request.body, b'test body')
def multiply_request(self, request: Request) -> Tuple[int, List[str]]: """Multiply size of Elasticsearch query""" body = load_json(request.body) topk = request.url.query.pop('size', None) correct_cids = request.url.query.pop('nboost', None) # search for topk in body if body: correct_cids = body.pop('nboost', correct_cids) topk = body.pop('size', topk) topk = 10 if topk is None else int(topk) if body: body['size'] = topk * self.multiplier request.body = dump_json(body) else: request.url.query['size'] = str(topk * self.multiplier) correct_cids = correct_cids.split(',') if correct_cids else None return topk, correct_cids
def loop(self, client_socket: socket.socket, address: Tuple[str, str]): """Main ioloop for reranking server results to the client. Exceptions raised in the http parser must be reraised from __context__ because they are caught by the MagicStack implementation""" server_socket = self.set_socket() buffer = bytearray() request = Request() response = Response() log = ('%s:%s %s', *address, request) try: self.server_connect(server_socket) with HttpParserContext(): # receive and buffer the client request self.client_recv(client_socket, request, buffer) self.logger.debug(*log) field, query = self.codex.parse_query(request) # magnify the size of the request to the server topk, correct_cids = self.codex.multiply_request(request) self.server_send(server_socket, request) # make sure server response comes back properly self.server_recv(server_socket, response) response.unpack() if response.status < 300: # parse the choices from the magnified response choices = self.codex.parse_choices(response, field) self.record_topk_and_choices(topk=topk, choices=choices) # use the model to rerank the choices ranks = self.model_rank(query, choices)[:topk] self.codex.reorder_response(request, response, ranks) # if the "nboost" param was sent, calculate MRRs if correct_cids is not None: self.calculate_mrrs(correct_cids, choices, ranks) self.client_send(request, response, client_socket) except FrontendRequest: self.logger.info(*log) if request.url.path == '/nboost/status': response.body = json.dumps(self.status, indent=2).encode() else: response.body = self.get_static_file(request.url.path) self.client_send(request, response, client_socket) except (UnknownRequest, MissingQuery): self.logger.warning(*log) # send the initial buffer that was used to check url path self.proxy_send(client_socket, server_socket, buffer) # stream the client socket to the server socket self.proxy_recv(client_socket, server_socket) except Exception as exc: # for misc errors, send back json error msg self.logger.error(repr(exc), exc_info=True) response.body = json.dumps(dict(error=repr(exc))).encode() response.status = 500 self.client_send(request, response, client_socket) finally: client_socket.close() server_socket.close()
def server_send(server_socket: socket.socket, request: Request): """Send magnified request to the server""" server_socket.send(request.prepare())