def rank(self, docs_matrix: List['DocumentArray'], parameters: Dict, **kwargs) -> 'DocumentArray': """ :param docs: the doc which gets bubbled up matches :param kwargs: not used (kept to maintain interface) """ result_da = DocumentArray( ) # length: 1 as every time there is only one query for d_mod1, d_mod2 in zip(*docs_matrix): final_matches = {} # type: Dict[str, Document] for m in d_mod1.matches: m.score.value *= d_mod1.weight final_matches[m.parent_id] = Document(m, copy=True) for m in d_mod2.matches: if m.parent_id in final_matches: final_matches[m.parent_id].score.value += (m.score.value * d_mod2.weight) else: m.score.value *= d_mod2.weight final_matches[m.parent_id] = Document(m, copy=True) da = DocumentArray(list(final_matches.values())) da.sort(key=lambda ma: ma.score.value, reverse=True) d = Document(matches=da[:int(parameters['top_k'])]) result_da.append(d) return result_da
def test_da_sort_by_document_interface_in_proto(): docs = [Document(embedding=np.array([1] * (10 - i))) for i in range(10)] da = DocumentArray( [docs[i] if (i % 2 == 0) else docs[i].proto for i in range(len(docs))]) assert len(da) == 10 assert da[0].embedding.shape == (10, ) da.sort(key=lambda d: d.embedding.dense.shape[0]) assert da[0].embedding.shape == (1, )
def test_da_sort_by_score(): da = DocumentArray( [Document(id=i, copy=True, scores={'euclid': 10 - i}) for i in range(10)] ) assert da[0].id == '0' assert da[0].scores['euclid'].value == 10 da.sort(key=lambda d: d.scores['euclid'].value) # sort matches by their values assert da[0].id == '9' assert da[0].scores['euclid'].value == 1
def test_da_sort_topk_tie(): da = DocumentArray( [Document(id=i, tags={'order': i % 10}) for i in range(100)]) da.sort(top_k=10, key=lambda doc: doc.tags['order']) top_k_ids = [doc.id for doc in da[0:10]] assert top_k_ids == [ '0', '10', '20', '30', '40', '50', '60', '70', '80', '90' ] for i in range(10): assert da[i].tags['order'] == 0
def rank(self, docs_matrix: List[DocumentArray], parameters: Dict, **kwargs): result = DocumentArray() docs_matrix = [doc_arr for doc_arr in docs_matrix if doc_arr is not None and len(doc_arr) > 0] for single_doc_per_modality in zip(*docs_matrix): final_matches = {} for doc in single_doc_per_modality: for m in doc.matches: if m.tags['root_doc_id'] in final_matches: final_matches[m.tags['root_doc_id']].score.value += m.score.value else: final_matches[m.tags['root_doc_id']] = Document(id=m.tags['root_doc_id'], score=m.score) da = DocumentArray(list(final_matches.values())) da.sort(key=lambda ma: ma.score.value, reverse=True) d = Document(matches=da[: int(parameters.get('top_k', 3))]) result.append(d) return result
def test_da_sort_topk(): da = DocumentArray( [Document(id=i, scores={'euclid': 10 - i}) for i in range(10)]) original = deepcopy(da) da.sort(top_k=3, key=lambda d: d.scores['euclid'].value) top = [da[i].scores['euclid'].value for i in range(3)] rest = [da[i].scores['euclid'].value for i in range(3, 10)] assert top[0] == 1 and top[1] == 2 and top[2] == 3 assert rest != sorted(rest) assert len(da) == len(original) assert all([d.id in original for d in da]) da.sort(top_k=3, key=lambda d: d.scores['euclid'].value, reverse=True) top = [da[i].scores['euclid'].value for i in range(3)] rest = [da[i].scores['euclid'].value for i in range(3, 10)] assert top[0] == 10 and top[1] == 9 and top[2] == 8 assert rest != sorted(rest, reverse=True) assert len(da) == len(original) assert all([d.id in original for d in da])
def rank(self, docs_matrix: List['DocumentArray'], parameters: Dict, **kwargs) -> 'DocumentArray': """ :param docs_matrix: list of :class:`DocumentArray` on multiple requests to get bubbled up matches. :param parameters: the parameters passed into the ranker, in this case stores :attr`top_k` to filter k results based on score. :param kwargs: not used (kept to maintain interface) """ result_da = DocumentArray( ) # length: 1 as every time there is only one query for d_mod1, d_mod2 in zip(*docs_matrix): final_matches = {} # type: Dict[str, Document] for m in d_mod1.matches: m.scores[ 'relevance'] = m.scores['cosine'].value * d_mod1.weight final_matches[m.parent_id] = Document(m, copy=True) for m in d_mod2.matches: if m.parent_id in final_matches: final_matches[ m.parent_id].scores['relevance'] = final_matches[ m.parent_id].scores['relevance'].value + ( m.scores['cosine'].value * d_mod2.weight) else: m.scores[ 'relevance'] = m.scores['cosine'].value * d_mod2.weight final_matches[m.parent_id] = Document(m, copy=True) da = DocumentArray(list(final_matches.values())) da.sort(key=lambda ma: ma.scores['relevance'].value, reverse=True) d = Document(matches=da[:int(parameters['top_k'])]) result_da.append(d) return result_da