def query(self, docs: DocumentArray, **kwargs): for doc in docs: new_matches = DocumentArray() for match in doc.matches: extracted_doc = self._docs[match.parent_id] extracted_doc.scores = match.scores new_matches.append(extracted_doc) doc.matches = new_matches
def _add_docs(req, batch, data_type, _kwargs): da = DocumentArray() for content in batch: if isinstance(content, tuple) and len(content) == 2: d, data_type = _new_doc_from_data(content[0], data_type, **_kwargs) gt, _ = _new_doc_from_data(content[1], data_type, **_kwargs) da.append(d) else: d, data_type = _new_doc_from_data(content, data_type, **_kwargs) da.append(d) req.data.docs = da
def print_result(groundtruths, resp): from docarray import DocumentArray global top_k global evaluation_value """ Callback function to receive results. :param resp: returned response with data """ queries = DocumentArray() queries.extend(resp.docs) gts = DocumentArray() for query in queries: gt = groundtruths[query.tags['query_label']] gts.append(gt) queries.evaluate(gts, metric='recall_at_k', hash_fn=lambda d: d.tags['id'], top_k=50) queries.evaluate(gts, metric='precision_at_k', hash_fn=lambda d: d.tags['id'], top_k=50) for query in queries: vi = query.uri result_html.append(f'<tr><td><img src="{vi}"/></td><td>') top_k = len(query.matches) for kk in query.matches: kmi = kk.uri result_html.append( f'<img src="{kmi}" style="opacity:{kk.scores["cosine"].value}"/>' ) result_html.append('</td></tr>\n') # update evaluation values # as evaluator set to return running avg, here we can simply replace the value for k, evaluation in query.evaluations.items(): evaluation_value[k] = evaluation.value
def rank( self, docs_matrix: List['DocumentArray'], parameters: Dict, **kwargs ) -> 'DocumentArray': """ :param docs_matrix: list of :class:`DocumentArray` on multiple requests to get bubbled up matches. :param parameters: the parameters passed into the ranker, in this case stores :attr`top_k` to filter k results based on score. :param kwargs: not used (kept to maintain interface) """ result_da = DocumentArray() # length: 1 as every time there is only one query for d_mod1, d_mod2 in zip(*docs_matrix): final_matches = {} # type: Dict[str, Document] for m in d_mod1.matches: relevance_score = m.scores['cosine'].value * d_mod1.weight m.scores['relevance'].value = relevance_score final_matches[m.parent_id] = Document(m, copy=True) for m in d_mod2.matches: if m.parent_id in final_matches: final_matches[m.parent_id].scores[ 'relevance' ].value = final_matches[m.parent_id].scores['relevance'].value + ( m.scores['cosine'].value * d_mod2.weight ) else: m.scores['relevance'].value = ( m.scores['cosine'].value * d_mod2.weight ) final_matches[m.parent_id] = Document(m, copy=True) da = DocumentArray(list(final_matches.values())) da = sorted(da, key=lambda ma: ma.scores['relevance'].value, reverse=True) d = Document(matches=da[: int(parameters['top_k'])]) result_da.append(d) return result_da
def index(self, docs: DocumentArray, **kwargs): docs.append(Document(text='added')) return docs
def _get_example_data(): from docarray import Document, DocumentArray docs = DocumentArray() docs.append(Document(text='hello, world!')) return docs.to_list()