class MyIndexer(Executor): """Simple indexer class""" def __init__(self, **kwargs): super().__init__(**kwargs) self.table_name = 'qabot_docs' self._docs = DocumentArray( storage='sqlite', config={ 'connection': os.path.join(self.workspace, 'indexer.db'), 'table_name': self.table_name, }, ) @requests(on='/index') def index(self, docs: 'DocumentArray', **kwargs): self._docs.extend(docs) @requests(on='/search') def search(self, docs: 'DocumentArray', **kwargs): """Append best matches to each document in docs :param docs: documents that are searched :param parameters: dictionary of pairs (parameter,value) :param kwargs: other keyword arguments """ docs.match( self._docs, metric='cosine', normalization=(1, 0), limit=1, )
class MyIndexer(Executor): """Simple indexer class """ def __init__(self, **kwargs): super().__init__(**kwargs) if os.path.exists(self.workspace + '/indexer'): self._docs = DocumentArray.load(self.workspace + '/indexer') else: self._docs = DocumentArray() @requests(on='/index') def index(self, docs: 'DocumentArray', **kwargs): self._docs.extend(docs) @requests(on='/search') def search(self, docs: 'DocumentArray', **kwargs): """Append best matches to each document in docs :param docs: documents that are searched :param parameters: dictionary of pairs (parameter,value) :param kwargs: other keyword arguments """ docs.match( self._docs, metric='cosine', normalization=(1, 0), limit=1, ) def close(self): """ Stores the DocumentArray to disk """ self._docs.save(self.workspace + '/indexer')
class KeyValueIndexer(Executor): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) if os.path.exists(self.workspace + '/kv-idx'): self._docs = DocumentArray.load(self.workspace + '/kv-idx') else: self._docs = DocumentArray() @requests(on='/index') def index(self, docs: DocumentArray, **kwargs): self._docs.extend(docs) @requests(on='/search') def query(self, docs: DocumentArray, **kwargs): for doc in docs: new_matches = DocumentArray() for match in doc.matches: extracted_doc = self._docs[match.parent_id] extracted_doc.scores = match.scores new_matches.append(extracted_doc) doc.matches = new_matches def close(self): """ Stores the DocumentArray to disk """ self._docs.save(self.workspace + '/kv-idx')
class DocVectorIndexer(Executor): def __init__(self, index_file_name: str, **kwargs): super().__init__(**kwargs) self._index_file_name = index_file_name if os.path.exists(self.workspace + f'/{index_file_name}'): self._docs = DocumentArray.load(self.workspace + f'/{index_file_name}') else: self._docs = DocumentArray() @requests(on='/index') def index(self, docs: 'DocumentArray', **kwargs): self._docs.extend(docs) @requests(on='/search') def search(self, docs: 'DocumentArray', parameters: Dict, **kwargs): docs.match( self._docs, metric='cosine', normalization=(1, 0), limit=int(parameters['top_k']), ) def close(self): """ Stores the DocumentArray to disk """ self._docs.save(self.workspace + f'/{self._index_file_name}')
async def aencode(self, content, **kwargs): from docarray import DocumentArray r = DocumentArray() async for da in self._async_client.post( **self._get_post_payload(content, kwargs)): r.extend(da) return r.embeddings if self._return_plain else r
def print_result(groundtruths, resp): from docarray import DocumentArray global top_k global evaluation_value """ Callback function to receive results. :param resp: returned response with data """ queries = DocumentArray() queries.extend(resp.docs) gts = DocumentArray() for query in queries: gt = groundtruths[query.tags['query_label']] gts.append(gt) queries.evaluate(gts, metric='recall_at_k', hash_fn=lambda d: d.tags['id'], top_k=50) queries.evaluate(gts, metric='precision_at_k', hash_fn=lambda d: d.tags['id'], top_k=50) for query in queries: vi = query.uri result_html.append(f'<tr><td><img src="{vi}"/></td><td>') top_k = len(query.matches) for kk in query.matches: kmi = kk.uri result_html.append( f'<img src="{kmi}" style="opacity:{kk.scores["cosine"].value}"/>' ) result_html.append('</td></tr>\n') # update evaluation values # as evaluator set to return running avg, here we can simply replace the value for k, evaluation in query.evaluations.items(): evaluation_value[k] = evaluation.value
class MyIndexer(Executor): """ Executor with basic exact search using cosine distance """ def __init__(self, **kwargs): super().__init__(**kwargs) if os.path.exists(self.workspace + '/indexer'): self._docs = DocumentArray.load(self.workspace + '/indexer') else: self._docs = DocumentArray() @requests(on='/index') def index(self, docs: 'DocumentArray', **kwargs): """Extend self._docs :param docs: DocumentArray containing Documents :param kwargs: other keyword arguments """ self._docs.extend(docs) @requests(on=['/search', '/eval']) def search(self, docs: 'DocumentArray', parameters: Dict, **kwargs): """Append best matches to each document in docs :param docs: documents that are searched :param parameters: dictionary of pairs (parameter,value) :param kwargs: other keyword arguments """ docs.match( self._docs, metric='cosine', normalization=(1, 0), limit=int(parameters['top_k']), ) def close(self): """ Stores the DocumentArray to disk """ self._docs.save(self.workspace + '/indexer')