Beispiel #1
0
class MyIndexer(Executor):
    """Simple indexer class"""

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.table_name = 'qabot_docs'
        self._docs = DocumentArray(
            storage='sqlite',
            config={
                'connection': os.path.join(self.workspace, 'indexer.db'),
                'table_name': self.table_name,
            },
        )

    @requests(on='/index')
    def index(self, docs: 'DocumentArray', **kwargs):
        self._docs.extend(docs)

    @requests(on='/search')
    def search(self, docs: 'DocumentArray', **kwargs):
        """Append best matches to each document in docs

        :param docs: documents that are searched
        :param parameters: dictionary of pairs (parameter,value)
        :param kwargs: other keyword arguments
        """
        docs.match(
            self._docs,
            metric='cosine',
            normalization=(1, 0),
            limit=1,
        )
Beispiel #2
0
class MyIndexer(Executor):
    """Simple indexer class """

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        if os.path.exists(self.workspace + '/indexer'):
            self._docs = DocumentArray.load(self.workspace + '/indexer')
        else:
            self._docs = DocumentArray()

    @requests(on='/index')
    def index(self, docs: 'DocumentArray', **kwargs):
        self._docs.extend(docs)

    @requests(on='/search')
    def search(self, docs: 'DocumentArray', **kwargs):
        """Append best matches to each document in docs

        :param docs: documents that are searched
        :param parameters: dictionary of pairs (parameter,value)
        :param kwargs: other keyword arguments
        """
        docs.match(
            self._docs,
            metric='cosine',
            normalization=(1, 0),
            limit=1,
        )

    def close(self):
        """
        Stores the DocumentArray to disk
        """
        self._docs.save(self.workspace + '/indexer')
Beispiel #3
0
class KeyValueIndexer(Executor):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        if os.path.exists(self.workspace + '/kv-idx'):
            self._docs = DocumentArray.load(self.workspace + '/kv-idx')
        else:
            self._docs = DocumentArray()

    @requests(on='/index')
    def index(self, docs: DocumentArray, **kwargs):
        self._docs.extend(docs)

    @requests(on='/search')
    def query(self, docs: DocumentArray, **kwargs):
        for doc in docs:
            new_matches = DocumentArray()
            for match in doc.matches:
                extracted_doc = self._docs[match.parent_id]
                extracted_doc.scores = match.scores
                new_matches.append(extracted_doc)
            doc.matches = new_matches

    def close(self):
        """
        Stores the DocumentArray to disk
        """
        self._docs.save(self.workspace + '/kv-idx')
Beispiel #4
0
class DocVectorIndexer(Executor):
    def __init__(self, index_file_name: str, **kwargs):
        super().__init__(**kwargs)
        self._index_file_name = index_file_name
        if os.path.exists(self.workspace + f'/{index_file_name}'):
            self._docs = DocumentArray.load(self.workspace + f'/{index_file_name}')
        else:
            self._docs = DocumentArray()

    @requests(on='/index')
    def index(self, docs: 'DocumentArray', **kwargs):
        self._docs.extend(docs)

    @requests(on='/search')
    def search(self, docs: 'DocumentArray', parameters: Dict, **kwargs):
        docs.match(
            self._docs,
            metric='cosine',
            normalization=(1, 0),
            limit=int(parameters['top_k']),
        )

    def close(self):
        """
        Stores the DocumentArray to disk
        """
        self._docs.save(self.workspace + f'/{self._index_file_name}')
Beispiel #5
0
    async def aencode(self, content, **kwargs):
        from docarray import DocumentArray

        r = DocumentArray()
        async for da in self._async_client.post(
                **self._get_post_payload(content, kwargs)):
            r.extend(da)
        return r.embeddings if self._return_plain else r
Beispiel #6
0
def print_result(groundtruths, resp):
    from docarray import DocumentArray

    global top_k
    global evaluation_value
    """
    Callback function to receive results.

    :param resp: returned response with data
    """
    queries = DocumentArray()
    queries.extend(resp.docs)

    gts = DocumentArray()
    for query in queries:
        gt = groundtruths[query.tags['query_label']]
        gts.append(gt)

    queries.evaluate(gts,
                     metric='recall_at_k',
                     hash_fn=lambda d: d.tags['id'],
                     top_k=50)
    queries.evaluate(gts,
                     metric='precision_at_k',
                     hash_fn=lambda d: d.tags['id'],
                     top_k=50)
    for query in queries:
        vi = query.uri

        result_html.append(f'<tr><td><img src="{vi}"/></td><td>')
        top_k = len(query.matches)
        for kk in query.matches:
            kmi = kk.uri
            result_html.append(
                f'<img src="{kmi}" style="opacity:{kk.scores["cosine"].value}"/>'
            )
        result_html.append('</td></tr>\n')

        # update evaluation values
        # as evaluator set to return running avg, here we can simply replace the value
        for k, evaluation in query.evaluations.items():
            evaluation_value[k] = evaluation.value
Beispiel #7
0
class MyIndexer(Executor):
    """
    Executor with basic exact search using cosine distance
    """
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        if os.path.exists(self.workspace + '/indexer'):
            self._docs = DocumentArray.load(self.workspace + '/indexer')
        else:
            self._docs = DocumentArray()

    @requests(on='/index')
    def index(self, docs: 'DocumentArray', **kwargs):
        """Extend self._docs

        :param docs: DocumentArray containing Documents
        :param kwargs: other keyword arguments
        """
        self._docs.extend(docs)

    @requests(on=['/search', '/eval'])
    def search(self, docs: 'DocumentArray', parameters: Dict, **kwargs):
        """Append best matches to each document in docs

        :param docs: documents that are searched
        :param parameters: dictionary of pairs (parameter,value)
        :param kwargs: other keyword arguments
        """
        docs.match(
            self._docs,
            metric='cosine',
            normalization=(1, 0),
            limit=int(parameters['top_k']),
        )

    def close(self):
        """
        Stores the DocumentArray to disk
        """
        self._docs.save(self.workspace + '/indexer')