Beispiel #1
0
class ViIndexer:
    @property
    def encoder_type(self):
        """The encoder type ensures it uses either the 'encode' or 'encode_question'/'encode_answer'
        Currently supported encoder types: 
            Question-Answer
            Encoder
        """
        if self.definition.model_id.startswith('qa'):
            return 'qa'
        else:
            return 'encoder'

    def request_api_key(self, username: str, email: str, referral=None):
        """
        Requesting an API key.
        """
        print("API key is being requested. Be sure to save it somewhere!")
        return request_api_key(username=username,
                               email=email,
                               description='vectorhub',
                               referral=referral)

    def add_documents(self,
                      username: str,
                      api_key: str,
                      items: List[Any],
                      metadata: Optional[List[Any]] = None,
                      collection_name: str = None):
        """
        Add documents to the Vector AI cloud.
        """
        self.username = username
        self.api_key = api_key
        if collection_name is not None:
            self.collection_name = collection_name
        else:
            self.collection_name = 'vectorhub_collection_with_' + self.__name__
        if metadata is not None:
            docs = [
                self._create_document(i, item, metadata)
                for i, (item,
                        metadata) in enumerate(list(zip(items, metadata)))
            ]
        else:
            docs = [
                self._create_document(i, item) for i, item in enumerate(items)
            ]

        self.client = ViClient(username, api_key)
        if self.encoder_type == 'encoder':
            return self.client.insert_documents(self.collection_name, docs,
                                                {'item': self})
        elif self.encoder_type == 'qa':
            return self.client.insert_documents(self.collection_name, docs,
                                                {'item': self.encode_question})

    def _create_document(self, _id: str, item: List[str], metadata=None):
        return {'_id': str(_id), 'item': item, 'metadata': metadata}

    def delete_collection(self, collection_name=None):
        if collection_name is None:
            collection_name = self.collection_name
        return self.delete_collection(collection_name)

    def search(self, item: Any, num_results: int = 10):
        """
        Simple search with Vector AI
        """
        warnings.warn(
            "If you are looking for more advanced functionality, we recommend using the official Vector AI Github package"
        )
        if self.encoder_type == 'encoder':
            return self.client.search(self.collection_name,
                                      self.encode(item),
                                      field='item_' + self.__name__ +
                                      '_vector_',
                                      page_size=num_results)
        elif self.encoder_type == 'qa':
            return self.client.search(self.collection_name,
                                      self.encode_question(item),
                                      field='item_vector_',
                                      page_size=num_results)

    def retrieve_documents(self, num_of_documents: int):
        """
        Get all the documents in our package.
        """
        return self.client.retrieve_documents(
            self.collection_name, page_size=num_of_documents)['documents']

    def retrieve_all_documents(self):
        """
        Retrieve all documents.
        """
        return self.retrieve_all_documents(self.collection_name)
Beispiel #2
0
        for i, doc in enumerate(docs):
            if 'short_description' not in docs[i].keys():
                short_description = summarise(doc['description'])
                docs[i]['short_description'] = short_description
                # LOGGER.debug(short_description)

    vi_client = ViClient(os.environ['VH_USERNAME'], os.environ['VH_API_KEY'])
    ids = vi_client.get_field_across_documents('_id', docs)
    if args.reset_collection:
        if args.collection_name in vi_client.list_collections():
            vi_client.delete_collection(args.collection_name)
            time.sleep(5)
    text_encoder = ViText2Vec(os.environ['VH_USERNAME'], os.environ['VH_API_KEY'])

    response = vi_client.insert_documents(args.collection_name, docs, models={'description': text_encoder})

    LOGGER.debug(response)
    print(response)
    if response['failed'] != 0:
        raise ValueError("Failed IDs")
    
    if args.evaluate_results:
        LOGGER.debug("Checking Documents:")
        LOGGER.debug(vi_client.head(args.collection_name))
        LOGGER.debug(vi_client.head(args.collection_name)['vector_length'])
        LOGGER.debug(vi_client.collection_schema(args.collection_name))
        import pandas as pd
        pd.set_option('display.max_colwidth', None)
        LOGGER.debug(vi_client.show_json(vi_client.random_documents(args.collection_name), selected_fields=['markdown_without_example']))
Beispiel #3
0
"""
    Script to create model cards.
"""

if __name__ == "__main__":
    from vectorai import ViClient
    from vectorai.models.deployed.text import ViText2Vec
    from vectorhub.auto_encoder import *
    import os

    vi_client = ViClient(os.environ['VH_USERNAME'], os.environ['VH_API_KEY'])
    text_encoder = ViText2Vec(os.environ['VH_USERNAME'],
                              os.environ['VH_API_KEY'])
    docs = get_model_definitions(None)
    vi_client.insert_documents(os.environ['VH_COLLECTION_NAME'],
                               docs,
                               models={'description': text_encoder})