def image_compute_signatures(img_path):

    # compute chain codes for all fragments
    chaincodes = chaincode.chaincodes(img_path)
    documents = text_embedding.chaincodes_to_documents(chaincodes)
    signatures = jaccard.document_signatures(documents)
    return signatures, chaincodes
Beispiel #2
0
    def test_minhash_chaincodes(self):
        QUERY_INDEX = 3

        # compute chain codes for all fragments
        chaincodes = chaincode.chaincodes(self.img_path)
        # find the chain code for query chain
        q_code = text_embedding.query_chaincode(chaincodes[QUERY_INDEX])

        documents = text_embedding.chaincodes_to_documents(chaincodes)
        q_document = text_embedding.chaincodes_to_documents([q_code])

        signatures = jaccard.document_signatures(documents)
        q_signature = jaccard.document_signatures(q_document)[0]

        lsh_indices = jaccard.create_buckets(signatures)
        similar_documents = jaccard.query(lsh_indices, q_signature, QUERY_INDEX)
        print(f'similar_documents: {similar_documents}')
        self.assertGreater(len(similar_documents), 0)
def image_query_similar_fragments(chaincodes, signatures, query_index,
                                  threshold):
    lsh_indices = jaccard.create_buckets(signatures, threshold)
    # find the chain code for query chain
    q_code = text_embedding.query_chaincode(chaincodes[query_index])
    q_document = text_embedding.chaincodes_to_documents([q_code])
    q_signature = jaccard.document_signatures(q_document)[0]

    similar_documents = jaccard.query(lsh_indices, q_signature, query_index,
                                      threshold)
    return similar_documents
 def test_can_get_similar_documents(self):
     QUERY_INDEX = 1
     signatures = jaccard.document_signatures(self.documents)
     lsh_indices = jaccard.create_buckets(signatures)
     similar_documents = jaccard.query(lsh_indices, signatures[QUERY_INDEX],
                                       QUERY_INDEX)
 def test_can_get_signatures_for_multiple_documents(self):
     signatures = jaccard.document_signatures(self.documents)
     self.assertIsInstance(signatures, list)
     self.assertIsInstance(signatures[0], MinHash)