Python LSH 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: lsh.lsh

클래스/타입: LSH

hotexamples.com에서의 예제들: 6

Python LSH - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 lsh.lsh.LSH에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

LSH(2)

hash(2)

isSimilar(2)

train(2)

예제 #1

파일 보기

파일: hasher.py 프로젝트: svebk/dig-lsh-clustering

class Hasher:
    def __init__(self, **options):
        self.signer = MinHashSignature(options.get("numHashes",100))
        self.hasher = LSH(options.get("numHashes",100),options.get("numItemsInBand",10), None)
        self.save_min_hash = options.get("saveMinhashes",False)
        print 'in hashing'
        print options.get("numHashes",100),options.get("numsItemsInBand",10),options.get("saveMinhashes",False)


    def perform(self,rdd):
        rdd = rdd.map(lambda (x,y) : (x,json.dumps(y)))
        return self.compute_hashes(rdd)

    def compute_hashes(self, data):
        return data.flatMap(lambda (x, y): self.compute_row_lsh(x, y))

    def compute_row_lsh(self, key, row):
        if len(row) > 0:
            #print "Sign:", row
            min_hash_sig = self.signer.sign(row)
            #print min_hash_sig
            if min_hash_sig is not None:
                lsh_sig = list(self.hasher.hash(min_hash_sig))
                if self.save_min_hash is False:
                    min_hash_sig = None
                for lsh_val in lsh_sig:
                    yield lsh_val, (key, min_hash_sig)

예제 #2

파일 보기

파일: hasher.py 프로젝트: sylvanmist/dig-lsh-clustering

class Hasher:
    def __init__(self, num_hashes, num_items_in_band, save_min_hash):
        self.signer = MinHashSignature(num_hashes)
        self.hasher = LSH(num_hashes, num_items_in_band, None)
        self.save_min_hash = save_min_hash
        pass

    def compute_hashes(self, data):
        return data.flatMap(lambda (x, y): self.compute_row_lsh(x, y))

    def compute_row_lsh(self, key, row):
        if len(row) > 0:
            #print "Sign:", row
            min_hash_sig = self.signer.sign(row)
            if min_hash_sig is not None:
                lsh_sig = list(self.hasher.hash(min_hash_sig))
                if self.save_min_hash is False:
                    min_hash_sig = None
                for lsh_val in lsh_sig:
                    yield lsh_val, (key, min_hash_sig)

예제 #3

파일 보기

파일: hasher.py 프로젝트: sylvanmist/dig-lsh-clustering

 def __init__(self, num_hashes, num_items_in_band, save_min_hash):
     self.signer = MinHashSignature(num_hashes)
     self.hasher = LSH(num_hashes, num_items_in_band, None)
     self.save_min_hash = save_min_hash
     pass

예제 #4

파일 보기

파일: hasher.py 프로젝트: svebk/dig-lsh-clustering

 def __init__(self, **options):
     self.signer = MinHashSignature(options.get("numHashes",100))
     self.hasher = LSH(options.get("numHashes",100),options.get("numItemsInBand",10), None)
     self.save_min_hash = options.get("saveMinhashes",False)
     print 'in hashing'
     print options.get("numHashes",100),options.get("numsItemsInBand",10),options.get("saveMinhashes",False)

예제 #5

파일 보기

]

images = []
for imgNames in imageNames:
    images.append(getImageData(imgNames))

alphaIgnoredImages = []
for img in images:
    imgWithoutAlpha = img[:,:,0:3]
    # print("imgWithoutAlpha", imgWithoutAlpha.shape)
    alphaIgnoredImages.append(imgWithoutAlpha)

# print("images", images, images[0].shape,  images[1].shape, images[2].shape)
# print("alphaIgnoredImages", alphaIgnoredImages)

reshapedImages = []
for img in alphaIgnoredImages:
    reshapedImg = img.reshape(1,-1)
    reshapedImages.append(reshapedImg)

print("reshapedImages", reshapedImages, "dimension", reshapedImages[0].shape[1])

lshModel = LSH(noOfHashers=25, noOfHash=10, dimension=reshapedImages[0].shape[1])

for i in range(0, len(reshapedImages)):
    lshModel.train(reshapedImages[i], { "name": imageNames[i] })

print(lshModel.isSimilar(reshapedImages[0], reshapedImages[1]))
print(lshModel.isSimilar(reshapedImages[0], reshapedImages[2]))
print(lshModel.isSimilar(reshapedImages[1], reshapedImages[2]))
print(lshModel.isSimilar(reshapedImages[2], reshapedImages[3]))

예제 #6

파일 보기

파일: compare_text.py 프로젝트: vpurush/approximate-comparison

from sklearn.feature_extraction.text import CountVectorizer
from lsh.lsh import LSH
import numpy as np

texts = [
    'Jack went to the market to buy some fruits',
    'Jane went to the market to buy some fruits today',
    'Robert and his team played hockey today'
]

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(texts).toarray().reshape(len(texts), 1, -1)

lshModel = LSH(noOfHashers=25, noOfHash=3, dimension=X.shape[2])

for i in range(0, X.shape[0]):
    lshModel.train(X[i], {"name": texts[i]})

print(lshModel.isSimilar(X[0], X[1]))
print(lshModel.isSimilar(X[0], X[2]))
print(lshModel.isSimilar(X[1], X[2]))