def getLabels(data, centroids): m,_ = data.shape labels=np.arange(m) for idxdata, valdata in enumerate(data): centroid=0 MIN_VALUE=sys.float_info.max for idxcent, valcent in enumerate(centroids): if similarity.euclidean(valcent,valdata)<MIN_VALUE: MIN_VALUE=similarity.euclidean(valcent,valdata) centroid=idxcent np.put(labels, idxdata, centroid) return labels
#-*- coding: utf-8 -*- import similarity as sim if __name__=="__main__": #辞書 dictionary = { "A":{"apple":2.5,"mikan":2.5,"banana":5.0,"melon":2.0}, "B":{"apple":2.5,"banana":1.5,"kiui":3.0,"melon":4.0} } #ユークリッド距離 value = sim.euclidean(dictionary,"A","B") print str(value) #ピアソン相関 value = sim.pearson(dictionary,"A","B") print str(value) #コサイン類似度 value = sim.cosine(dictionary,"A","B") print str(value)