コード例 #1
0
    def calcular_similitud(self, pair_key, lines):
        """
        Se suman los componentes de cada par con todos los usuarios que
         calificaron los items X y Y del par, luego se calcula la similitud
         de Pearson. Esta es normalizada entre [0,1] por el sort numerico
        """

        sum_xx, sum_xy, sum_yy, sum_x, sum_y, n = (0.0, 0.0, 0.0, 0.0, 0.0, 0)
        item_pair, co_ratings = pair_key, lines
        item_xname, item_yname = item_pair
        for item_x, item_y in lines:
            sum_xx += item_x * item_x
            sum_yy += item_y * item_y
            sum_xy += item_x * item_y
            sum_x += item_x
            sum_y += item_y
            n += 1

        corr_sim = correlation(n, sum_xy, sum_x,
                               sum_y, sum_xx, sum_yy)

        reg_corr_sim = regularized_correlation(n, sum_xy, sum_x, sum_y,
                                               sum_xx, sum_yy, PRIOR_COUNT, PRIOR_CORRELATION)

        cos_sim = cosine(sum_xy, sqrt(sum_xx), sqrt(sum_yy))

        jaccard_sim = 0.0

        yield (item_xname, item_yname), (corr_sim, cos_sim,
                                         reg_corr_sim, jaccard_sim, n)
コード例 #2
0
	def Cal_similarity(self,itemids, ratings):


		'''
		Many pairs of movie and ratings
		For example
		(1,2) (5,0,4.5)
		(2,3) (3.1,3,3)
		(3,4) (2.0,3.0)
		(4,2) (1.0,2.3)
		...
		'''
		sum_xx,sum_yy,sum_xy,sum_x,sum_y,sumx_y,n=0.0,0.0,0.0,0.0,0.0,0.0,0
		item_pairs,rate=itemids,ratings
		itemx,itemy=item_pairs

		tempx=[]
		tempy=[]
		for x,y in rate:
			sumx_y+=(x-y)*(x-y)
			sum_xx+=x*x
			sum_yy+=y*y
			sum_xy=x*y
			sum_x+=x
			sum_y+=y
			tempx.append(x)
			tempy.append(y)
			n+=1

		tempx=np.array(tempx)
		tempy=np.array(tempy)
		tempx-=np.mean(tempx)
		tempy-=np.mean(tempy)
		tempx=tempx*tempx
		tempy=tempy*tempy
		sumx2,sumy2=sum(tempx),sum(tempy)
		temp=0.0
		T=tempx*tempy
		for i in range(len(T)):
			temp+=T[i]
		# corr_sim=correaltion(n,sum_xy,sum_x,sum_y,sum_xx,sum_yy)

		# reg_corr_sim=regularized_correlation(n,sum_xy,sum_x,sum_y,sum_xx,sum_yy,PRIOR_COUNT,PRIOR_CORRELATION)

		cos_sim=cosine(sum_xy,sqrt(sum_xx),sqrt(sum_yy))

		pdis=pearsondistance(temp,sqrt(sumx2),sqrt(sumy2))

		# jac=jaccard(count,n,n)

		edis=EuclideanDistance(n,sumx_y)

		if n>20:
			yield (itemx,itemy),(cos_sim,edis,pdis,n)
コード例 #3
0
    def Cal_similarity(self, itemids, ratings):
        '''
		Many pairs of movie and ratings
		For example
		(1,2) (5,0,4.5)
		(2,3) (3.1,3,3)
		(3,4) (2.0,3.0)
		(4,2) (1.0,2.3)
		...
		'''
        sum_xx, sum_yy, sum_xy, sum_x, sum_y, sumx_y, n = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0
        item_pairs, rate = itemids, ratings
        itemx, itemy = item_pairs

        tempx = []
        tempy = []
        for x, y in rate:
            sumx_y += (x - y) * (x - y)
            sum_xx += x * x
            sum_yy += y * y
            sum_xy = x * y
            sum_x += x
            sum_y += y
            tempx.append(x)
            tempy.append(y)
            n += 1

        tempx = np.array(tempx)
        tempy = np.array(tempy)
        tempx -= np.mean(tempx)
        tempy -= np.mean(tempy)
        tempx = tempx * tempx
        tempy = tempy * tempy
        sumx2, sumy2 = sum(tempx), sum(tempy)
        temp = 0.0
        T = tempx * tempy
        for i in range(len(T)):
            temp += T[i]
        # corr_sim=correaltion(n,sum_xy,sum_x,sum_y,sum_xx,sum_yy)

        # reg_corr_sim=regularized_correlation(n,sum_xy,sum_x,sum_y,sum_xx,sum_yy,PRIOR_COUNT,PRIOR_CORRELATION)

        cos_sim = cosine(sum_xy, sqrt(sum_xx), sqrt(sum_yy))

        pdis = pearsondistance(temp, sqrt(sumx2), sqrt(sumy2))

        # jac=jaccard(count,n,n)

        edis = EuclideanDistance(n, sumx_y)

        if n > 20:
            yield (itemx, itemy), (cos_sim, edis, pdis, n)
コード例 #4
0
    def calculate_similarity(self, pair_key, lines):
        '''
        Sum components of each corating pair across all users who rated both
        item x and item y, then calculate pairwise pearson similarity and
        corating counts.  The similarities are normalized to the [0,1] scale
        because we do a numerical sort.

        19,21   0.4,2
        21,19   0.4,2
        19,70   0.6,1
        70,19   0.6,1
        21,70   0.1,1
        70,21   0.1,1
        '''
        sum_xx, sum_xy, sum_yy, sum_x, sum_y, n = (0.0, 0.0, 0.0, 0.0, 0.0, 0)
        item_pair, co_ratings = pair_key, lines
        item_xname, item_yname = item_pair
        for item_x, item_y in lines:
            sum_xx += item_x * item_x
            sum_yy += item_y * item_y
            sum_xy += item_x * item_y
            sum_y += item_y
            sum_x += item_x
            n += 1

        corr_sim = correlation(n, sum_xy, sum_x, \
                 sum_y, sum_xx, sum_yy)

        reg_corr_sim = regularized_correlation(n, sum_xy, sum_x, \
                sum_y, sum_xx, sum_yy, PRIOR_COUNT, PRIOR_CORRELATION)

        cos_sim = cosine(sum_xy, sqrt(sum_xx), sqrt(sum_yy))

        jaccard_sim = 0.0

        yield (item_xname, item_yname), (corr_sim, \
                cos_sim, reg_corr_sim, jaccard_sim, n)
コード例 #5
0
    def calculate_similarity(self, pair_key, lines):
        '''
        Sum components of each corating pair across all users who rated both
        item x and item y, then calculate pairwise pearson similarity and
        corating counts.  The similarities are normalized to the [0,1] scale
        because we do a numerical sort.

        19,21   0.4,2
        21,19   0.4,2
        19,70   0.6,1
        70,19   0.6,1
        21,70   0.1,1
        70,21   0.1,1
        '''
        sum_xx, sum_xy, sum_yy, sum_x, sum_y, n = (0.0, 0.0, 0.0, 0.0, 0.0, 0)
        item_pair, co_ratings = pair_key, lines
        item_xname, item_yname = item_pair
        for item_x, item_y in lines:
            sum_xx += item_x * item_x
            sum_yy += item_y * item_y
            sum_xy += item_x * item_y
            sum_y += item_y
            sum_x += item_x
            n += 1

        corr_sim = correlation(n, sum_xy, sum_x, \
                 sum_y, sum_xx, sum_yy)

        reg_corr_sim = regularized_correlation(n, sum_xy, sum_x, \
                sum_y, sum_xx, sum_yy, PRIOR_COUNT, PRIOR_CORRELATION)

        cos_sim = cosine(sum_xy, sqrt(sum_xx), sqrt(sum_yy))

        jaccard_sim = 0.0

        yield (item_xname, item_yname), (corr_sim, \
                cos_sim, reg_corr_sim, jaccard_sim, n)
コード例 #6
0
    def calculate_similarity(self, pair_key, lines):
        sum_xx, sum_xy, sum_yy, sum_x, sum_y, n = (0.0, 0.0, 0.0, 0.0, 0.0, 0)
        item_pair, co_ratings = pair_key, lines
        item_xname, item_yname = item_pair
        for item_x, item_y in lines:
            sum_xx += item_x * item_x
            sum_yy += item_y * item_y
            sum_xy += item_x * item_y
            sum_y += item_y
            sum_x += item_x
            n += 1

        corr_sim = correlation(n, sum_xy, sum_x, \
                 sum_y, sum_xx, sum_yy)

        reg_corr_sim = regularized_correlation(n, sum_xy, sum_x, \
                sum_y, sum_xx, sum_yy, PRIOR_COUNT, PRIOR_CORRELATION)

        cos_sim = cosine(sum_xy, sqrt(sum_xx), sqrt(sum_yy))

        jaccard_sim = 0.0

        yield (item_xname, item_yname), (corr_sim, \
            cos_sim, reg_corr_sim, jaccard_sim, n)
コード例 #7
0
    def calculate_similarity(self, pair_key, lines):
        sum_xx, sum_xy, sum_yy, sum_x, sum_y, n = (0.0, 0.0, 0.0, 0.0, 0.0, 0)
        n_x, n_y = 0, 0
        item_pair, co_ratings = pair_key, lines
        item_xname, item_yname = item_pair
        for item_x, item_y, nx_count, ny_count in lines:
            sum_xx += item_x * item_x
            sum_yy += item_y * item_y
            sum_xy += item_x * item_y
            sum_y += item_y
            sum_x += item_x
            n += 1
            n_x = int(ny_count)
            n_y = int(nx_count)

        corr_sim = correlation(n, sum_xy, sum_x, sum_y, sum_xx, sum_yy)

        reg_corr_sim = regularized_correlation(n, sum_xy, sum_x, sum_y, sum_xx, sum_yy, PRIOR_COUNT, PRIOR_CORRELATION)

        cos_sim = cosine(sum_xy, sqrt(sum_xx), sqrt(sum_yy))

        jaccard_sim = jaccard(n, n_x, n_y)

        yield (item_xname, item_yname), (corr_sim, cos_sim, reg_corr_sim, jaccard_sim, n)