예제 #1
0
    def _rank_documents(self, doc_list, threshold, tolerance):
        n = len(doc_list)
        #Initialises the adjacency matrix
        adjacency_matrix = numpy.zeros([n, n])

        degree = numpy.zeros([n])
        scores = numpy.zeros([n])

        for i, documenti in enumerate(doc_list):
            for j, documentj in enumerate(doc_list):
                adjacency_matrix[i][j] = cosine(documenti.fv,
                                                documentj.fv,
                                                distance=False)

                if adjacency_matrix[i][j] > threshold:
                    adjacency_matrix[i][j] = 1.0
                    degree[i] += 1
                else:
                    adjacency_matrix[i][j] = 0

        for i in xrange(n):
            for j in xrange(n):
                if degree[i] == 0: degree[i] = 1.0  #at least similat to itself
                adjacency_matrix[i][j] = adjacency_matrix[i][j] / degree[i]

        scores = self.power_method(adjacency_matrix, tolerance)

        for i in xrange(0, n):
            doc_list[i].dist = scores[i]
        return doc_list
예제 #2
0
    def _rank_documents(self, doc_list, threshold, tolerance):
        n = len(doc_list)
        # Initialises the adjacency matrix
        adjacency_matrix = numpy.zeros([n, n])

        degree = numpy.zeros([n])
        scores = numpy.zeros([n])

        for i, documenti in enumerate(doc_list):
            for j, documentj in enumerate(doc_list):
                adjacency_matrix[i][j] = cosine(documenti.fv, documentj.fv, distance=False)

                if adjacency_matrix[i][j] > threshold:
                    adjacency_matrix[i][j] = 1.0
                    degree[i] += 1
                else:
                    adjacency_matrix[i][j] = 0

        for i in xrange(n):
            for j in xrange(n):
                if degree[i] == 0:
                    degree[i] = 1.0  # at least similat to itself
                adjacency_matrix[i][j] = adjacency_matrix[i][j] / degree[i]

        scores = self.power_method(adjacency_matrix, tolerance)

        for i in xrange(0, n):
            doc_list[i].dist = scores[i]
        return doc_list
예제 #3
0
 def _calculate_document_scores(self):
     '''
     Calculates the cosine similarity between a document and the centroid.
     It attaches the calculated distance on the document.
     '''
     for id, document in self.documents.iteritems():
         dist = cosine(self.centroid, document.fv)
         self.documents[id].dist = dist
예제 #4
0
 def _calculate_document_scores(self):
     """
     Calculates the cosine similarity between a document and the centroid.
     It attaches the calculated distance on the document.
     """
     for id, document in self.documents.iteritems():
         dist = cosine(self.centroid, document.fv)
         self.documents[id].dist = dist