def _rank_documents(self, doc_list, threshold, tolerance): n = len(doc_list) #Initialises the adjacency matrix adjacency_matrix = numpy.zeros([n, n]) degree = numpy.zeros([n]) scores = numpy.zeros([n]) for i, documenti in enumerate(doc_list): for j, documentj in enumerate(doc_list): adjacency_matrix[i][j] = cosine(documenti.fv, documentj.fv, distance=False) if adjacency_matrix[i][j] > threshold: adjacency_matrix[i][j] = 1.0 degree[i] += 1 else: adjacency_matrix[i][j] = 0 for i in xrange(n): for j in xrange(n): if degree[i] == 0: degree[i] = 1.0 #at least similat to itself adjacency_matrix[i][j] = adjacency_matrix[i][j] / degree[i] scores = self.power_method(adjacency_matrix, tolerance) for i in xrange(0, n): doc_list[i].dist = scores[i] return doc_list
def _rank_documents(self, doc_list, threshold, tolerance): n = len(doc_list) # Initialises the adjacency matrix adjacency_matrix = numpy.zeros([n, n]) degree = numpy.zeros([n]) scores = numpy.zeros([n]) for i, documenti in enumerate(doc_list): for j, documentj in enumerate(doc_list): adjacency_matrix[i][j] = cosine(documenti.fv, documentj.fv, distance=False) if adjacency_matrix[i][j] > threshold: adjacency_matrix[i][j] = 1.0 degree[i] += 1 else: adjacency_matrix[i][j] = 0 for i in xrange(n): for j in xrange(n): if degree[i] == 0: degree[i] = 1.0 # at least similat to itself adjacency_matrix[i][j] = adjacency_matrix[i][j] / degree[i] scores = self.power_method(adjacency_matrix, tolerance) for i in xrange(0, n): doc_list[i].dist = scores[i] return doc_list
def _calculate_document_scores(self): ''' Calculates the cosine similarity between a document and the centroid. It attaches the calculated distance on the document. ''' for id, document in self.documents.iteritems(): dist = cosine(self.centroid, document.fv) self.documents[id].dist = dist
def _calculate_document_scores(self): """ Calculates the cosine similarity between a document and the centroid. It attaches the calculated distance on the document. """ for id, document in self.documents.iteritems(): dist = cosine(self.centroid, document.fv) self.documents[id].dist = dist