def left_eigenvector(matrix): """ Calculates the left eigenvector for a given matrix. Arguments: matrix ([[float]]): The dampened, "Google" matrix. Returns: left_eigenvector ([float]): A vector of weights for each site. """ try: evals, evecs = linalg.eig(a=matrix, left=True, right=False, overwrite_a=True, check_finite=False) except LinAlgError as e: return None evec_ind = index_of(evals, lambda x: x > .99 and x < 1.01) return evecs[:,evec_ind]
def create_matrix(self, cleaned_small_orgs): """ Convert the cleaned small_orgs to a matrix. Arguments: cleaned_small_orgs ([SmallOrganization]): The list of all organizations in the form of SmallOrganization objects that has passed through the cleanup_data function. Returns: matrix [[float]]: A 2D matrix of floats on which to perform the PageRank algorithm. """ org_count = len(cleaned_small_orgs) if org_count <= 0: return None default_val = 1 / float(org_count) matrix = [] for org in cleaned_small_orgs: # initialize row to zeros row = [0] * org_count total = 0.0 for ref in org.page_rank_info.references: # don't count self for now... if ref.org_domain == org.org_domain: continue # find index of the referenced organization in the list index = index_of(cleaned_small_orgs, lambda x: x.org_domain == ref.org_domain) if index != -1: row[index] = ref.count / float(org.page_rank_info.total) # track total to make sure this all adds to 1.0 total += row[index] # if zero, org didn't referefence other organizations, # set all values evenly to simulate random navigation behavior if total == 0.0: for i in range(0, org_count): row[i] = default_val total += default_val # if our total doesn't add to exactly 1.0, compensate. if total != 1.0: # first, try to distribute equally diff = 1.0 - total refs_count = len(org.page_rank_info.references) if refs_count == 0: # set to org_count if no references refs_count = org_count diff_to_add = diff / refs_count for i in range(0, org_count): if row[i] != 0.0: row[i] += diff_to_add total += diff_to_add # if still not perfect, just alter the first val that's big enough to not be a huge deal if total != 1.0: diff = 1.0 - total first_val = index_of(row, lambda x: x > abs(diff_to_add)) row[first_val] += diff total = 1.0 # add row to matrix matrix.append(row) # sanity check if len(matrix) != org_count: raise Exception("Error: The matrix constructed is not square in preprocessors.py - create_matrix().") return matrix