Example #1
0
def left_eigenvector(matrix):
    """
    Calculates the left eigenvector for a given matrix.

    Arguments: matrix ([[float]]): The dampened, "Google" matrix.

    Returns:
        left_eigenvector ([float]): A vector of weights for each site.
    """
    try:
        evals, evecs = linalg.eig(a=matrix, left=True, right=False, overwrite_a=True, check_finite=False)
    except LinAlgError as e:
        return None
    evec_ind = index_of(evals, lambda x: x > .99 and x < 1.01)
    return evecs[:,evec_ind]
    def create_matrix(self, cleaned_small_orgs):
        """
        Convert the cleaned small_orgs to a matrix.

        Arguments:
            cleaned_small_orgs ([SmallOrganization]): The list of all organizations in the form of SmallOrganization
                objects that has passed through the cleanup_data function.

        Returns:
            matrix [[float]]: A 2D matrix of floats on which to perform the PageRank algorithm.
        """

        org_count = len(cleaned_small_orgs)

        if org_count <= 0:
            return None

        default_val = 1 / float(org_count)

        matrix = []
        for org in cleaned_small_orgs:
            # initialize row to zeros
            row = [0] * org_count

            total = 0.0

            for ref in org.page_rank_info.references:
                # don't count self for now...
                if ref.org_domain == org.org_domain:
                    continue

                # find index of the referenced organization in the list
                index = index_of(cleaned_small_orgs, lambda x: x.org_domain == ref.org_domain)
                if index != -1:
                    row[index] = ref.count / float(org.page_rank_info.total)
                    # track total to make sure this all adds to 1.0
                    total += row[index]

            # if zero, org didn't referefence other organizations,
            # set all values evenly to simulate random navigation behavior
            if total == 0.0:
                for i in range(0, org_count):
                    row[i] = default_val
                    total += default_val

            # if our total doesn't add to exactly 1.0, compensate.
            if total != 1.0:
                # first, try to distribute equally
                diff = 1.0 - total
                refs_count = len(org.page_rank_info.references)
                if refs_count == 0:
                    # set to org_count if no references
                    refs_count = org_count
                diff_to_add = diff / refs_count
                for i in range(0, org_count):
                    if row[i] != 0.0:
                        row[i] += diff_to_add
                        total += diff_to_add

                # if still not perfect, just alter the first val that's big enough to not be a huge deal
                if total != 1.0:
                    diff = 1.0 - total
                    first_val = index_of(row, lambda x: x > abs(diff_to_add))
                    row[first_val] += diff
                    total = 1.0

            # add row to matrix
            matrix.append(row)

        # sanity check
        if len(matrix) != org_count:
            raise Exception("Error: The matrix constructed is not square in preprocessors.py - create_matrix().")

        return matrix