Ejemplo n.º 1
0
def correlation_map(pcs, field):
    """Correlation maps for a set of PCs and a spatial-temporal field.

    Given an array where the columns are PCs (e.g., as output from
    :py:meth:`eof2.EofSolve.pcs`) and an array containing a
    spatial-temporal where time is the first dimension, one correlation
    map per PC is computed.

    The field must have the same temporal dimension as the PCs. Any
    number of spatial dimensions (including zero) are allowed in the
    field and there can be any number of PCs.

    **Arguments:**

    *pcs*
        PCs as the columns of an array.

    *field*
        Spatial-temporal field with time as the first dimension.

    """
    # Check PCs and fields for validity, flatten the arrays ready for the
    # computation and remove the mean along the leading dimension.
    pcs_cent, field_cent, out_shape = _check_flat_center(pcs, field)
    # Compute the standard deviation of the PCs and the fields along the time
    # dimension (the leading dimension).
    pcs_std = pcs_cent.std(axis=0)
    field_std = field_cent.std(axis=0)
    # Set the divisor.
    div = np.float64(pcs_cent.shape[0])
    # Compute the correlation map.
    cor = ma.dot(field_cent.T, pcs_cent).T / div
    cor /= ma.outer(pcs_std, field_std)
    # Return the correlation with the appropriate shape.
    return cor.reshape(out_shape)
Ejemplo n.º 2
0
def correlation_map(pcs, field):
    """Correlation maps for a set of PCs and a spatial-temporal field.

    Given an array where the columns are PCs (e.g., as output from
    :py:meth:`eof2.EofSolve.pcs`) and an array containing a
    spatial-temporal where time is the first dimension, one correlation
    map per PC is computed.

    The field must have the same temporal dimension as the PCs. Any
    number of spatial dimensions (including zero) are allowed in the
    field and there can be any number of PCs.

    **Arguments:**

    *pcs*
        PCs as the columns of an array.

    *field*
        Spatial-temporal field with time as the first dimension.

    """
    # Check PCs and fields for validity, flatten the arrays ready for the
    # computation and remove the mean along the leading dimension.
    pcs_cent, field_cent, out_shape = _check_flat_center(pcs, field)
    # Compute the standard deviation of the PCs and the fields along the time
    # dimension (the leading dimension).
    pcs_std = pcs_cent.std(axis=0)
    field_std = field_cent.std(axis=0)
    # Set the divisor.
    div = np.float64(pcs_cent.shape[0])
    # Compute the correlation map.
    cor = ma.dot(field_cent.T, pcs_cent).T / div
    cor /= ma.outer(pcs_std, field_std)
    # Return the correlation with the appropriate shape.
    return cor.reshape(out_shape)
Ejemplo n.º 3
0
def hessian(X, theta, l=1.0):
    N, P = X.shape

    result = zeros((len(theta), len(theta)))
    reg = diag([0] + ([l]*(P-1)))

    for i, (p, a_i) in enumerate(zip(log_prob(X, theta), a(X, theta))):
        result += exp(2*p - a_i) * outer(X[i, :], X[i, :])

    result += reg

    return result / float(N)
Ejemplo n.º 4
0
def correlation_map(pcs, var_list):
    """DIFFERENT METHOD FOR CORRELATION MAPS - NO PVALUE
    Correlation maps for a set of PCs and a spatial-temporal field.
    Given an array where the columns are PCs and an array containing spatial-temporal
    data where the first dimension represents time, one correlation map
    per PC is computed.
    The field must have the same temporal dimension as the PCs. Any
    number of spatial dimensions (including zero) are allowed in the
    field and there can be any number of PCs.
    **Arguments:**
    *pcs*
        PCs as the columns of an array.
    *var_list*
        list of Spatial-temporal fields with time as the first dimension.
    
    **Returns:**
    *correlation_maps*
        An array with the correlation maps reshaped to the data array size.

    """
    ntime, neofs = pcs.shape
    ntim, nlat, nlon = var_list[0].shape

    ## Flatten data to [time x space]
    flat_var = flatten_array(var_list)

    field = flat_var[0]
    # remove the mean along the leading dimension.
    pcs_cent = pcs - pcs.mean(axis=0)
    field_cent = field - field.mean(axis=0)

    # Compute the standard deviation of the PCs and the fields along the time
    # dimension (the leading dimension).
    pcs_std = pcs_cent.std(axis=0)
    field_std = field_cent.std(axis=0)
    # Set the divisor.
    div = np.float64(pcs_cent.shape[0])
    # Compute the correlation map.
    cor = ma.dot(field_cent.T, pcs_cent).T / div
    cor = ma.masked_invalid(cor)
    # divide by std dev of pc * std dev of field
    cor /= ma.outer(pcs_std, field_std)

    # Reshape correlation results
    # Reshape spatial dim back to 2D map
    cormap = np.reshape(cor, (neofs, nlat, nlon))

    return cormap
Ejemplo n.º 5
0
 def test_testTakeTransposeInnerOuter(self):
     # Test of take, transpose, inner, outer products
     x = arange(24)
     y = np.arange(24)
     x[5:6] = masked
     x = x.reshape(2, 3, 4)
     y = y.reshape(2, 3, 4)
     assert_(eq(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1))))
     assert_(eq(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1)))
     assert_(eq(np.inner(filled(x, 0), filled(y, 0)), inner(x, y)))
     assert_(eq(np.outer(filled(x, 0), filled(y, 0)), outer(x, y)))
     y = array(['abc', 1, 'def', 2, 3], object)
     y[2] = masked
     t = take(y, [0, 3, 4])
     assert_(t[0] == 'abc')
     assert_(t[1] == 2)
     assert_(t[2] == 3)
Ejemplo n.º 6
0
 def test_testTakeTransposeInnerOuter(self):
     # Test of take, transpose, inner, outer products
     x = arange(24)
     y = np.arange(24)
     x[5:6] = masked
     x = x.reshape(2, 3, 4)
     y = y.reshape(2, 3, 4)
     assert_(eq(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1))))
     assert_(eq(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1)))
     assert_(eq(np.inner(filled(x, 0), filled(y, 0)), inner(x, y)))
     assert_(eq(np.outer(filled(x, 0), filled(y, 0)), outer(x, y)))
     y = array(["abc", 1, "def", 2, 3], object)
     y[2] = masked
     t = take(y, [0, 3, 4])
     assert_(t[0] == "abc")
     assert_(t[1] == 2)
     assert_(t[2] == 3)
Ejemplo n.º 7
0
    def initialize_cluster_centers(self, pXY, K):
        """ Initializes the cluster assignments along each axis, by first selecting k centers, 
        and then map each row to its closet center under cosine similarity.

        Args:
            pXY: original data matrix
            K: numbers of clusters desired in each dimension

        Return:
            new_C: a list of list of cluster id that the current index in the current axis is assigned to.
        """
        if not isinstance(pXY, SparseMatrix):
            raise Exception("Matrix argument to initialize_cluster_centers is not an instance of SparseMatrix.")
        new_C = [[-1] * Ni for Ni in pXY.N]

        for axis in xrange(len(K)): # loop over each dimension
            # choose cluster centers
            axis_length = pXY.N[axis]
            center_indices = random.sample(xrange(axis_length), K[axis])
            cluster_ids = {}
            for i in xrange(K[axis]):  # assign identifiers to clusters
                center_index = center_indices[i]
                cluster_ids[center_index] = i
            centers = defaultdict(lambda: defaultdict(float))  # all nonzero indices for each center
            for coords in pXY.nonzero_elements:
                coord_this_axis = coords[axis]
                if coord_this_axis in cluster_ids: # is a center
                    reduced_coords = tuple([coords[i] for i in xrange(len(coords)) if i != axis]) # coords without the current axis
                    centers[cluster_ids[coord_this_axis]][reduced_coords] = pXY.nonzero_elements[coords] # (cluster_id, other coords) -> value

            # assign rows to clusters
            scores = np.zeros(shape=(pXY.N[axis], K[axis])) # scores: axis_size x cluster_number
            denoms_P = np.zeros(shape=(pXY.N[axis]))
            denoms_Q = np.zeros(shape=(K[axis]))
            for coords in pXY.nonzero_elements:
                coord_this_axis = coords[axis]
                if coord_this_axis in center_indices:
                    continue  # don't reassign cluster centers, please
                reduced_coords = tuple([coords[i] for i in xrange(len(coords)) if i != axis])
                for cluster_index in cluster_ids:
                    xhat = cluster_ids[cluster_index]  # need cluster ID, not the axis index
                    if reduced_coords in centers[xhat]:  # overlapping point
                        P_i = pXY.nonzero_elements[coords]
                        Q_i = centers[xhat][reduced_coords]
                        scores[coords[axis]][xhat] += P_i * Q_i  # now doing based on cosine similarity
                        denoms_P[coords[axis]] += P_i * P_i  # magnitude of this slice of original matrix
                        denoms_Q[xhat] += Q_i * Q_i  # magnitude of cluster centers

            # normalize scores
            scores = divide(scores, outer(sqrt(denoms_P), sqrt(denoms_Q)))
            scores[scores == 0] = -1.0

            # add random jitter to scores to handle tie-breaking
            scores += self.jitter_max * random_sample(scores.shape)
            new_cXYi = list(scores.argmax(1))  # this needs to be argmax because cosine similarity

            # make sure to assign the cluster centers to themselves
            for center_index in cluster_ids:
                new_cXYi[center_index] = cluster_ids[center_index]

            # ensure numbers of clusters are correct
            self.ensure_correct_number_clusters(new_cXYi, K[axis])
            new_C[axis] = new_cXYi
        return new_C
Ejemplo n.º 8
0
def smooth1d_with_holes(y, n, n_near_boundaries=False, gaussian=True):
    """Making my own smoothing routine to deal with holes

    Inputs
    ------
    y: 1D array
        Signal to smooth
    n: int
        Length of smoothing window
    n_near_boundaries: bool or int
        If True, 1st, 2nd, ..., n/2-th values are all mean(y[:n]). And same for
        other end
        If int, then 1st, 2nd, ..., int are all mean(y[:int])
    gaussian: bool
        If True, use gaussian weigthing function, not tophat


    Overall, it's a simple n-step moving average, but one that deals with edges
    by doing the following (using n = 5 as example):
    1st point: avg(1st)
    2nd point: avg(1st, 2nd, 3rd)
    3rd point: avg(1st, ..., 5th)
    4th point: avg(2nd, ..., 6th)
    5th point: avg(3rd, ..., 7th)
    and similarly at the other end
    """
    half_n_ceil = np.ceil(n/2).astype(int)
    half_n_floor = np.floor(n/2).astype(int)
    # Create a matrix of values that incrementally shift to the right by one
    # index per row
    # For most rows, this means ith column will contain i-n//2 to i+n//2 values
    y_matrix = ma.outer(np.ones(n), y)
    y_matrix.unshare_mask()
    for i, row in enumerate(y_matrix):
        y_matrix[i, :] = np.roll(row, i - half_n_floor)

    # For earlier and later columns, we need to remove some of the values that
    # are shifted from the other end. Do this by concatenating triangular blocks
    # to make arrowhead boolean blocks at each end
    bottom_left = np.tri(half_n_ceil, half_n_ceil, k=-1).astype(bool)
    top_right = ~bottom_left
    left = np.row_stack((np.fliplr(top_right), bottom_left))

    # if n is odd (n % 2), remove first row of 'left' to leave symmetrical block
    left = left[1:, :] if (n % 2) else left

    right = np.fliplr(left)

    # Change unwanted values to NaN
    filterwarnings('ignore', '.*setting an item on a masked array*.')
    y_matrix[:, :half_n_ceil][left] = np.nan
    y_matrix[:, -half_n_ceil:][right] = np.nan

    y_matrix = ma.masked_invalid(y_matrix)

    if gaussian:
        gaussian_kernel = np.exp(-np.linspace(-1.5, 1.5, n)**2)
        gaussian_kernel = gaussian_kernel[:, np.newaxis]*np.ones_like(y_matrix)
        gaussian_kernel = ma.masked_where(y_matrix.mask, gaussian_kernel)
        smoothed = np.sum(
            y_matrix*gaussian_kernel, axis=0)/np.sum(gaussian_kernel, axis=0)
    else:
        # Take mean to give smoothed result
        smoothed = np.nanmean(y_matrix, axis=0)

    # Any values that were masked or nan to start with are converted to nan
    smoothed[nan_or_masked(y)] = np.nan

    # Constant value near boundaries
    n_bound = n if type(n_near_boundaries) is bool else n_near_boundaries
    if n_near_boundaries:
        assert_msg = 'n_near_boundaries is not finite'
        assert np.isfinite(smoothed[n_bound//2]), assert_msg
        assert np.isfinite(smoothed[-n_bound//2]), assert_msg
        smoothed[:n_bound//2] = smoothed[n_bound//2]
        smoothed[-n_bound//2:] = smoothed[-n_bound//2]

    return smoothed