def correlation_map(pcs, field): """Correlation maps for a set of PCs and a spatial-temporal field. Given an array where the columns are PCs (e.g., as output from :py:meth:`eof2.EofSolve.pcs`) and an array containing a spatial-temporal where time is the first dimension, one correlation map per PC is computed. The field must have the same temporal dimension as the PCs. Any number of spatial dimensions (including zero) are allowed in the field and there can be any number of PCs. **Arguments:** *pcs* PCs as the columns of an array. *field* Spatial-temporal field with time as the first dimension. """ # Check PCs and fields for validity, flatten the arrays ready for the # computation and remove the mean along the leading dimension. pcs_cent, field_cent, out_shape = _check_flat_center(pcs, field) # Compute the standard deviation of the PCs and the fields along the time # dimension (the leading dimension). pcs_std = pcs_cent.std(axis=0) field_std = field_cent.std(axis=0) # Set the divisor. div = np.float64(pcs_cent.shape[0]) # Compute the correlation map. cor = ma.dot(field_cent.T, pcs_cent).T / div cor /= ma.outer(pcs_std, field_std) # Return the correlation with the appropriate shape. return cor.reshape(out_shape)
def hessian(X, theta, l=1.0): N, P = X.shape result = zeros((len(theta), len(theta))) reg = diag([0] + ([l]*(P-1))) for i, (p, a_i) in enumerate(zip(log_prob(X, theta), a(X, theta))): result += exp(2*p - a_i) * outer(X[i, :], X[i, :]) result += reg return result / float(N)
def correlation_map(pcs, var_list): """DIFFERENT METHOD FOR CORRELATION MAPS - NO PVALUE Correlation maps for a set of PCs and a spatial-temporal field. Given an array where the columns are PCs and an array containing spatial-temporal data where the first dimension represents time, one correlation map per PC is computed. The field must have the same temporal dimension as the PCs. Any number of spatial dimensions (including zero) are allowed in the field and there can be any number of PCs. **Arguments:** *pcs* PCs as the columns of an array. *var_list* list of Spatial-temporal fields with time as the first dimension. **Returns:** *correlation_maps* An array with the correlation maps reshaped to the data array size. """ ntime, neofs = pcs.shape ntim, nlat, nlon = var_list[0].shape ## Flatten data to [time x space] flat_var = flatten_array(var_list) field = flat_var[0] # remove the mean along the leading dimension. pcs_cent = pcs - pcs.mean(axis=0) field_cent = field - field.mean(axis=0) # Compute the standard deviation of the PCs and the fields along the time # dimension (the leading dimension). pcs_std = pcs_cent.std(axis=0) field_std = field_cent.std(axis=0) # Set the divisor. div = np.float64(pcs_cent.shape[0]) # Compute the correlation map. cor = ma.dot(field_cent.T, pcs_cent).T / div cor = ma.masked_invalid(cor) # divide by std dev of pc * std dev of field cor /= ma.outer(pcs_std, field_std) # Reshape correlation results # Reshape spatial dim back to 2D map cormap = np.reshape(cor, (neofs, nlat, nlon)) return cormap
def test_testTakeTransposeInnerOuter(self): # Test of take, transpose, inner, outer products x = arange(24) y = np.arange(24) x[5:6] = masked x = x.reshape(2, 3, 4) y = y.reshape(2, 3, 4) assert_(eq(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1)))) assert_(eq(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1))) assert_(eq(np.inner(filled(x, 0), filled(y, 0)), inner(x, y))) assert_(eq(np.outer(filled(x, 0), filled(y, 0)), outer(x, y))) y = array(['abc', 1, 'def', 2, 3], object) y[2] = masked t = take(y, [0, 3, 4]) assert_(t[0] == 'abc') assert_(t[1] == 2) assert_(t[2] == 3)
def test_testTakeTransposeInnerOuter(self): # Test of take, transpose, inner, outer products x = arange(24) y = np.arange(24) x[5:6] = masked x = x.reshape(2, 3, 4) y = y.reshape(2, 3, 4) assert_(eq(np.transpose(y, (2, 0, 1)), transpose(x, (2, 0, 1)))) assert_(eq(np.take(y, (2, 0, 1), 1), take(x, (2, 0, 1), 1))) assert_(eq(np.inner(filled(x, 0), filled(y, 0)), inner(x, y))) assert_(eq(np.outer(filled(x, 0), filled(y, 0)), outer(x, y))) y = array(["abc", 1, "def", 2, 3], object) y[2] = masked t = take(y, [0, 3, 4]) assert_(t[0] == "abc") assert_(t[1] == 2) assert_(t[2] == 3)
def initialize_cluster_centers(self, pXY, K): """ Initializes the cluster assignments along each axis, by first selecting k centers, and then map each row to its closet center under cosine similarity. Args: pXY: original data matrix K: numbers of clusters desired in each dimension Return: new_C: a list of list of cluster id that the current index in the current axis is assigned to. """ if not isinstance(pXY, SparseMatrix): raise Exception("Matrix argument to initialize_cluster_centers is not an instance of SparseMatrix.") new_C = [[-1] * Ni for Ni in pXY.N] for axis in xrange(len(K)): # loop over each dimension # choose cluster centers axis_length = pXY.N[axis] center_indices = random.sample(xrange(axis_length), K[axis]) cluster_ids = {} for i in xrange(K[axis]): # assign identifiers to clusters center_index = center_indices[i] cluster_ids[center_index] = i centers = defaultdict(lambda: defaultdict(float)) # all nonzero indices for each center for coords in pXY.nonzero_elements: coord_this_axis = coords[axis] if coord_this_axis in cluster_ids: # is a center reduced_coords = tuple([coords[i] for i in xrange(len(coords)) if i != axis]) # coords without the current axis centers[cluster_ids[coord_this_axis]][reduced_coords] = pXY.nonzero_elements[coords] # (cluster_id, other coords) -> value # assign rows to clusters scores = np.zeros(shape=(pXY.N[axis], K[axis])) # scores: axis_size x cluster_number denoms_P = np.zeros(shape=(pXY.N[axis])) denoms_Q = np.zeros(shape=(K[axis])) for coords in pXY.nonzero_elements: coord_this_axis = coords[axis] if coord_this_axis in center_indices: continue # don't reassign cluster centers, please reduced_coords = tuple([coords[i] for i in xrange(len(coords)) if i != axis]) for cluster_index in cluster_ids: xhat = cluster_ids[cluster_index] # need cluster ID, not the axis index if reduced_coords in centers[xhat]: # overlapping point P_i = pXY.nonzero_elements[coords] Q_i = centers[xhat][reduced_coords] scores[coords[axis]][xhat] += P_i * Q_i # now doing based on cosine similarity denoms_P[coords[axis]] += P_i * P_i # magnitude of this slice of original matrix denoms_Q[xhat] += Q_i * Q_i # magnitude of cluster centers # normalize scores scores = divide(scores, outer(sqrt(denoms_P), sqrt(denoms_Q))) scores[scores == 0] = -1.0 # add random jitter to scores to handle tie-breaking scores += self.jitter_max * random_sample(scores.shape) new_cXYi = list(scores.argmax(1)) # this needs to be argmax because cosine similarity # make sure to assign the cluster centers to themselves for center_index in cluster_ids: new_cXYi[center_index] = cluster_ids[center_index] # ensure numbers of clusters are correct self.ensure_correct_number_clusters(new_cXYi, K[axis]) new_C[axis] = new_cXYi return new_C
def smooth1d_with_holes(y, n, n_near_boundaries=False, gaussian=True): """Making my own smoothing routine to deal with holes Inputs ------ y: 1D array Signal to smooth n: int Length of smoothing window n_near_boundaries: bool or int If True, 1st, 2nd, ..., n/2-th values are all mean(y[:n]). And same for other end If int, then 1st, 2nd, ..., int are all mean(y[:int]) gaussian: bool If True, use gaussian weigthing function, not tophat Overall, it's a simple n-step moving average, but one that deals with edges by doing the following (using n = 5 as example): 1st point: avg(1st) 2nd point: avg(1st, 2nd, 3rd) 3rd point: avg(1st, ..., 5th) 4th point: avg(2nd, ..., 6th) 5th point: avg(3rd, ..., 7th) and similarly at the other end """ half_n_ceil = np.ceil(n/2).astype(int) half_n_floor = np.floor(n/2).astype(int) # Create a matrix of values that incrementally shift to the right by one # index per row # For most rows, this means ith column will contain i-n//2 to i+n//2 values y_matrix = ma.outer(np.ones(n), y) y_matrix.unshare_mask() for i, row in enumerate(y_matrix): y_matrix[i, :] = np.roll(row, i - half_n_floor) # For earlier and later columns, we need to remove some of the values that # are shifted from the other end. Do this by concatenating triangular blocks # to make arrowhead boolean blocks at each end bottom_left = np.tri(half_n_ceil, half_n_ceil, k=-1).astype(bool) top_right = ~bottom_left left = np.row_stack((np.fliplr(top_right), bottom_left)) # if n is odd (n % 2), remove first row of 'left' to leave symmetrical block left = left[1:, :] if (n % 2) else left right = np.fliplr(left) # Change unwanted values to NaN filterwarnings('ignore', '.*setting an item on a masked array*.') y_matrix[:, :half_n_ceil][left] = np.nan y_matrix[:, -half_n_ceil:][right] = np.nan y_matrix = ma.masked_invalid(y_matrix) if gaussian: gaussian_kernel = np.exp(-np.linspace(-1.5, 1.5, n)**2) gaussian_kernel = gaussian_kernel[:, np.newaxis]*np.ones_like(y_matrix) gaussian_kernel = ma.masked_where(y_matrix.mask, gaussian_kernel) smoothed = np.sum( y_matrix*gaussian_kernel, axis=0)/np.sum(gaussian_kernel, axis=0) else: # Take mean to give smoothed result smoothed = np.nanmean(y_matrix, axis=0) # Any values that were masked or nan to start with are converted to nan smoothed[nan_or_masked(y)] = np.nan # Constant value near boundaries n_bound = n if type(n_near_boundaries) is bool else n_near_boundaries if n_near_boundaries: assert_msg = 'n_near_boundaries is not finite' assert np.isfinite(smoothed[n_bound//2]), assert_msg assert np.isfinite(smoothed[-n_bound//2]), assert_msg smoothed[:n_bound//2] = smoothed[n_bound//2] smoothed[-n_bound//2:] = smoothed[-n_bound//2] return smoothed