def test_fast_cov_x_and_y(self): logger.debug("*************happy path x and y") x, y = TestFastCov.build_standard_x_y() combined = numpy.hstack([x, y]) logger.debug("combined: {}".format(combined)) logger.debug("combined.shape: {}".format(combined.shape)) off_diag_ind = combined.shape[1] / 2 raw_ex = numpy.cov(combined, rowvar=False) logger.debug( "raw expected produced from numpy.cov on full combined - raw_ex: {}" .format(raw_ex)) ex = raw_ex[:off_diag_ind, off_diag_ind:] logger.debug("expected ex: {}".format(ex)) r = fast_cov.fast_cov(x, y) logger.debug("r: {}".format(r)) self.assertTrue(numpy.allclose(ex, r)) #happy path x and y, other direction combined = numpy.hstack([x.T, y.T]) off_diag_ind = combined.shape[1] / 2 raw_ex = numpy.cov(combined, rowvar=False) logger.debug( "happy path x and y, other direction, raw expected produced from numpy.cov on full combined - raw_ex: {}" .format(raw_ex)) ex = raw_ex[:off_diag_ind, off_diag_ind:] logger.debug("expected ex: {}".format(ex)) r = fast_cov.fast_cov(x.T, y.T) logger.debug("r: {}".format(r)) self.assertTrue(numpy.allclose(ex, r))
def test_fast_cov_x_and_y_different_shapes(self): logger.debug("*************happy path x and y different shapes") x, _ = TestFastCov.build_standard_x_y() y = numpy.array([[13, 17, 19, 23, 41], [23, 29, 31, 37, 43]]) logger.debug("y.shape: {}".format(y.shape)) logger.debug("y:\n{}".format(y)) combined = numpy.hstack([x, y]) logger.debug("combined: {}".format(combined)) logger.debug("combined.shape: {}".format(combined.shape)) raw_ex = numpy.cov(combined, rowvar=False) logger.debug( "raw expected produced from numpy.cov on full combined - raw_ex: {}" .format(raw_ex)) logger.debug("raw_ex.shape: {}".format(raw_ex.shape)) ex = raw_ex[:x.shape[1], -y.shape[1]:] logger.debug("expected ex: {}".format(ex)) logger.debug("ex.shape: {}".format(ex.shape)) r = fast_cov.fast_cov(x, y) logger.debug("r: {}".format(r)) self.assertTrue(numpy.allclose(ex, r)) #happy path x and y different shapes, using destination dest = numpy.zeros((x.shape[1], y.shape[1])) r = fast_cov.fast_cov(x, y, dest) logger.debug( "happy path x and y different shapes, using destination - r: {}". format(r)) self.assertIs(dest, r) self.assertTrue(numpy.allclose(ex, dest))
def test_fast_cov_check_validations_run(self): #unhappy path check that input validation checks are run with self.assertRaises( fast_cov.CmapPyMathFastCovInvalidInputXY) as context: fast_cov.fast_cov(None, None) logger.debug( "unhappy path check that input validation checks are run - context.exception: {}" .format(context.exception))
def test_fast_cov_just_x(self): logger.debug("*************happy path just x") x, _ = TestFastCov.build_standard_x_y() ex = numpy.cov(x, rowvar=False) logger.debug("expected ex: {}".format(ex)) r = fast_cov.fast_cov(x) logger.debug("r: {}".format(r)) self.assertTrue(numpy.allclose(ex, r)) #happy path just x, uses destination dest = numpy.zeros((x.shape[1], x.shape[1])) r = fast_cov.fast_cov(x, destination=dest) logger.debug("happy path just x, uses destination - r: {}".format(r)) self.assertIs(dest, r) self.assertTrue(numpy.allclose(ex, dest)) #happy path just x, uses destination which is a different type dest = dest.astype(numpy.float16) r = fast_cov.fast_cov(x, destination=dest) logger.debug( "happy path, just x, uses destination which is a different type - r: {}" .format(r)) self.assertIs(dest, r) self.assertTrue(numpy.allclose(ex, dest)) #happy path just x, uses destination that is a numpy.memmap outfile = tempfile.mkstemp() logger.debug( "happy path, just x, uses destination which is a numpy.memmap - outfile: {}" .format(outfile)) dest = numpy.memmap(outfile[1], dtype="float16", mode="w+", shape=ex.shape) dest_array = numpy.asarray(dest) r = fast_cov.fast_cov(x, destination=dest_array) dest.flush() logger.debug(" - r: {}".format(r)) os.close(outfile[0]) os.remove(outfile[1]) #happy path just x, transposed ex = numpy.cov(x, rowvar=True) logger.debug( "happy path just x, transposed, expected ex: {}".format(ex)) r = fast_cov.fast_cov(x.T) logger.debug("r: {}".format(r)) self.assertTrue(numpy.allclose(ex, r))
def fast_corr(x, y=None, destination=None): """calculate the pearson correlation matrix for the columns of x (with dimensions MxN), or optionally, the pearson correlaton matrix between x and y (with dimensions OxP). If destination is provided, put the results there. In the language of statistics the columns are the variables and the rows are the observations. Args: x (numpy array-like) MxN in shape y (optional, numpy array-like) OxP in shape. M (# rows in x) must equal O (# rows in y) destination (numpy array-like) optional location where to store the results as they are calculated (e.g. a numpy memmap of a file) returns (numpy array-like) array of the covariance values for defaults (y=None), shape is NxN if y is provied, shape is NxP """ if y is None: y = x r = fast_cov.fast_cov(x, y, destination=destination) std_x = numpy.std(x, axis=0, ddof=1) std_y = numpy.std(y, axis=0, ddof=1) numpy.divide(r, std_x[:, numpy.newaxis], out=r) numpy.divide(r, std_y[numpy.newaxis, :], out=r) return r
def test_fast_cov_just_x(self): logger.debug("*************happy path just x") x, _ = TestFastCov.build_standard_x_y() ex = numpy.cov(x, rowvar=False) logger.debug("expected ex: {}".format(ex)) r = fast_cov.fast_cov(x) logger.debug("r: {}".format(r)) self.assertTrue(numpy.allclose(ex, r)) #happy path just x, transposed ex = numpy.cov(x, rowvar=True) logger.debug( "happy path just x, transposed, expected ex: {}".format(ex)) r = fast_cov.fast_cov(x.T) logger.debug("r: {}".format(r)) self.assertTrue(numpy.allclose(ex, r))
def fast_corr(x, y=None): """calculate the pearson correlation matrix for the columns of x (MxN), or optionally, the correlaton matrix between x and y (OxP). In the language of statistics the columns are the variables and the rows are the observations. Args: x (numpy array-like) MxN in shape y (optional, numpy array-like) OxP in shape returns (numpy array-like) array of the covariance values for defaults (y=None), shape is NxN if y is provied, shape is NxP """ if y is None: y = x cov_mat = fast_cov.fast_cov(x, y) std_x = numpy.std(x, axis=0, ddof=1) std_y = numpy.std(y, axis=0, ddof=1) std_outer = numpy.outer(std_x, std_y) return cov_mat / std_outer