Esempio n. 1
0
    def test_fast_cov_x_and_y(self):
        logger.debug("*************happy path x and y")
        x, y = TestFastCov.build_standard_x_y()

        combined = numpy.hstack([x, y])
        logger.debug("combined:  {}".format(combined))
        logger.debug("combined.shape:  {}".format(combined.shape))

        off_diag_ind = combined.shape[1] / 2

        raw_ex = numpy.cov(combined, rowvar=False)
        logger.debug(
            "raw expected produced from numpy.cov on full combined - raw_ex:  {}"
            .format(raw_ex))
        ex = raw_ex[:off_diag_ind, off_diag_ind:]
        logger.debug("expected ex:  {}".format(ex))

        r = fast_cov.fast_cov(x, y)
        logger.debug("r:  {}".format(r))
        self.assertTrue(numpy.allclose(ex, r))

        #happy path x and y, other direction
        combined = numpy.hstack([x.T, y.T])
        off_diag_ind = combined.shape[1] / 2

        raw_ex = numpy.cov(combined, rowvar=False)
        logger.debug(
            "happy path x and y, other direction, raw expected produced from numpy.cov on full combined - raw_ex:  {}"
            .format(raw_ex))
        ex = raw_ex[:off_diag_ind, off_diag_ind:]
        logger.debug("expected ex:  {}".format(ex))

        r = fast_cov.fast_cov(x.T, y.T)
        logger.debug("r:  {}".format(r))
        self.assertTrue(numpy.allclose(ex, r))
Esempio n. 2
0
    def test_fast_cov_x_and_y_different_shapes(self):
        logger.debug("*************happy path x and y different shapes")
        x, _ = TestFastCov.build_standard_x_y()
        y = numpy.array([[13, 17, 19, 23, 41], [23, 29, 31, 37, 43]])
        logger.debug("y.shape:  {}".format(y.shape))
        logger.debug("y:\n{}".format(y))

        combined = numpy.hstack([x, y])
        logger.debug("combined:  {}".format(combined))
        logger.debug("combined.shape:  {}".format(combined.shape))

        raw_ex = numpy.cov(combined, rowvar=False)
        logger.debug(
            "raw expected produced from numpy.cov on full combined - raw_ex:  {}"
            .format(raw_ex))
        logger.debug("raw_ex.shape:  {}".format(raw_ex.shape))

        ex = raw_ex[:x.shape[1], -y.shape[1]:]
        logger.debug("expected ex:  {}".format(ex))
        logger.debug("ex.shape:  {}".format(ex.shape))

        r = fast_cov.fast_cov(x, y)
        logger.debug("r:  {}".format(r))
        self.assertTrue(numpy.allclose(ex, r))

        #happy path x and y different shapes, using destination
        dest = numpy.zeros((x.shape[1], y.shape[1]))
        r = fast_cov.fast_cov(x, y, dest)
        logger.debug(
            "happy path x and y different shapes, using destination - r:  {}".
            format(r))
        self.assertIs(dest, r)
        self.assertTrue(numpy.allclose(ex, dest))
Esempio n. 3
0
 def test_fast_cov_check_validations_run(self):
     #unhappy path check that input validation checks are run
     with self.assertRaises(
             fast_cov.CmapPyMathFastCovInvalidInputXY) as context:
         fast_cov.fast_cov(None, None)
     logger.debug(
         "unhappy path check that input validation checks are run - context.exception:  {}"
         .format(context.exception))
Esempio n. 4
0
    def test_fast_cov_just_x(self):
        logger.debug("*************happy path just x")
        x, _ = TestFastCov.build_standard_x_y()

        ex = numpy.cov(x, rowvar=False)
        logger.debug("expected ex:  {}".format(ex))

        r = fast_cov.fast_cov(x)
        logger.debug("r:  {}".format(r))

        self.assertTrue(numpy.allclose(ex, r))

        #happy path just x, uses destination
        dest = numpy.zeros((x.shape[1], x.shape[1]))
        r = fast_cov.fast_cov(x, destination=dest)
        logger.debug("happy path just x, uses destination - r:  {}".format(r))
        self.assertIs(dest, r)
        self.assertTrue(numpy.allclose(ex, dest))

        #happy path just x, uses destination which is a different type
        dest = dest.astype(numpy.float16)
        r = fast_cov.fast_cov(x, destination=dest)
        logger.debug(
            "happy path, just x, uses destination which is a different type - r:  {}"
            .format(r))
        self.assertIs(dest, r)
        self.assertTrue(numpy.allclose(ex, dest))

        #happy path just x, uses destination that is a numpy.memmap
        outfile = tempfile.mkstemp()
        logger.debug(
            "happy path, just x, uses destination which is a numpy.memmap - outfile:  {}"
            .format(outfile))
        dest = numpy.memmap(outfile[1],
                            dtype="float16",
                            mode="w+",
                            shape=ex.shape)
        dest_array = numpy.asarray(dest)
        r = fast_cov.fast_cov(x, destination=dest_array)
        dest.flush()
        logger.debug(" - r:  {}".format(r))
        os.close(outfile[0])
        os.remove(outfile[1])

        #happy path just x, transposed
        ex = numpy.cov(x, rowvar=True)
        logger.debug(
            "happy path just x, transposed, expected ex:  {}".format(ex))
        r = fast_cov.fast_cov(x.T)
        logger.debug("r:  {}".format(r))
        self.assertTrue(numpy.allclose(ex, r))
Esempio n. 5
0
def fast_corr(x, y=None, destination=None):
    """calculate the pearson correlation matrix for the columns of x (with dimensions MxN), or optionally, the pearson correlaton matrix
    between x and y (with dimensions OxP).  If destination is provided, put the results there.  
    In the language of statistics the columns are the variables and the rows are the observations.

    Args:
        x (numpy array-like) MxN in shape
        y (optional, numpy array-like) OxP in shape.  M (# rows in x) must equal O (# rows in y)
        destination (numpy array-like) optional location where to store the results as they are calculated (e.g. a numpy
            memmap of a file)

        returns (numpy array-like) array of the covariance values
            for defaults (y=None), shape is NxN
            if y is provied, shape is NxP
    """
    if y is None:
        y = x

    r = fast_cov.fast_cov(x, y, destination=destination)

    std_x = numpy.std(x, axis=0, ddof=1)
    std_y = numpy.std(y, axis=0, ddof=1)

    numpy.divide(r, std_x[:, numpy.newaxis], out=r)
    numpy.divide(r, std_y[numpy.newaxis, :], out=r)

    return r
Esempio n. 6
0
    def test_fast_cov_just_x(self):
        logger.debug("*************happy path just x")
        x, _ = TestFastCov.build_standard_x_y()

        ex = numpy.cov(x, rowvar=False)
        logger.debug("expected ex:  {}".format(ex))

        r = fast_cov.fast_cov(x)
        logger.debug("r:  {}".format(r))

        self.assertTrue(numpy.allclose(ex, r))

        #happy path just x, transposed
        ex = numpy.cov(x, rowvar=True)
        logger.debug(
            "happy path just x, transposed, expected ex:  {}".format(ex))
        r = fast_cov.fast_cov(x.T)
        logger.debug("r:  {}".format(r))
        self.assertTrue(numpy.allclose(ex, r))
Esempio n. 7
0
def fast_corr(x, y=None):
    """calculate the pearson correlation matrix for the columns of x (MxN), or optionally, the correlaton matrix between x and y (OxP).
    In the language of statistics the columns are the variables and the rows are the observations.

    Args:
        x (numpy array-like) MxN in shape
        y (optional, numpy array-like) OxP in shape

        returns (numpy array-like) array of the covariance values
            for defaults (y=None), shape is NxN
            if y is provied, shape is NxP
    """
    if y is None:
        y = x

    cov_mat = fast_cov.fast_cov(x, y)

    std_x = numpy.std(x, axis=0, ddof=1)
    std_y = numpy.std(y, axis=0, ddof=1)

    std_outer = numpy.outer(std_x, std_y)

    return cov_mat / std_outer