Exemple #1
0
    def test_fast_corr_just_x(self):
        logger.debug("*************happy path just x")
        x, _ = TestFastCorr.build_standard_x_y()

        ex = numpy.corrcoef(x, rowvar=False)
        logger.debug("expected ex:  {}".format(ex))

        r = fast_corr.fast_corr(x)
        logger.debug("r:  {}".format(r))

        self.assertTrue(numpy.allclose(ex, r))

        #happy path just x, destination provided
        dest = numpy.zeros((x.shape[1], x.shape[1]))
        r = fast_corr.fast_corr(x, destination=dest)
        self.assertIs(dest, r)
        self.assertTrue(numpy.allclose(ex, dest))

        #happy path just x, other direction
        ex = numpy.corrcoef(x, rowvar=True)
        logger.debug(
            "happy path just x, other direction, expected ex:  {}".format(ex))
        r = fast_corr.fast_corr(x.T)
        logger.debug("r:  {}".format(r))
        self.assertTrue(numpy.allclose(ex, r))
Exemple #2
0
    def test_fast_corr_functional(self):
        logger.debug("*************happy path functional test using randomly generated matrices")

        for i in range(num_iterations_functional_tests):
            #the dimension containing the observations must have at least size 2
            x_shape = [numpy.random.randint(2, max_dimension_functional_tests),
                numpy.random.randint(1, max_dimension_functional_tests)]
            logger.debug("x_shape:  {}".format(x_shape))

            x = numpy.random.rand(x_shape[0], x_shape[1]) * numpy.random.randint(1, multiplier_max_functional_tests, size=1)
            logger.debug("x:\n{}".format(x))

            y_other_shape = numpy.random.randint(1, max_dimension_functional_tests, size=1)[0]
            y_shape = (x_shape[0], y_other_shape)
            logger.debug("y_shape:  {}".format(y_shape))
            y = numpy.random.rand(y_shape[0], y_shape[1]) * numpy.random.randint(1, multiplier_max_functional_tests, size=1)
            logger.debug("y:\n{}".format(y))

            combined = numpy.hstack([x, y])

            raw_ex = numpy.corrcoef(combined, rowvar=False)
            logger.debug("raw_ex.shape:  {}".format(raw_ex.shape))

            ex = raw_ex[:x.shape[1], -y.shape[1]:]
            logger.debug("ex:\n{}".format(ex))
            logger.debug("ex.shape:  {}".format(ex.shape))

            r = fast_corr.fast_corr(x, y)
            logger.debug("r:\n{}".format(r))
            logger.debug("r.shape:  {}".format(r.shape))

            self.assertTrue(numpy.allclose(ex, r))
Exemple #3
0
    def test_fast_corr_x_and_y(self):
        logger.debug("*************happy path x and y")
        x, y = TestFastCorr.build_standard_x_y()

        combined = numpy.hstack([x, y])
        logger.debug("combined:  {}".format(combined))
        logger.debug("combined.shape:  {}".format(combined.shape))

        off_diag_ind = combined.shape[1] / 2

        raw_ex = numpy.corrcoef(combined, rowvar=False)
        logger.debug(
            "raw expected produced from numpy.cov on full combined - raw_ex:  {}"
            .format(raw_ex))
        ex = raw_ex[:off_diag_ind, off_diag_ind:]
        logger.debug("expected ex:  {}".format(ex))

        r = fast_corr.fast_corr(x, y)
        logger.debug("r:  {}".format(r))
        self.assertTrue(numpy.allclose(ex, r))

        #happy path x, y, and destination
        dest = numpy.zeros((x.shape[1], y.shape[1]))
        r = fast_corr.fast_corr(x, y, dest)
        self.assertIs(dest, r)
        self.assertTrue(numpy.allclose(ex, dest))

        #happy path x and y, other direction
        combined = numpy.hstack([x.T, y.T])
        logger.debug(
            "*************happy path x and y, other direction - combined:  {}".
            format(combined))
        logger.debug("combined.shape:  {}".format(combined.shape))

        off_diag_ind = combined.shape[1] / 2

        raw_ex = numpy.corrcoef(combined, rowvar=False)
        logger.debug(
            "raw expected produced from numpy.cov on full combined - raw_ex:  {}"
            .format(raw_ex))
        ex = raw_ex[:off_diag_ind, off_diag_ind:]
        logger.debug("expected ex:  {}".format(ex))

        r = fast_corr.fast_corr(x.T, y.T)
        logger.debug("r:  {}".format(r))
        self.assertTrue(numpy.allclose(ex, r))
Exemple #4
0
    def test_fast_corr_x_and_y_different_shapes(self):
        logger.debug("*************happy path x and y different shapes")
        x, _ = TestFastCorr.build_standard_x_y()
        y = numpy.array([[13, 17, 19, 41, 23], [23, 29, 31, 37, 43]])
        logger.debug("y.shape:  {}".format(y.shape))
        logger.debug("y:\n{}".format(y))

        combined = numpy.hstack([x, y])
        logger.debug("combined:  {}".format(combined))
        logger.debug("combined.shape:  {}".format(combined.shape))

        raw_ex = numpy.corrcoef(combined, rowvar=False)
        logger.debug("raw expected produced from numpy.cov on full combined - raw_ex:  {}".format(raw_ex))
        logger.debug("raw_ex.shape:  {}".format(raw_ex.shape))

        ex = raw_ex[:x.shape[1], -y.shape[1]:]
        logger.debug("expected ex:  {}".format(ex))
        logger.debug("ex.shape:  {}".format(ex.shape))

        r = fast_corr.fast_corr(x, y)
        logger.debug("r:  {}".format(r))
        self.assertTrue(numpy.allclose(ex, r))
    print(
        f"Now analyzing well {well} with diffusion {diffusion} for file {profile_file_id}"
    )
    subset_diffusion = diffusion_sets[well]

    for plate_map in profile_df.loc[:, platemap_col_id].unique():
        # Define the two matrices to calculate pairwise correlations between
        focus_df = profile_df.query(f"{well_position_col_id} == @well").query(
            f"{platemap_col_id} == @plate_map")
        compare_df = profile_df.query(
            f"{well_position_col_id} in @subset_diffusion").query(
                f"{platemap_col_id} != @plate_map")

        # Get all non-replicate pairwise correlations
        distrib = fast_corr.fast_corr(
            focus_df.loc[:, features].transpose().values,
            compare_df.loc[:, features].transpose().values,
        ).flatten()

        if len(distrib) == 0:
            print(f"well {well} on {plate_map} skipped. Missing data.")
            continue

        med = np.median(distrib)

        result = (pd.DataFrame(describe(distrib)).transpose().assign(
            median=med,
            cor_category="nonreplicate",
            well=well,
            plate_map=plate_map))

        all_results.append(result)