def test_replacement(self):
        logging.info("Start test test_replacement")
        # case 1
        out = replace_missing_values.replace_missing(self.TEST_STR_IND, "NA",
                                                     0.42, 0.42, " ")
        data = loadtxt(self.TEST_042)

        assert data.shape == out.shape
        for i in range(data.shape[0]):
            assert tools.correlation(
                data[i, :], out[i, :], 1e-2
            )  #-2 because we fist generated data with float64 then we changed it to use float32

        # case 2
        out = replace_missing_values.replace_missing(self.TEST_FLOAT_IND,
                                                     "9.0", 0.5, 0.5, " ")
        data = loadtxt(self.TEST_05)

        assert data.shape == out.shape
        for i in range(data.shape[0]):
            assert tools.correlation(data[i, :], out[i, :], 1e-12)

        #case 3
        data = loadtxt(self.TEST_042_05)
        out = replace_missing_values.replace_missing(self.TEST_FLOAT_IND, 9,
                                                     0.42, 0.5, " ")

        assert data.shape == out.shape
        for i in range(data.shape[0]):
            assert tools.correlation(data[i, :], out[i, :], 1e-12)

        logging.info("PASS")
Exemple #2
0
    def _test_regress_out(self):
        """
        check linear regression (y-x)
        """
        logging.info("Testing linear regression: regress_out")
        y = loadtxt(self.LIN_REG_Y)
        x = loadtxt(self.LIN_REG_X)
        orig_residuals = loadtxt(self.LIN_REG_RESIDUALS)

        if y.ndim != 1:
            raise ("TEST WASNT IMPLEMENTED")
            return

        # test 1 dim
        residuals = LinearRegression.regress_out(y, x)
        y_2dim = y.reshape(-1, 1)
        residuals2 = LinearRegression.regress_out(y_2dim, x)
        assert tools.correlation(orig_residuals, residuals)
        assert tools.correlation(residuals2, residuals)
        assert len(residuals) == len(x)

        # test 2 dim
        y2 = column_stack((y, y))
        residuals = LinearRegression.regress_out(y2, x)
        for i in range(len(y2[0])):
            assert tools.correlation(orig_residuals, residuals[:, i])
            assert len(residuals[:, i]) == len(x)

        logging.info("PASS")
Exemple #3
0
    def test_impute(self):
        logging.info("Testing test_impute...")
        imputed = loadtxt(self.MANUALLY_IMPUTED_FILE)
        self.module.impute(self.MIN_SCORE, self.SNPS_FILE, self.GENO_FILE,
                           self.IND_FILE, self.MIN_MISSING_VALUES)

        assert (imputed.shape[0] == self.module.site_imputation.shape[0])
        assert (imputed.shape[1] == self.module.site_imputation.shape[1])
        for i in range(imputed.shape[1]):
            assert (tools.correlation(imputed[:, i],
                                      self.module.site_imputation[:, i]))

        for i in range(imputed.shape[0]):
            assert (tools.correlation(imputed[i, :],
                                      self.module.site_imputation[i, :]))
        logging.info("PASS")
Exemple #4
0
    def test_euclidean_distance(self):
        logging.info("Testing euclidean distance...")
        euc_dist = loadtxt(self.EUC_DIST)
        out = tools.euclidean_distance(self.meth_data.data[:500,:].transpose(), self.meth_data.data[500:,:].transpose())

        assert tests_tools.correlation(out, euc_dist)
        logging.info("PASS")
    def test_exclude_sites_with_low_mean(self):
        logging.info("Testing excluded mean below 0.5...")
        data = self.meth_data.copy()
        data.exclude_sites_with_low_mean(self.MIN_MEAN_TH)
        res = loadtxt(self.FAKE_DATA_MIN_MEANS)

        for i in range(res.shape[0]):
            assert tools.correlation(res[i, :], data.data[i, :], 1e-14)
        logging.info("PASS")
def test_remove_lowest_std_sites():
    logging.info("Testing stdth again...")
    data_after_std = methylation_data.MethylationDataLoader(
        datafile=DATA_STDTH)
    data_after_std.remove_lowest_std_sites(0.02)
    data = loadtxt(STDTH_RES)
    for i in range(data.shape[0]):
        assert tools.correlation(data[i, :], data_after_std.data[i, :], 1e-12)
    logging.info("PASS")
Exemple #7
0
    def test_low_rank_approx(self):
        logging.info("Testing low rank approximation")
        low_rank_approx = loadtxt(self.LOW_RANK_APPROX)
        lra_met_data = self.meth_data.copy()

        res = tools.low_rank_approximation(lra_met_data.data.transpose(), self.K)
        res = res.transpose()

        for i in range(lra_met_data.sites_size):
            assert tests_tools.correlation(res[i,:], low_rank_approx[i,:])
        
        logging.info("PASS")
Exemple #8
0
    def __init__(self):
        logging.info("Testing Started on PCATester")
        pca_res_p = loadtxt(self.PCA_P_RES)

        meth_data = methylation_data.MethylationDataLoader(
            datafile=self.DATA_FILE)
        pca_out = pca.PCA(meth_data.data.transpose())

        for i in range(10):
            assert tools.correlation(pca_out.P[:, i], pca_res_p[:, i])

        logging.info("PASS")
        logging.info("Testing Finished on PCATester")
Exemple #9
0
    def test_components(self):
        logging.info("Testing houseman components...")

        comp = loadtxt(self.OUT_COMP)

        module = houseman.Houseman(
            self.meth_data,
            open(houseman_parser.HOUSEMAN_DEFAULT_REFERENCE, 'r'))

        module.components
        assert module.components.shape == comp.shape
        for i in range(module.components.shape[1]):
            assert tools.correlation(module.components[:, i], comp[:, i], 1e-2)
        logging.info("PASS")
Exemple #10
0
    def test_remove_covariates(self):
        logging.info("Testing removing covariates...")
        covar_meth_data = self.meth_data.copy()
        covar_meth_data.regress_out(
            self.meth_data.covar)  # regress out all covariates

        coavr, covarnames = covar_meth_data._load_and_validate_covar(
            [self.DEMO_COVAR], covar_meth_data.samples_size,
            covar_meth_data.samples_ids)

        # remove "manually"
        for i, site in enumerate(self.meth_data.data):
            residuals = LinearRegression.regress_out(site, coavr)
            assert len(residuals) == len(site)
            assert tools.correlation(residuals, covar_meth_data.data[i])

        logging.info("PASS")
Exemple #11
0
    def test_senario1(self):
        logging.info("Testing clean refactor components...")
        refactor_meth_data = self.meth_data.copy()

        comp = loadtxt(self.RES1)
        ranked = loadtxt(self.RES1_RANKED, dtype=str)

        module = refactor.Refactor(methylation_data=refactor_meth_data,
                                   k=5,
                                   t=500,
                                   stdth=0,
                                   bad_probes_list=self.bad_probes,
                                   use_phenos=None,
                                   use_covars=None)

        module.run()
        assert self._validate_ranked_lists(module, ranked)
        assert module.components.shape == comp.shape
        for i in range(module.components.shape[1]):
            assert tools.correlation(module.components[:, i], comp[:, i])
        logging.info("PASS")
Exemple #12
0
    def test_senario3(self):
        logging.info("Testing controls feature selection...")
        refactor_meth_data = self.meth_data.copy()

        comp = loadtxt(self.RES3)
        ranked = loadtxt(self.RES3_RANKED, dtype=str)

        module = refactor.Refactor(methylation_data=refactor_meth_data,
                                   k=5,
                                   t=500,
                                   stdth=0,
                                   bad_probes_list=self.bad_probes,
                                   feature_selection='controls',
                                   use_phenos=['p2'],
                                   use_covars=[])
        module.run()
        assert self._validate_ranked_lists(module, ranked)
        assert module.components.shape == comp.shape

        for i in range(module.components.shape[1]):
            assert tools.correlation(module.components[:, i], comp[:, i])
        logging.info("PASS")
 def test_get_mean_per_site(self):
     logging.info("Testing mean oer site...")
     meth_data_means = self.meth_data.get_mean_per_site()
     means = loadtxt(self.FAKE_DATA_MEANS)
     assert tools.correlation(means, meth_data_means)
     logging.info("PASS")