Example #1
0
    def test_exclude_bad_probes(self):
        logging.info("Testing removing bad probes...")
        probes_meth_data = self.meth_data.copy()

        data_no_bad_probes = methylation_data.MethylationDataLoader(
            datafile=self.DEMO_DATA_NO_BAD_PROBES)

        bad_probes = load(open(self.BAD_PROBES, 'r'))
        module = refactor.Refactor(methylation_data=probes_meth_data,
                                   k=5,
                                   bad_probes_list=bad_probes)

        module._exclude_bad_probes()

        assert array_equal(data_no_bad_probes.data, module.meth_data.data)

        # tests sites list has changed
        remove_count = len(bad_probes)
        orig_sites_before = []
        orig_sites_before.extend(self.meth_data.cpgnames)
        orig_sites_after = []
        orig_sites_after.extend(module.meth_data.cpgnames)
        for i in bad_probes:
            try:
                orig_sites_before.remove(i)
            except:
                remove_count -= 1
        assert orig_sites_after == orig_sites_before
        # test sites size
        assert self.meth_data.sites_size - remove_count == module.meth_data.sites_size

        logging.info("PASS")
Example #2
0
    def _test_fit_model(self):
        logging.info("Testing linear regression: fit_model")
        meth_data = methylation_data.MethylationDataLoader(
            datafile=self.LIN_REG_DATA,
            covarfiles=[self.LIN_REG_COVAR],
            phenofile=[self.LIN_REG_PHENO])
        results = loadtxt(self.LIN_REG_FIT_MODEL)

        # test 1 dim
        coefs, tstats, pvals = LinearRegression.fit_model(
            meth_data.phenotype, meth_data.data[0, :], covars=meth_data.covar)
        coefs_inter = coefs[0]
        coefs_site = coefs[-1]
        coefs_covar1 = coefs[1]
        coefs_covar2 = coefs[2]
        tstats = tstats[-1]
        pvals = pvals[-1]

        assert abs(coefs_inter - results[0]) < 1e-3
        assert abs(coefs_site - results[1]) < 1e-3
        assert abs(coefs_covar1 - results[2]) < 1e-3
        assert abs(coefs_covar2 - results[3]) < 1e-3
        assert abs(tstats - results[4]) < 1e-2
        assert abs(pvals - results[5]) < 1e-3
        # Note - there is no option to test 2 dim
        logging.info("PASS")
 def test_exclude(self):
     logging.info("Testing test_exclude...")
     data_after = methylation_data.MethylationDataLoader(
         datafile=self.FAKE_DATA_EXC)
     data = self.meth_data.copy()
     data.exclude(self.INC_EXC)
     assert array_equal(data_after.data, data.data)
     logging.info("PASS")
Example #4
0
 def __init__(self):
     logging.info("Testing Started on LMMTester")
     self.meth_data = methylation_data.MethylationDataLoader(
         datafile=self.DATA,
         covarfiles=[self.COVAR],
         phenofile=[self.PHENO])
     self.test_pvalues()
     logging.info("Testing Finished on LMMTester")
 def test_remove_outliers(self):
     logging.info("Test remove outliers")
     data = methylation_data.MethylationDataLoader(datafile=self.DATA_FILE)
     samples_after_maxpcstd = loadtxt(self.SAMPLES_AFTER_MAXPCSTD,
                                      dtype=str)
     orig_samples = set([i for i in data.samples_ids])
     data.exclude_maxpcstds([[1, 3], [2, 3]])
     assert array_equal(data.samples_ids, samples_after_maxpcstd)
Example #6
0
 def __init__(self):
     logging.info("Testing Started on LogisticRegressionTester")
     self.meth_data = methylation_data.MethylationDataLoader(
         datafile=self.LIN_REG_DATA,
         covarfiles=[self.LIN_REG_COVAR],
         phenofile=[self.LIN_REG_PHENO])
     self._test_fit_model()
     logging.info("Testing Finished on LogisticRegressionTester")
 def test_remove_lowest_std_sites(self):
     logging.info("Testing stdth...")
     data_copy = self.meth_data.copy()
     data_copy.remove_lowest_std_sites(self.STDTH)
     data_after_std = methylation_data.MethylationDataLoader(
         datafile=self.FAKE_DATA_STDTH)
     assert array_equal(data_copy.data, data_after_std.data)
     test_remove_lowest_std_sites()
     logging.info("PASS")
def test_remove_lowest_std_sites():
    logging.info("Testing stdth again...")
    data_after_std = methylation_data.MethylationDataLoader(
        datafile=DATA_STDTH)
    data_after_std.remove_lowest_std_sites(0.02)
    data = loadtxt(STDTH_RES)
    for i in range(data.shape[0]):
        assert tools.correlation(data[i, :], data_after_std.data[i, :], 1e-12)
    logging.info("PASS")
    def test_add_covariates(self):
        logging.info("Testing add covar...")
        meth_data = self.meth_data.copy()
        data2 = methylation_data.MethylationDataLoader(
            datafile=self.FAKE_DATA, covarfiles=[self.FAKE_COVAR_PART1])
        data2.add_covar_files([self.FAKE_COVAR_PART2])
        assert array_equal(data2.covar, meth_data.covar)

        logging.info("PASS")
Example #10
0
    def run(self, args):
        try:
            self.args = args
            self.module = None
            if args.datafile.name.endswith(methylation_data.GLINT_FILE_SUFFIX):
                logging.info("Loading glint file %s..." % args.datafile.name)
                a = time()
                dataf = args.datafile.read()
                JSON_decoder = JSONDecoder(
                    object_hook=methylation_data.json_numpy_obj_hook)
                result = JSON_decoder.decode(dataf)
                self.module = methylation_data.MethylationData(
                    result['data'], result['samples_ids'], result['cpgnames'],
                    result['phenotype'], result['covar'], result['covarnames'],
                    result['phenonames'], result['title_indexes'])

                logging.debug("Loading binary data took  %s seconds" %
                              (time() - a))
                logging.debug(
                    "Got methylation data with %s sites and %s samples id" %
                    (self.module.sites_size, self.module.samples_size))
                # if phenotype or covariates supplied with metylation data, replace module covar and pheno file with new ones
                if args.phenofile is not None:
                    self.module.add_pheno_files(args.phenofile)
                if args.covarfile is not None:
                    self.module.add_covar_files(args.covarfile)
            else:
                self.module = methylation_data.MethylationDataLoader(
                    datafile=args.datafile,
                    phenofile=args.phenofile,
                    covarfiles=args.covarfile)

            # load remove/keep sites/samples files and remove/keep values
            self.include_list = []
            self.exclude_list = []
            self.remove_list = []
            self.keep_list = []

            if args.include is not None:
                self.include_list = self._load_and_validate_ids_in_file(
                    args.include, self.module.cpgnames)
            if args.exclude is not None:
                self.exclude_list = self._load_and_validate_ids_in_file(
                    args.exclude, self.module.cpgnames)
            if args.keep is not None:
                self.keep_list = self._load_and_validate_ids_in_file(
                    args.keep, self.module.samples_ids)
            if args.remove is not None:
                self.remove_list = self._load_and_validate_ids_in_file(
                    args.remove, self.module.samples_ids)

        except Exception:
            logging.exception("in methylation data")
            raise
Example #11
0
 def test_remove(self):
     logging.info("Testing remove...")
     data_after = methylation_data.MethylationDataLoader(
         datafile=self.FAKE_DATA_REMOVE,
         covarfiles=[self.FAKE_COVAR_REMOVE],
         phenofile=[self.FAKE_PHENO_REMOVE])
     data = self.meth_data.copy()
     data.remove(self.KEEP_REMOVE_INDICES)
     assert array_equal(data_after.data, data.data)
     assert array_equal(data_after.phenotype, data.phenotype)
     assert array_equal(data_after.covar, data.covar)
     logging.info("PASS")
Example #12
0
    def __init__(self):
        logging.info("Testing Started on RefactorTester")
        self.meth_data = methylation_data.MethylationData
        self.meth_data = methylation_data.MethylationDataLoader(
            datafile=self.DEMO_SMALL_DATA,
            covarfiles=[self.DEMO_COVAR],
            phenofile=[self.DEMO_PHENO])
        self.test_remove_covariates()
        self.test_low_rank_approx_distances()
        self.test_exclude_bad_probes()

        logging.info("Testing Finished on RefactorTester")
Example #13
0
    def _test_fit_model(self): # todo not working
        logging.info("Testing Wilcoxon")
        meth_data = methylation_data.MethylationDataLoader(datafile = self.DATA, covarfiles = [self.COVAR], phenofile = [self.PHENO])


        y = meth_data.phenotype # a binary vector (phenotype)
        x = meth_data.data[0,:]# site under test - with 0 just the first site

        zstats, pval = tools.wilcoxon_test(y, x)

        assert abs(zstats - self.Z_STATS_RES) < 1e-2
        assert abs(pval - self.P_VAL_RES) < 1e-3
        logging.info("PASS")
Example #14
0
    def __init__(self):
        logging.info("Testing Started on PCATester")
        pca_res_p = loadtxt(self.PCA_P_RES)

        meth_data = methylation_data.MethylationDataLoader(
            datafile=self.DATA_FILE)
        pca_out = pca.PCA(meth_data.data.transpose())

        for i in range(10):
            assert tools.correlation(pca_out.P[:, i], pca_res_p[:, i])

        logging.info("PASS")
        logging.info("Testing Finished on PCATester")
Example #15
0
    def test_upload_new_files(self):
        logging.info("Testing upload new covaritates and phenotype files...")
        data = self.meth_data.copy()
        data_upload = methylation_data.MethylationDataLoader(
            datafile=self.FAKE_DATA_REMOVE)

        data.remove(self.KEEP_REMOVE_INDICES)

        data_upload.upload_new_covaritates_files([self.FAKE_COVAR_REMOVE])
        data_upload.upload_new_phenotype_file([self.FAKE_PHENO_REMOVE])

        assert array_equal(data.data, data_upload.data)
        assert array_equal(data.phenotype, data_upload.phenotype)
        assert array_equal(data.covar, data_upload.covar)
        logging.info("PASS")
Example #16
0
 def __init__(self):
     logging.info("Testing Started on SenariosTester")
     bad_probes = set()
     [
         bad_probes.update(loadtxt(probes_file, dtype=str))
         for probes_file in refactor_parser.BAD_PROBES_FILES
     ]
     self.bad_probes = list(bad_probes)
     self.meth_data = methylation_data.MethylationDataLoader(
         datafile=self.DATA,
         covarfiles=[self.COVAR],
         phenofile=[self.PHENO])
     self.test_senario1()
     self.test_senario2()
     self.test_senario3()
     self.test_senario4()
     logging.info("Testing Finished on SenariosTester")
Example #17
0
 def __init__(self):
     logging.info("Testing Started on DataTester")
     self.meth_data = methylation_data.MethylationDataLoader(
         datafile=self.FAKE_DATA,
         covarfiles=[self.FAKE_COVAR],
         phenofile=[self.FAKE_PHENO])
     self.test_remove_lowest_std_sites()
     self.test_get_mean_per_site()
     self.test_include()
     self.test_exclude()
     self.test_keep()
     self.test_remove()
     self.test_exclude_sites_with_low_mean()
     self.test_exclude_sites_with_high_mean()
     self.test_upload_new_files()
     self.test_load_and_validate_covar()
     self.test_load_and_validate_phenotype()
     self.test_add_covariates()
     self.test_fail_exclude()
     self.test_fail_remove()
     self.test_remove_outliers()
     logging.info("Testing Finished on DataTester")
Example #18
0
 def __init__(self):
     logging.info("Testing Started on HousemanTester")
     self.meth_data = methylation_data.MethylationDataLoader(
         datafile=self.DATA)
     self.test_components()
     logging.info("Testing Finished on HousemanTester")
Example #19
0
 def __init__(self):
     logging.info("Testing Started ToolsTester")
     self.meth_data = methylation_data.MethylationDataLoader(datafile = self.DATA_FILE)
     self.test_low_rank_approx()
     self.test_euclidean_distance()
     logging.info("Testing Finished ToolsTester")
Example #20
0
 def __init__(self):
     logging.info("Testing Started on WilcoxonTester")
     self.meth_data = methylation_data.MethylationDataLoader(datafile = self.DATA, covarfiles = [self.COVAR], phenofile = [self.PHENO])
     self._test_fit_model()
     logging.info("Testing Finished on WilcoxonTester")