예제 #1
0
    def test_exclude_bad_probes(self):
        logging.info("Testing removing bad probes...")
        probes_meth_data = self.meth_data.copy()

        data_no_bad_probes = methylation_data.MethylationDataLoader(
            datafile=self.DEMO_DATA_NO_BAD_PROBES)

        bad_probes = load(open(self.BAD_PROBES, 'r'))
        module = refactor.Refactor(methylation_data=probes_meth_data,
                                   k=5,
                                   bad_probes_list=bad_probes)

        module._exclude_bad_probes()

        assert array_equal(data_no_bad_probes.data, module.meth_data.data)

        # tests sites list has changed
        remove_count = len(bad_probes)
        orig_sites_before = []
        orig_sites_before.extend(self.meth_data.cpgnames)
        orig_sites_after = []
        orig_sites_after.extend(module.meth_data.cpgnames)
        for i in bad_probes:
            try:
                orig_sites_before.remove(i)
            except:
                remove_count -= 1
        assert orig_sites_after == orig_sites_before
        # test sites size
        assert self.meth_data.sites_size - remove_count == module.meth_data.sites_size

        logging.info("PASS")
예제 #2
0
    def test_low_rank_approx_distances(self):
        """
        tests that number of distances is as the number of sites (distance for every site)
        """
        logging.info("Testing low rank approx distances...")
        dis_meth_data = self.meth_data.copy()

        module = refactor.Refactor(methylation_data=dis_meth_data, k=5)

        distances = module._calc_low_rank_approx_distances(dis_meth_data)
        assert distances.size == dis_meth_data.sites_size, "there must be distances as the number of sites"
        logging.info("PASS")
예제 #3
0
    def test_senario1(self):
        logging.info("Testing clean refactor components...")
        refactor_meth_data = self.meth_data.copy()

        comp = loadtxt(self.RES1)
        ranked = loadtxt(self.RES1_RANKED, dtype=str)

        module = refactor.Refactor(methylation_data=refactor_meth_data,
                                   k=5,
                                   t=500,
                                   stdth=0,
                                   bad_probes_list=self.bad_probes,
                                   use_phenos=None,
                                   use_covars=None)

        module.run()
        assert self._validate_ranked_lists(module, ranked)
        assert module.components.shape == comp.shape
        for i in range(module.components.shape[1]):
            assert tools.correlation(module.components[:, i], comp[:, i])
        logging.info("PASS")
예제 #4
0
    def test_senario3(self):
        logging.info("Testing controls feature selection...")
        refactor_meth_data = self.meth_data.copy()

        comp = loadtxt(self.RES3)
        ranked = loadtxt(self.RES3_RANKED, dtype=str)

        module = refactor.Refactor(methylation_data=refactor_meth_data,
                                   k=5,
                                   t=500,
                                   stdth=0,
                                   bad_probes_list=self.bad_probes,
                                   feature_selection='controls',
                                   use_phenos=['p2'],
                                   use_covars=[])
        module.run()
        assert self._validate_ranked_lists(module, ranked)
        assert module.components.shape == comp.shape

        for i in range(module.components.shape[1]):
            assert tools.correlation(module.components[:, i], comp[:, i])
        logging.info("PASS")
예제 #5
0
 def run(self, args, meth_data, output_perfix = None):
   try:
     if args.pheno is not None and meth_data.phenotype is None:
       common.terminate("There is no phenotype in the data, use --phenofile to add phenotype.")
     if not output_perfix:
       output_perfix = "output"
     bad_probes_list = set()
     [bad_probes_list.update(loadtxt(probes_file, dtype=str)) for probes_file in BAD_PROBES_FILES]
     bad_probes_list = array(list(bad_probes_list))
     self.module  = refactor.Refactor(methylation_data = meth_data, 
                           k = args.k, 
                           t = args.t, 
                           stdth = args.stdth,
                           feature_selection = args.fs.lower().strip(), 
                           num_components = args.numcomp,
                           use_covars = args.covar,
                           use_phenos = args.pheno,
                           bad_probes_list = bad_probes_list,
                           ranked_output_filename = output_perfix + "." + refactor.RANKED_FILENAME, 
                           components_output_filename  = output_perfix + "." + refactor.COMPONENTS_FILENAME)
     self.module.run()
   except Exception :
     logging.exception("in refactor")
     raise