Ejemplo n.º 1
0
    def too_slow_test_peertopeer(self):
        logging.info("test_peertopeer")

        output_file = self.file_name("peertopeer")

        def id_and_path_function():
            from pysnptools.util.filecache import ip_address_pid
            ip_pid = ip_address_pid()
            #Need to put the 'cache_top' here explicitly.
            return ip_pid, 'peertopeer/{0}'.format(ip_pid)

        storage = PeerToPeer(common_directory='peertopeer/common',
                             id_and_path_function=id_and_path_function)
        test_snps_cache = storage.join('test_snps')
        test_snps_cache.rmtree()
        test_snps = DistributedBed.write(test_snps_cache,
                                         self.bed,
                                         piece_per_chrom_count=2)

        runner = LocalMultiProc(
            taskcount=5)  #Run on 5 additional Python processes

        for clear_cache in (True, False):
            if clear_cache:
                storage.join('cache').rmtree()
            results_df = single_snp_scale(test_snps=test_snps,
                                          pheno=self.phen_fn,
                                          covar=self.cov_fn,
                                          cache=storage.join('cache'),
                                          output_file_name=output_file,
                                          runner=runner)
            self.compare_files(results_df, "old")
Ejemplo n.º 2
0
    def test_one_chrom(self):
        logging.info("test_one_chrom")

        output_file = self.file_name("one_chrom")

        storage = LocalCache("local_cache/one_chrom")
        test_storage = storage.join('test_snps')
        test_storage.rmtree('')
        test_snps3 = self.bed[:, self.bed.pos[:, 0] ==
                              3]  # Test only on chromosome 3
        test_snps3_dist = DistributedBed.write(test_storage,
                                               test_snps3,
                                               piece_per_chrom_count=2)

        for test_snps, ref, clear_cache, name in (
            (test_snps3, "old_one", True, "Run with just chrom3"),
            (test_snps3_dist, "old_one", True,
             "Run with distributed test SNPs"),
            (test_snps3, "old_one", False, "Run with just chrom3 (use cache)"),
            (test_snps3_dist, "old_one", False,
             "Run with distributed test SNPs (use cache)"),
        ):
            logging.info("=========== " + name + " ===========")
            results_df = single_snp_scale(
                test_snps=test_snps,
                pheno=self.phen_fn,
                covar=self.cov_fn,
                K0=self.bed,
                cache=self._cache_dict(storage, clear_cache=clear_cache),
                output_file_name=output_file,
            )
            self.compare_files(results_df, ref)
Ejemplo n.º 3
0
    def test_local_distribute(self):
        logging.info("test_local_distribute")
        force_python_only = False

        output_file = self.file_name("local_distribute")

        storage = LocalCache("local_cache/local_distribute")
        test_storage = storage.join('test_snps')
        test_storage.rmtree('')
        test_snps = DistributedBed.write(test_storage,
                                         self.bed,
                                         piece_per_chrom_count=2)

        results_df = single_snp_scale(test_snps=test_snps,
                                      pheno=self.phen_fn,
                                      covar=self.cov_fn,
                                      G0=self.bed,
                                      cache=self._cache_dict(storage,
                                                             clear_cache=True),
                                      output_file_name=output_file,
                                      force_python_only=force_python_only)

        self.compare_files(results_df, "old")

        results_df = single_snp_scale(test_snps=self.bed,
                                      pheno=self.phen_fn,
                                      covar=self.cov_fn,
                                      G0=self.bed,
                                      cache=self._cache_dict(
                                          storage, clear_cache=False),
                                      output_file_name=output_file)
        self.compare_files(results_df, "old")
Ejemplo n.º 4
0
def snpsA(seed, iid_count, sid_count, use_distributed):
    import numpy as np
    from pysnptools.snpreader import Bed
    from pysnptools.snpreader import DistributedBed
    from pysnptools.snpreader import SnpGen

    chrom_count = 10
    global top_cache
    if use_distributed:
        test_snp_path = (
            cache_top /
            f"snpsA_{seed}_{chrom_count}_{iid_count}_{sid_count}_db")
    else:
        test_snp_path = (
            cache_top /
            f"snpsA_{seed}_{chrom_count}_{iid_count}_{sid_count}.bed")
    count_A1 = False
    if not test_snp_path.exists():
        snpgen = SnpGen(
            seed=seed,
            iid_count=iid_count,
            sid_count=sid_count,
            chrom_count=chrom_count,
            block_size=1000,
        )
        if use_distributed:
            test_snps = DistributedBed.write(str(test_snp_path), snpgen)
        else:
            test_snps = Bed.write(str(test_snp_path),
                                  snpgen.read(dtype="float32"),
                                  count_A1=count_A1)
    else:
        if use_distributed:
            test_snps = DistributedBed(str(test_snp_path))
        else:
            test_snps = Bed(str(test_snp_path), count_A1=count_A1)
    from pysnptools.snpreader import SnpData

    np.random.seed(seed)
    pheno = SnpData(
        iid=test_snps.iid,
        sid=["pheno"],
        val=np.random.randn(test_snps.iid_count, 1) * 3 + 2,
    )
    covar = SnpData(
        iid=test_snps.iid,
        sid=["covar1", "covar2"],
        val=np.random.randn(test_snps.iid_count, 2) * 2 - 3,
    )

    return test_snps, pheno, covar
Ejemplo n.º 5
0
    def test_one_fast(self):
        logging.info("test_one_fast")

        output_file = self.file_name("one_fast")

        storage = LocalCache("local_cache")
        test_storage = storage.join('one_fast')
        test_storage.rmtree()
        test_snps3 = self.bed[:, self.bed.pos[:, 0] ==
                              3]  # Test only on chromosome 3
        test_snps3_dist = DistributedBed.write(test_storage,
                                               test_snps3,
                                               piece_per_chrom_count=2)

        results_df = single_snp_scale(test_snps=test_snps3_dist,
                                      pheno=self.phen_fn,
                                      covar=self.cov_fn,
                                      G0=self.bed,
                                      output_file_name=output_file)
        self.compare_files(results_df, "old_one")
Ejemplo n.º 6
0
    def test1(self):
        logging.info("in TestDistributedBed test1")
        from pysnptools.snpreader import SnpGen, DistributedBed
        snpgen = SnpGen(seed=0, iid_count=100, sid_count=100)

        temp_dir = 'tempdir/distributed_bed_test1'
        if os.path.exists(temp_dir):
            shutil.rmtree(temp_dir)
        distributed_bed = DistributedBed.write(temp_dir,
                                               snpgen,
                                               piece_per_chrom_count=2)
        snpdata = distributed_bed.read()

        ref1 = DistributedBed(
            os.path.dirname(os.path.realpath(__file__)) +
            '/../../tests/datasets/distributed_bed_test1').read()
        assert (snpdata.allclose(ref1, equal_nan=True))

        ref2 = Bed(os.path.dirname(os.path.realpath(__file__)) +
                   '/../../tests/datasets/distributed_bed_test1_X',
                   count_A1=False).read()
        assert (snpdata.allclose(ref2, equal_nan=True))
Ejemplo n.º 7
0
    test_suite = unittest.TestSuite([])
    test_suite.addTests(
        unittest.TestLoader().loadTestsFromTestCase(TestDistributedBed))
    return test_suite


if __name__ == "__main__":
    import doctest
    logging.basicConfig(level=logging.INFO)

    if False:
        from pysnptools.snpreader import DistributedBed, Bed
        import shutil
        directory = 'tempdir/toydataSkip10.distributedbed'
        if os.path.exists(directory):
            shutil.rmtree(directory)
        snpreader = Bed(
            '../examples/toydata.5chrom.bed',
            count_A1=False)[:, ::10]  # Read every 10 snps from Bed format
        DistributedBed.write(
            directory, snpreader,
            piece_per_chrom_count=5)  # Write data in DistributedBed format

    result = doctest.testmod(optionflags=doctest.ELLIPSIS)
    assert result.failed == 0, "failed doc test: " + __file__

    suites = getTestSuite()
    r = unittest.TextTestRunner(failfast=True)
    ret = r.run(suites)
    assert ret.wasSuccessful()