Esempio n. 1
0
 def test_merge_std(self):
     #unit vs beta
     for std in [stdizer.Beta(2, 10), stdizer.Unit()]:
         np.random.seed(0)
         sid_count = 20
         snpreader = SnpData(iid=[["0", "0"], ["1", "1"], ["2", "2"]],
                             sid=[str(i) for i in range(sid_count)],
                             val=np.array(np.random.randint(
                                 3, size=[3, sid_count]),
                                          dtype=np.float64,
                                          order='F'))
         kerneldata0, trained0, diag0 = SnpKernel(
             snpreader, std,
             block_size=1)._read_with_standardizing(to_kerneldata=True,
                                                    return_trained=True)
         kerneldata1, trained1, diag1 = SnpKernel(
             snpreader, std,
             block_size=None)._read_with_standardizing(to_kerneldata=True,
                                                       return_trained=True)
         np.testing.assert_array_almost_equal(kerneldata0.val,
                                              kerneldata1.val,
                                              decimal=10)
         np.testing.assert_array_almost_equal(trained0.stats,
                                              trained1.stats,
                                              decimal=10)
         assert abs(diag0.factor - diag1.factor) < 1e-7
Esempio n. 2
0
    def test_subset(self):
        logging.info("in test_subset")
        snpreader = Bed(self.currentFolder + "/../examples/toydata.5chrom.bed",
                        count_A1=False)
        snpkernel = SnpKernel(snpreader, stdizer.Unit())
        krsub = snpkernel[::2, ::2]
        kerneldata1 = krsub.read()
        expected = snpreader.read_kernel(stdizer.Unit())[::2].read()
        np.testing.assert_array_almost_equal(kerneldata1.val,
                                             expected.val,
                                             decimal=10)

        krsub2 = snpkernel[::2]
        kerneldata2 = krsub2.read()
        np.testing.assert_array_almost_equal(kerneldata2.val,
                                             expected.val,
                                             decimal=10)
        logging.info("done with test")
Esempio n. 3
0
 def test_npz(self):
     logging.info("in test_npz")
     snpreader = Bed(self.currentFolder + "/../examples/toydata.5chrom.bed",
                     count_A1=False)
     kerneldata1 = snpreader.read_kernel(standardizer=stdizer.Unit())
     s = str(kerneldata1)
     output = "tempdir/kernelreader/toydata.kernel.npz"
     create_directory_if_necessary(output)
     KernelNpz.write(output, kerneldata1)
     kernelreader2 = KernelNpz(output)
     kerneldata2 = kernelreader2.read()
     np.testing.assert_array_almost_equal(kerneldata1.val,
                                          kerneldata2.val,
                                          decimal=10)
     logging.info("done with test")
Esempio n. 4
0
    def test_respect_inputs(self):
        np.random.seed(0)
        for dtype_start, decimal_start in [(np.float32, 5), (np.float64, 10)]:
            for order_start in ['F', 'C', 'A']:
                for sid_count in [20, 2]:
                    snpdataX = SnpData(
                        iid=[["0", "0"], ["1", "1"], ["2", "2"]],
                        sid=[str(i) for i in range(sid_count)],
                        val=np.array(np.random.randint(3, size=[3, sid_count]),
                                     dtype=dtype_start,
                                     order=order_start))
                    for stdx in [
                            stdizer.Beta(1, 25),
                            stdizer.Identity(),
                            stdizer.Unit()
                    ]:
                        for snpreader0 in [snpdataX, snpdataX[:, 1:]]:
                            snpreader1 = snpreader0[1:, :]

                            refdata0, trained_standardizer = snpreader0.read(
                            ).standardize(stdx,
                                          return_trained=True,
                                          force_python_only=True)
                            refval0 = refdata0.val.dot(refdata0.val.T)
                            refdata1 = snpreader1.read().standardize(
                                trained_standardizer, force_python_only=True
                            )  #LATER why aren't these used?
                            refval1 = refdata0.val.dot(
                                refdata1.val.T)  #LATER why aren't these used?
                            for dtype_goal, decimal_goal in [(np.float32, 5),
                                                             (np.float64, 10)]:
                                for order_goal in ['F', 'C', 'A']:
                                    k = snpreader0.read_kernel(
                                        standardizer=stdx,
                                        block_size=1,
                                        order=order_goal,
                                        dtype=dtype_goal)
                                    PstReader._array_properties_are_ok(
                                        k.val, order_goal, dtype_goal)
                                    np.testing.assert_array_almost_equal(
                                        refval0,
                                        k.val,
                                        decimal=min(decimal_start,
                                                    decimal_goal))
Esempio n. 5
0
    def test_cpp_std(self):

        #Order C vs F
        for order in ['C', 'F']:
            #32 vs 64
            for dtype in [np.float64, np.float32]:
                #unit vs beta
                for std in [stdizer.Unit(), stdizer.Beta(2, 10)]:
                    np.random.seed(0)
                    sid_count = 20
                    snpreader0 = SnpData(
                        iid=[["0", "0"], ["1", "1"], ["2", "2"]],
                        sid=[str(i) for i in range(sid_count)],
                        val=np.array(np.random.randint(3, size=[3, sid_count]),
                                     dtype=dtype,
                                     order=order))
                    snpreader1 = SnpData(
                        iid=[["3", "3"], ["4", "4"]],
                        sid=[str(i) for i in range(sid_count)],
                        val=np.array(np.random.randint(3, size=[2, sid_count]),
                                     dtype=dtype,
                                     order=order))

                    #has SNC
                    for has_SNC_in_train in [False, True]:
                        if has_SNC_in_train:
                            snpreader0.val[:, 1] = 0

                        #missing data
                        for has_missing_data in [False, True]:
                            if has_missing_data:
                                snpreader0.val[0, 2] = np.nan
                                snpreader1.val[0, 2] = np.nan

                            #gather stats vs not
                            cppa, stdcppa = snpreader0.read(
                                order=order, dtype=dtype).standardize(
                                    std,
                                    return_trained=True,
                                    force_python_only=False)
                            pya, stdpya = snpreader0.read(
                                order=order, dtype=dtype).standardize(
                                    std,
                                    return_trained=True,
                                    force_python_only=True)
                            np.testing.assert_array_almost_equal(
                                cppa.val,
                                pya.val,
                                decimal=10 if dtype == np.float64 else 5)

                            np.testing.assert_array_almost_equal(
                                stdcppa.stats,
                                stdpya.stats,
                                decimal=10 if dtype == np.float64 else 5)
                            assert (np.inf
                                    in stdcppa.stats[:, 1]) == has_SNC_in_train
                            assert (np.inf
                                    in stdpya.stats[:, 1]) == has_SNC_in_train

                            if has_SNC_in_train:
                                assert np.array_equal(
                                    cppa.val[:, 1],
                                    np.zeros([cppa.val.shape[0]]))
                                assert np.array_equal(
                                    pya.val[:, 1],
                                    np.zeros([pya.val.shape[0]]))

                            if has_missing_data:
                                assert 0 == cppa.val[0, 2]
                                assert 0 == pya.val[0, 2]

                            #uses stats
                            cppb = snpreader1.read(order=order,
                                                   dtype=dtype).standardize(
                                                       stdcppa,
                                                       force_python_only=False)
                            pyb = snpreader1.read(order=order,
                                                  dtype=dtype).standardize(
                                                      stdpya,
                                                      force_python_only=True)
                            np.testing.assert_array_almost_equal(
                                cppb.val,
                                pyb.val,
                                decimal=10 if dtype == np.float64 else 5)
                            np.testing.assert_array_almost_equal(
                                stdcppa.stats,
                                stdpya.stats,
                                decimal=10 if dtype == np.float64 else 5
                            )  #Make sure we haven't messed up the train stats

                            if has_SNC_in_train:
                                assert np.array_equal(
                                    cppb.val[:, 1],
                                    np.zeros([cppb.val.shape[0]]))
                                assert np.array_equal(
                                    pyb.val[:, 1],
                                    np.zeros([pyb.val.shape[0]]))

                            if has_missing_data:
                                assert cppb.val[0, 2] == 0
                                assert pyb.val[0, 2] == 0
        logging.info("done with 'test_cpp_std'")