def test_snp_kernel2(self): logging.info("in test_snp_kernel2") snpreader = Bed(self.currentFolder + "/../examples/toydata.5chrom.bed", count_A1=False) snpkernel = SnpKernel(snpreader, standardizer=stdizer.Beta(1, 25)) s = str(snpkernel) _fortesting_JustCheckExists().input(snpkernel)
def test_merge_std(self): #unit vs beta for std in [stdizer.Beta(2, 10), stdizer.Unit()]: np.random.seed(0) sid_count = 20 snpreader = SnpData(iid=[["0", "0"], ["1", "1"], ["2", "2"]], sid=[str(i) for i in range(sid_count)], val=np.array(np.random.randint( 3, size=[3, sid_count]), dtype=np.float64, order='F')) kerneldata0, trained0, diag0 = SnpKernel( snpreader, std, block_size=1)._read_with_standardizing(to_kerneldata=True, return_trained=True) kerneldata1, trained1, diag1 = SnpKernel( snpreader, std, block_size=None)._read_with_standardizing(to_kerneldata=True, return_trained=True) np.testing.assert_array_almost_equal(kerneldata0.val, kerneldata1.val, decimal=10) np.testing.assert_array_almost_equal(trained0.stats, trained1.stats, decimal=10) assert abs(diag0.factor - diag1.factor) < 1e-7
def test_respect_inputs(self): np.random.seed(0) for dtype_start, decimal_start in [(np.float32, 5), (np.float64, 10)]: for order_start in ['F', 'C', 'A']: for sid_count in [20, 2]: snpdataX = SnpData( iid=[["0", "0"], ["1", "1"], ["2", "2"]], sid=[str(i) for i in range(sid_count)], val=np.array(np.random.randint(3, size=[3, sid_count]), dtype=dtype_start, order=order_start)) for stdx in [ stdizer.Beta(1, 25), stdizer.Identity(), stdizer.Unit() ]: for snpreader0 in [snpdataX, snpdataX[:, 1:]]: snpreader1 = snpreader0[1:, :] refdata0, trained_standardizer = snpreader0.read( ).standardize(stdx, return_trained=True, force_python_only=True) refval0 = refdata0.val.dot(refdata0.val.T) refdata1 = snpreader1.read().standardize( trained_standardizer, force_python_only=True ) #LATER why aren't these used? refval1 = refdata0.val.dot( refdata1.val.T) #LATER why aren't these used? for dtype_goal, decimal_goal in [(np.float32, 5), (np.float64, 10)]: for order_goal in ['F', 'C', 'A']: k = snpreader0.read_kernel( standardizer=stdx, block_size=1, order=order_goal, dtype=dtype_goal) PstReader._array_properties_are_ok( k.val, order_goal, dtype_goal) np.testing.assert_array_almost_equal( refval0, k.val, decimal=min(decimal_start, decimal_goal))
def test_cpp_std(self): #Order C vs F for order in ['C', 'F']: #32 vs 64 for dtype in [np.float64, np.float32]: #unit vs beta for std in [stdizer.Unit(), stdizer.Beta(2, 10)]: np.random.seed(0) sid_count = 20 snpreader0 = SnpData( iid=[["0", "0"], ["1", "1"], ["2", "2"]], sid=[str(i) for i in range(sid_count)], val=np.array(np.random.randint(3, size=[3, sid_count]), dtype=dtype, order=order)) snpreader1 = SnpData( iid=[["3", "3"], ["4", "4"]], sid=[str(i) for i in range(sid_count)], val=np.array(np.random.randint(3, size=[2, sid_count]), dtype=dtype, order=order)) #has SNC for has_SNC_in_train in [False, True]: if has_SNC_in_train: snpreader0.val[:, 1] = 0 #missing data for has_missing_data in [False, True]: if has_missing_data: snpreader0.val[0, 2] = np.nan snpreader1.val[0, 2] = np.nan #gather stats vs not cppa, stdcppa = snpreader0.read( order=order, dtype=dtype).standardize( std, return_trained=True, force_python_only=False) pya, stdpya = snpreader0.read( order=order, dtype=dtype).standardize( std, return_trained=True, force_python_only=True) np.testing.assert_array_almost_equal( cppa.val, pya.val, decimal=10 if dtype == np.float64 else 5) np.testing.assert_array_almost_equal( stdcppa.stats, stdpya.stats, decimal=10 if dtype == np.float64 else 5) assert (np.inf in stdcppa.stats[:, 1]) == has_SNC_in_train assert (np.inf in stdpya.stats[:, 1]) == has_SNC_in_train if has_SNC_in_train: assert np.array_equal( cppa.val[:, 1], np.zeros([cppa.val.shape[0]])) assert np.array_equal( pya.val[:, 1], np.zeros([pya.val.shape[0]])) if has_missing_data: assert 0 == cppa.val[0, 2] assert 0 == pya.val[0, 2] #uses stats cppb = snpreader1.read(order=order, dtype=dtype).standardize( stdcppa, force_python_only=False) pyb = snpreader1.read(order=order, dtype=dtype).standardize( stdpya, force_python_only=True) np.testing.assert_array_almost_equal( cppb.val, pyb.val, decimal=10 if dtype == np.float64 else 5) np.testing.assert_array_almost_equal( stdcppa.stats, stdpya.stats, decimal=10 if dtype == np.float64 else 5 ) #Make sure we haven't messed up the train stats if has_SNC_in_train: assert np.array_equal( cppb.val[:, 1], np.zeros([cppb.val.shape[0]])) assert np.array_equal( pyb.val[:, 1], np.zeros([pyb.val.shape[0]])) if has_missing_data: assert cppb.val[0, 2] == 0 assert pyb.val[0, 2] == 0 logging.info("done with 'test_cpp_std'")