Ejemplo n.º 1
0
    def _read_kernel(train, standardizer, block_size=None, order='A', dtype=np.float64, force_python_only=False, view_ok=False, return_trained=False):
        '''
        The method creates a kernel for the in-memory SNP data. It handles these cases
                * No standardization is needed & everything is in memory  OR uses the FROM-DISK method
        '''
        from pysnptools.pstreader import PstReader


        #Just do a 'python' dot, if no standardization is needed and everything is the right type
        if isinstance(standardizer,Identity) and train.val.dtype == dtype:
            ts = time.time()
            #is_worth_logging = train.val.shape[0] * train.val.shape[1] * test.val.shape[0] > 1e9
            #if is_worth_logging: logging.info("  _read_kernel about to multiply train{0} x test{1}".format(train.val.shape,test.val.shape))
            if order == 'F': #numpy's 'dot' always returns 'C' order
                K = (train.val.dot(train.val.T)).T
            else:
                K = train.val.dot(train.val.T)
            assert PstReader._array_properties_are_ok(K,order,dtype), "internal error: K is not of the expected order or dtype"
            #if is_worth_logging: logging.info("  _read_kernel took %.2f seconds" % (time.time()-ts))
            if return_trained:
                return K, standardizer
            else:
                return K
        else: #Do things the more general SnpReader way.
            return SnpReader._read_kernel(train, standardizer, block_size=block_size, order=order, dtype=dtype, force_python_only=force_python_only,view_ok=view_ok, return_trained=return_trained)
Ejemplo n.º 2
0
    def _read_kernel(train, standardizer, block_size=None, order='A', dtype=np.float64, force_python_only=False, view_ok=False, return_trained=False):
        '''
        The method creates a kernel for the in-memory SNP data. It handles these cases
                * No standardization is needed & everything is in memory  OR uses the FROM-DISK method
        '''
        from pysnptools.pstreader import PstReader


        #Just do a 'python' dot, if no standardization is needed and everything is the right type
        if isinstance(standardizer,Identity) and train.val.dtype == dtype:
            ts = time.time()
            #is_worth_logging = train.val.shape[0] * train.val.shape[1] * test.val.shape[0] > 1e9
            #if is_worth_logging: logging.info("  _read_kernel about to multiply train{0} x test{1}".format(train.val.shape,test.val.shape))
            if order == 'F': #numpy's 'dot' always returns 'C' order
                K = (train.val.dot(train.val.T)).T
            else:
                K = train.val.dot(train.val.T)
            assert PstReader._array_properties_are_ok(K,order,dtype), "internal error: K is not of the expected order or dtype"
            #if is_worth_logging: logging.info("  _read_kernel took %.2f seconds" % (time.time()-ts))
            if return_trained:
                return K, standardizer
            else:
                return K
        else: #Do things the more general SnpReader way.
            return SnpReader._read_kernel(train, standardizer, block_size=block_size, order=order, dtype=dtype, force_python_only=force_python_only,view_ok=view_ok, return_trained=return_trained)
Ejemplo n.º 3
0
    def test_respect_inputs(self):
        np.random.seed(0)
        for dtype_start, decimal_start in [(np.float32, 5), (np.float64, 10)]:
            for order_start in ['F', 'C', 'A']:
                for sid_count in [20, 2]:
                    snpdataX = SnpData(
                        iid=[["0", "0"], ["1", "1"], ["2", "2"]],
                        sid=[str(i) for i in range(sid_count)],
                        val=np.array(np.random.randint(3, size=[3, sid_count]),
                                     dtype=dtype_start,
                                     order=order_start))
                    for stdx in [
                            stdizer.Beta(1, 25),
                            stdizer.Identity(),
                            stdizer.Unit()
                    ]:
                        for snpreader0 in [snpdataX, snpdataX[:, 1:]]:
                            snpreader1 = snpreader0[1:, :]

                            refdata0, trained_standardizer = snpreader0.read(
                            ).standardize(stdx,
                                          return_trained=True,
                                          force_python_only=True)
                            refval0 = refdata0.val.dot(refdata0.val.T)
                            refdata1 = snpreader1.read().standardize(
                                trained_standardizer, force_python_only=True
                            )  #LATER why aren't these used?
                            refval1 = refdata0.val.dot(
                                refdata1.val.T)  #LATER why aren't these used?
                            for dtype_goal, decimal_goal in [(np.float32, 5),
                                                             (np.float64, 10)]:
                                for order_goal in ['F', 'C', 'A']:
                                    k = snpreader0.read_kernel(
                                        standardizer=stdx,
                                        block_size=1,
                                        order=order_goal,
                                        dtype=dtype_goal)
                                    PstReader._array_properties_are_ok(
                                        k.val, order_goal, dtype_goal)
                                    np.testing.assert_array_almost_equal(
                                        refval0,
                                        k.val,
                                        decimal=min(decimal_start,
                                                    decimal_goal))
Ejemplo n.º 4
0
    def test_respect_inputs(self):
        np.random.seed(0)
        for dtype_start,decimal_start in [(np.float32,5),(np.float64,10)]:
            for order_start in ['F','C','A']:
                for snp_count in [20,2]:
                    snpdataX = SnpData(iid=[["0","0"],["1","1"],["2","2"]],sid=[str(i) for i in range(snp_count)],val=np.array(np.random.randint(3,size=[3,snp_count]),dtype=dtype_start,order=order_start))
                    for stdx in [stdizer.Beta(1,25),stdizer.Identity(),stdizer.Unit()]:
                        for snpreader0 in [snpdataX,snpdataX[:,1:]]:
                            snpreader1 = snpreader0[1:,:]

                            refdata0, trained_standardizer = snpreader0.read().standardize(stdx,return_trained=True,force_python_only=True)
                            refval0 = refdata0.val.dot(refdata0.val.T)
                            refdata1 = snpreader1.read().standardize(trained_standardizer,force_python_only=True)
                            refval1 = refdata0.val.dot(refdata1.val.T)
                            for dtype_goal,decimal_goal in [(np.float32,5),(np.float64,10)]:
                                for order_goal in ['F','C','A']:
                                    k = snpreader0.read_kernel(standardizer=stdx,block_size=1,order=order_goal,dtype=dtype_goal)
                                    PstReader._array_properties_are_ok(k.val,order_goal,dtype_goal)
                                    np.testing.assert_array_almost_equal(refval0,k.val, decimal=min(decimal_start,decimal_goal))