Exemple #1
0
    def fit(self, X, y):
        self.model.fit(X, y)
        #### Get p-values for the fitted model ####
        denom = 2.0 * (1.0 + np.cosh(self.model.decision_function(X)))
        denom = cp.tile(denom, (X.shape[1], 1)).T
        F_ij = cp.dot((X / denom).T, X)  ## Fisher Information Matrix
        Cramer_Rao = cp.linalg.inv(F_ij)  ## Inverse Information Matrix
        sigma_estimates = cp.sqrt(cp.diagonal(Cramer_Rao))

        ## Changed below to make it equal to sklearn
        z_scores = (self.model.coef_.flatten() / sigma_estimates
                    )  # z-score for eaach model coefficient
        # z_scores = self.model.coef_[0]/sigma_estimates # z-score for eaach model coefficient

        # serial on cpu but only n_features so should not be too bad
        ## cna look into gpu accerattion if needed
        p_values = [stat.norm.sf(abs(x.item())) * 2
                    for x in z_scores]  ### two tailed test for p-values

        ### In case we need confidence intervals
        # from: https://gist.github.com/rspeare/77061e6e317896be29c6de9a85db301d#gistcomment-2267786
        # alpha = 0.05
        # q = stats.norm.ppf(1 - alpha / 2)
        # lower = self.model.coef_[0] - q * sigma_estimates
        # upper = self.model.coef_[0] + q * sigma_estimates
        # self.conf_int = np.dstack((lower, upper))[0]

        self.z_scores = z_scores
        self.p_values = p_values
        self.sigma_estimates = sigma_estimates
        self.F_ij = F_ij
Exemple #2
0
def make_diagonal(D, offset=0, axis1=0, axis2=1):
    # Numpy doesn't offer a complement to np.diagonal: a function to create new
    # diagonal arrays with extra dimensions. We need such a function for the
    # gradient of np.diagonal and it's also quite handy to have. So here it is.
    if not (offset == 0 and axis1 == -1 and axis2 == -2):
        raise NotImplementedError(
            "Currently make_diagonal only supports offset=0, axis1=-1, axis2=-2"  # noqa
        )

    # We use a trick: calling np.diagonal returns a view on the original array,
    # so we can modify it in-place. (only valid for numpy version >= 1.10.)
    new_array = _cp.zeros(D.shape + (D.shape[-1], ))
    new_array_diag = _cp.diagonal(new_array, offset=0, axis1=-1, axis2=-2)
    new_array_diag.flags.writeable = True
    new_array_diag[:] = D
    return new_array
Exemple #3
0
def extract(noisyimg_gpu, imgweights_gpu, A_gpu):
    #- Set up the equation to solve (B&S eq 4)
    W_gpu = cpx.scipy.sparse.spdiags(data=imgweights_gpu.ravel(),
                                     diags=[
                                         0,
                                     ],
                                     m=npix,
                                     n=npix)

    iCov_gpu = A_gpu.T.dot(W_gpu.dot(A_gpu))

    y_gpu = A_gpu.T.dot(W_gpu.dot(noisyimg_gpu.ravel()))

    ##- Solve f (B&S eq 4)
    f_gpu = cp.linalg.solve(iCov_gpu.todense(),
                            y_gpu)  #requires array, not sparse object

    #- Eigen-decompose iCov to assist in upcoming steps
    u_gpu, v_gpu = cp.linalg.eigh(iCov_gpu.todense())

    #- Calculate C^-1 = QQ (B&S eq 10)
    d_gpu = cpx.scipy.sparse.spdiags(cp.sqrt(u_gpu), 0, len(u_gpu), len(u_gpu))

    Q_gpu = v_gpu.dot(d_gpu.dot(v_gpu.T))

    #- normalization vector (B&S eq 11)
    norm_vector_gpu = cp.sum(Q_gpu, axis=1)

    #- Resolution matrix (B&S eq 12)
    R_gpu = cp.outer(norm_vector_gpu**(-1), cp.ones(
        norm_vector_gpu.size)) * Q_gpu

    #- Decorrelated covariance matrix (B&S eq 13-15)
    udiags_gpu = cpx.scipy.sparse.spdiags(1 / u_gpu, 0, len(u_gpu), len(u_gpu))

    Cov_gpu = v_gpu.dot(udiags_gpu.dot(v_gpu.T))

    Cx_gpu = R_gpu.dot(Cov_gpu.dot(R_gpu.T))

    #- Decorrelated flux (B&S eq 16)
    fx_gpu = R_gpu.dot(f_gpu.ravel()).reshape(f_gpu.shape)

    #- Variance on f (B&S eq 13)
    varfx_gpu = cp.diagonal(Cx_gpu)

    return fx_gpu, varfx_gpu, R_gpu
Exemple #4
0
    def process(self, inputs):
        df = inputs[self.INPUT_PORT_NAME]

        all_sample_ids = df['sample_id'].unique()
        total_samples = len(all_sample_ids)

        # df = df.drop('datetime', axis=1)
        input_meta = self.get_input_meta()
        if self.INPUT_PORT_NAME in input_meta:
            assets = int(math.sqrt(len(input_meta[self.INPUT_PORT_NAME]) - 3))
        output = {}
        data_ma = df[list(range(assets * assets))].values
        data_ma = data_ma.reshape(total_samples, -1, assets, assets)
        diagonzied = cupy.diagonal(data_ma, 0, 2, 3)
        diagonzied = cupy.sqrt(1.0 / diagonzied)  # inverse variance
        diagonzied = diagonzied / diagonzied.sum(axis=2, keepdims=True)
        diagonzied = diagonzied.reshape(-1, assets)
        weight_df = cudf.DataFrame(diagonzied)
        weight_df['month'] = df['month']
        weight_df['year'] = df['year']
        weight_df['sample_id'] = df['sample_id']
        output.update({self.OUTPUT_PORT_NAME: weight_df})
        return output
Exemple #5
0
def mi_model_1d_gpu_gd(x, y, biascorrect=False, demeaned=False):
    """Mutual information between a Gaussian and a discrete variable in bits.
    This method is based on ANOVA style model comparison.
    I = mi_model_gd(x,y) returns the MI between the (possibly multidimensional)
    Gaussian variable x and the discrete variable y.
    Parameters
    ----------
    x, y : array_like
        Gaussian arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y
        must be an array of integers
    biascorrect : bool | True
        Specifies whether bias correction should be applied to the estimated MI
    demeaned : bool | False
        Specifies whether the input data already has zero mean (true if it has
        been copula-normalized)
    Returns
    -------
    i : float
        Information shared by x and y (in bits)
    """
    # Converting to cupy array
    #x, y = cp.array(x), cp.array(y)
    x, y = cp.atleast_2d(x), cp.squeeze(y)
    if x.ndim > 2:
        raise ValueError("x must be at most 2d")
    if y.ndim > 1:
        raise ValueError("only univariate discrete variables supported")
    if not cp.issubdtype(y.dtype, cp.integer):
        raise ValueError("y should be an integer array")

    nvarx, ntrl = x.shape
    ym = cp.unique(y)

    if y.size != ntrl:
        raise ValueError("number of trials do not match")

    if not demeaned:
        x = x - x.mean(axis=1)[:, cp.newaxis]

    # class-conditional entropies
    ntrl_y = cp.zeros(len(ym))
    hcond = cp.zeros(len(ym))
    for n_yi, yi in enumerate(ym):
        idx = y == yi
        xm = x[:, idx]
        ntrl_y[n_yi] = xm.shape[1]
        xm = xm - xm.mean(axis=1)[:, cp.newaxis]
        cm = cp.dot(xm, xm.T) / float(ntrl_y[n_yi] - 1)
        chcm = cp.linalg.cholesky(cm)
        hcond[n_yi] = cp.sum(cp.log(cp.diagonal(chcm)))

    # class weights
    w = ntrl_y / float(ntrl)

    # unconditional entropy from unconditional Gaussian fit
    cx = cp.dot(x, x.T) / float(ntrl - 1)
    chc = cp.linalg.cholesky(cx)
    hunc = cp.sum(cp.log(cp.diagonal(chc)))  # + c*nvarx

    ln2 = cp.log(2)
    if biascorrect:
        vars = cp.arange(1, nvarx + 1)

        psiterms = psi((ntrl - vars).astype(cp.float) / 2.) / 2.
        dterm = (ln2 - cp.log(float(ntrl - 1))) / 2.
        hunc = hunc - nvarx * dterm - psiterms.sum()

        dterm = (ln2 - cp.log((ntrl_y - 1).astype(cp.float))) / 2.0
        psiterms = cp.zeros(len(ym))
        for vi in vars:
            idx = ntrl_y - vi
            psiterms = psiterms + psi(idx.astype(cp.float) / 2.)
        hcond = hcond - nvarx * dterm - (psiterms / 2.)

    # MI in bits
    i = (hunc - cp.sum(w * hcond)) / ln2
    return i
Exemple #6
0
def cmi_1d_gpu_ggg(x, y, z, biascorrect=True, demeaned=False):
    """Conditional MI between two Gaussian variables conditioned on a third.

    I = cmi_ggg(x,y,z) returns the CMI between two (possibly multidimensional)
    Gaussian variables, x and y, conditioned on a third, z, with bias
    correction.

    Parameters
    ----------
    x, y, z : array_like
        Gaussians arrays of shape (n_epochs,) or (n_dimensions, n_epochs).
    biascorrect : bool | True
        Specifies whether bias correction should be applied to the estimated MI
    demeaned : bool | False
        Specifies whether the input data already has zero mean (true if it has
        been copula-normalized)

    Returns
    -------
    i : float
        Information shared by x and y conditioned by z (in bits)
    """
    x, y, z = cp.atleast_2d(x), cp.atleast_2d(y), cp.atleast_2d(z)
    if x.ndim > 2 or y.ndim > 2 or z.ndim > 2:
        raise ValueError("x, y and z must be at most 2d")
    ntrl = x.shape[1]
    nvarx = x.shape[0]
    nvary = y.shape[0]
    nvarz = z.shape[0]
    nvaryz = nvary + nvarz
    nvarxy = nvarx + nvary
    nvarxz = nvarx + nvarz
    nvarxyz = nvarx + nvaryz

    if y.shape[1] != ntrl or z.shape[1] != ntrl:
        raise ValueError("number of trials do not match")

    # joint variable
    xyz = cp.vstack((x, y, z))
    if not demeaned:
        xyz = xyz - xyz.mean(axis=1)[:, cp.newaxis]
    cxyz = cp.dot(xyz, xyz.T) / float(ntrl - 1)
    # submatrices of joint covariance
    cz = cxyz[nvarxy:, nvarxy:]
    cyz = cxyz[nvarx:, nvarx:]
    cxz = cp.zeros((nvarxz, nvarxz))
    cxz[:nvarx, :nvarx] = cxyz[:nvarx, :nvarx]
    cxz[:nvarx, nvarx:] = cxyz[:nvarx, nvarxy:]
    cxz[nvarx:, :nvarx] = cxyz[nvarxy:, :nvarx]
    cxz[nvarx:, nvarx:] = cxyz[nvarxy:, nvarxy:]

    chcz = cp.linalg.cholesky(cz)
    chcxz = cp.linalg.cholesky(cxz)
    chcyz = cp.linalg.cholesky(cyz)
    chcxyz = cp.linalg.cholesky(cxyz)

    # entropies in nats
    # normalizations cancel for cmi
    hz = cp.sum(cp.log(cp.diagonal(chcz)))
    hxz = cp.sum(cp.log(cp.diagonal(chcxz)))
    hyz = cp.sum(cp.log(cp.diagonal(chcyz)))
    hxyz = cp.sum(cp.log(cp.diagonal(chcxyz)))

    ln2 = cp.log(2)
    if biascorrect:
        psiterms = psi(
            (ntrl - cp.arange(1, nvarxyz + 1)).astype(cp.float) / 2.) / 2.
        dterm = (ln2 - cp.log(ntrl - 1.)) / 2.
        hz = hz - nvarz * dterm - psiterms[:nvarz].sum()
        hxz = hxz - nvarxz * dterm - psiterms[:nvarxz].sum()
        hyz = hyz - nvaryz * dterm - psiterms[:nvaryz].sum()
        hxyz = hxyz - nvarxyz * dterm - psiterms[:nvarxyz].sum()

    # MI in bits
    i = (hxz + hyz - hxyz - hz) / ln2
    return i
Exemple #7
0
def mi_1d_gpu_gg(x, y, biascorrect=True, demeaned=False):
    """Mutual information (MI) between two Gaussian variables in bits.

    This is the GPU variant of the m1_1d_gg function, using CuPy

    I = mi_gg(x,y) returns the MI between two (possibly multidimensional)
    Gaussian variables, x and y, with bias correction.

    Parameters
    ----------
    x, y : array_like
        Gaussian arrays of shape (n_epochs,) or (n_dimensions, n_epochs)
    biascorrect : bool | True
        Specifies whether bias correction should be applied to the estimated MI
    demeaned : bool | False
        Specifies whether the input data already has zero mean (true if it has
        been copula-normalized)

    Returns
    -------
    i : float
        Information shared by x and y (in bits)
    """
    x, y = cp.atleast_2d(x), cp.atleast_2d(y)
    if (x.ndim > 2) or (y.ndim > 2):
        raise ValueError("x and y must be at most 2d")
    nvarx, ntrl = x.shape
    nvary = y.shape[0]
    nvarxy = nvarx + nvary

    if y.shape[1] != ntrl:
        raise ValueError("number of trials do not match")

    # joint variable
    xy = cp.vstack((x, y))
    if not demeaned:
        xy = xy - xy.mean(axis=1)[:, cp.newaxis]
    cxy = cp.dot(xy, xy.T) / float(ntrl - 1)
    # submatrices of joint covariance
    cx = cxy[:nvarx, :nvarx]
    cy = cxy[nvarx:, nvarx:]

    chcxy = cp.linalg.cholesky(cxy)
    chcx = cp.linalg.cholesky(cx)
    chcy = cp.linalg.cholesky(cy)

    # entropies in nats
    # normalizations cancel for mutual information
    hx = cp.sum(cp.log(cp.diagonal(chcx)))
    hy = cp.sum(cp.log(cp.diagonal(chcy)))
    hxy = cp.sum(cp.log(cp.diagonal(chcxy)))

    ln2 = cp.log(2)
    if biascorrect:
        psiterms = psi(
            (ntrl - cp.arange(1, nvarxy + 1)).astype(cp.float) / 2.) / 2.
        dterm = (ln2 - cp.log(ntrl - 1.)) / 2.
        hx = hx - nvarx * dterm - psiterms[:nvarx].sum()
        hy = hy - nvary * dterm - psiterms[:nvary].sum()
        hxy = hxy - nvarxy * dterm - psiterms[:nvarxy].sum()

    # MI in bits
    i = (hx + hy - hxy) / ln2
    return i
Exemple #8
0
    # np.linalg.det.
    if x.dtype not in _floating_dtypes:
        raise TypeError('Only floating-point dtypes are allowed in det')
    return Array._new(np.linalg.det(x._array))


# Note: diagonal is the numpy top-level namespace, not np.linalg
def diagonal(x: Array, /, *, offset: int = 0) -> Array:
    """
    Array API compatible wrapper for :py:func:`np.diagonal <numpy.diagonal>`.

    See its docstring for more information.
    """
    # Note: diagonal always operates on the last two axes, whereas np.diagonal
    # operates on the first two axes by default
    return Array._new(np.diagonal(x._array, offset=offset, axis1=-2, axis2=-1))


def eigh(x: Array, /) -> EighResult:
    """
    Array API compatible wrapper for :py:func:`np.linalg.eigh <numpy.linalg.eigh>`.

    See its docstring for more information.
    """
    # Note: the restriction to floating-point dtypes only is different from
    # np.linalg.eigh.
    if x.dtype not in _floating_dtypes:
        raise TypeError('Only floating-point dtypes are allowed in eigh')

    # Note: the return type here is a namedtuple, which is different from
    # np.eigh, which only returns a tuple.
Exemple #9
0
def slogdet(a):
    """Returns sign and logarithm of the determinant of an array.

    It calculates the natural logarithm of the determinant of a given value.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(..., N, N)``.

    Returns:
        tuple of :class:`~cupy.ndarray`:
            It returns a tuple ``(sign, logdet)``. ``sign`` represents each
            sign of the determinant as a real number ``0``, ``1`` or ``-1``.
            'logdet' represents the natural logarithm of the absolute of the
            determinant.
            If the determinant is zero, ``sign`` will be ``0`` and ``logdet``
            will be ``-inf``.
            The shapes of both ``sign`` and ``logdet`` are equal to
            ``a.shape[:-2]``.

    .. warning::
        This function calls one or more cuSOLVER routine(s) which may yield
        invalid results if input conditions are not met.
        To detect these invalid results, you can set the `linalg`
        configuration to a value that is not `ignore` in
        :func:`cupyx.errstate` or :func:`cupyx.seterr`.

    .. warning::
        To produce the same results as :func:`numpy.linalg.slogdet` for
        singular inputs, set the `linalg` configuration to `raise`.

    .. seealso:: :func:`numpy.linalg.slogdet`
    """
    if a.ndim < 2:
        msg = ('%d-dimensional array given. '
               'Array must be at least two-dimensional' % a.ndim)
        raise linalg.LinAlgError(msg)
    _util._assert_nd_squareness(a)

    dtype = numpy.promote_types(a.dtype.char, 'f')
    real_dtype = numpy.dtype(dtype.char.lower())

    if dtype not in (numpy.float32, numpy.float64,
                     numpy.complex64, numpy.complex128):
        msg = ('dtype must be float32, float64, complex64, or complex128'
               ' (actual: {})'.format(a.dtype))
        raise ValueError(msg)

    a_shape = a.shape
    shape = a_shape[:-2]
    n = a_shape[-2]

    if a.size == 0:
        # empty batch (result is empty, too) or empty matrices det([[]]) == 1
        sign = cupy.ones(shape, dtype)
        logdet = cupy.zeros(shape, real_dtype)
        return sign, logdet

    lu, ipiv, dev_info = _decomposition._lu_factor(a, dtype)

    # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that
    # should never happen even if the matrix contains nan or inf.
    # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for
    # debugging purposes.

    diag = cupy.diagonal(lu, axis1=-2, axis2=-1)

    logdet = cupy.log(cupy.abs(diag)).sum(axis=-1)

    # ipiv is 1-origin
    non_zero = cupy.count_nonzero(ipiv != cupy.arange(1, n + 1), axis=-1)
    if dtype.kind == "f":
        non_zero += cupy.count_nonzero(diag < 0, axis=-1)

    # Note: sign == -1 ** (non_zero % 2)
    sign = (non_zero % 2) * -2 + 1
    if dtype.kind == "c":
        sign = sign * cupy.prod(diag / cupy.abs(diag), axis=-1)

    singular = dev_info > 0
    return (
        cupy.where(singular, dtype.type(0), sign.astype(dtype)).reshape(shape),
        cupy.where(singular, real_dtype.type('-inf'), logdet).reshape(shape),
    )
Exemple #10
0
def extract(noisyimg_gpu, imgweights_gpu, A_gpu):
    #- Set up the equation to solve (B&S eq 4)
    #get the cpu values too
    noisyimg_cpu = noisyimg_gpu.get()
    imgweights_cpu = imgweights_gpu.get()
    A_cpu = A_gpu.get()

    W_cpu = scipy.sparse.spdiags(data=imgweights_cpu.ravel(), diags=[0,], m=npix, n=npix) #scipy sparse object
    W_gpu = cpx.scipy.sparse.spdiags(data=imgweights_gpu.ravel(), diags=[0,], m=npix, n=npix)
    #yank gpu back to cpu so we can compare
    W_yank = W_gpu.get()
    assert np.allclose(W_cpu.todense(), W_yank.todense()) #todense bc this is a sparse object
    #passes

    iCov_gpu = A_gpu.T.dot(W_gpu.dot(A_gpu))
    iCov_cpu = A_cpu.T.dot(W_cpu.dot(A_cpu))
    #yank gpu back to cpu so we can compare
    iCov_yank = iCov_gpu.get()
    assert np.allclose(iCov_cpu.todense(), iCov_yank.todense()) #todense bc this is sparse
    #passes

    y_gpu = A_gpu.T.dot(W_gpu.dot(noisyimg_gpu.ravel()))
    y_cpu = A_cpu.T.dot(W_cpu.dot(noisyimg_cpu.ravel()))
    #yank gpu back and compare
    y_yank = y_gpu.get()
    assert np.allclose(y_cpu, y_yank)
    #passes

    ###- Solve f (B&S eq 4) 
    ##f_gpu_tup = cpx.scipy.sparse.linalg.lsqr(iCov_gpu, y_gpu)
    ###returns f_gpu as a tuple... need to reshape?
    #f_gpu_0 = f_gpu_tup[0] #take only zeroth element of tuple, rest are None for some reason
    #f_gpu = cp.asarray(f_gpu_0).reshape(nspec, nwave) #the tuple thing is dumb bc i think it goes back to the cpu, have to manually bring it back as a cupy array
    #f_cpu_0 = scipy.sparse.linalg.lsqr(iCov_cpu, y_cpu)[0] #need to take 0th element of tuple
    #f_cpu = np.asarray(f_cpu_0).reshape(nspec, nwave) #and then reshape, make less confusing in separate step
    ###yank back and compare
    #f_yank = f_gpu.get()
    #f_diff = f_yank - f_cpu
    #print("f_yank")
    #print(f_yank)

    #print("f_cpu")
    #print(f_cpu)

    #print("f_diff")
    #print(f_diff)

    #assert np.allclose(f_cpu, f_yank)
    ##fails! #maybe lsqr is the problem
    ##what was the other one?
    ##i think instead we want numpy solve and cupy solve
    ##that one at least passed our tests...

    #try again with np.solve and cp.solve
    #cp.linalg.solve
    f_gpu = cp.linalg.solve(iCov_gpu.todense(), y_gpu) #requires array, not sparse object
    f_cpu = np.linalg.solve(iCov_cpu.todense(), y_cpu) #requires array, not sparse object
    #yank back and compare
    f_yank = f_gpu.get()
    assert np.allclose(f_cpu, f_yank)
    #passes

    #- Eigen-decompose iCov to assist in upcoming steps
    u_gpu, v_gpu = cp.linalg.eigh(iCov_gpu.todense())
    u, v = np.linalg.eigh(iCov_cpu.todense())
    u_cpu = np.asarray(u)
    v_cpu = np.asarray(v)
    #yank back and compare
    u_yank = u_gpu.get()
    v_yank = v_gpu.get()
    assert np.allclose(u_cpu, u_yank)
    assert np.allclose(v_cpu, v_yank)
    #passes

    #- Calculate C^-1 = QQ (B&S eq 10)
    d_gpu = cpx.scipy.sparse.spdiags(cp.sqrt(u_gpu), 0, len(u_gpu) , len(u_gpu))
    d_cpu = scipy.sparse.spdiags(np.sqrt(u_cpu), 0, len(u_cpu), len(u_cpu))
    #yank back and compare
    d_yank = d_gpu.get()
    assert np.allclose(d_cpu.todense(), d_yank.todense())
    #passes

    Q_gpu = v_gpu.dot( d_gpu.dot( v_gpu.T ))
    Q_cpu = v_cpu.dot( d_cpu.dot( v_cpu.T ))
    #yank back and compare
    Q_yank = Q_gpu.get()
    assert np.allclose(Q_cpu, Q_yank)
    #passes

    #- normalization vector (B&S eq 11)
    norm_vector_gpu = cp.sum(Q_gpu, axis=1)
    norm_vector_cpu = np.sum(Q_cpu, axis=1)
    #yank back and compare
    norm_vector_yank = norm_vector_gpu.get()
    assert np.allclose(norm_vector_cpu, norm_vector_yank)
    #passes

    #- Resolution matrix (B&S eq 12)
    R_gpu = cp.outer(norm_vector_gpu**(-1), cp.ones(norm_vector_gpu.size)) * Q_gpu
    R_cpu = np.outer(norm_vector_cpu**(-1), np.ones(norm_vector_cpu.size)) * Q_cpu
    #yank back and compare
    R_yank = R_gpu.get()
    assert np.allclose(R_cpu, R_yank)
    #passes

    #- Decorrelated covariance matrix (B&S eq 13-15)
    udiags_gpu = cpx.scipy.sparse.spdiags(1/u_gpu, 0, len(u_gpu), len(u_gpu))
    udiags_cpu = scipy.sparse.spdiags(1/u_cpu, 0, len(u_cpu), len(u_cpu))
    #yank back and compare
    udiags_yank = udiags_gpu.get()
    assert np.allclose(udiags_cpu.todense(),udiags_yank.todense()) #sparse objects
    #passes

    Cov_gpu = v_gpu.dot( udiags_gpu.dot (v_gpu.T ))
    Cov_cpu = v_cpu.dot( udiags_cpu.dot( v_cpu.T ))
    #yank back and compare
    Cov_yank = Cov_gpu.get()
    assert np.allclose(Cov_cpu, Cov_yank)
    #passes

    Cx_gpu = R_gpu.dot(Cov_gpu.dot(R_gpu.T))
    Cx_cpu = R_cpu.dot(Cov_cpu.dot(R_cpu.T))
    #yank back and compare
    Cx_yank = Cx_gpu.get()
    assert np.allclose(Cx_cpu, Cx_yank)
    #passes

    #- Decorrelated flux (B&S eq 16)
    fx_gpu = R_gpu.dot(f_gpu.ravel()).reshape(f_gpu.shape)
    fx_cpu = R_cpu.dot(f_cpu.ravel()).reshape(f_cpu.shape)
    #yank back and compare
    fx_yank = fx_gpu.get()
    assert np.allclose(fx_cpu, fx_yank)
    #passes

    #- Variance on f (B&S eq 13)
    varfx_gpu = cp.diagonal(Cx_gpu)
    varfx_cpu = np.diagonal(Cx_cpu)
    #yank back and compare
    varfx_yank = varfx_gpu.get()
    assert np.allclose(varfx_cpu, varfx_yank)
    #passes

    return fx_gpu, fx_cpu, varfx_gpu, varfx_cpu, R_gpu, R_cpu
Exemple #11
0
    def process(self, inputs):
        df = inputs[self.INPUT_PORT_NAME]
        all_sample_ids = df['sample_id'].unique()
        total_samples = len(all_sample_ids)
        window = self.conf['window']
        means, cov, distance, all_dates = compute_cov_distance(total_samples,
                                                               df,
                                                               window=window)

        total_samples, num_months, assets, assets = cov.shape

        months_id = all_dates.dt.year * 12 + (all_dates.dt.month - 1)
        months_id = months_id - months_id.min()
        mid = (cupy.arange(months_id.max() + 1) +
               (all_dates.dt.month - 1)[0])[window:]
        minyear = all_dates.dt.year.min()
        if len(mid) == 0:
            mid = cupy.array([0])
        months = mid % 12
        years = mid // 12 + minyear

        output = {}
        # print(num_months, len(mid))
        if self.outport_connected(self.MEAN_DF):
            df_mean = cudf.DataFrame(
                means.reshape(total_samples * num_months, -1))
            df_mean['year'] = cupy.concatenate([years] * total_samples).astype(
                cupy.int16)
            df_mean['month'] = cupy.concatenate(
                [months] * total_samples).astype(cupy.int16)
            df_mean['sample_id'] = cupy.repeat(
                cupy.arange(total_samples) + all_sample_ids.min(), len(mid))
            output.update({self.MEAN_DF: df_mean})
        if self.outport_connected(self.STD_DF):
            data_ma = cov.reshape(total_samples * num_months, assets, assets)
            diagonzied = cupy.diagonal(data_ma, 0, 1, 2)  # get var
            diagonzied = cupy.sqrt(diagonzied)  # get std
            df_std = cudf.DataFrame(diagonzied)
            df_std['year'] = cupy.concatenate([years] * total_samples).astype(
                cupy.int16)
            df_std['month'] = cupy.concatenate(
                [months] * total_samples).astype(cupy.int16)
            df_std['sample_id'] = cupy.repeat(
                cupy.arange(total_samples) + all_sample_ids.min(), len(mid))
            output.update({self.STD_DF: df_std})
        if self.outport_connected(self.COV_DF):
            df_cov = cudf.DataFrame(cov.reshape(total_samples * num_months,
                                                -1))
            df_cov['year'] = cupy.concatenate([years] * total_samples).astype(
                cupy.int16)
            df_cov['month'] = cupy.concatenate(
                [months] * total_samples).astype(cupy.int16)
            df_cov['sample_id'] = cupy.repeat(
                cupy.arange(total_samples) + all_sample_ids.min(), len(mid))
            output.update({self.COV_DF: df_cov})
        if self.outport_connected(self.CORR_DF):
            dis_ma = distance.reshape(total_samples * num_months, -1)
            dis_ma = 1 - 2.0 * dis_ma
            df_corr = cudf.DataFrame(dis_ma)
            df_corr['year'] = cupy.concatenate([years] * total_samples).astype(
                cupy.int16)
            df_corr['month'] = cupy.concatenate(
                [months] * total_samples).astype(cupy.int16)
            df_corr['sample_id'] = cupy.repeat(
                cupy.arange(total_samples) + all_sample_ids.min(), len(mid))
            output.update({self.CORR_DF: df_corr})
        if self.outport_connected(self.DISTANCE_DF):
            df_dis = cudf.DataFrame(
                distance.reshape(total_samples * num_months, -1))
            df_dis['year'] = cupy.concatenate([years] * total_samples).astype(
                cupy.int16)
            df_dis['month'] = cupy.concatenate(
                [months] * total_samples).astype(cupy.int16)
            df_dis['sample_id'] = cupy.repeat(
                cupy.arange(total_samples) + all_sample_ids.min(), len(mid))
            output.update({self.DISTANCE_DF: df_dis})
        return output