Example #1
0
def linear_operator_from_shape(shape, weights=None, calc_lambda_max=False):
    """Generates the linear operator for the total variation Nesterov function
    from the shape of a 1D, 2D or 3D image.

    Parameters
    ----------
    shape : List or tuple with 1, 2 or 3 integers. The shape of the 1D, 2D or
            3D image. shape has the form X, (X,), (Y, X) or (Z, Y, X), where Z
            is the number of "layers", Y is the number of rows and X is the
            number of columns. The shape does not involve any intercept
            variables.

    weights : Sequence, e.g. list or numpy (p-by-1) array. Weights put on the
            groups. Default is weight 1 for each group, i.e. no weight.

    calc_lambda_max: boolean. Should the largest singular value being
            precomputed ?
    """
    if not isinstance(shape, (list, tuple)):
        shape = [shape]
    while len(shape) < 3:
        shape = tuple([1] + list(shape))

    nz = shape[0]
    ny = shape[1]
    nx = shape[2]
    p = nx * ny * nz
    ind = np.arange(p).reshape((nz, ny, nx))

    if weights is not None:
        weights = np.array(weights)
        weights = weights.ravel()
#        w = sparse.spdiags(weights.ravel(), 0, p, p)

    if nx > 1:
        if weights is not None:
            Ax = sparse.spdiags(weights, -1, p, p).T - \
                 sparse.spdiags(weights, 0, p, p)
            Ax = Ax.tocsr()
        else:
            Ax = sparse.eye(p, p, 1, format='csr') - \
                 sparse.eye(p, p)
        zind = ind[:, :, -1].ravel()
        for i in zind:
            Ax.data[Ax.indptr[i]: \
                    Ax.indptr[i + 1]] = 0
        Ax.eliminate_zeros()
    else:
        Ax = sparse.csr_matrix((p, p), dtype=float)

    if ny > 1:
        if weights is not None:
            Ay = sparse.spdiags(weights, -nx, p, p).T - \
                 sparse.spdiags(weights, 0, p, p)
            Ay = Ay.tocsr()
        else:
            Ay = sparse.eye(p, p, nx, format='csr') - \
                 sparse.eye(p, p)

        yind = ind[:, -1, :].ravel()
        for i in yind:
            Ay.data[Ay.indptr[i]: \
                    Ay.indptr[i + 1]] = 0
        Ay.eliminate_zeros()
    else:
        Ay = sparse.csr_matrix((p, p), dtype=float)

    if nz > 1:
        if weights is not None:
            Az = sparse.spdiags(weights, -(ny * nx), p, p).T - \
                 sparse.spdiags(weights, 0, p, p)
            Az = Az.tocsr()
        else:
            Az = (sparse.eye(p, p, ny * nx, format='csr') - \
                  sparse.eye(p, p))

        xind = ind[-1, :, :].ravel()
        for i in xind:
            Az.data[Az.indptr[i]: \
                    Az.indptr[i + 1]] = 0
        Az.eliminate_zeros()
    else:
        Az = sparse.csr_matrix((p, p), dtype=float)
    A = LinearOperatorNesterov(Ax, Ay, Az)
    A.n_compacts = (nz * ny * nx - 1)

    if calc_lambda_max:
        A.singular_values = [TotalVariation(l=0., A=A).lambda_max()]
    return A
Example #2
0
def linear_operator_from_mesh(mesh_coord, mesh_triangles, mask=None, offset=0,
                              weights=None, calc_lambda_max=False):
    """Generates the linear operator for the total variation Nesterov function
    from a mesh.

    Parameters
    ----------
    mesh_coord : Numpy array [n, 3] of float.

    mesh_triangles : Numpy array, n_triangles-by-3. The (integer) indices of
            the three nodes forming the triangle.

    mask : Numpy array (shape (n,)) of integers/boolean. Non-null values
            correspond to columns of X. Groups may be defined using different
            values in the mask. TV will be applied within groups of the same
            value in the mask.

    offset : Non-negative integer. The index of the first column, variable,
            where TV applies. This is different from penalty_start which
            define where the penalty applies. The offset defines where TV
            applies within the penalised variables.

                Example: X := [Intercept, Age, Weight, Image]. Intercept is
                not penalized, TV does not apply on Age and Weight but only on
                Image. Thus: penalty_start = 1, offset = 2 (skip Age and
                Weight).

    weights : Numpy array. The weight put on the gradient of every point.
            Default is weight 1 for each point, or equivalently, no weight. The
            weights is a numpy array of the same shape as mask.

    calc_lambda_max: boolean. Should the largest singular value being
            precomputed ?

    Returns
    -------
    out1 : List or sparse matrices. Linear operator for the total variation
           Nesterov function computed over a mesh.

    out2 : Integer. The number of compacts.

    Examples
    --------
    >>> import numpy as np
    >>> import parsimony.functions.nesterov.tv as tv_helper
    >>> mesh_coord = np.array([[0, 0], [1, 0], [0, 1], [1, 1], [0, 2], [1, 2]])
    >>> mesh_triangles = np.array([[0 ,1, 3], [0, 2 ,3], [2, 3, 5], [2, 4, 5]])
    >>> A = tv_helper.linear_operator_from_mesh(mesh_coord, mesh_triangles)
    """
    if mask is None:
        mask = np.ones(mesh_coord.shape[0], dtype=bool)
    assert mask.shape[0] == mesh_coord.shape[0]
    mask_bool = mask != 0
    mask_idx = np.where(mask_bool)[0]
    # Mapping from full array to masked array.
    map_full2masked = np.zeros(mask.shape, dtype=int)
    map_full2masked[:] = -1
    map_full2masked[mask_bool] = np.arange(np.sum(mask_bool)) + offset
    ## 1) Associate edges to nodes
    nodes_with_edges = [[] for i in range(mesh_coord.shape[0])]

    def connect_edge_to_node(node_idx1, node_idx2, nodes_with_edges):
            # Attach edge to first node.
            if np.sum(mesh_coord[node_idx1] - mesh_coord[node_idx2]) >= 0:
                edge = [node_idx1, node_idx2]
                if not edge in nodes_with_edges[node_idx1]:
                    nodes_with_edges[node_idx1].append(edge)
            else:  # attach edge to second node
                edge = [node_idx2, node_idx1]
                if not edge in nodes_with_edges[node_idx2]:
                    nodes_with_edges[node_idx2].append(edge)
    for i in range(mesh_triangles.shape[0]):
        t = mesh_triangles[i, :]
        connect_edge_to_node(t[0], t[1], nodes_with_edges)
        connect_edge_to_node(t[0], t[2], nodes_with_edges)
        connect_edge_to_node(t[1], t[2], nodes_with_edges)
    max_connectivity = np.max(np.array([len(n) for n in nodes_with_edges]))
    # 3. build sparse matrices
    # 1..max_connectivity of i, j and value
    A = [[[], [], []] for i in range(max_connectivity)]
    n_compacts = 0
    for node_idx in mask_idx:
        #node_idx = 0
        found = False
        node = nodes_with_edges[node_idx]
        for i, v in enumerate(node):
            found = False
            if weights is not None:
                w = weights[i]
            else:
                w = 1.0
            #print i, v
            node1_idx, node2_idx = v
            if mask_bool[node1_idx] and mask_bool[node2_idx]:
                found = True
                A[i][0] += [map_full2masked[node1_idx],
                            map_full2masked[node1_idx]]
                A[i][1] += [map_full2masked[node1_idx],
                            map_full2masked[node2_idx]]
                A[i][2] += [-w, w]
        if found:
            n_compacts += 1
    p = mask.sum()
    A = [sparse.csr_matrix((A[i][2], (A[i][0], A[i][1])),
                           shape=(p, p)) for i in range(len(A))]
    A = LinearOperatorNesterov(*A)
    A.n_compacts = n_compacts

    if calc_lambda_max:
        A.singular_values = [TotalVariation(l=0., A=A).lambda_max()]
    return A
Example #3
0
def linear_operator_from_mask(mask, offset=0, weights=None, calc_lambda_max=False):
    """Generates the linear operator for the total variation Nesterov function
    from a mask for a 3D image.

    Parameters
    ----------
    mask : Numpy array of integers. The mask has the same shape as the original
            data. Non-null values correspond to columns of X. Groups may be
            defined using different values in the mask. TV will be applied
            within groups of the same value in the mask.

    offset: Non-negative integer. The index of the first column, variable,
            where TV applies. This is different from penalty_start which
            define where the penalty applies. The offset defines where TV
            applies within the penalised variables.

                Example: X := [Intercept, Age, Weight, Image]. Intercept is
                not penalized, TV does not apply on Age and Weight but only on
                Image. Thus: penalty_start = 1, offset = 2 (skip Age and
                Weight).

    weights : Numpy array. The weight put on the gradient of every point.
            Default is weight 1 for each point, or equivalently, no weight. The
            weights is a numpy array of the same shape as mask.

    calc_lambda_max: boolean. Should the largest singular value being
            precomputed ?
    """
    while len(mask.shape) < 3:
        mask = mask[..., np.newaxis]

    if weights is not None:
        while len(weights.shape) < 3:
            weights = weights[..., np.newaxis]

    nx, ny, nz = mask.shape
    mask_bool = mask != 0
    xyz_mask = np.where(mask_bool)
    Ax_i = list()
    Ax_j = list()
    Ax_v = list()
    Ay_i = list()
    Ay_j = list()
    Ay_v = list()
    Az_i = list()
    Az_j = list()
    Az_v = list()
    n_compacts = 0
    p = np.sum(mask_bool) + offset

    # Mapping from image coordinate to flat masked array.
    im2flat = np.zeros(mask.shape, dtype=int)
    im2flat[:] = -1
    im2flat[mask_bool] = np.arange(np.sum(mask_bool)) + offset

    for pt in range(len(xyz_mask[0])):

        found = False
        x, y, z = xyz_mask[0][pt], xyz_mask[1][pt], xyz_mask[2][pt]
        i_pt = im2flat[x, y, z]
        val = mask[x, y, z]

        if weights is not None:
            w = weights[x, y, z]
        else:
            w = 1.0

        if x + 1 < nx and (mask[x + 1, y, z] == val):
            found = True
            Ax_i += [i_pt, i_pt]
            Ax_j += [i_pt, im2flat[x + 1, y, z]]
            Ax_v += [-w, w]
        if y + 1 < ny and (mask[x, y + 1, z] == val):
            found = True
            Ay_i += [i_pt, i_pt]
            Ay_j += [i_pt, im2flat[x, y + 1, z]]
            Ay_v += [-w, w]
        if z + 1 < nz and (mask[x, y, z + 1] == val):
            found = True
            Az_i += [i_pt, i_pt]
            Az_j += [i_pt, im2flat[x, y, z + 1]]
            Az_v += [-w, w]

        if found:
            n_compacts += 1

    Ax = sparse.csr_matrix((Ax_v, (Ax_i, Ax_j)), shape=(p, p))
    Ay = sparse.csr_matrix((Ay_v, (Ay_i, Ay_j)), shape=(p, p))
    Az = sparse.csr_matrix((Az_v, (Az_i, Az_j)), shape=(p, p))
    A = LinearOperatorNesterov(Ax, Ay, Az)
    A.n_compacts = n_compacts
    if calc_lambda_max:
        A.singular_values = [TotalVariation(l=0., A=A).lambda_max()]
    return A
Example #4
0
def linear_operator_from_subset_mask(mask, weights=None, calc_lambda_max=False):
    """Generates the linear operator for the total variation Nesterov function
    from a mask for a 3D image.

    The binary mask marks a subset of the variables that are supposed to be
    smoothed. The mask has the same size as the input and output image.

    Parameters
    ----------
    mask : Numpy array. The mask. The mask does not involve any intercept
            variables.

    weights : Numpy array. The weight put on the gradient of every point.
            Default is weight 1 for each point, or equivalently, no weight. The
            weights is a numpy array of the same shape as mask.

    calc_lambda_max: boolean. Should the largest singular value being
            precomputed ?
    """
    while len(mask.shape) < 3:
        mask = mask[np.newaxis, :]

    if weights is not None:
        while len(weights.shape) < 3:
            weights = weights[np.newaxis, :]

    nz, ny, nx = mask.shape
    mask = mask.astype(bool)
    zyx_mask = np.where(mask)
    Ax_i = list()
    Ax_j = list()
    Ax_v = list()
    Ay_i = list()
    Ay_j = list()
    Ay_v = list()
    Az_i = list()
    Az_j = list()
    Az_v = list()
    num_compacts = 0
#    p = np.sum(mask)

    # Mapping from image coordinate to flat masked array.
    def im2flat(sub, dims):
        return sub[0] * dims[2] * dims[1] + \
               sub[1] * dims[2] + \
               sub[2]
#    im2flat = np.zeros(mask.shape, dtype=int)
#    im2flat[:] = -1
#    im2flat[mask] = np.arange(p)
#    im2flat[np.arange(p)] = np.arange(p)

    for pt in range(len(zyx_mask[0])):

        found = False
        z, y, x = zyx_mask[0][pt], zyx_mask[1][pt], zyx_mask[2][pt]
        i_pt = im2flat((z, y, x), mask.shape)

        if weights is not None:
            w = weights[z, y, x]
        else:
            w = 1.0

        if z + 1 < nz and mask[z + 1, y, x]:
            found = True
            Az_i += [i_pt, i_pt]
            Az_j += [i_pt, im2flat((z + 1, y, x), mask.shape)]
            Az_v += [-w, w]
        if y + 1 < ny and mask[z, y + 1, x]:
            found = True
            Ay_i += [i_pt, i_pt]
            Ay_j += [i_pt, im2flat((z, y + 1, x), mask.shape)]
            Ay_v += [-w, w]
        if x + 1 < nx and mask[z, y, x + 1]:
            found = True
            Ax_i += [i_pt, i_pt]
            Ax_j += [i_pt, im2flat((z, y, x + 1), mask.shape)]
            Ax_v += [-w, w]

        if found:
            num_compacts += 1

    p = np.prod(mask.shape)
    Az = sparse.csr_matrix((Az_v, (Az_i, Az_j)), shape=(p, p))
    Ay = sparse.csr_matrix((Ay_v, (Ay_i, Ay_j)), shape=(p, p))
    Ax = sparse.csr_matrix((Ax_v, (Ax_i, Ax_j)), shape=(p, p))
    A = LinearOperatorNesterov(Ax, Ay, Az)
    A.n_compacts = num_compacts
    if calc_lambda_max:
        A.singular_values = [TotalVariation(l=0., A=A).lambda_max()]
    return A
Example #5
0
#Use mean imputation, we could have used median for age
#imput = sklearn.preprocessing.Imputer(strategy = 'median',axis=0)
#Z = imput.fit_transform(Z)
X = np.hstack([Z, X])
assert X.shape == (526, 140364)

#Remove nan lines
X = X[np.logical_not(np.isnan(y)).ravel(), :]
y = y[np.logical_not(np.isnan(y))]
assert X.shape == (526, 140364)

np.save(os.path.join(OUTPUT, "X.npy"), X)
np.save(os.path.join(OUTPUT, "y.npy"), y)

###############################################################################
###############################################################################
# precompute linearoperator
X = np.load(os.path.join(OUTPUT, "X.npy"))
y = np.load(os.path.join(OUTPUT, "y.npy"))

mask = nibabel.load(os.path.join(OUTPUT, "mask.nii"))

import parsimony.functions.nesterov.tv as nesterov_tv
from parsimony.utils.linalgs import LinearOperatorNesterov

Atv = nesterov_tv.linear_operator_from_mask(mask.get_data(),
                                            calc_lambda_max=True)
Atv.save(os.path.join(OUTPUT, "Atv.npz"))
Atv_ = LinearOperatorNesterov(filename=os.path.join(OUTPUT, "Atv.npz"))
assert Atv.get_singular_values(0) == Atv_.get_singular_values(0)
Example #6
0
def load_globals(config):
    import mapreduce as GLOBAL  # access to global variables
    GLOBAL.DATA = GLOBAL.load_data(config["data"])
    Atv = LinearOperatorNesterov(filename=config["structure_linear_operator_tv"])
    GLOBAL.Atv = Atv
    GLOBAL.FULL_RESAMPLE = config['full_resample']
Example #7
0
def linear_operator_from_shape(shape, weights=None, calc_lambda_max=False):
    """Generates the linear operator for the total variation Nesterov function
    from the shape of a 1D, 2D or 3D image.

    Parameters
    ----------
    shape : List or tuple with 1, 2 or 3 integers. The shape of the 1D, 2D or
            3D image. shape has the form X, (X,), (Y, X) or (Z, Y, X), where Z
            is the number of "layers", Y is the number of rows and X is the
            number of columns. The shape does not involve any intercept
            variables.

    weights : Sequence, e.g. list or numpy (p-by-1) array. Weights put on the
            groups. Default is weight 1 for each group, i.e. no weight.

    calc_lambda_max: boolean. Should the largest singular value being
            precomputed ?
    """
    if not isinstance(shape, (list, tuple)):
        shape = [shape]
    while len(shape) < 3:
        shape = tuple([1] + list(shape))

    nz = shape[0]
    ny = shape[1]
    nx = shape[2]
    p = nx * ny * nz
    ind = np.arange(p).reshape((nz, ny, nx))

    if weights is not None:
        weights = np.array(weights)
        weights = weights.ravel()


#        w = sparse.spdiags(weights.ravel(), 0, p, p)

    if nx > 1:
        if weights is not None:
            Ax = sparse.spdiags(weights, -1, p, p).T - \
                 sparse.spdiags(weights, 0, p, p)
            Ax = Ax.tocsr()
        else:
            Ax = sparse.eye(p, p, 1, format='csr') - \
                 sparse.eye(p, p)
        zind = ind[:, :, -1].ravel()
        for i in zind:
            Ax.data[Ax.indptr[i]: \
                    Ax.indptr[i + 1]] = 0
        Ax.eliminate_zeros()
    else:
        Ax = sparse.csr_matrix((p, p), dtype=float)

    if ny > 1:
        if weights is not None:
            Ay = sparse.spdiags(weights, -nx, p, p).T - \
                 sparse.spdiags(weights, 0, p, p)
            Ay = Ay.tocsr()
        else:
            Ay = sparse.eye(p, p, nx, format='csr') - \
                 sparse.eye(p, p)

        yind = ind[:, -1, :].ravel()
        for i in yind:
            Ay.data[Ay.indptr[i]: \
                    Ay.indptr[i + 1]] = 0
        Ay.eliminate_zeros()
    else:
        Ay = sparse.csr_matrix((p, p), dtype=float)

    if nz > 1:
        if weights is not None:
            Az = sparse.spdiags(weights, -(ny * nx), p, p).T - \
                 sparse.spdiags(weights, 0, p, p)
            Az = Az.tocsr()
        else:
            Az = (sparse.eye(p, p, ny * nx, format='csr') - \
                  sparse.eye(p, p))

        xind = ind[-1, :, :].ravel()
        for i in xind:
            Az.data[Az.indptr[i]: \
                    Az.indptr[i + 1]] = 0
        Az.eliminate_zeros()
    else:
        Az = sparse.csr_matrix((p, p), dtype=float)
    A = LinearOperatorNesterov(Ax, Ay, Az)
    A.n_compacts = (nz * ny * nx - 1)

    if calc_lambda_max:
        A.singular_values = [TotalVariation(l=0., A=A).lambda_max()]
    return A
Example #8
0
def linear_operator_from_mesh(mesh_coord,
                              mesh_triangles,
                              mask=None,
                              offset=0,
                              weights=None,
                              calc_lambda_max=False):
    """Generates the linear operator for the total variation Nesterov function
    from a mesh.

    Parameters
    ----------
    mesh_coord : Numpy array [n, 3] of float.

    mesh_triangles : Numpy array, n_triangles-by-3. The (integer) indices of
            the three nodes forming the triangle.

    mask : Numpy array (shape (n,)) of integers/boolean. Non-null values
            correspond to columns of X. Groups may be defined using different
            values in the mask. TV will be applied within groups of the same
            value in the mask.

    offset : Non-negative integer. The index of the first column, variable,
            where TV applies. This is different from penalty_start which
            define where the penalty applies. The offset defines where TV
            applies within the penalised variables.

                Example: X := [Intercept, Age, Weight, Image]. Intercept is
                not penalized, TV does not apply on Age and Weight but only on
                Image. Thus: penalty_start = 1, offset = 2 (skip Age and
                Weight).

    weights : Numpy array. The weight put on the gradient of every point.
            Default is weight 1 for each point, or equivalently, no weight. The
            weights is a numpy array of the same shape as mask.

    calc_lambda_max: boolean. Should the largest singular value being
            precomputed ?

    Returns
    -------
    out1 : List or sparse matrices. Linear operator for the total variation
           Nesterov function computed over a mesh.

    out2 : Integer. The number of compacts.

    Examples
    --------
    >>> import numpy as np
    >>> import parsimony.functions.nesterov.tv as tv_helper
    >>> mesh_coord = np.array([[0, 0], [1, 0], [0, 1], [1, 1], [0, 2], [1, 2]])
    >>> mesh_triangles = np.array([[0 ,1, 3], [0, 2 ,3], [2, 3, 5], [2, 4, 5]])
    >>> A = tv_helper.linear_operator_from_mesh(mesh_coord, mesh_triangles)
    """
    if mask is None:
        mask = np.ones(mesh_coord.shape[0], dtype=bool)
    assert mask.shape[0] == mesh_coord.shape[0]
    mask_bool = mask != 0
    mask_idx = np.where(mask_bool)[0]
    # Mapping from full array to masked array.
    map_full2masked = np.zeros(mask.shape, dtype=int)
    map_full2masked[:] = -1
    map_full2masked[mask_bool] = np.arange(np.sum(mask_bool)) + offset
    ## 1) Associate edges to nodes
    nodes_with_edges = [[] for i in range(mesh_coord.shape[0])]

    def connect_edge_to_node(node_idx1, node_idx2, nodes_with_edges):
        # Attach edge to first node.
        if np.sum(mesh_coord[node_idx1] - mesh_coord[node_idx2]) >= 0:
            edge = [node_idx1, node_idx2]
            if not edge in nodes_with_edges[node_idx1]:
                nodes_with_edges[node_idx1].append(edge)
        else:  # attach edge to second node
            edge = [node_idx2, node_idx1]
            if not edge in nodes_with_edges[node_idx2]:
                nodes_with_edges[node_idx2].append(edge)

    for i in range(mesh_triangles.shape[0]):
        t = mesh_triangles[i, :]
        connect_edge_to_node(t[0], t[1], nodes_with_edges)
        connect_edge_to_node(t[0], t[2], nodes_with_edges)
        connect_edge_to_node(t[1], t[2], nodes_with_edges)
    max_connectivity = np.max(np.array([len(n) for n in nodes_with_edges]))
    # 3. build sparse matrices
    # 1..max_connectivity of i, j and value
    A = [[[], [], []] for i in range(max_connectivity)]
    n_compacts = 0
    for node_idx in mask_idx:
        #node_idx = 0
        found = False
        node = nodes_with_edges[node_idx]
        for i, v in enumerate(node):
            found = False
            if weights is not None:
                w = weights[i]
            else:
                w = 1.0
            #print i, v
            node1_idx, node2_idx = v
            if mask_bool[node1_idx] and mask_bool[node2_idx]:
                found = True
                A[i][0] += [
                    map_full2masked[node1_idx], map_full2masked[node1_idx]
                ]
                A[i][1] += [
                    map_full2masked[node1_idx], map_full2masked[node2_idx]
                ]
                A[i][2] += [-w, w]
        if found:
            n_compacts += 1
    p = mask.sum()
    A = [
        sparse.csr_matrix((A[i][2], (A[i][0], A[i][1])), shape=(p, p))
        for i in range(len(A))
    ]
    A = LinearOperatorNesterov(*A)
    A.n_compacts = n_compacts

    if calc_lambda_max:
        A.singular_values = [TotalVariation(l=0., A=A).lambda_max()]
    return A
Example #9
0
def linear_operator_from_mask(mask,
                              offset=0,
                              weights=None,
                              calc_lambda_max=False):
    """Generates the linear operator for the total variation Nesterov function
    from a mask for a 3D image.

    Parameters
    ----------
    mask : Numpy array of integers. The mask has the same shape as the original
            data. Non-null values correspond to columns of X. Groups may be
            defined using different values in the mask. TV will be applied
            within groups of the same value in the mask.

    offset: Non-negative integer. The index of the first column, variable,
            where TV applies. This is different from penalty_start which
            define where the penalty applies. The offset defines where TV
            applies within the penalised variables.

                Example: X := [Intercept, Age, Weight, Image]. Intercept is
                not penalized, TV does not apply on Age and Weight but only on
                Image. Thus: penalty_start = 1, offset = 2 (skip Age and
                Weight).

    weights : Numpy array. The weight put on the gradient of every point.
            Default is weight 1 for each point, or equivalently, no weight. The
            weights is a numpy array of the same shape as mask.

    calc_lambda_max: boolean. Should the largest singular value being
            precomputed ?
    """
    while len(mask.shape) < 3:
        mask = mask[..., np.newaxis]

    if weights is not None:
        while len(weights.shape) < 3:
            weights = weights[..., np.newaxis]

    nx, ny, nz = mask.shape
    mask_bool = mask != 0
    xyz_mask = np.where(mask_bool)
    Ax_i = list()
    Ax_j = list()
    Ax_v = list()
    Ay_i = list()
    Ay_j = list()
    Ay_v = list()
    Az_i = list()
    Az_j = list()
    Az_v = list()
    n_compacts = 0
    p = np.sum(mask_bool) + offset

    # Mapping from image coordinate to flat masked array.
    im2flat = np.zeros(mask.shape, dtype=int)
    im2flat[:] = -1
    im2flat[mask_bool] = np.arange(np.sum(mask_bool)) + offset

    for pt in range(len(xyz_mask[0])):

        found = False
        x, y, z = xyz_mask[0][pt], xyz_mask[1][pt], xyz_mask[2][pt]
        i_pt = im2flat[x, y, z]
        val = mask[x, y, z]

        if weights is not None:
            w = weights[x, y, z]
        else:
            w = 1.0

        if x + 1 < nx and (mask[x + 1, y, z] == val):
            found = True
            Ax_i += [i_pt, i_pt]
            Ax_j += [i_pt, im2flat[x + 1, y, z]]
            Ax_v += [-w, w]
        if y + 1 < ny and (mask[x, y + 1, z] == val):
            found = True
            Ay_i += [i_pt, i_pt]
            Ay_j += [i_pt, im2flat[x, y + 1, z]]
            Ay_v += [-w, w]
        if z + 1 < nz and (mask[x, y, z + 1] == val):
            found = True
            Az_i += [i_pt, i_pt]
            Az_j += [i_pt, im2flat[x, y, z + 1]]
            Az_v += [-w, w]

        if found:
            n_compacts += 1

    Ax = sparse.csr_matrix((Ax_v, (Ax_i, Ax_j)), shape=(p, p))
    Ay = sparse.csr_matrix((Ay_v, (Ay_i, Ay_j)), shape=(p, p))
    Az = sparse.csr_matrix((Az_v, (Az_i, Az_j)), shape=(p, p))
    A = LinearOperatorNesterov(Ax, Ay, Az)
    A.n_compacts = n_compacts
    if calc_lambda_max:
        A.singular_values = [TotalVariation(l=0., A=A).lambda_max()]
    return A
Example #10
0
def linear_operator_from_subset_mask(mask,
                                     weights=None,
                                     calc_lambda_max=False):
    """Generates the linear operator for the total variation Nesterov function
    from a mask for a 3D image.

    The binary mask marks a subset of the variables that are supposed to be
    smoothed. The mask has the same size as the input and output image.

    Parameters
    ----------
    mask : Numpy array. The mask. The mask does not involve any intercept
            variables.

    weights : Numpy array. The weight put on the gradient of every point.
            Default is weight 1 for each point, or equivalently, no weight. The
            weights is a numpy array of the same shape as mask.

    calc_lambda_max: boolean. Should the largest singular value being
            precomputed ?
    """
    while len(mask.shape) < 3:
        mask = mask[np.newaxis, :]

    if weights is not None:
        while len(weights.shape) < 3:
            weights = weights[np.newaxis, :]

    nz, ny, nx = mask.shape
    mask = mask.astype(bool)
    zyx_mask = np.where(mask)
    Ax_i = list()
    Ax_j = list()
    Ax_v = list()
    Ay_i = list()
    Ay_j = list()
    Ay_v = list()
    Az_i = list()
    Az_j = list()
    Az_v = list()
    num_compacts = 0

    #    p = np.sum(mask)

    # Mapping from image coordinate to flat masked array.
    def im2flat(sub, dims):
        return sub[0] * dims[2] * dims[1] + \
               sub[1] * dims[2] + \
               sub[2]


#    im2flat = np.zeros(mask.shape, dtype=int)
#    im2flat[:] = -1
#    im2flat[mask] = np.arange(p)
#    im2flat[np.arange(p)] = np.arange(p)

    for pt in range(len(zyx_mask[0])):

        found = False
        z, y, x = zyx_mask[0][pt], zyx_mask[1][pt], zyx_mask[2][pt]
        i_pt = im2flat((z, y, x), mask.shape)

        if weights is not None:
            w = weights[z, y, x]
        else:
            w = 1.0

        if z + 1 < nz and mask[z + 1, y, x]:
            found = True
            Az_i += [i_pt, i_pt]
            Az_j += [i_pt, im2flat((z + 1, y, x), mask.shape)]
            Az_v += [-w, w]
        if y + 1 < ny and mask[z, y + 1, x]:
            found = True
            Ay_i += [i_pt, i_pt]
            Ay_j += [i_pt, im2flat((z, y + 1, x), mask.shape)]
            Ay_v += [-w, w]
        if x + 1 < nx and mask[z, y, x + 1]:
            found = True
            Ax_i += [i_pt, i_pt]
            Ax_j += [i_pt, im2flat((z, y, x + 1), mask.shape)]
            Ax_v += [-w, w]

        if found:
            num_compacts += 1

    p = np.prod(mask.shape)
    Az = sparse.csr_matrix((Az_v, (Az_i, Az_j)), shape=(p, p))
    Ay = sparse.csr_matrix((Ay_v, (Ay_i, Ay_j)), shape=(p, p))
    Ax = sparse.csr_matrix((Ax_v, (Ax_i, Ax_j)), shape=(p, p))
    A = LinearOperatorNesterov(Ax, Ay, Az)
    A.n_compacts = num_compacts
    if calc_lambda_max:
        A.singular_values = [TotalVariation(l=0., A=A).lambda_max()]
    return A
Example #11
0
np.save(os.path.join(OUTPUT, "mask.npy"), mask)

X = Xtot[:, mask]
assert X.shape == (80, 299798)

#############################################################################

X = np.hstack([Z, X])
assert X.shape == (80, 299800)
#Remove nan lines
X = X[np.logical_not(np.isnan(y)).ravel(), :]
y = y[np.logical_not(np.isnan(y))]
assert X.shape == (80, 299800)

np.save(os.path.join(OUTPUT, "X.npy"), X)
np.save(os.path.join(OUTPUT, "y.npy"), y)

#############################################################################
import parsimony.functions.nesterov.tv as nesterov_tv
from parsimony.utils.linalgs import LinearOperatorNesterov

Atv = nesterov_tv.linear_operator_from_mesh(cor,
                                            tri,
                                            mask,
                                            calc_lambda_max=True)
Atv.save(os.path.join(OUTPUT, "Atv.npz"))
Atv_ = LinearOperatorNesterov(filename=os.path.join(OUTPUT, "Atv.npz"))
assert Atv.get_singular_values(0) == Atv_.get_singular_values(0)
assert np.allclose(Atv_.get_singular_values(0), 8.999, rtol=1e-03, atol=1e-03)
assert np.all([a.shape == (299798, 299798) for a in Atv])
Example #12
0
# Save data X and y
X = Xtot[:, mask_bool.ravel()]
#Use mean imputation, we could have used median for age
#imput = sklearn.preprocessing.Imputer(strategy = 'median',axis=0)
#Z = imput.fit_transform(Z)
X = np.hstack([Z, X])
assert X.shape == (606, 125962)

#Remove nan lines
X = X[np.logical_not(np.isnan(y)).ravel(), :]
y = y[np.logical_not(np.isnan(y))]
assert X.shape == (606, 125962)

np.save(os.path.join(OUTPUT, "X.npy"), X)
np.save(os.path.join(OUTPUT, "y.npy"), y)

###############################################################################
# precompute linearoperator

X = np.load(os.path.join(OUTPUT, "X.npy"))
y = np.load(os.path.join(OUTPUT, "y.npy"))

mask = nibabel.load(os.path.join(OUTPUT, "mask.nii"))

Atv = nesterov_tv.linear_operator_from_mask(mask.get_data(),
                                            calc_lambda_max=True)
Atv.save(os.path.join(OUTPUT, "Atv.npz"))
Atv_ = LinearOperatorNesterov(filename=os.path.join(OUTPUT, "Atv.npz"))
assert Atv.get_singular_values(0) == Atv_.get_singular_values(0)
assert np.allclose(Atv_.get_singular_values(0), 11.909770107366217)
Example #13
0
def load_globals(config):
    import mapreduce as GLOBAL  # access to global variables
    GLOBAL.DATA = GLOBAL.load_data(config["data"])
    A = LinearOperatorNesterov(filename=config["structure_linear_operator_tv"])
    GLOBAL.A = A
Example #14
0
def init():
    INPUT_DATA_X = os.path.join(WD_ORIGINAL, 'X.npy')
    INPUT_DATA_y = os.path.join(WD_ORIGINAL, 'y.npy')
    INPUT_MASK_PATH = os.path.join(WD_ORIGINAL, 'mask.npy')
    INPUT_MESH_PATH = '/neurospin/brainomics/2013_adni/MCIc-CTL-FS_cs/lrh.pial.gii'
    #INPUT_LINEAR_OPE_PATH = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/data/30yo/Atv.npz'
    # INPUT_CSV = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/population_30yo.csv'

    os.makedirs(WD, exist_ok=True)
    shutil.copy(INPUT_DATA_X, WD)
    shutil.copy(INPUT_DATA_y, WD)
    shutil.copy(INPUT_MASK_PATH, WD)
    shutil.copy(INPUT_MESH_PATH, WD)

    #shutil.copy(INPUT_LINEAR_OPE_PATH, WD)

    ## Create config file
    os.chdir(WD)
    X = np.load("X.npy")
    y = np.load("y.npy")

    if not os.path.exists(os.path.join(WD, "Atv.npz")):
        import brainomics.mesh_processing as mesh_utils
        cor, tri = mesh_utils.mesh_arrays(os.path.join(WD, "lrh.pial.gii"))
        mask = np.load(os.path.join(WD, 'mask.npy'))

        import parsimony.functions.nesterov.tv as nesterov_tv
        from parsimony.utils.linalgs import LinearOperatorNesterov
        Atv = nesterov_tv.linear_operator_from_mesh(cor, tri, mask, calc_lambda_max=True)
        Atv.save(os.path.join(WD, "Atv.npz"))
        Atv_ = LinearOperatorNesterov(filename=os.path.join(WD, "Atv.npz"))
        assert Atv.get_singular_values(0) == Atv_.get_singular_values(0)
        assert np.allclose(Atv_.get_singular_values(0), 8.999, rtol=1e-03, atol=1e-03)
        assert np.all([a.shape == (317089, 317089) for a in Atv])

    if not os.path.exists(os.path.join(WD, "beta_start.npz")):
        betas = dict()
        import time
        alphas = [.01, 0.1, 1.0, 10]
        for alpha in alphas:
            mod = estimators.RidgeLogisticRegression(l=alpha, class_weight="auto", penalty_start=penalty_start)
            t_ = time.time()
            mod.fit(X, y.ravel())
            print(time.time() - t_) # 11564
            betas["lambda_%.2f" % alpha] = mod.beta

        np.savez(os.path.join(WD, "beta_start.npz"), **betas)
        beta_start = np.load(os.path.join(WD, "beta_start.npz"))
        assert np.all([np.all(beta_start[a] == betas[a]) for a in beta_start.keys()])

    ## Create config file

    #  ########################################################################
    #  Setting 1: 5cv + large range of parameters: cv_largerange
    #  with sub-sample training set with size 50, 100
    # 5cv/cv0*[_sub50]/refit/*

    # sub_sizes = [50, 100]
    sub_sizes = []

    cv_outer = [[tr, te] for tr, te in
                StratifiedKFold(n_splits=NFOLDS_OUTER, random_state=42).split(np.zeros(y.shape[0]), y.ravel())]

    # check we got the same CV than previoulsy
    cv_old = json.load(open(os.path.join(WD_ORIGINAL, "config_modselectcv.json")))["resample"]
    cv_outer_old = [cv_old[k] for k in ['cv%02d/refit' % i for i in  range(NFOLDS_OUTER)]]
    assert np.all([np.all(np.array(cv_outer_old[i][0]) == cv_outer[i][0]) for i in range(NFOLDS_OUTER)])
    assert np.all([np.all(np.array(cv_outer_old[i][1]) == cv_outer[i][1]) for i in range(NFOLDS_OUTER)])
    # check END

    import collections
    cv = collections.OrderedDict()

    cv["refit/refit"] = [np.arange(len(y)), np.arange(len(y))]

    for cv_outer_i, (tr_val, te) in enumerate(cv_outer):
        # Simple CV
        cv["cv%02d/refit" % (cv_outer_i)] = [tr_val, te]

        # Nested CV
        # cv_inner = StratifiedKFold(y[tr_val].ravel(), n_folds=NFOLDS_INNER, random_state=42)
        # for cv_inner_i, (tr, val) in enumerate(cv_inner):
        #     cv["cv%02d/cvnested%02d" % ((cv_outer_i), cv_inner_i)] = [tr_val[tr], tr_val[val]]

        # Sub-sample training set with size 50, 100
        # => cv*_sub[50|100]/refit
        grps = np.unique(y[tr_val]).astype(int)
        ytr = y.copy()
        ytr[te] = np.nan
        g_idx = [np.where(ytr == g)[0] for g in grps]
        assert np.all([np.all(ytr[g_idx[g]] == g) for g in grps])

        g_size = np.array([len(g) for g in g_idx])
        g_prop = g_size / g_size.sum()

        for sub_size in sub_sizes:
            # sub_size = sub_sizes[0]
            sub_g_size = np.round(g_prop * sub_size).astype(int)
            g_sub_idx = [np.random.choice(g_idx[g], sub_g_size[g], replace=False) for g in grps]
            assert np.all([np.all(y[g_sub_idx[g]] == g) for g in grps])
            tr_val_sub = np.concatenate(g_sub_idx)
            assert len(tr_val_sub) == sub_size
            assert np.all([idx in tr_val for idx in tr_val_sub])
            assert np.all(np.logical_not([idx in te for idx in tr_val_sub]))
            cv["cv%02d_sub%i/refit" % (cv_outer_i, sub_size)] = [tr_val_sub, te]

    cv = {k:[cv[k][0].tolist(), cv[k][1].tolist()] for k in cv}

    # Nested CV
    # assert len(cv_largerange) == NFOLDS_OUTER * NFOLDS_INNER + NFOLDS_OUTER + 1

    # Simple CV
    # assert len(cv) == NFOLDS_OUTER + 1

    # Simple CV + sub-sample training set with size 50, 100:
    assert len(cv) == NFOLDS_OUTER * (1 + len(sub_sizes)) + 1

    print(list(cv.keys()))

    # Large grid of parameters
    alphas = [0.001, 0.01, 0.1, 1.0]
    # alphas = [.01, 0.1, 1.0] # first ran with this grid
    tv_ratio = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    l1l2_ratio = [0.1, 0.5, 0.9]
    # l1l2_ratio = [0, 0.1, 0.5, 0.9, 1.0] # first ran with this grid
    algos = ["enettv", "enetgn"]
    params_enet_tvgn = [list(param) for param in itertools.product(algos, alphas, l1l2_ratio, tv_ratio)]
    assert len(params_enet_tvgn) == 240 # old 300

    params_enet = [list(param) for param in itertools.product(["enet"], alphas, l1l2_ratio, [0])]
    assert len(params_enet) ==  12 # old 15

    params = params_enet_tvgn + params_enet
    assert len(params) == 252 # 315
    # Simple CV
    # assert len(params) * len(cv) == 1890

    # Simple CV + sub-sample training set with size 50, 100:
    assert len(params) * len(cv) == 1512 # 5040

    config = dict(data=dict(X="X.npy", y="y.npy"),
                  params=params, resample=cv,
                  structure_linear_operator_tv="Atv.npz",
                  beta_start="beta_start.npz",
                  map_output="5cv",
                  user_func=user_func_filename)
    json.dump(config, open(os.path.join(WD, "config_cv_largerange.json"), "w"))


    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    cmd = "mapreduce.py --map  %s/config_cv_largerange.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd,walltime = "250:00:00",
                                            suffix="_cv_largerange",
                                            freecores=2)

    #  ########################################################################
    #  Setting 2: dcv + reduced range of parameters: dcv_reducedrange
    #  5cv/cv0*/cvnested0*/*

    cv_outer = [[tr, te] for tr, te in
                StratifiedKFold(n_splits=NFOLDS_OUTER, random_state=42).split(np.zeros(y.shape[0]), y.ravel())]

    # check we got the same CV than previoulsy
    cv_old = json.load(open(os.path.join(WD_ORIGINAL, "config_modselectcv.json")))["resample"]
    cv_outer_old = [cv_old[k] for k in ['cv%02d/refit' % i for i in  range(NFOLDS_OUTER)]]
    assert np.all([np.all(np.array(cv_outer_old[i][0]) == cv_outer[i][0]) for i in range(NFOLDS_OUTER)])
    assert np.all([np.all(np.array(cv_outer_old[i][1]) == cv_outer[i][1]) for i in range(NFOLDS_OUTER)])
    # check END

    import collections
    cv = collections.OrderedDict()
    cv["refit/refit"] = [np.arange(len(y)), np.arange(len(y))]

    for cv_outer_i, (tr_val, te) in enumerate(cv_outer):
        cv["cv%02d/refit" % (cv_outer_i)] = [tr_val, te]
        cv_inner = StratifiedKFold(n_splits=NFOLDS_INNER, random_state=42).split(np.zeros(y[tr_val].shape[0]), y[tr_val].ravel())
        for cv_inner_i, (tr, val) in enumerate(cv_inner):
            cv["cv%02d/cvnested%02d" % ((cv_outer_i), cv_inner_i)] = [tr_val[tr], tr_val[val]]

    cv = {k:[cv[k][0].tolist(), cv[k][1].tolist()] for k in cv}
    #assert len(cv) == NFOLDS_OUTER + 1
    assert len(cv) == NFOLDS_OUTER * NFOLDS_INNER + NFOLDS_OUTER + 1
    print(list(cv.keys()))

    # Reduced grid of parameters
    alphas = [0.001, 0.01, 0.1, 1.0]
    # alphas = [.01, 0.1] # original
    tv_ratio = [0.2, 0.8]
    l1l2_ratio = [0.1, 0.9]
    algos = ["enettv", "enetgn"]
    params_enet_tvgn = [list(param) for param in itertools.product(algos, alphas, l1l2_ratio, tv_ratio)]
    assert len(params_enet_tvgn) == 32 # 16

    params_enet = [list(param) for param in itertools.product(["enet"], alphas, l1l2_ratio, [0])]
    assert len(params_enet) == 8 # 4

    params = params_enet_tvgn + params_enet
    assert len(params) == 40 # 20
    assert len(params) * len(cv) == 1240 # 620

    config = dict(data=dict(X="X.npy", y="y.npy"),
                  params=params, resample=cv,
                  structure_linear_operator_tv="Atv.npz",
                  beta_start="beta_start.npz",
                  map_output="5cv",
                  user_func=user_func_filename)
    json.dump(config, open(os.path.join(WD, "config_dcv_reducedrange.json"), "w"))

    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    cmd = "mapreduce.py --map  %s/config_dcv_reducedrange.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd,walltime = "250:00:00",
                                            suffix="_dcv_reducedrange",
                                            freecores=2)
Example #15
0
    # [0.19778403 0.04279359 0.03579749]
    assert pca.components_.shape == (N_COMP, 371278)
    PC = pca.transform(X)
    #U = pca.transform(X)
    d = pca.singular_values_
    V = pca.components_.T
    U = pca.transform(X)
    explained_variance = pca.explained_variance_ratio_.cumsum()

if options.algo == 'enettv':
    ########################################################################################################################
    # PCA TV
    from parsimony.utils.linalgs import LinearOperatorNesterov

    #mask_img = nibabel.Nifti1Image(mask_arr.astype(float), affine=ref_img.affine)
    Atv = LinearOperatorNesterov(filename=os.path.join(ANALYSIS_DATA_PATH, "Atv.npz"))
    #assert Atv.get_singular_values(0) == Atv_.get_singular_values(0)
    assert np.allclose(Atv.get_singular_values(0), 11.974760295502465)

    inner_max_iter = int(1e3)
    l1max = pca_tv.PCAL1L2TV.l1_max(X) * .9 # 0.03899665773990707
    assert np.allclose(l1max, 0.03509699196591636)

    if False:  # Not to bad, TV too low
        # ll1 < 0.01 * l1max,  tv = 0.01 * 1/3
        ll1, ll2, ltv = 0.01 * l1max, 1, 0.01
        key_pca_enettv = "pca_enettv_%.4f_%.3f_%.3f" % (ll1, ll2, ltv)
        # Corr with old PC[-0.99966211718252285, -0.99004655401439967, -0.74332811780676245]


    if False:# Too much l1, not enough tv