Beispiel #1
0
def _create_dense_diagonal_precision(
    X,
    graph,
    n_features,
    n_features_per_vertex,
    dtype=np.float32,
    n_components=None,
    bias=0,
    return_covariances=False,
    verbose=False,
):
    # Initialize precision
    precision = np.zeros((n_features, n_features), dtype=dtype)
    if return_covariances:
        all_covariances = np.zeros(
            (graph.n_vertices, n_features_per_vertex, n_features_per_vertex),
            dtype=dtype,
        )
    if verbose:
        print_dynamic("Allocated precision matrix of size {}".format(
            bytes_str(precision.nbytes)))

    # Print information if asked
    if verbose:
        vertices = print_progress(
            range(graph.n_vertices),
            n_items=graph.n_vertices,
            prefix="Precision per vertex",
            end_with_newline=False,
        )
    else:
        vertices = range(graph.n_vertices)

    # Compute covariance matrix for each patch
    for v in vertices:
        # find indices in target precision matrix
        i_from = v * n_features_per_vertex
        i_to = (v + 1) * n_features_per_vertex

        # compute covariance
        covmat = np.cov(X[:, i_from:i_to], rowvar=0, bias=bias)
        if return_covariances:
            all_covariances[v] = covmat

        # invert it
        covmat = _covariance_matrix_inverse(covmat, n_components)

        # insert to precision matrix
        precision[i_from:i_to, i_from:i_to] = covmat

    # return covariances
    if return_covariances:
        return precision, all_covariances
    else:
        return precision
Beispiel #2
0
def _increment_dense_diagonal_precision(
    X,
    mean_vector,
    covariances,
    n,
    graph,
    n_features,
    n_features_per_vertex,
    dtype=np.float32,
    n_components=None,
    bias=0,
    verbose=False,
):
    # Initialize precision
    precision = np.zeros((n_features, n_features), dtype=dtype)

    # Print information if asked
    if verbose:
        print_dynamic("Allocated precision matrix of size {}".format(
            bytes_str(precision.nbytes)))
        vertices = print_progress(
            range(graph.n_vertices),
            n_items=graph.n_vertices,
            prefix="Precision per vertex",
            end_with_newline=False,
        )
    else:
        vertices = range(graph.n_vertices)

    # Compute covariance matrix for each patch
    for v in vertices:
        # find indices in target precision matrix
        i_from = v * n_features_per_vertex
        i_to = (v + 1) * n_features_per_vertex

        # get data
        edge_data = X[:, i_from:i_to]
        m = mean_vector[i_from:i_to]

        # increment
        _, covariances[v] = _increment_multivariate_gaussian_cov(
            edge_data, m, covariances[v], n, bias=bias)

        # invert it
        precision[i_from:i_to, i_from:i_to] = _covariance_matrix_inverse(
            covariances[v], n_components)

    # return covariances
    return precision, covariances
Beispiel #3
0
def _create_dense_diagonal_precision(X, graph, n_features,
                                     n_features_per_vertex,
                                     dtype=np.float32, n_components=None,
                                     bias=0, return_covariances=False,
                                     verbose=False):
    # Initialize precision
    precision = np.zeros((n_features, n_features), dtype=dtype)
    if return_covariances:
        all_covariances = np.zeros(
            (graph.n_vertices, n_features_per_vertex, n_features_per_vertex),
            dtype=dtype)
    if verbose:
        print_dynamic('Allocated precision matrix of size {}'.format(
            bytes_str(precision.nbytes)))

    # Print information if asked
    if verbose:
        vertices = print_progress(
            range(graph.n_vertices), n_items=graph.n_vertices,
            prefix='Precision per vertex', end_with_newline=False)
    else:
        vertices = range(graph.n_vertices)

    # Compute covariance matrix for each patch
    for v in vertices:
        # find indices in target precision matrix
        i_from = v * n_features_per_vertex
        i_to = (v + 1) * n_features_per_vertex

        # compute covariance
        covmat = np.cov(X[:, i_from:i_to], rowvar=0, bias=bias)
        if return_covariances:
            all_covariances[v] = covmat

        # invert it
        covmat = _covariance_matrix_inverse(covmat, n_components)

        # insert to precision matrix
        precision[i_from:i_to, i_from:i_to] = covmat

    # return covariances
    if return_covariances:
        return precision, all_covariances
    else:
        return precision
Beispiel #4
0
def _increment_dense_diagonal_precision(X, mean_vector, covariances, n, graph,
                                        n_features, n_features_per_vertex,
                                        dtype=np.float32, n_components=None,
                                        bias=0, verbose=False):
    # Initialize precision
    precision = np.zeros((n_features, n_features), dtype=dtype)

    # Print information if asked
    if verbose:
        print_dynamic('Allocated precision matrix of size {}'.format(
            bytes_str(precision.nbytes)))
        vertices = print_progress(
            range(graph.n_vertices), n_items=graph.n_vertices,
            prefix='Precision per vertex', end_with_newline=False)
    else:
        vertices = range(graph.n_vertices)

    # Compute covariance matrix for each patch
    for v in vertices:
        # find indices in target precision matrix
        i_from = v * n_features_per_vertex
        i_to = (v + 1) * n_features_per_vertex

        # get data
        edge_data = X[:, i_from:i_to]
        m = mean_vector[i_from:i_to]

        # increment
        _, covariances[v] = _increment_multivariate_gaussian_cov(
            edge_data, m, covariances[v], n, bias=bias)

        # invert it
        precision[i_from:i_to, i_from:i_to] = _covariance_matrix_inverse(
            covariances[v], n_components)

    # return covariances
    return precision, covariances
Beispiel #5
0
def as_matrix(vectorizables, length=None, return_template=False, verbose=False):
    r"""
    Create a matrix from a list/generator of :map:`Vectorizable` objects.
    All the objects in the list **must** be the same size when vectorized.

    Consider using a generator if the matrix you are creating is large and
    passing the length of the generator explicitly.

    Parameters
    ----------
    vectorizables : `list` or generator if :map:`Vectorizable` objects
        A list or generator of objects that supports the vectorizable interface
    length : `int`, optional
        Length of the vectorizable list. Useful if you are passing a generator
        with a known length.
    verbose : `bool`, optional
        If ``True``, will print the progress of building the matrix.
    return_template : `bool`, optional
        If ``True``, will return the first element of the list/generator, which
        was used as the template. Useful if you need to map back from the
        matrix to a list of vectorizable objects.

    Returns
    -------
    M : (length, n_features) `ndarray`
        Every row is an element of the list.
    template : :map:`Vectorizable`, optional
        If ``return_template == True``, will return the template used to
        build the matrix `M`.

    Raises
    ------
    ValueError
        ``vectorizables`` terminates in fewer than ``length`` iterations
    """
    # get the first element as the template and use it to configure the
    # data matrix
    if length is None:
        # samples is a list
        length = len(vectorizables)
        template = vectorizables[0]
        vectorizables = vectorizables[1:]
    else:
        # samples is an iterator
        template = next(vectorizables)
    n_features = template.n_parameters
    template_vector = template.as_vector()

    data = np.zeros((length, n_features), dtype=template_vector.dtype)
    if verbose:
        print(
            "Allocated data matrix of size {} "
            "({} samples)".format(bytes_str(data.nbytes), length)
        )

    # now we can fill in the first element from the template
    data[0] = template_vector
    del template_vector

    # ensure we take at most the remaining length - 1 elements
    vectorizables = islice(vectorizables, length - 1)

    if verbose:
        vectorizables = print_progress(
            vectorizables,
            n_items=length,
            offset=1,
            prefix="Building data matrix",
            end_with_newline=False,
        )

    # 1-based as we have the template vector set already
    i = 0
    for i, sample in enumerate(vectorizables, 1):
        data[i] = sample.as_vector()

    # we have exhausted the iterable, but did we get enough items?
    if i != length - 1:  # -1
        raise ValueError(
            "Incomplete data matrix due to early iterator "
            "termination (expected {} items, got {})".format(length, i + 1)
        )

    if return_template:
        return data, template
    else:
        return data
Beispiel #6
0
def as_matrix(vectorizables, length=None, return_template=False, verbose=False):
    r"""
    Create a matrix from a list/generator of :map:`Vectorizable` objects.
    All the objects in the list **must** be the same size when vectorized.

    Consider using a generator if the matrix you are creating is large and
    passing the length of the generator explicitly.

    Parameters
    ----------
    vectorizables : `list` or generator if :map:`Vectorizable` objects
        A list or generator of objects that supports the vectorizable interface
    length : `int`, optional
        Length of the vectorizable list. Useful if you are passing a generator
        with a known length.
    verbose : `bool`, optional
        If ``True``, will print the progress of building the matrix.
    return_template : `bool`, optional
        If ``True``, will return the first element of the list/generator, which
        was used as the template. Useful if you need to map back from the
        matrix to a list of vectorizable objects.

    Returns
    -------
    M : (length, n_features) `ndarray`
        Every row is an element of the list.
    template : :map:`Vectorizable`, optional
        If ``return_template == True``, will return the template used to
        build the matrix `M`.

    Raises
    ------
    ValueError
        ``vectorizables`` terminates in fewer than ``length`` iterations
    """
    # get the first element as the template and use it to configure the
    # data matrix
    if length is None:
        # samples is a list
        length = len(vectorizables)
        template = vectorizables[0]
        vectorizables = vectorizables[1:]
    else:
        # samples is an iterator
        template = next(vectorizables)
    n_features = template.n_parameters
    template_vector = template.as_vector()

    data = np.zeros((length, n_features), dtype=template_vector.dtype)
    if verbose:
        print('Allocated data matrix of size {} '
              '({} samples)'.format(bytes_str(data.nbytes), length))

    # now we can fill in the first element from the template
    data[0] = template_vector
    del template_vector

    # ensure we take at most the remaining length - 1 elements
    vectorizables = islice(vectorizables, length - 1)

    if verbose:
        vectorizables = print_progress(vectorizables, n_items=length, offset=1,
                                       prefix='Building data matrix')

    # 1-based as we have the template vector set already
    i = 0
    for i, sample in enumerate(vectorizables, 1):
        data[i] = sample.as_vector()

    # we have exhausted the iterable, but did we get enough items?
    if i != length - 1:  # -1
        raise ValueError('Incomplete data matrix due to early iterator '
                         'termination (expected {} items, got {})'.format(
            length, i + 1))

    if return_template:
        return data, template
    else:
        return data
Beispiel #7
0
def _increment_dense_precision(X, mean_vector, covariances, n, graph,
                               n_features, n_features_per_vertex,
                               mode='concatenation', dtype=np.float32,
                               n_components=None, bias=0, verbose=False):
    # check mode argument
    if mode not in ['concatenation', 'subtraction']:
        raise ValueError("mode must be either ''concatenation'' "
                         "or ''subtraction''; {} is given.".format(mode))

    # Initialize precision
    precision = np.zeros((n_features, n_features), dtype=dtype)

    # Print information if asked
    if verbose:
        print_dynamic('Allocated precision matrix of size {}'.format(
            bytes_str(precision.nbytes)))
        edges = print_progress(range(graph.n_edges), n_items=graph.n_edges,
                               prefix='Precision per edge',
                               end_with_newline=False)
    else:
        edges = range(graph.n_edges)

    # Compute covariance matrix for each edge, invert it and store it
    for e in edges:
        # edge vertices
        v1 = graph.edges[e, 0]
        v2 = graph.edges[e, 1]

        # find indices in data matrix
        v1_from = v1 * n_features_per_vertex
        v1_to = (v1 + 1) * n_features_per_vertex
        v2_from = v2 * n_features_per_vertex
        v2_to = (v2 + 1) * n_features_per_vertex

        # data concatenation
        if mode == 'concatenation':
            edge_data = X[:, list(range(v1_from, v1_to)) +
                             list(range(v2_from, v2_to))]
            m = mean_vector[list(range(v1_from, v1_to)) +
                            list(range(v2_from, v2_to))]
        else:
            edge_data = X[:, v1_from:v1_to] - X[:, v2_from:v2_to]
            m = mean_vector[v1_from:v1_to] - mean_vector[v2_from:v2_to]

        # increment
        _, covariances[e] = _increment_multivariate_gaussian_cov(
            edge_data, m, covariances[e], n, bias=bias)

        # invert it
        covmat = _covariance_matrix_inverse(covariances[e], n_components)

        # store it
        if mode == 'concatenation':
            # v1, v1
            precision[v1_from:v1_to, v1_from:v1_to] += \
                covmat[:n_features_per_vertex, :n_features_per_vertex]
            # v2, v2
            precision[v2_from:v2_to, v2_from:v2_to] += \
                covmat[n_features_per_vertex::, n_features_per_vertex::]
            # v1, v2
            precision[v1_from:v1_to, v2_from:v2_to] = \
                covmat[:n_features_per_vertex, n_features_per_vertex::]
            # v2, v1
            precision[v2_from:v2_to, v1_from:v1_to] = \
                covmat[n_features_per_vertex::, :n_features_per_vertex]
        elif mode == 'subtraction':
            # v1, v2
            precision[v1_from:v1_to, v2_from:v2_to] = -covmat
            # v2, v1
            precision[v2_from:v2_to, v1_from:v1_to] = -covmat
            # v1, v1
            precision[v1_from:v1_to, v1_from:v1_to] += covmat
            # v2, v2
            precision[v2_from:v2_to, v2_from:v2_to] += covmat

    # return covariances
    return precision, covariances
Beispiel #8
0
def rpca_missing(X, M, lambda_=None, tol=1e-6, max_iter=1000, verbose=False):
    r"""
    Robust PCA with Missing Values using the inexact augmented Lagrange
    multiplier method.
    Parameters
    ----------
    X : ``(n_samples, n_features)`` `ndarray`
        Data matrix.
    M : ``(n_components, n_features)`` `ndarray` of type `np.bool`
        Mask matrix. For each element, if ``True`` indicates that the
        corresponding element in ``X`` is meaningful data. If ``False``
        the corresponding element in ``X`` is not considered in the
        calculation.
    lambda_ : float, optional
        The weight on sparse error term in the cost function. If ``None``,
        the heuristic value of ``1 / np.sqrt(n_samples)`` is used.
    tol : `float`, optional
        The tolerance for the stopping criterion.
    max_iter : `float`, optional
        The maximum allowed number of iterations.
    verbose : `boolean`, optional
        If ``True``, details of the progress of the algorithm will be printed
        every 10 iterations.
    Returns
    -------
    A : ``(n_samples, n_features)`` `ndarray`
        Low rank reconstruction
    E : ``(n_samples, n_features)`` `ndarray`
        Sparse reconstruction
    """
    m, n = X.shape

    if verbose:
        print('X {} of type {}: {}'.format(
            X.shape, X.dtype, bytes_str(X.nbytes)))
        # Have to allocate 7 arrays (X, Y, A, E, T, Z, V) all of this size
        # + 4 in temp computations
        print('Estimated total memory required: {}'.format(bytes_str(X.nbytes
                                                                     * 11)))
        t = time()

    if lambda_ is None:
        lambda_ = 1. / np.sqrt(m)

    norm_fro = np.linalg.norm(X, ord='fro')
    norm_two = np.linalg.norm(X, ord=2)
    norm_inf = np.linalg.norm(X.ravel(), ord=np.inf) / lambda_
    dual_norm_inv = 1.0 / max(norm_two, norm_inf)
    Y = X * dual_norm_inv

    A = np.zeros_like(X)
    notM = ~M
    mu = 1.25 / norm_two
    mu_bar = mu * 1e7
    rho = 1.5
    sv = 10
    for i in range(1, max_iter + 1):
        T = X - A + (1 / mu) * Y
        E = (np.maximum(T - (lambda_ / mu), 0) +
             np.minimum(T + (lambda_ / mu), 0))
        E = E * M + T * notM
        U, s, V = np.linalg.svd(X - E + (1 / mu) * Y, full_matrices=False)

        svp = (s > 1 / mu).sum()
        sv = min(svp + 1 if svp < sv else svp + round(0.05 * n), n)

        S_svp = np.diag(s[:svp] - 1 / mu)
        A = np.dot(U[:, :svp], np.dot(S_svp, V[:svp]))

        Z = X - A - E
        Y += mu * Z
        mu = min(mu * rho, mu_bar)

        stopping_criterion = np.linalg.norm(Z, ord='fro') / norm_fro

        if verbose and (time() - t > 1):
            print('{i:02d} ({time:.1f} sec/iter) r(A): {r_A} |E|_0: {E_0} '
                  'criterion/tol: {sc:.0f} '.format(
                i=i, r_A=np.linalg.matrix_rank(A), time=time() - t,
                E_0=(np.abs(E) > 0).sum(), sc=stopping_criterion / tol))
            t = time()
        if stopping_criterion < tol:
            if verbose:
                print('Converged after {} iterations'.format(i))
            break
    else:
        if verbose:
            print('Maximum iterations ({}) reached without '
                  'convergence'.format(max_iter))

    return A, E
Beispiel #9
0
def _increment_dense_precision(
    X,
    mean_vector,
    covariances,
    n,
    graph,
    n_features,
    n_features_per_vertex,
    mode="concatenation",
    dtype=np.float32,
    n_components=None,
    bias=0,
    verbose=False,
):
    # check mode argument
    if mode not in ["concatenation", "subtraction"]:
        raise ValueError("mode must be either ''concatenation'' "
                         "or ''subtraction''; {} is given.".format(mode))

    # Initialize precision
    precision = np.zeros((n_features, n_features), dtype=dtype)

    # Print information if asked
    if verbose:
        print_dynamic("Allocated precision matrix of size {}".format(
            bytes_str(precision.nbytes)))
        edges = print_progress(
            range(graph.n_edges),
            n_items=graph.n_edges,
            prefix="Precision per edge",
            end_with_newline=False,
        )
    else:
        edges = range(graph.n_edges)

    # Compute covariance matrix for each edge, invert it and store it
    for e in edges:
        # edge vertices
        v1 = graph.edges[e, 0]
        v2 = graph.edges[e, 1]

        # find indices in data matrix
        v1_from = v1 * n_features_per_vertex
        v1_to = (v1 + 1) * n_features_per_vertex
        v2_from = v2 * n_features_per_vertex
        v2_to = (v2 + 1) * n_features_per_vertex

        # data concatenation
        if mode == "concatenation":
            edge_data = X[:,
                          list(range(v1_from, v1_to)) +
                          list(range(v2_from, v2_to))]
            m = mean_vector[list(range(v1_from, v1_to)) +
                            list(range(v2_from, v2_to))]
        else:
            edge_data = X[:, v1_from:v1_to] - X[:, v2_from:v2_to]
            m = mean_vector[v1_from:v1_to] - mean_vector[v2_from:v2_to]

        # increment
        _, covariances[e] = _increment_multivariate_gaussian_cov(
            edge_data, m, covariances[e], n, bias=bias)

        # invert it
        covmat = _covariance_matrix_inverse(covariances[e], n_components)

        # store it
        if mode == "concatenation":
            # v1, v1
            precision[v1_from:v1_to,
                      v1_from:v1_to] += covmat[:n_features_per_vertex, :
                                               n_features_per_vertex]
            # v2, v2
            precision[v2_from:v2_to,
                      v2_from:v2_to] += covmat[n_features_per_vertex::,
                                               n_features_per_vertex::]
            # v1, v2
            precision[v1_from:v1_to,
                      v2_from:v2_to] = covmat[:n_features_per_vertex,
                                              n_features_per_vertex::]
            # v2, v1
            precision[v2_from:v2_to, v1_from:v1_to] = covmat[
                n_features_per_vertex::, :n_features_per_vertex]
        elif mode == "subtraction":
            # v1, v2
            precision[v1_from:v1_to, v2_from:v2_to] = -covmat
            # v2, v1
            precision[v2_from:v2_to, v1_from:v1_to] = -covmat
            # v1, v1
            precision[v1_from:v1_to, v1_from:v1_to] += covmat
            # v2, v2
            precision[v2_from:v2_to, v2_from:v2_to] += covmat

    # return covariances
    return precision, covariances
Beispiel #10
0
def increment_parameters(images,
                         mm,
                         id_indices,
                         exp_indices,
                         template_camera,
                         p,
                         qs,
                         cs,
                         c_f=1,
                         c_l=1,
                         c_id=1,
                         c_exp=1,
                         c_sm=1,
                         lm_group=None,
                         n_samples=1000,
                         compute_costs=True):

    n_frames = len(images)
    n_points = mm.shape_model.template_instance.n_points
    n_p = len(id_indices)
    n_q = len(exp_indices)
    n_c = cs.shape[1] - 2  # sub one for quaternion, one for focal length

    print('Precomputing....')
    # Rescale shape components to have size:
    # n_points x (n_components * n_dims)
    # and to be scaled by the relevant standard deviation.
    shape_pc = (mm.shape_model.components.T *
                np.sqrt(mm.shape_model.eigenvalues)).reshape([n_points, -1])
    # include std.dev in principal components
    shape_pc_lms = shape_pc.reshape([n_points, 3,
                                     -1])[mm.model_landmarks_index]

    print('Initializing Hessian/JTe for frame...')
    H, JTe = initialize_hessian_and_JTe(c_id, c_exp, c_sm, n_p, n_q, n_c, p,
                                        qs, n_frames)
    print('H: {} ({})'.format(H.shape, bytes_str(H.nbytes)))

    if compute_costs:
        costs = defaultdict(list)

    for (f, image), c, q in zip(
            enumerate(print_progress(images, prefix='Incrementing H/JTe')), cs,
            qs):

        # Form the overall shape parameter: [p, q]
        s = np.zeros(mm.shape_model.n_active_components)
        s[id_indices] = p
        s[exp_indices] = q

        # In our error we consider landmarks stored [x, y] - so flip here.
        lms_points_xy = image.landmarks[lm_group].points[:, [1, 0]]

        # Compute input image gradient
        grad_x, grad_y = gradient_xy(image)

        j = jacobians(s,
                      c,
                      image,
                      lms_points_xy,
                      mm,
                      id_indices,
                      exp_indices,
                      template_camera,
                      grad_x,
                      grad_y,
                      shape_pc,
                      shape_pc_lms,
                      n_samples,
                      compute_costs=compute_costs)
        insert_frame_to_H(H, j, f, n_p, n_q, n_c, c_f, c_l, n_frames)
        insert_frame_to_JTe(JTe, j, f, n_p, n_q, n_c, c_f, c_l, n_frames)

        if compute_costs:
            for cost, val in j['costs'].items():
                costs[cost].append(val)

    print('Converting Hessian to sparse format')
    H = sp.csr_matrix(H)
    print("Sparsity (prop. 0's) of H: {:.2%}".format(
        1 - (H.count_nonzero() / np.prod(np.array(H.shape)))))
    print('Solving for parameter update')
    d = sp.linalg.spsolve(H, JTe)
    dp = d[:n_p]
    dqs = d[n_p:(n_p + (n_frames * n_q))].reshape([n_frames, n_q])
    dcs = d[-(n_frames * n_c):].reshape([n_frames, n_c])
    # Add the focal length and degenerate quaternion parameters back on as
    # null delta updates
    dcs = np.hstack([np.tile(np.array([0, 1]), (n_frames, 1)), dcs])

    new_p = p + dp
    new_qs = qs + dqs
    new_cs = np.array(
        [camera_parameters_update(c, dc) for c, dc in zip(cs, dcs)])

    params = {
        'p': new_p,
        'qs': new_qs,
        'cs': new_cs,
        'dp': dp,
        'dqs': dqs,
        'dcs': dcs,
    }

    if compute_costs:
        c = {k: np.array(v) for k, v in costs.items()}

        err_s_id = (p**2).sum()
        err_s_exp = (qs**2).sum()
        err_sm = ((qs[:-2] - 2 * qs[1:-1] + qs[2:])**2).sum()

        err_f_tot = c['err_f'].sum() * c_f / (n_c * n_samples)
        err_l_tot = c['err_l'].sum()

        total_energy = (err_f_tot + c_l * err_l_tot + c_id * err_s_id +
                        c_exp * err_s_exp + c_sm * err_sm)

        c['total_energy'] = total_energy
        c['err_s_id'] = (c_id, err_s_id)
        c['err_s_exp'] = (c_exp, err_s_exp)
        c['err_sm'] = (c_sm, err_sm)
        c['err_f_tot'] = err_f_tot
        c['err_l_tot'] = (c_l, err_l_tot)

        print_cost_dict(c)

        params['costs'] = c
    return params
Beispiel #11
0
def as_matrix(vectorizables, length=None, return_template=False, verbose=False):
    r"""
    Create a matrix from a list/generator of :map:`Vectorizable` objects.
    All the objects in the list **must** be the same size when vectorized.

    Consider using a generator if the matrix you are creating is large and
    passing the length of the generator explicitly.

    Parameters
    ----------
    vectorizables : `list` or generator if :map:`Vectorizable` objects
        A list or generator of objects that supports the vectorizable interface
    length : `int`, optional
        Length of the vectorizable list. Useful if you are passing a generator
        with a known length.
    verbose : `bool`, optional
        If ``True``, will print the progress of building the matrix.
    return_template : `bool`, optional
        If ``True``, will return the first element of the list/generator, which
        was used as the template. Useful if you need to map back from the
        matrix to a list of vectorizable objects.

    Returns
    -------
    M : (length, n_features) `ndarray`
        Every row is an element of the list.
    template : :map:`Vectorizable`, optional
        If ``return_template == True``, will return the template used to
        build the matrix `M`.
    """
    # get the first element as the template and use it to configure the
    # data matrix
    if length is None:
        # samples is a list
        length = len(vectorizables)
        template = vectorizables[0]
        vectorizables = vectorizables[1:]
    else:
        # samples is an iterator
        template = next(vectorizables)
    n_features = template.n_parameters
    template_vector = template.as_vector()

    data = np.zeros((length, n_features), dtype=template_vector.dtype)
    if verbose:
        print('Allocated data matrix of size {} '
              '({} samples)'.format(bytes_str(data.nbytes), length))

    # now we can fill in the first element from the template
    data[0] = template_vector
    del template_vector

    if verbose:
        vectorizables = print_progress(vectorizables, n_items=length, offset=1,
                                       prefix='Building data matrix')

    # 1-based as we have the template vector set already
    for i, sample in enumerate(vectorizables, 1):
        if i >= length:
            break
        data[i] = sample.as_vector()

    if return_template:
        return data, template
    else:
        return data