Example #1
0
def _write_numpy_to_dense_tensor(file, array, labels=None):
    """Writes a numpy array to a dense record

    Args:
        file (file-like object): file-like object where the
                                 records will be written
        array (numpy array): numpy array containing the features
        labels (numpy array): numpy array containing the labels
    """

    # Validate shape of array and labels, resolve array and label types
    if not len(array.shape) == 2:
        raise ValueError("Array must be a Matrix")
    if labels is not None:
        if not len(labels.shape) == 1:
            raise ValueError("Labels must be a Vector")
        if labels.shape[0] not in array.shape:
            raise ValueError(
                "Label shape {} not compatible with array shape {}".format(
                    labels.shape, array.shape
                )
            )
        resolved_label_type = _resolve_type(labels.dtype)
    resolved_type = _resolve_type(array.dtype)

    # Write each vector in array into a Record in the file object
    record = Record()
    for index, vector in enumerate(array):
        record.Clear()
        _write_feature_tensor(resolved_type, record, vector)
        if labels is not None:
            _write_label_tensor(resolved_label_type, record, labels[index])
        _write_recordio(file, record.SerializeToString())
Example #2
0
def _write_spmatrix_to_sparse_tensor(file, array, labels=None):
    """Writes a scipy sparse matrix to a sparse tensor.

    Args:
        file (file-like object): file-like object where the
                                 records will be written
        array (array-like): a sparse matrix containing features
        labels (numpy array): numpy array containing the labels

    """

    if not issparse(array):
        raise TypeError("Array must be sparse")

    # Validate shape of array and labels, resolve array and label types
    if not len(array.shape) == 2:
        raise ValueError("Array must be a Matrix")
    if labels is not None:
        if not len(labels.shape) == 1:
            raise ValueError("Labels must be a Vector")
        if labels.shape[0] not in array.shape:
            raise ValueError(
                "Label shape {} not compatible with array shape {}".format(
                    labels.shape, array.shape
                )
            )
        resolved_label_type = _resolve_type(labels.dtype)
    resolved_type = _resolve_type(array.dtype)

    csr_array = array.tocsr()
    n_rows, n_cols = csr_array.shape

    record = Record()
    for row_idx in range(n_rows):
        record.Clear()
        row = csr_array.getrow(row_idx)
        # Write values
        _write_feature_tensor(resolved_type, record, row.data)
        # Write keys
        _write_keys_tensor(resolved_type, record, row.indices.astype(np.uint64))

        # Write labels
        if labels is not None:
            _write_label_tensor(resolved_label_type, record, labels[row_idx])

        # Write shape
        _write_shape(resolved_type, record, n_cols)

        _write_recordio(file, record.SerializeToString())
Example #3
0
def _encode_selected_predictions_recordio_protobuf(predictions):
    """Encode predictions in recordio-protobuf format.

    For each prediction, a new record is created. The content is populated under the "label" field
    of a record where the keys are derived from the selected content keys. Every value is encoded
    to a float32 tensor.

    :param predictions: output of serve_utils.get_selected_predictions(...) (list of dict)
    :return: predictions in recordio-protobuf response format (bytes)
    """
    record_bio = io.BytesIO()
    recordio_bio = io.BytesIO()
    record = Record()
    for item in predictions:
        for key in item.keys():
            value = item[key] if type(item[key]) is list else [item[key]]
            _write_record(record, key, value)
        record_bio.write(record.SerializeToString())
        record.Clear()
        _write_recordio(recordio_bio, record_bio.getvalue())
    return recordio_bio.getvalue()