def test_array_to_recordio_dense():
    array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
    buf = _encoders.array_to_recordio_protobuf(np.array(array_data))
    for record_data, expected in zip(_read_recordio(buf), array_data):
        record = Record()
        record.ParseFromString(record_data)
        assert record.features["values"].float64_tensor.values == expected
Ejemplo n.º 2
0
def _write_numpy_to_dense_tensor(file, array, labels=None):
    """Writes a numpy array to a dense record

    Args:
        file (file-like object): file-like object where the
                                 records will be written
        array (numpy array): numpy array containing the features
        labels (numpy array): numpy array containing the labels
    """

    # Validate shape of array and labels, resolve array and label types
    if not len(array.shape) == 2:
        raise ValueError("Array must be a Matrix")
    if labels is not None:
        if not len(labels.shape) == 1:
            raise ValueError("Labels must be a Vector")
        if labels.shape[0] not in array.shape:
            raise ValueError(
                "Label shape {} not compatible with array shape {}".format(
                    labels.shape, array.shape
                )
            )
        resolved_label_type = _resolve_type(labels.dtype)
    resolved_type = _resolve_type(array.dtype)

    # Write each vector in array into a Record in the file object
    record = Record()
    for index, vector in enumerate(array):
        record.Clear()
        _write_feature_tensor(resolved_type, record, vector)
        if labels is not None:
            _write_label_tensor(resolved_label_type, record, labels[index])
        _write_recordio(file, record.SerializeToString())
def test_encode_selected_predictions_protobuf():
    expected_predicted_labels = [[1], [0]]
    expected_probabilities = [[0.4, 0.6], [0.9, 0.1]]

    protobuf_response = serve_utils.encode_selected_predictions(TEST_PREDICTIONS, TEST_KEYS,
                                                                "application/x-recordio-protobuf")
    stream = io.BytesIO(protobuf_response)

    for recordio, predicted_label, probabilities in zip(_read_recordio(stream),
                                                        expected_predicted_labels, expected_probabilities):
        record = Record()
        record.ParseFromString(recordio)
        assert record.label["predicted_label"].float32_tensor.values == predicted_label
        assert all(np.isclose(record.label["probabilities"].float32_tensor.values, probabilities))
Ejemplo n.º 4
0
def _write_spmatrix_to_sparse_tensor(file, array, labels=None):
    """Writes a scipy sparse matrix to a sparse tensor.

    Args:
        file (file-like object): file-like object where the
                                 records will be written
        array (array-like): a sparse matrix containing features
        labels (numpy array): numpy array containing the labels

    """

    if not issparse(array):
        raise TypeError("Array must be sparse")

    # Validate shape of array and labels, resolve array and label types
    if not len(array.shape) == 2:
        raise ValueError("Array must be a Matrix")
    if labels is not None:
        if not len(labels.shape) == 1:
            raise ValueError("Labels must be a Vector")
        if labels.shape[0] not in array.shape:
            raise ValueError(
                "Label shape {} not compatible with array shape {}".format(
                    labels.shape, array.shape
                )
            )
        resolved_label_type = _resolve_type(labels.dtype)
    resolved_type = _resolve_type(array.dtype)

    csr_array = array.tocsr()
    n_rows, n_cols = csr_array.shape

    record = Record()
    for row_idx in range(n_rows):
        record.Clear()
        row = csr_array.getrow(row_idx)
        # Write values
        _write_feature_tensor(resolved_type, record, row.data)
        # Write keys
        _write_keys_tensor(resolved_type, record, row.indices.astype(np.uint64))

        # Write labels
        if labels is not None:
            _write_label_tensor(resolved_label_type, record, labels[row_idx])

        # Write shape
        _write_shape(resolved_type, record, n_cols)

        _write_recordio(file, record.SerializeToString())
Ejemplo n.º 5
0
def _encode_selected_predictions_recordio_protobuf(predictions):
    """Encode predictions in recordio-protobuf format.

    For each prediction, a new record is created. The content is populated under the "label" field
    of a record where the keys are derived from the selected content keys. Every value is encoded
    to a float32 tensor.

    :param predictions: output of serve_utils.get_selected_predictions(...) (list of dict)
    :return: predictions in recordio-protobuf response format (bytes)
    """
    record_bio = io.BytesIO()
    recordio_bio = io.BytesIO()
    record = Record()
    for item in predictions:
        for key in item.keys():
            value = item[key] if type(item[key]) is list else [item[key]]
            _write_record(record, key, value)
        record_bio.write(record.SerializeToString())
        record.Clear()
        _write_recordio(recordio_bio, record_bio.getvalue())
    return recordio_bio.getvalue()
def test_sparse_float32_write_spmatrix_to_sparse_tensor():
    n = 4
    array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0],
                  [1000.0, 2000.0, 3000.0]]
    keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]]

    flatten_data = list(itertools.chain.from_iterable(array_data))
    y_indices = list(itertools.chain.from_iterable(keys_data))
    x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))]
    x_indices = list(itertools.chain.from_iterable(x_indices))

    array = sparse.coo_matrix((flatten_data, (x_indices, y_indices)),
                              dtype="float32")
    buf = _encoders.array_to_recordio_protobuf(array)
    for record_data, expected_data, expected_keys in zip(
            _read_recordio(buf), array_data, keys_data):
        record = Record()
        record.ParseFromString(record_data)
        assert record.features["values"].float32_tensor.values == expected_data
        assert record.features["values"].float32_tensor.keys == expected_keys
        assert record.features["values"].float32_tensor.shape == [n]