def deserialize(self, stream, content_type):
     """Read a recordio-protobuf stream from a SageMaker Semantic Segmentation algorithm endpoint
     Args:
         stream (botocore.response.StreamingBody): A stream of bytes.
         content_type (str): The MIME type of the data.
     Returns:
         array: numpy array of class probabilities per pixel
     """
     try:
         # Unpack the RecordIO wrapper first:
         reccontent = HackyProtobufDeserializer.next_recordio_record(stream)
         # Then load the protocol buffer:
         rec = Record()
         print("Parsing protobuf...")
         protobuf = rec.ParseFromString(reccontent)
         # Then read the two provided tensors `target` (predictions) and `shape`, squeeze out any batch
         # dimensions (since we'll always be predicting on a single image) and shape target appropriately:
         print("Fetching Tensors...")
         values = list(rec.features["target"].float32_tensor.values)
         shape = list(rec.features["shape"].int32_tensor.values)
         print("reshaping arrays...")
         shape = squeeze(shape)
         mask = reshape(array(values), shape)
         return squeeze(mask, axis=0)
     finally:
         stream.close()
예제 #2
0
def write_numpy_to_dense_tensor(file, array, labels=None):
    """Writes a numpy array to a dense tensor

    Args:
        file:
        array:
        labels:
    """

    # Validate shape of array and labels, resolve array and label types
    if not len(array.shape) == 2:
        raise ValueError("Array must be a Matrix")
    if labels is not None:
        if not len(labels.shape) == 1:
            raise ValueError("Labels must be a Vector")
        if labels.shape[0] not in array.shape:
            raise ValueError(
                "Label shape {} not compatible with array shape {}".format(
                    labels.shape, array.shape))
        resolved_label_type = _resolve_type(labels.dtype)
    resolved_type = _resolve_type(array.dtype)

    # Write each vector in array into a Record in the file object
    record = Record()
    for index, vector in enumerate(array):
        record.Clear()
        _write_feature_tensor(resolved_type, record, vector)
        if labels is not None:
            _write_label_tensor(resolved_label_type, record, labels[index])
        _write_recordio(file, record.SerializeToString())
def test_serializer_accepts_one_dimensional_array():
    s = RecordSerializer()
    array_data = [1.0, 2.0, 3.0]
    buf = s.serialize(np.array(array_data))
    record_data = next(read_recordio(buf))
    record = Record()
    record.ParseFromString(record_data)
    assert record.features["values"].float64_tensor.values == array_data
def test_serializer():
    s = RecordSerializer()
    array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
    buf = s.serialize(np.array(array_data))
    for record_data, expected in zip(read_recordio(buf), array_data):
        record = Record()
        record.ParseFromString(record_data)
        assert record.features["values"].float64_tensor.values == expected
예제 #5
0
def read_records(file):
    """Eagerly read a collection of amazon Record protobuf objects from file."""
    records = []
    for record_data in read_recordio(file):
        record = Record()
        record.ParseFromString(record_data)
        records.append(record)
    return records
예제 #6
0
def read_pipe(pipe):
    with open(pipe,'rb') as f:
        for rec in read_recordio(f):
            print("read record")
            print(rec)
            record = Record()
            record.ParseFromString(rec)
            print("record parsed")
            print(record)
def test_int_write_numpy_to_dense_tensor():
    array_data = [[1, 2, 3], [10, 20, 3]]
    array = np.array(array_data)
    with tempfile.TemporaryFile() as f:
        write_numpy_to_dense_tensor(f, array)
        f.seek(0)
        for record_data, expected in zip(read_recordio(f), array_data):
            record = Record()
            record.ParseFromString(record_data)
            assert record.features["values"].int32_tensor.values == expected
def test_float32_write_numpy_to_dense_tensor():
    array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
    array = np.array(array_data).astype(np.dtype("float32"))
    with tempfile.TemporaryFile() as f:
        write_numpy_to_dense_tensor(f, array)
        f.seek(0)
        for record_data, expected in zip(read_recordio(f), array_data):
            record = Record()
            record.ParseFromString(record_data)
            assert record.features["values"].float32_tensor.values == expected
예제 #9
0
def write_spmatrix_to_sparse_tensor(file, array, labels=None):
    """Writes a scipy sparse matrix to a sparse tensor

    Args:
        file:
        array:
        labels:
    """
    try:
        import scipy
    except ImportError as e:
        logging.warning(
            "scipy failed to import. Sparse matrix functions will be impaired or broken."
        )
        # Any subsequent attempt to use scipy will raise the ImportError
        scipy = DeferredError(e)

    if not scipy.sparse.issparse(array):
        raise TypeError("Array must be sparse")

    # Validate shape of array and labels, resolve array and label types
    if not len(array.shape) == 2:
        raise ValueError("Array must be a Matrix")
    if labels is not None:
        if not len(labels.shape) == 1:
            raise ValueError("Labels must be a Vector")
        if labels.shape[0] not in array.shape:
            raise ValueError(
                "Label shape {} not compatible with array shape {}".format(
                    labels.shape, array.shape))
        resolved_label_type = _resolve_type(labels.dtype)
    resolved_type = _resolve_type(array.dtype)

    csr_array = array.tocsr()
    n_rows, n_cols = csr_array.shape

    record = Record()
    for row_idx in range(n_rows):
        record.Clear()
        row = csr_array.getrow(row_idx)
        # Write values
        _write_feature_tensor(resolved_type, record, row.data)
        # Write keys
        _write_keys_tensor(resolved_type, record,
                           row.indices.astype(np.uint64))

        # Write labels
        if labels is not None:
            _write_label_tensor(resolved_label_type, record, labels[row_idx])

        # Write shape
        _write_shape(resolved_type, record, n_cols)

        _write_recordio(file, record.SerializeToString())
예제 #10
0
def test_float32_label():
    array_data = [[1, 2, 3], [10, 20, 3]]
    array = np.array(array_data)
    label_data = np.array([99, 98, 97]).astype(np.dtype('float32'))
    with tempfile.TemporaryFile() as f:
        write_numpy_to_dense_tensor(f, array, label_data)
        f.seek(0)
        for record_data, expected, label in zip(_read_recordio(f), array_data, label_data):
            record = Record()
            record.ParseFromString(record_data)
            assert record.features["values"].int32_tensor.values == expected
            assert record.label["values"].float32_tensor.values == [label]
예제 #11
0
def test_dense_int_write_spmatrix_to_sparse_tensor():
    array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
    keys_data = [[0, 1, 2], [0, 1, 2]]
    array = coo_matrix(np.array(array_data).astype(np.dtype('int')))
    with tempfile.TemporaryFile() as f:
        write_spmatrix_to_sparse_tensor(f, array)
        f.seek(0)
        for record_data, expected_data, expected_keys in zip(read_recordio(f), array_data, keys_data):
            record = Record()
            record.ParseFromString(record_data)
            assert record.features["values"].int32_tensor.values == expected_data
            assert record.features["values"].int32_tensor.keys == expected_keys
            assert record.features["values"].int32_tensor.shape == [len(expected_data)]
def test_dense_float64_spmatrix_to_sparse_label():
    array_data = [[1, 2, 3], [10, 20, 3]]
    keys_data = [[0, 1, 2], [0, 1, 2]]
    array = coo_matrix(np.array(array_data).astype("float64"))
    label_data = np.array([99, 98, 97])
    with tempfile.TemporaryFile() as f:
        write_spmatrix_to_sparse_tensor(f, array, label_data)
        f.seek(0)
        for record_data, expected_data, expected_keys, label in zip(
            read_recordio(f), array_data, keys_data, label_data
        ):
            record = Record()
            record.ParseFromString(record_data)
            assert record.features["values"].float64_tensor.values == expected_data
            assert record.features["values"].float64_tensor.keys == expected_keys
            assert record.label["values"].int32_tensor.values == [label]
            assert record.features["values"].float64_tensor.shape == [len(expected_data)]
예제 #13
0
def write_spmatrix_to_sparse_tensor(file, array, labels=None):
    """Writes a scipy sparse matrix to a sparse tensor

    Args:
        file:
        array:
        labels:
    """

    if not issparse(array):
        raise TypeError("Array must be sparse")

    # Validate shape of array and labels, resolve array and label types
    if not len(array.shape) == 2:
        raise ValueError("Array must be a Matrix")
    if labels is not None:
        if not len(labels.shape) == 1:
            raise ValueError("Labels must be a Vector")
        if labels.shape[0] not in array.shape:
            raise ValueError(
                "Label shape {} not compatible with array shape {}".format(
                    labels.shape, array.shape))
        resolved_label_type = _resolve_type(labels.dtype)
    resolved_type = _resolve_type(array.dtype)

    csr_array = array.tocsr()
    n_rows, n_cols = csr_array.shape

    record = Record()
    for row_idx in range(n_rows):
        record.Clear()
        row = csr_array.getrow(row_idx)
        # Write values
        _write_feature_tensor(resolved_type, record, row.data)
        # Write keys
        _write_keys_tensor(resolved_type, record,
                           row.indices.astype(np.uint64))

        # Write labels
        if labels is not None:
            _write_label_tensor(resolved_label_type, record, labels[row_idx])

        # Write shape
        _write_shape(resolved_type, record, n_cols)

        _write_recordio(file, record.SerializeToString())
예제 #14
0
    def infer(self, filename, output_folder, endpoint, show_image):
        """
        :param filename:
        :param output_folder:
        :param endpoint:
        :param show_image:
        :return:
        """
        output_filename = os.path.splitext(filename)[0]
        output_filename = os.path.basename(output_filename)
        output_filename = os.path.join(output_folder, output_filename + '_inference.png')

        runtime = boto3.Session().client('sagemaker-runtime')

        image = pillow.Image.open(filename)
        image.thumbnail([800, 600], pillow.Image.ANTIALIAS)
        image.save(filename, "JPEG")

        with open(filename, 'rb') as f:
            payload = f.read()
            payload = bytearray(payload)

        response = runtime.invoke_endpoint(EndpointName=endpoint, ContentType='application/x-image', Body=payload)
        results_file = 'results.rec'
        with open(results_file, 'wb') as f:
            f.write(response['Body'].read())

        rec = Record()
        recordio = mx.recordio.MXRecordIO(results_file, 'r')
        protobuf = rec.ParseFromString(recordio.read())
        values = list(rec.features["target"].float32_tensor.values)
        shape = list(rec.features["shape"].int32_tensor.values)
        shape = np.squeeze(shape)
        mask = np.reshape(np.array(values), shape)
        mask = np.squeeze(mask, axis=0)
        pred_map = np.argmax(mask, axis=0)

        if show_image is True:
            utils_obj = utils.Utils()
            utils_obj.show_image(pred_map)

        plt.imshow(pred_map, vmin=0, vmax=settings.HYPER['num_classes'] - 1, cmap='jet')
        plt.savefig(output_filename)
예제 #15
0
def test_sparse_int_write_spmatrix_to_sparse_tensor():
    n = 4
    array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]]
    keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]]

    flatten_data = list(itertools.chain.from_iterable(array_data))
    y_indices = list(itertools.chain.from_iterable(keys_data))
    x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))]
    x_indices = list(itertools.chain.from_iterable(x_indices))

    array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype='int')
    with tempfile.TemporaryFile() as f:
        write_spmatrix_to_sparse_tensor(f, array)
        f.seek(0)
        for record_data, expected_data, expected_keys in zip(read_recordio(f), array_data, keys_data):
            record = Record()
            record.ParseFromString(record_data)
            assert record.features["values"].int32_tensor.values == expected_data
            assert record.features["values"].int32_tensor.keys == expected_keys
            assert record.features["values"].int32_tensor.shape == [n]
예제 #16
0
def write_record(file, features=None, metadata=None):
    record = Record()
    record.Clear()
    """
    print(record)
    print(dir(record))
    print(record.features)
    print("data: {}, {}".format(type(data), len(data)))
    print(dir(record.features["values"].bytes.value))
    """
    if features:
        for k, v in features.items():
            write_feature_tensor(record, v, k)
    if metadata:
        if not isinstance(metadata, str):
            metadata = json.dumps(metadata)
        record.metadata = metadata
    # print(record.metadata)
    # print(dir(record.metadata))
    # record.metadata.val
    _write_recordio(file, record.SerializeToString())
def transformation():
    """Do an inference on a single batch of data. In this server, we take data as JSON, convert
    it to a sparse array for internal use and then convert the predictions back to json.

    Input format is:
    '{"instances": [{"keys": ["User","1","2"], "values": ["a","b","c"]}, {"keys": ["User","5","6"], "values": ["d","e","f"]}]}' 
    """

    # Convert from json to numpy
    te_row_ind = []
    te_col_ind = []
    te_data = []
    te_idx = 0
    headers = ScoringService.get_headers()
    if flask.request.content_type == 'application/json':
        print("Working with JSON input")
        s = flask.request.data.decode('utf-8')
        inputs = json.loads(s)
        for instance in inputs['instances']:

            # The column index has to be found from the headers
            for col_idx in range(0, len(instance['keys'])):
                key = instance['keys'][col_idx]
                val = instance['values'][col_idx]
                item_to_find = "{0}_{1}".format(key, val)
                try:
                    te_col_ind.append(headers.index(item_to_find))
                    te_data.append(1.0)
                    te_row_ind.append(te_idx) 
                except Exception as e:
                    te_col_ind.append(1)
                    te_data.append(0.0)
                    te_row_ind.append(te_idx) 
                    print("Couldn't find header for {0}".format(item_to_find))
            te_idx = te_idx + 1
    elif flask.request.content_type == 'application/x-recordio-protobuf':
        print("Working with Protobuf input")
        #print("{0}".format(flask.request.stream))
        #s = flask.request.data.decode('latin-1')
        #print("Data: {}".format(s))
        test_records = smac.read_records(StringIO.StringIO(flask.request.data))
        num_test_samples = len(test_records)
        for test_record in test_records:
            te_row_ind.extend([te_idx] * len(test_record.features['values'].float32_tensor.values))
            te_col_ind.extend(test_record.features['values'].float32_tensor.keys)
            te_data.extend(test_record.features['values'].float32_tensor.values)
            te_idx = te_idx + 1

    else:
        return flask.Response(response='This predictor only supports JSON or Protobuf data', status=415, mimetype='text/plain')

    X_te_sparse = sp.csr_matrix( (np.array(te_data),(np.array(te_row_ind),np.array(te_col_ind))), shape=(te_idx,ScoringService.get_num_features()) )
    print('Invoked with {} records'.format(X_te_sparse.shape))

    # Do the prediction
    predictions = ScoringService.predict(X_te_sparse)

    # Convert from array back to json
    result = None
    if flask.request.content_type == 'application/json':
        js = {'predictions': []}
        for pred_value in predictions:
            js['predictions'].append({'score': str(pred_value)})
        result = json.dumps(js)
    else:
        # convert to protobuf
        buf = io.BytesIO()
        record = Record()
        for pred_value in predictions:
            record.Clear()
            #smac._write_label_tensor('Float32', record, pred_value)
            record.label["score"].float32_tensor.values.extend([pred_value])
            smac._write_recordio(buf, record.SerializeToString())
        buf.seek(0)
        result = buf.getvalue()

    return flask.Response(response=result, status=200, mimetype=flask.request.content_type)