def deserialize(self, stream, content_type): """Read a recordio-protobuf stream from a SageMaker Semantic Segmentation algorithm endpoint Args: stream (botocore.response.StreamingBody): A stream of bytes. content_type (str): The MIME type of the data. Returns: array: numpy array of class probabilities per pixel """ try: # Unpack the RecordIO wrapper first: reccontent = HackyProtobufDeserializer.next_recordio_record(stream) # Then load the protocol buffer: rec = Record() print("Parsing protobuf...") protobuf = rec.ParseFromString(reccontent) # Then read the two provided tensors `target` (predictions) and `shape`, squeeze out any batch # dimensions (since we'll always be predicting on a single image) and shape target appropriately: print("Fetching Tensors...") values = list(rec.features["target"].float32_tensor.values) shape = list(rec.features["shape"].int32_tensor.values) print("reshaping arrays...") shape = squeeze(shape) mask = reshape(array(values), shape) return squeeze(mask, axis=0) finally: stream.close()
def write_numpy_to_dense_tensor(file, array, labels=None): """Writes a numpy array to a dense tensor Args: file: array: labels: """ # Validate shape of array and labels, resolve array and label types if not len(array.shape) == 2: raise ValueError("Array must be a Matrix") if labels is not None: if not len(labels.shape) == 1: raise ValueError("Labels must be a Vector") if labels.shape[0] not in array.shape: raise ValueError( "Label shape {} not compatible with array shape {}".format( labels.shape, array.shape)) resolved_label_type = _resolve_type(labels.dtype) resolved_type = _resolve_type(array.dtype) # Write each vector in array into a Record in the file object record = Record() for index, vector in enumerate(array): record.Clear() _write_feature_tensor(resolved_type, record, vector) if labels is not None: _write_label_tensor(resolved_label_type, record, labels[index]) _write_recordio(file, record.SerializeToString())
def test_serializer_accepts_one_dimensional_array(): s = RecordSerializer() array_data = [1.0, 2.0, 3.0] buf = s.serialize(np.array(array_data)) record_data = next(read_recordio(buf)) record = Record() record.ParseFromString(record_data) assert record.features["values"].float64_tensor.values == array_data
def test_serializer(): s = RecordSerializer() array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]] buf = s.serialize(np.array(array_data)) for record_data, expected in zip(read_recordio(buf), array_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].float64_tensor.values == expected
def read_records(file): """Eagerly read a collection of amazon Record protobuf objects from file.""" records = [] for record_data in read_recordio(file): record = Record() record.ParseFromString(record_data) records.append(record) return records
def read_pipe(pipe): with open(pipe,'rb') as f: for rec in read_recordio(f): print("read record") print(rec) record = Record() record.ParseFromString(rec) print("record parsed") print(record)
def test_int_write_numpy_to_dense_tensor(): array_data = [[1, 2, 3], [10, 20, 3]] array = np.array(array_data) with tempfile.TemporaryFile() as f: write_numpy_to_dense_tensor(f, array) f.seek(0) for record_data, expected in zip(read_recordio(f), array_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].int32_tensor.values == expected
def test_float32_write_numpy_to_dense_tensor(): array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]] array = np.array(array_data).astype(np.dtype("float32")) with tempfile.TemporaryFile() as f: write_numpy_to_dense_tensor(f, array) f.seek(0) for record_data, expected in zip(read_recordio(f), array_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].float32_tensor.values == expected
def write_spmatrix_to_sparse_tensor(file, array, labels=None): """Writes a scipy sparse matrix to a sparse tensor Args: file: array: labels: """ try: import scipy except ImportError as e: logging.warning( "scipy failed to import. Sparse matrix functions will be impaired or broken." ) # Any subsequent attempt to use scipy will raise the ImportError scipy = DeferredError(e) if not scipy.sparse.issparse(array): raise TypeError("Array must be sparse") # Validate shape of array and labels, resolve array and label types if not len(array.shape) == 2: raise ValueError("Array must be a Matrix") if labels is not None: if not len(labels.shape) == 1: raise ValueError("Labels must be a Vector") if labels.shape[0] not in array.shape: raise ValueError( "Label shape {} not compatible with array shape {}".format( labels.shape, array.shape)) resolved_label_type = _resolve_type(labels.dtype) resolved_type = _resolve_type(array.dtype) csr_array = array.tocsr() n_rows, n_cols = csr_array.shape record = Record() for row_idx in range(n_rows): record.Clear() row = csr_array.getrow(row_idx) # Write values _write_feature_tensor(resolved_type, record, row.data) # Write keys _write_keys_tensor(resolved_type, record, row.indices.astype(np.uint64)) # Write labels if labels is not None: _write_label_tensor(resolved_label_type, record, labels[row_idx]) # Write shape _write_shape(resolved_type, record, n_cols) _write_recordio(file, record.SerializeToString())
def test_float32_label(): array_data = [[1, 2, 3], [10, 20, 3]] array = np.array(array_data) label_data = np.array([99, 98, 97]).astype(np.dtype('float32')) with tempfile.TemporaryFile() as f: write_numpy_to_dense_tensor(f, array, label_data) f.seek(0) for record_data, expected, label in zip(_read_recordio(f), array_data, label_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].int32_tensor.values == expected assert record.label["values"].float32_tensor.values == [label]
def test_dense_int_write_spmatrix_to_sparse_tensor(): array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]] keys_data = [[0, 1, 2], [0, 1, 2]] array = coo_matrix(np.array(array_data).astype(np.dtype('int'))) with tempfile.TemporaryFile() as f: write_spmatrix_to_sparse_tensor(f, array) f.seek(0) for record_data, expected_data, expected_keys in zip(read_recordio(f), array_data, keys_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].int32_tensor.values == expected_data assert record.features["values"].int32_tensor.keys == expected_keys assert record.features["values"].int32_tensor.shape == [len(expected_data)]
def test_dense_float64_spmatrix_to_sparse_label(): array_data = [[1, 2, 3], [10, 20, 3]] keys_data = [[0, 1, 2], [0, 1, 2]] array = coo_matrix(np.array(array_data).astype("float64")) label_data = np.array([99, 98, 97]) with tempfile.TemporaryFile() as f: write_spmatrix_to_sparse_tensor(f, array, label_data) f.seek(0) for record_data, expected_data, expected_keys, label in zip( read_recordio(f), array_data, keys_data, label_data ): record = Record() record.ParseFromString(record_data) assert record.features["values"].float64_tensor.values == expected_data assert record.features["values"].float64_tensor.keys == expected_keys assert record.label["values"].int32_tensor.values == [label] assert record.features["values"].float64_tensor.shape == [len(expected_data)]
def write_spmatrix_to_sparse_tensor(file, array, labels=None): """Writes a scipy sparse matrix to a sparse tensor Args: file: array: labels: """ if not issparse(array): raise TypeError("Array must be sparse") # Validate shape of array and labels, resolve array and label types if not len(array.shape) == 2: raise ValueError("Array must be a Matrix") if labels is not None: if not len(labels.shape) == 1: raise ValueError("Labels must be a Vector") if labels.shape[0] not in array.shape: raise ValueError( "Label shape {} not compatible with array shape {}".format( labels.shape, array.shape)) resolved_label_type = _resolve_type(labels.dtype) resolved_type = _resolve_type(array.dtype) csr_array = array.tocsr() n_rows, n_cols = csr_array.shape record = Record() for row_idx in range(n_rows): record.Clear() row = csr_array.getrow(row_idx) # Write values _write_feature_tensor(resolved_type, record, row.data) # Write keys _write_keys_tensor(resolved_type, record, row.indices.astype(np.uint64)) # Write labels if labels is not None: _write_label_tensor(resolved_label_type, record, labels[row_idx]) # Write shape _write_shape(resolved_type, record, n_cols) _write_recordio(file, record.SerializeToString())
def infer(self, filename, output_folder, endpoint, show_image): """ :param filename: :param output_folder: :param endpoint: :param show_image: :return: """ output_filename = os.path.splitext(filename)[0] output_filename = os.path.basename(output_filename) output_filename = os.path.join(output_folder, output_filename + '_inference.png') runtime = boto3.Session().client('sagemaker-runtime') image = pillow.Image.open(filename) image.thumbnail([800, 600], pillow.Image.ANTIALIAS) image.save(filename, "JPEG") with open(filename, 'rb') as f: payload = f.read() payload = bytearray(payload) response = runtime.invoke_endpoint(EndpointName=endpoint, ContentType='application/x-image', Body=payload) results_file = 'results.rec' with open(results_file, 'wb') as f: f.write(response['Body'].read()) rec = Record() recordio = mx.recordio.MXRecordIO(results_file, 'r') protobuf = rec.ParseFromString(recordio.read()) values = list(rec.features["target"].float32_tensor.values) shape = list(rec.features["shape"].int32_tensor.values) shape = np.squeeze(shape) mask = np.reshape(np.array(values), shape) mask = np.squeeze(mask, axis=0) pred_map = np.argmax(mask, axis=0) if show_image is True: utils_obj = utils.Utils() utils_obj.show_image(pred_map) plt.imshow(pred_map, vmin=0, vmax=settings.HYPER['num_classes'] - 1, cmap='jet') plt.savefig(output_filename)
def test_sparse_int_write_spmatrix_to_sparse_tensor(): n = 4 array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]] keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]] flatten_data = list(itertools.chain.from_iterable(array_data)) y_indices = list(itertools.chain.from_iterable(keys_data)) x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))] x_indices = list(itertools.chain.from_iterable(x_indices)) array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype='int') with tempfile.TemporaryFile() as f: write_spmatrix_to_sparse_tensor(f, array) f.seek(0) for record_data, expected_data, expected_keys in zip(read_recordio(f), array_data, keys_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].int32_tensor.values == expected_data assert record.features["values"].int32_tensor.keys == expected_keys assert record.features["values"].int32_tensor.shape == [n]
def write_record(file, features=None, metadata=None): record = Record() record.Clear() """ print(record) print(dir(record)) print(record.features) print("data: {}, {}".format(type(data), len(data))) print(dir(record.features["values"].bytes.value)) """ if features: for k, v in features.items(): write_feature_tensor(record, v, k) if metadata: if not isinstance(metadata, str): metadata = json.dumps(metadata) record.metadata = metadata # print(record.metadata) # print(dir(record.metadata)) # record.metadata.val _write_recordio(file, record.SerializeToString())
def transformation(): """Do an inference on a single batch of data. In this server, we take data as JSON, convert it to a sparse array for internal use and then convert the predictions back to json. Input format is: '{"instances": [{"keys": ["User","1","2"], "values": ["a","b","c"]}, {"keys": ["User","5","6"], "values": ["d","e","f"]}]}' """ # Convert from json to numpy te_row_ind = [] te_col_ind = [] te_data = [] te_idx = 0 headers = ScoringService.get_headers() if flask.request.content_type == 'application/json': print("Working with JSON input") s = flask.request.data.decode('utf-8') inputs = json.loads(s) for instance in inputs['instances']: # The column index has to be found from the headers for col_idx in range(0, len(instance['keys'])): key = instance['keys'][col_idx] val = instance['values'][col_idx] item_to_find = "{0}_{1}".format(key, val) try: te_col_ind.append(headers.index(item_to_find)) te_data.append(1.0) te_row_ind.append(te_idx) except Exception as e: te_col_ind.append(1) te_data.append(0.0) te_row_ind.append(te_idx) print("Couldn't find header for {0}".format(item_to_find)) te_idx = te_idx + 1 elif flask.request.content_type == 'application/x-recordio-protobuf': print("Working with Protobuf input") #print("{0}".format(flask.request.stream)) #s = flask.request.data.decode('latin-1') #print("Data: {}".format(s)) test_records = smac.read_records(StringIO.StringIO(flask.request.data)) num_test_samples = len(test_records) for test_record in test_records: te_row_ind.extend([te_idx] * len(test_record.features['values'].float32_tensor.values)) te_col_ind.extend(test_record.features['values'].float32_tensor.keys) te_data.extend(test_record.features['values'].float32_tensor.values) te_idx = te_idx + 1 else: return flask.Response(response='This predictor only supports JSON or Protobuf data', status=415, mimetype='text/plain') X_te_sparse = sp.csr_matrix( (np.array(te_data),(np.array(te_row_ind),np.array(te_col_ind))), shape=(te_idx,ScoringService.get_num_features()) ) print('Invoked with {} records'.format(X_te_sparse.shape)) # Do the prediction predictions = ScoringService.predict(X_te_sparse) # Convert from array back to json result = None if flask.request.content_type == 'application/json': js = {'predictions': []} for pred_value in predictions: js['predictions'].append({'score': str(pred_value)}) result = json.dumps(js) else: # convert to protobuf buf = io.BytesIO() record = Record() for pred_value in predictions: record.Clear() #smac._write_label_tensor('Float32', record, pred_value) record.label["score"].float32_tensor.values.extend([pred_value]) smac._write_recordio(buf, record.SerializeToString()) buf.seek(0) result = buf.getvalue() return flask.Response(response=result, status=200, mimetype=flask.request.content_type)