def write_numpy_to_dense_tensor(file, array, labels=None): """Writes a numpy array to a dense tensor Args: file: array: labels: """ # Validate shape of array and labels, resolve array and label types if not len(array.shape) == 2: raise ValueError("Array must be a Matrix") if labels is not None: if not len(labels.shape) == 1: raise ValueError("Labels must be a Vector") if labels.shape[0] not in array.shape: raise ValueError( "Label shape {} not compatible with array shape {}".format( labels.shape, array.shape)) resolved_label_type = _resolve_type(labels.dtype) resolved_type = _resolve_type(array.dtype) # Write each vector in array into a Record in the file object record = Record() for index, vector in enumerate(array): record.Clear() _write_feature_tensor(resolved_type, record, vector) if labels is not None: _write_label_tensor(resolved_label_type, record, labels[index]) _write_recordio(file, record.SerializeToString())
def write_spmatrix_to_sparse_tensor(file, array, labels=None): """Writes a scipy sparse matrix to a sparse tensor Args: file: array: labels: """ try: import scipy except ImportError as e: logging.warning( "scipy failed to import. Sparse matrix functions will be impaired or broken." ) # Any subsequent attempt to use scipy will raise the ImportError scipy = DeferredError(e) if not scipy.sparse.issparse(array): raise TypeError("Array must be sparse") # Validate shape of array and labels, resolve array and label types if not len(array.shape) == 2: raise ValueError("Array must be a Matrix") if labels is not None: if not len(labels.shape) == 1: raise ValueError("Labels must be a Vector") if labels.shape[0] not in array.shape: raise ValueError( "Label shape {} not compatible with array shape {}".format( labels.shape, array.shape)) resolved_label_type = _resolve_type(labels.dtype) resolved_type = _resolve_type(array.dtype) csr_array = array.tocsr() n_rows, n_cols = csr_array.shape record = Record() for row_idx in range(n_rows): record.Clear() row = csr_array.getrow(row_idx) # Write values _write_feature_tensor(resolved_type, record, row.data) # Write keys _write_keys_tensor(resolved_type, record, row.indices.astype(np.uint64)) # Write labels if labels is not None: _write_label_tensor(resolved_label_type, record, labels[row_idx]) # Write shape _write_shape(resolved_type, record, n_cols) _write_recordio(file, record.SerializeToString())
def write_spmatrix_to_sparse_tensor(file, array, labels=None): """Writes a scipy sparse matrix to a sparse tensor Args: file: array: labels: """ if not issparse(array): raise TypeError("Array must be sparse") # Validate shape of array and labels, resolve array and label types if not len(array.shape) == 2: raise ValueError("Array must be a Matrix") if labels is not None: if not len(labels.shape) == 1: raise ValueError("Labels must be a Vector") if labels.shape[0] not in array.shape: raise ValueError( "Label shape {} not compatible with array shape {}".format( labels.shape, array.shape)) resolved_label_type = _resolve_type(labels.dtype) resolved_type = _resolve_type(array.dtype) csr_array = array.tocsr() n_rows, n_cols = csr_array.shape record = Record() for row_idx in range(n_rows): record.Clear() row = csr_array.getrow(row_idx) # Write values _write_feature_tensor(resolved_type, record, row.data) # Write keys _write_keys_tensor(resolved_type, record, row.indices.astype(np.uint64)) # Write labels if labels is not None: _write_label_tensor(resolved_label_type, record, labels[row_idx]) # Write shape _write_shape(resolved_type, record, n_cols) _write_recordio(file, record.SerializeToString())
def write_record(file, features=None, metadata=None): record = Record() record.Clear() """ print(record) print(dir(record)) print(record.features) print("data: {}, {}".format(type(data), len(data))) print(dir(record.features["values"].bytes.value)) """ if features: for k, v in features.items(): write_feature_tensor(record, v, k) if metadata: if not isinstance(metadata, str): metadata = json.dumps(metadata) record.metadata = metadata # print(record.metadata) # print(dir(record.metadata)) # record.metadata.val _write_recordio(file, record.SerializeToString())
def transformation(): """Do an inference on a single batch of data. In this server, we take data as JSON, convert it to a sparse array for internal use and then convert the predictions back to json. Input format is: '{"instances": [{"keys": ["User","1","2"], "values": ["a","b","c"]}, {"keys": ["User","5","6"], "values": ["d","e","f"]}]}' """ # Convert from json to numpy te_row_ind = [] te_col_ind = [] te_data = [] te_idx = 0 headers = ScoringService.get_headers() if flask.request.content_type == 'application/json': print("Working with JSON input") s = flask.request.data.decode('utf-8') inputs = json.loads(s) for instance in inputs['instances']: # The column index has to be found from the headers for col_idx in range(0, len(instance['keys'])): key = instance['keys'][col_idx] val = instance['values'][col_idx] item_to_find = "{0}_{1}".format(key, val) try: te_col_ind.append(headers.index(item_to_find)) te_data.append(1.0) te_row_ind.append(te_idx) except Exception as e: te_col_ind.append(1) te_data.append(0.0) te_row_ind.append(te_idx) print("Couldn't find header for {0}".format(item_to_find)) te_idx = te_idx + 1 elif flask.request.content_type == 'application/x-recordio-protobuf': print("Working with Protobuf input") #print("{0}".format(flask.request.stream)) #s = flask.request.data.decode('latin-1') #print("Data: {}".format(s)) test_records = smac.read_records(StringIO.StringIO(flask.request.data)) num_test_samples = len(test_records) for test_record in test_records: te_row_ind.extend([te_idx] * len(test_record.features['values'].float32_tensor.values)) te_col_ind.extend(test_record.features['values'].float32_tensor.keys) te_data.extend(test_record.features['values'].float32_tensor.values) te_idx = te_idx + 1 else: return flask.Response(response='This predictor only supports JSON or Protobuf data', status=415, mimetype='text/plain') X_te_sparse = sp.csr_matrix( (np.array(te_data),(np.array(te_row_ind),np.array(te_col_ind))), shape=(te_idx,ScoringService.get_num_features()) ) print('Invoked with {} records'.format(X_te_sparse.shape)) # Do the prediction predictions = ScoringService.predict(X_te_sparse) # Convert from array back to json result = None if flask.request.content_type == 'application/json': js = {'predictions': []} for pred_value in predictions: js['predictions'].append({'score': str(pred_value)}) result = json.dumps(js) else: # convert to protobuf buf = io.BytesIO() record = Record() for pred_value in predictions: record.Clear() #smac._write_label_tensor('Float32', record, pred_value) record.label["score"].float32_tensor.values.extend([pred_value]) smac._write_recordio(buf, record.SerializeToString()) buf.seek(0) result = buf.getvalue() return flask.Response(response=result, status=200, mimetype=flask.request.content_type)