Beispiel #1
0
def write_numpy_to_dense_tensor(file, array, labels=None):
    """Writes a numpy array to a dense tensor

    Args:
        file:
        array:
        labels:
    """

    # Validate shape of array and labels, resolve array and label types
    if not len(array.shape) == 2:
        raise ValueError("Array must be a Matrix")
    if labels is not None:
        if not len(labels.shape) == 1:
            raise ValueError("Labels must be a Vector")
        if labels.shape[0] not in array.shape:
            raise ValueError(
                "Label shape {} not compatible with array shape {}".format(
                    labels.shape, array.shape))
        resolved_label_type = _resolve_type(labels.dtype)
    resolved_type = _resolve_type(array.dtype)

    # Write each vector in array into a Record in the file object
    record = Record()
    for index, vector in enumerate(array):
        record.Clear()
        _write_feature_tensor(resolved_type, record, vector)
        if labels is not None:
            _write_label_tensor(resolved_label_type, record, labels[index])
        _write_recordio(file, record.SerializeToString())
Beispiel #2
0
def write_spmatrix_to_sparse_tensor(file, array, labels=None):
    """Writes a scipy sparse matrix to a sparse tensor

    Args:
        file:
        array:
        labels:
    """
    try:
        import scipy
    except ImportError as e:
        logging.warning(
            "scipy failed to import. Sparse matrix functions will be impaired or broken."
        )
        # Any subsequent attempt to use scipy will raise the ImportError
        scipy = DeferredError(e)

    if not scipy.sparse.issparse(array):
        raise TypeError("Array must be sparse")

    # Validate shape of array and labels, resolve array and label types
    if not len(array.shape) == 2:
        raise ValueError("Array must be a Matrix")
    if labels is not None:
        if not len(labels.shape) == 1:
            raise ValueError("Labels must be a Vector")
        if labels.shape[0] not in array.shape:
            raise ValueError(
                "Label shape {} not compatible with array shape {}".format(
                    labels.shape, array.shape))
        resolved_label_type = _resolve_type(labels.dtype)
    resolved_type = _resolve_type(array.dtype)

    csr_array = array.tocsr()
    n_rows, n_cols = csr_array.shape

    record = Record()
    for row_idx in range(n_rows):
        record.Clear()
        row = csr_array.getrow(row_idx)
        # Write values
        _write_feature_tensor(resolved_type, record, row.data)
        # Write keys
        _write_keys_tensor(resolved_type, record,
                           row.indices.astype(np.uint64))

        # Write labels
        if labels is not None:
            _write_label_tensor(resolved_label_type, record, labels[row_idx])

        # Write shape
        _write_shape(resolved_type, record, n_cols)

        _write_recordio(file, record.SerializeToString())
Beispiel #3
0
def write_spmatrix_to_sparse_tensor(file, array, labels=None):
    """Writes a scipy sparse matrix to a sparse tensor

    Args:
        file:
        array:
        labels:
    """

    if not issparse(array):
        raise TypeError("Array must be sparse")

    # Validate shape of array and labels, resolve array and label types
    if not len(array.shape) == 2:
        raise ValueError("Array must be a Matrix")
    if labels is not None:
        if not len(labels.shape) == 1:
            raise ValueError("Labels must be a Vector")
        if labels.shape[0] not in array.shape:
            raise ValueError(
                "Label shape {} not compatible with array shape {}".format(
                    labels.shape, array.shape))
        resolved_label_type = _resolve_type(labels.dtype)
    resolved_type = _resolve_type(array.dtype)

    csr_array = array.tocsr()
    n_rows, n_cols = csr_array.shape

    record = Record()
    for row_idx in range(n_rows):
        record.Clear()
        row = csr_array.getrow(row_idx)
        # Write values
        _write_feature_tensor(resolved_type, record, row.data)
        # Write keys
        _write_keys_tensor(resolved_type, record,
                           row.indices.astype(np.uint64))

        # Write labels
        if labels is not None:
            _write_label_tensor(resolved_label_type, record, labels[row_idx])

        # Write shape
        _write_shape(resolved_type, record, n_cols)

        _write_recordio(file, record.SerializeToString())
Beispiel #4
0
def write_record(file, features=None, metadata=None):
    record = Record()
    record.Clear()
    """
    print(record)
    print(dir(record))
    print(record.features)
    print("data: {}, {}".format(type(data), len(data)))
    print(dir(record.features["values"].bytes.value))
    """
    if features:
        for k, v in features.items():
            write_feature_tensor(record, v, k)
    if metadata:
        if not isinstance(metadata, str):
            metadata = json.dumps(metadata)
        record.metadata = metadata
    # print(record.metadata)
    # print(dir(record.metadata))
    # record.metadata.val
    _write_recordio(file, record.SerializeToString())
def transformation():
    """Do an inference on a single batch of data. In this server, we take data as JSON, convert
    it to a sparse array for internal use and then convert the predictions back to json.

    Input format is:
    '{"instances": [{"keys": ["User","1","2"], "values": ["a","b","c"]}, {"keys": ["User","5","6"], "values": ["d","e","f"]}]}' 
    """

    # Convert from json to numpy
    te_row_ind = []
    te_col_ind = []
    te_data = []
    te_idx = 0
    headers = ScoringService.get_headers()
    if flask.request.content_type == 'application/json':
        print("Working with JSON input")
        s = flask.request.data.decode('utf-8')
        inputs = json.loads(s)
        for instance in inputs['instances']:

            # The column index has to be found from the headers
            for col_idx in range(0, len(instance['keys'])):
                key = instance['keys'][col_idx]
                val = instance['values'][col_idx]
                item_to_find = "{0}_{1}".format(key, val)
                try:
                    te_col_ind.append(headers.index(item_to_find))
                    te_data.append(1.0)
                    te_row_ind.append(te_idx) 
                except Exception as e:
                    te_col_ind.append(1)
                    te_data.append(0.0)
                    te_row_ind.append(te_idx) 
                    print("Couldn't find header for {0}".format(item_to_find))
            te_idx = te_idx + 1
    elif flask.request.content_type == 'application/x-recordio-protobuf':
        print("Working with Protobuf input")
        #print("{0}".format(flask.request.stream))
        #s = flask.request.data.decode('latin-1')
        #print("Data: {}".format(s))
        test_records = smac.read_records(StringIO.StringIO(flask.request.data))
        num_test_samples = len(test_records)
        for test_record in test_records:
            te_row_ind.extend([te_idx] * len(test_record.features['values'].float32_tensor.values))
            te_col_ind.extend(test_record.features['values'].float32_tensor.keys)
            te_data.extend(test_record.features['values'].float32_tensor.values)
            te_idx = te_idx + 1

    else:
        return flask.Response(response='This predictor only supports JSON or Protobuf data', status=415, mimetype='text/plain')

    X_te_sparse = sp.csr_matrix( (np.array(te_data),(np.array(te_row_ind),np.array(te_col_ind))), shape=(te_idx,ScoringService.get_num_features()) )
    print('Invoked with {} records'.format(X_te_sparse.shape))

    # Do the prediction
    predictions = ScoringService.predict(X_te_sparse)

    # Convert from array back to json
    result = None
    if flask.request.content_type == 'application/json':
        js = {'predictions': []}
        for pred_value in predictions:
            js['predictions'].append({'score': str(pred_value)})
        result = json.dumps(js)
    else:
        # convert to protobuf
        buf = io.BytesIO()
        record = Record()
        for pred_value in predictions:
            record.Clear()
            #smac._write_label_tensor('Float32', record, pred_value)
            record.label["score"].float32_tensor.values.extend([pred_value])
            smac._write_recordio(buf, record.SerializeToString())
        buf.seek(0)
        result = buf.getvalue()

    return flask.Response(response=result, status=200, mimetype=flask.request.content_type)