Ejemplo n.º 1
0
    def _transform(self, row):
        """Transforms the sparse vector to a dense vector while putting it in a new column."""
        sparse_vector = row[self.input_column]
        dense_vector = DenseVector(sparse_vector.toArray())
        new_row = new_dataframe_row(row, self.output_column, dense_vector)

        return new_row
Ejemplo n.º 2
0
    def _transform(self, row):
        """Transforms every row by adding a "predicted index" column to the dataframe. """
        prediction = row[self.input_column]
        index = float(self.get_index(prediction))
        new_row = new_dataframe_row(row, self.output_column, index)

        return new_row
Ejemplo n.º 3
0
    def _transform(self, row):
        """Transforms the vector to a dense matrix while putting it in a new column."""
        vector = row[self.input_column]
        reshaped = vector.toArray().reshape(self.shape).tolist()
        new_row = new_dataframe_row(row, self.output_column, reshaped)

        return new_row
Ejemplo n.º 4
0
    def _transform(self, row):
        """Transforms the vector to a dense matrix while putting it in a new column."""
        vector = row[self.input_column]
        vector = np.asarray(vector)
        reshaped = vector.reshape(self.shape).tolist()
        new_row = new_dataframe_row(row, self.output_column, reshaped)

        return new_row
Ejemplo n.º 5
0
    def _transform(self, row):
        """Transforms every individual row.

        Only for internal use.
        """
        label = row[self.input_column]
        vector = to_one_hot_encoded_dense(label, self.output_dimensionality)
        new_row = new_dataframe_row(row, self.output_column, vector.tolist())

        return new_row
Ejemplo n.º 6
0
    def _transform(self, row):
        """Take the column, and normalize it with the computed means and std devs."""
        mean = self.means[self.current_column]
        stddev = self.stddevs[self.current_column]
        x = row[self.current_column]
        x_normalized = (x - mean) / stddev
        output_column = self.current_column + self.column_suffix
        new_row = new_dataframe_row(row, output_column, x_normalized)

        return new_row
Ejemplo n.º 7
0
    def _transform(self, row):
        """Appends the desired binary label column."""
        value = row[self.input_column]
        vector = np.zeros(2)
        # Check if the name matches.
        if value == self.label:
            vector[0] = 1.0
        else:
            vector[1] = 1.0
        # Convert to a Spark DenseVector
        vector = DenseVector(vector)

        return new_dataframe_row(row, self.output_column, vector)
Ejemplo n.º 8
0
    def _transform(self, row):
        """Rescale every instance like this:

        x' = \frac{x - min}{max - min}
        """
        vector = row[self.input_column].toArray()
        vector = self.scale * (vector - self.o_max) + self.n_max
        # Convert to a DenseVector.
        dense_vector = DenseVector(vector)
        # Construct a new row with the normalized vector.
        new_row = new_dataframe_row(row, self.output_column, dense_vector)

        return new_row
Ejemplo n.º 9
0
    def _predict(self, iterator):
        """Lambda method which will append a prediction column to the provided rows.

        # Arguments:
            iterator: iterator. Spark Row iterator.
        """
        model = deserialize_keras_model(self.model)
        for row in iterator:
            features = [np.asarray([row[c]]) for c in self.features_column]
            prediction = model.predict(features)
            dense_prediction = DenseVector(prediction[0])
            new_row = new_dataframe_row(row, self.output_column, dense_prediction)
            yield new_row
Ejemplo n.º 10
0
    def _predict(self, iterator):
        """Lambda method which will append a prediction column to the provided rows.

        # Arguments:
            iterator: iterator. Spark Row iterator.
        """
        model = deserialize_keras_model(self.model)
        for row in iterator:
            features = np.asarray([row[self.features_column]])
            prediction = model.predict(features)
            dense_prediction = DenseVector(prediction[0])
            new_row = new_dataframe_row(row, self.output_column,
                                        dense_prediction)
            yield new_row
Ejemplo n.º 11
0
    def _transform(self, row):
        """Rescale every instance like this:

        x' = \frac{x - min}{max - min}
        """
        if self.is_vector:
            vector = row[self.input_column].toArray()
            vector = self.scale * (vector - self.o_max) + self.n_max
            new_value = DenseVector(vector)
        else:
            value = row[self.input_column]
            new_value = self.scale * (value - self.o_max) + self.n_max
        # Construct a new row with the normalized vector.
        new_row = new_dataframe_row(row, self.output_column, new_value)

        return new_row
Ejemplo n.º 12
0
    def _transform(self, row):
        """Transforms every individual row.

        Only for internal use.
        """
        label = row[self.input_column]

        if (isinstance(label, types.ListType)):
            vector = np.zeros((len(label), self.output_dimensionality))
            for i in range(len(label)):
                vector[i] = to_one_hot_encoded_dense(
                    label[i], self.output_dimensionality)
        else:
            vector = to_one_hot_encoded_dense(label,
                                              self.output_dimensionality)

        new_row = new_dataframe_row(row, self.output_column, vector.tolist())
        return new_row