Esempio n. 1
0
    def _transform(self, row):
        """Transforms every row by adding a "predicted index" column to the dataframe. """
        prediction = row[self.input_column]
        index = float(self.get_index(prediction))
        new_row = new_dataframe_row(row, self.output_column, index)

        return new_row
Esempio n. 2
0
    def _transform(self, row):
        """Transforms the sparse vector to a dense vector while putting it in a new column."""
        sparse_vector = row[self.input_column]
        dense_vector = DenseVector(sparse_vector.toArray())
        new_row = new_dataframe_row(row, self.output_column, dense_vector)

        return new_row
Esempio n. 3
0
    def _transform(self, row):
        """Transforms the vector to a dense matrix while putting it in a new column."""
        vector = row[self.input_column]
        vector = np.asarray(vector)
        reshaped = vector.reshape(self.shape).tolist()
        new_row = new_dataframe_row(row, self.output_column, reshaped)

        return new_row
Esempio n. 4
0
    def _transform(self, row):
        """Transforms every individual row.

        Only for internal use.
        """
        label = row[self.input_column]
        vector = to_one_hot_encoded_dense(label, self.output_dimensionality)
        new_row = new_dataframe_row(row, self.output_column, vector.tolist())

        return new_row
Esempio n. 5
0
    def _transform(self, row):
        """Take the column, and normalize it with the computed means and std devs."""
        mean = self.means[self.current_column]
        stddev = self.stddevs[self.current_column]
        x = row[self.current_column]
        x_normalized = (x - mean) / stddev
        output_column = self.current_column + self.column_suffix
        new_row = new_dataframe_row(row, output_column, x_normalized)

        return new_row
Esempio n. 6
0
    def _transform(self, row):
        """Appends the desired binary label column."""
        value = row[self.input_column]
        vector = np.zeros(2)
        # Check if the name matches.
        if value == self.label:
            vector[0] = 1.0
        else:
            vector[1] = 1.0
        # Convert to a Spark DenseVector
        vector = DenseVector(vector)

        return new_dataframe_row(row, self.output_column, vector)
Esempio n. 7
0
    def _predict(self, iterator):
        """Lambda method which will append a prediction column to the provided rows.

        # Arguments:
            iterator: iterator. Spark Row iterator.
        """
        model = deserialize_keras_model(self.model)
        for row in iterator:
            features = [np.asarray([row[c]]) for c in self.features_column]
            prediction = model.predict(features)
            dense_prediction = DenseVector(prediction[0])
            new_row = new_dataframe_row(row, self.output_column,
                                        dense_prediction)
            yield new_row
Esempio n. 8
0
    def _transform(self, row):
        """Rescale every instance like this:

        x' = \frac{x - min}{max - min}
        """
        if self.is_vector:
            vector = row[self.input_column].toArray()
            vector = self.scale * (vector - self.o_max) + self.n_max
            new_value = DenseVector(vector)
        else:
            value = row[self.input_column]
            new_value = self.scale * (value - self.o_max) + self.n_max
        # Construct a new row with the normalized vector.
        new_row = new_dataframe_row(row, self.output_column, new_value)

        return new_row