def _transform(self, row): """Transforms every row by adding a "predicted index" column to the dataframe. """ prediction = row[self.input_column] index = float(self.get_index(prediction)) new_row = new_dataframe_row(row, self.output_column, index) return new_row
def _transform(self, row): """Transforms the sparse vector to a dense vector while putting it in a new column.""" sparse_vector = row[self.input_column] dense_vector = DenseVector(sparse_vector.toArray()) new_row = new_dataframe_row(row, self.output_column, dense_vector) return new_row
def _transform(self, row): """Transforms the vector to a dense matrix while putting it in a new column.""" vector = row[self.input_column] vector = np.asarray(vector) reshaped = vector.reshape(self.shape).tolist() new_row = new_dataframe_row(row, self.output_column, reshaped) return new_row
def _transform(self, row): """Transforms every individual row. Only for internal use. """ label = row[self.input_column] vector = to_one_hot_encoded_dense(label, self.output_dimensionality) new_row = new_dataframe_row(row, self.output_column, vector.tolist()) return new_row
def _transform(self, row): """Take the column, and normalize it with the computed means and std devs.""" mean = self.means[self.current_column] stddev = self.stddevs[self.current_column] x = row[self.current_column] x_normalized = (x - mean) / stddev output_column = self.current_column + self.column_suffix new_row = new_dataframe_row(row, output_column, x_normalized) return new_row
def _transform(self, row): """Appends the desired binary label column.""" value = row[self.input_column] vector = np.zeros(2) # Check if the name matches. if value == self.label: vector[0] = 1.0 else: vector[1] = 1.0 # Convert to a Spark DenseVector vector = DenseVector(vector) return new_dataframe_row(row, self.output_column, vector)
def _predict(self, iterator): """Lambda method which will append a prediction column to the provided rows. # Arguments: iterator: iterator. Spark Row iterator. """ model = deserialize_keras_model(self.model) for row in iterator: features = [np.asarray([row[c]]) for c in self.features_column] prediction = model.predict(features) dense_prediction = DenseVector(prediction[0]) new_row = new_dataframe_row(row, self.output_column, dense_prediction) yield new_row
def _transform(self, row): """Rescale every instance like this: x' = \frac{x - min}{max - min} """ if self.is_vector: vector = row[self.input_column].toArray() vector = self.scale * (vector - self.o_max) + self.n_max new_value = DenseVector(vector) else: value = row[self.input_column] new_value = self.scale * (value - self.o_max) + self.n_max # Construct a new row with the normalized vector. new_row = new_dataframe_row(row, self.output_column, new_value) return new_row