def _model(self): if self.family == GAUSSIAN_: reg = LinearRegression() elif self.family == BINOMIAL_: reg = GeneralizedLinearRegression(family="binomial", link="logit") else: raise NotImplementedError("Family '{}' not implemented".format( self.family)) reg.setLabelCol(self.response) reg.setMaxIter(self.__max_iter) return reg
# In[18]: lr.setPredictionCol("Predicted_PE") .setLabelCol("PE") # We will also configure two parameters, which a re customary to the linear # regression # - the maximum number of iterations to 100 # - the regularization parameter to 0.1 # In[19]: lr.setMaxIter(100) .setRegParam(0.1) # ## Part 8 Create a pipeline # # Next, to create a workflow that puts together the vectorization and the Linear # Regression learner, we can create an ML Pipeline that stitch together the two # trasnformations we created before # In[25]: lrPipeline = Pipeline() lrPipeline.setStages([vectorizer, lr])