Beispiel #1
0
 def _model(self):
     if self.family == GAUSSIAN_:
         reg = LinearRegression()
     elif self.family == BINOMIAL_:
         reg = GeneralizedLinearRegression(family="binomial", link="logit")
     else:
         raise NotImplementedError("Family '{}' not implemented".format(
             self.family))
     reg.setLabelCol(self.response)
     reg.setMaxIter(self.__max_iter)
     return reg
sc = SparkContext()
sqlContext = SQLContext(sc)
spark = sqlContext.sparkSession

app = Flask(__name__)

model = None

gpa_df = sqlContext.read.load("./gpa_data.csv",
                              format='com.databricks.spark.csv',
                              header='true',
                              inferSchema='true')

lr = LinearRegression(maxIter=20)
lr.setFeaturesCol("hs_gpa_vector")
lr.setLabelCol("c_gpa")

assembler = VectorAssembler(inputCols=["hs_gpa"], outputCol="hs_gpa_vector")
output = assembler.transform(gpa_df)
split = output.randomSplit([0.7, 0.3])


@app.route('/home')
def doHome():
    return 'Hello, World!'


@app.route('/train')
def doTrain():
    global model
Beispiel #3
0
# In[9]:

print(vectorizer.explainParams())

# In[10]:

from pyspark.ml.regression import LinearRegression

# In[11]:

lr = LinearRegression()
print(lr.explainParams())

# In[12]:

lr.setLabelCol("EP")
lr.setFeaturesCol("features")
model = lr.fit(df_vect)

# In[13]:

type(model)

# In[14]:

print("R2:", model.summary.r2)
print("Intercept: ", model.intercept, "Coefficients", model.coefficients)

# In[15]:

df_pred = model.transform(df_vect)