Ejemplo n.º 1
0
def model_process(dataset=dataset):
    algscore = request.form.get('model')
    res = request.form.get('response')
    kfold = request.form.get('kfold')
    alg, score = algscore.split('.')
    scaling = request.form.get('scaling')
    variables = request.form.getlist('variables')
    from sklearn.model_selection import cross_validate
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import Pipeline
    df = loadDataset(dataset)
    y = df[str(res)]

    if variables != [] and '' not in variables:
        df = df[list(set(variables + [res]))]
    X = df.drop(str(res), axis=1)
    try:
        X = pd.get_dummies(X)
    except:
        pass

    predictors = X.columns
    if len(predictors) > 10: pred = str(len(predictors))
    else: pred = ', '.join(predictors)

    if score == 'Classification':
        from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, roc_curve, auc
        scoring = ['precision', 'recall', 'f1', 'accuracy', 'roc_auc']
        if scaling == 'Yes':
            clf = algorithms.classificationModels()[alg]
            mod = Pipeline([('scaler', StandardScaler()), ('clf', clf)])
        else:
            mod = algorithms.classificationModels()[alg]
        fig = plotfun.plot_ROC(X.values, y, mod, int(kfold))

    elif score == 'Regression':
        from sklearn.metrics import explained_variance_score, r2_score, mean_squared_error
        scoring = ['explained_variance', 'r2', 'mean_squared_error']
        if scaling == 'Yes':
            pr = algorithms.regressionModels()[alg]
            mod = Pipeline([('scaler', StandardScaler()), ('clf', pr)])
        else:
            mod = algorithms.regressionModels()[alg]
        fig = plotfun.plot_predVSreal(X, y, mod, int(kfold))

    scores = cross_validate(mod, X, y, cv=int(kfold), scoring=scoring)
    for s in scores:
        scores[s] = str(round(np.mean(scores[s]), 3))
    return render_template('scores.html',
                           scores=scores,
                           dataset=dataset,
                           alg=alg,
                           res=res,
                           kfold=kfold,
                           score=score,
                           predictors=pred,
                           response=str(fig, 'utf-8'))
Ejemplo n.º 2
0
def predict(dataset=dataset):
    columns = loadColumns(dataset)
    clfmodels = algorithms.classificationModels()
    predmodels = algorithms.regressionModels()
    return render_template('predict.html',
                           dataset=dataset,
                           clfmodels=clfmodels,
                           predmodels=predmodels,
                           columns=columns)
Ejemplo n.º 3
0
def predict_process(dataset=dataset):
    algscore = request.form.get('model')
    res = request.form.get('response')
    alg, score = algscore.split('.')
    scaling = request.form.get('scaling')
    df = loadDataset(dataset)
    columns = df.columns
    values = {}
    counter = 0
    for col in columns:
        values[col] = request.form.get(col)
        if values[col] != '' and col != res: counter += 1

    if counter == 0: return redirect('/datasets/' + dataset + '/predict')

    predictors = {}
    for v in values:
        if values[v] != '':
            try:
                predictors[v] = [float(values[v])]
            except:
                predictors[v] = [values[v]]

    from sklearn.preprocessing import StandardScaler
    X = df[list(predictors.keys())]
    Xpred = predictors
    #return str(Xpred)
    Xpred = pd.DataFrame(data=Xpred)
    X = pd.concat([X, Xpred])
    X = pd.get_dummies(X)
    Xpred = X.iloc[[-1]]
    X = X[:-1]
    if scaling == 'Yes':
        scaler = StandardScaler()
        X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
        Xpred = pd.DataFrame(scaler.transform(Xpred), columns=X.columns)
    try:
        X = X.drop(str(res), axis=1)
        Xpred = Xpred.drop(str(res), axis=1)
    except:
        pass
    #Xpred.reset_index(drop=True, inplace=True)
    #X.reset_index(drop=True, inplace=True)
    y = df[str(res)]
    if score == 'Classification':
        mod = algorithms.classificationModels()[alg]
    elif score == 'Regression':
        mod = algorithms.regressionModels()[alg]
    model = mod.fit(X, y)
    #return pd.DataFrame(Xpred).to_html()
    predictions = {}
    predictions['Prediction'] = model.predict(Xpred)[0]
    predictors.pop(res, None)
    for p in predictors:
        if str(predictors[p][0]).isdigit() is True:
            predictors[p] = int(predictors[p][0])
        else:
            try:
                predictors[p] = round(predictors[p][0], 2)
            except:
                predictors[p] = predictors[p][0]
    for p in predictions:
        if str(predictions[p]).isdigit() is True:
            predictions[p] = int(predictions[p])
        else:
            try:
                predictions[p] = round(predictions[p], 2)
            except:
                continue
    if len(predictors) > 15:
        predictors = {'Number of predictors': len(predictors)}
    #return str(predictors) + res + str(predictions) + alg + score
    if score == 'Classification':
        classes = model.classes_
        pred_proba = model.predict_proba(Xpred)
        for i in range(len(classes)):
            predictions['Prob. ' + str(classes[i])] = round(
                pred_proba[0][i], 3)
    return render_template('prediction.html',
                           predictions=predictions,
                           response=res,
                           predictors=predictors,
                           algorithm=alg,
                           score=score,
                           dataset=dataset)