Esempio n. 1
0
def column_info(request):
    column = request.data['column']
    dataset_id = int(request.data['data_id'])

    df = get_dataframe(dataset_id=dataset_id)

    return Response({'values': df[[column]][column]})
Esempio n. 2
0
def remove_row(request, data_id):
    df = get_dataframe(data_id)
    row_num = int(request.data['id'])

    df = df.drop(df.index[row_num])

    update_dataframe(df, data_id)
    return Response()
Esempio n. 3
0
def remove_column(request, data_id):
    df = get_dataframe(data_id)
    column = request.data['column']

    df = df.drop(columns=[column])

    update_dataframe(df, data_id)
    return Response({'ok'})
Esempio n. 4
0
def rename_column(request, data_id):
    df = get_dataframe(data_id)
    old_name = request.data['old_name']
    new_name = request.data['new_name']

    df = df.rename(columns={old_name: new_name})

    update_dataframe(df, data_id)
    return Response({'ok'})
Esempio n. 5
0
def forest_model_info(x_names, y_names, data_id, estimators):
    df = dataframe.get_dataframe(data_id)
    x = df[x_names]
    y = df[y_names]

    forest = RandomForestRegressor(n_estimators=estimators,
                                   random_state=0,
                                   max_depth=2).fit(x, y)
    return {'r_squared': forest.score(x, y), 'estimators': forest.n_estimators}
Esempio n. 6
0
def get_nan_columns_for_ds(ds_id):
    df = dataframe.get_dataframe(ds_id)
    cols = df.columns.values

    nan_columns = []
    for col in cols:
        if df[col].isnull().values.any():
            nan_columns.append(col)
    return nan_columns
Esempio n. 7
0
def linear_model_scatter(x_name, y_name, data_id):
    df = dataframe.get_dataframe(data_id)

    x = df[[x_name]]
    y = df[[y_name]]

    model = LinearRegression().fit(x, y)
    scatter_data = sct.get_scatter_data(model, x, y, x_name, y_name)
    return scatter_data
Esempio n. 8
0
def linear_model_info(x_names, y_names, data_id):
    df = dataframe.get_dataframe(data_id)

    predictor = sm.add_constant(df[x_names])
    model = sm.OLS(df[y_names], predictor).fit()

    info = get_model_info(model)
    info['predictors'] = ['Смещение'] + x_names
    validation_result = validator.validate_linear(info)
    return {"info": info, 'validation_result': validation_result}
Esempio n. 9
0
def edit_row(request, data_id):
    row_num = int(request.data['__row_id__'])
    df = get_dataframe(data_id)

    columns = list(df)
    for i in columns:
        value = __get_numeric_val(request.data[i])
        df[i][row_num] = value

    update_dataframe(df, data_id)
    return Response({})
Esempio n. 10
0
def poly_model_scatter(x_name, y_name, data_id, degree):
    df = dataframe.get_dataframe(data_id)

    x = df[[x_name]]
    y = df[[y_name]]

    model = make_pipeline(PolynomialFeatures(degree=degree),
                          LinearRegression())
    model.fit(x, y)

    scatter_data = sct.get_scatter_data(model, x, y, x_name, y_name)
    return scatter_data
Esempio n. 11
0
def add_row(request, data_id):
    df = get_dataframe(data_id)

    columns = list(df)
    values = {}
    for i in columns:
        values[i] = __get_numeric_val(request.data[i])

    df = df.append(values, ignore_index=True)
    update_dataframe(df, data_id)

    return Response(df.shape[0] - 1)
Esempio n. 12
0
def poly_model_info(x_names, y_names, data_id, degree):
    df = dataframe.get_dataframe(data_id)

    x = df[x_names]
    y = df[y_names]

    model = make_pipeline(PolynomialFeatures(degree=degree),
                          LinearRegression())
    model.fit(x, y)

    return {
        'r_squared': model.score(x, y),
        'degree': degree,
        'coefs': model.steps[1][1].coef_[0][1:],
        'intercept': model.steps[1][1].intercept_,
    }
Esempio n. 13
0
def neural_model_info(x_names, y_names, data_id, activation, hidden):
    df = dataframe.get_dataframe(data_id)
    x = df[x_names]
    y = df[y_names]

    model = MLPRegressor(hidden_layer_sizes=(hidden, ),
                         max_iter=10000,
                         activation=activation,
                         random_state=9)
    model = model.fit(x, y.values.ravel())
    score = model.score(x, y.values.ravel())
    return {
        'r_squared': score,
        'activation': func_mapping.get(model.activation, ''),
        'hidden_layer_sizes': model.hidden_layer_sizes
    }
Esempio n. 14
0
def forest_model_scatter(x_name, y_name, data_id, estimators):
    df = dataframe.get_dataframe(data_id)

    x = df[[x_name]]
    y = df[[y_name]]

    forest = RandomForestRegressor(n_estimators=estimators,
                                   random_state=0,
                                   max_depth=2).fit(x, y)
    scatter_data = sct.get_scatter_data(forest,
                                        x,
                                        y,
                                        x_name,
                                        y_name,
                                        scalar=True)
    return scatter_data
Esempio n. 15
0
def auto_analysis(request):
    request_x = request.data['x']
    request_y = request.data['y']
    data_id = request.data['data_id']
    df = get_dataframe(data_id)

    x = df[request_x]
    y = df[request_y]

    models = a_analysis.get_models(x, y, request.user)
    models.sort(key=lambda m: m['score'], reverse=True)
    formatted = a_analysis.format_models_data(models, df)

    limit = __calc_highlight_limit(models)
    print(limit)

    return Response({'models': formatted, 'highlight_limit': limit})
Esempio n. 16
0
def neural_model_scatter(x_name, y_name, data_id, activation, hidden):
    df = dataframe.get_dataframe(data_id)
    x = df[[x_name]]
    y = df[[y_name]]

    regressor = MLPRegressor(hidden_layer_sizes=(hidden, ),
                             max_iter=10000,
                             activation=activation,
                             random_state=9)
    model = regressor.fit(x, y.values.ravel())

    scatter_data = sct.get_scatter_data(model,
                                        x,
                                        y,
                                        x_name,
                                        y_name,
                                        scalar=True)
    return scatter_data
Esempio n. 17
0
def predict(model_id, inputs):
    model = MlModel.objects.get(pk=model_id)

    request_x = model.ds_in_cols
    request_y = model.ds_out_cols

    df = dataframe.get_dataframe(model.dataset.id)

    x = df[request_x]
    y = df[request_y]

    if model.model == 'OLS':
        return __predict_linear(x, y, inputs)
    elif model.model == 'Polynomial':
        return __predict_poly(model, x, y, inputs)
    elif model.model == 'MLP':
        return __predict_neural(model, x, y, inputs)
    elif model.model == 'Forest':
        return __predict_forest(model, x, y, inputs)
    return {'error': 'Incorrect model type'}
Esempio n. 18
0
def analysis(request):
    ds_id = request.data['dataset_id']
    in_columns = request.data['in_columns']
    out_columns = request.data['out_columns']

    df = get_dataframe(ds_id)

    in_uniques = dict(zip(in_columns, df[in_columns].nunique().tolist()))
    out_uniques = dict(zip(out_columns, df[out_columns].nunique().tolist()))

    in_types = {}
    for col in in_columns:
        is_string_type = is_string_dtype(df[col])
        in_types[col] = 'str' if is_string_type else 'num'

    out_types = {}
    for col in out_columns:
        is_string_type = is_string_dtype(df[col])
        out_types[col] = 'str' if is_string_type else 'num'

    all_columns = in_columns + out_columns
    incorrect_columns = __get_incorrect_columns(df, all_columns)
    nan_columns = miss_values.get_nan_columns(df, all_columns)

    config = Configuration.objects.get(owner=request.user)

    return Response({
        'unique_threshold': config.unique_values_threshold,
        'in_columns': in_columns,
        'out_columns': out_columns,
        'in_unique': in_uniques,
        'out_unique': out_uniques,
        'in_types': in_types,
        'out_types': out_types,
        'incorrect_columns': incorrect_columns,
        'nan_columns': nan_columns
    })
Esempio n. 19
0
def remove_nan(request, data_id):
    df = get_dataframe(data_id)
    df = df.dropna()
    update_dataframe(df, data_id)
    return Response({'ok'})