def column_info(request): column = request.data['column'] dataset_id = int(request.data['data_id']) df = get_dataframe(dataset_id=dataset_id) return Response({'values': df[[column]][column]})
def remove_row(request, data_id): df = get_dataframe(data_id) row_num = int(request.data['id']) df = df.drop(df.index[row_num]) update_dataframe(df, data_id) return Response()
def remove_column(request, data_id): df = get_dataframe(data_id) column = request.data['column'] df = df.drop(columns=[column]) update_dataframe(df, data_id) return Response({'ok'})
def rename_column(request, data_id): df = get_dataframe(data_id) old_name = request.data['old_name'] new_name = request.data['new_name'] df = df.rename(columns={old_name: new_name}) update_dataframe(df, data_id) return Response({'ok'})
def forest_model_info(x_names, y_names, data_id, estimators): df = dataframe.get_dataframe(data_id) x = df[x_names] y = df[y_names] forest = RandomForestRegressor(n_estimators=estimators, random_state=0, max_depth=2).fit(x, y) return {'r_squared': forest.score(x, y), 'estimators': forest.n_estimators}
def get_nan_columns_for_ds(ds_id): df = dataframe.get_dataframe(ds_id) cols = df.columns.values nan_columns = [] for col in cols: if df[col].isnull().values.any(): nan_columns.append(col) return nan_columns
def linear_model_scatter(x_name, y_name, data_id): df = dataframe.get_dataframe(data_id) x = df[[x_name]] y = df[[y_name]] model = LinearRegression().fit(x, y) scatter_data = sct.get_scatter_data(model, x, y, x_name, y_name) return scatter_data
def linear_model_info(x_names, y_names, data_id): df = dataframe.get_dataframe(data_id) predictor = sm.add_constant(df[x_names]) model = sm.OLS(df[y_names], predictor).fit() info = get_model_info(model) info['predictors'] = ['Смещение'] + x_names validation_result = validator.validate_linear(info) return {"info": info, 'validation_result': validation_result}
def edit_row(request, data_id): row_num = int(request.data['__row_id__']) df = get_dataframe(data_id) columns = list(df) for i in columns: value = __get_numeric_val(request.data[i]) df[i][row_num] = value update_dataframe(df, data_id) return Response({})
def poly_model_scatter(x_name, y_name, data_id, degree): df = dataframe.get_dataframe(data_id) x = df[[x_name]] y = df[[y_name]] model = make_pipeline(PolynomialFeatures(degree=degree), LinearRegression()) model.fit(x, y) scatter_data = sct.get_scatter_data(model, x, y, x_name, y_name) return scatter_data
def add_row(request, data_id): df = get_dataframe(data_id) columns = list(df) values = {} for i in columns: values[i] = __get_numeric_val(request.data[i]) df = df.append(values, ignore_index=True) update_dataframe(df, data_id) return Response(df.shape[0] - 1)
def poly_model_info(x_names, y_names, data_id, degree): df = dataframe.get_dataframe(data_id) x = df[x_names] y = df[y_names] model = make_pipeline(PolynomialFeatures(degree=degree), LinearRegression()) model.fit(x, y) return { 'r_squared': model.score(x, y), 'degree': degree, 'coefs': model.steps[1][1].coef_[0][1:], 'intercept': model.steps[1][1].intercept_, }
def neural_model_info(x_names, y_names, data_id, activation, hidden): df = dataframe.get_dataframe(data_id) x = df[x_names] y = df[y_names] model = MLPRegressor(hidden_layer_sizes=(hidden, ), max_iter=10000, activation=activation, random_state=9) model = model.fit(x, y.values.ravel()) score = model.score(x, y.values.ravel()) return { 'r_squared': score, 'activation': func_mapping.get(model.activation, ''), 'hidden_layer_sizes': model.hidden_layer_sizes }
def forest_model_scatter(x_name, y_name, data_id, estimators): df = dataframe.get_dataframe(data_id) x = df[[x_name]] y = df[[y_name]] forest = RandomForestRegressor(n_estimators=estimators, random_state=0, max_depth=2).fit(x, y) scatter_data = sct.get_scatter_data(forest, x, y, x_name, y_name, scalar=True) return scatter_data
def auto_analysis(request): request_x = request.data['x'] request_y = request.data['y'] data_id = request.data['data_id'] df = get_dataframe(data_id) x = df[request_x] y = df[request_y] models = a_analysis.get_models(x, y, request.user) models.sort(key=lambda m: m['score'], reverse=True) formatted = a_analysis.format_models_data(models, df) limit = __calc_highlight_limit(models) print(limit) return Response({'models': formatted, 'highlight_limit': limit})
def neural_model_scatter(x_name, y_name, data_id, activation, hidden): df = dataframe.get_dataframe(data_id) x = df[[x_name]] y = df[[y_name]] regressor = MLPRegressor(hidden_layer_sizes=(hidden, ), max_iter=10000, activation=activation, random_state=9) model = regressor.fit(x, y.values.ravel()) scatter_data = sct.get_scatter_data(model, x, y, x_name, y_name, scalar=True) return scatter_data
def predict(model_id, inputs): model = MlModel.objects.get(pk=model_id) request_x = model.ds_in_cols request_y = model.ds_out_cols df = dataframe.get_dataframe(model.dataset.id) x = df[request_x] y = df[request_y] if model.model == 'OLS': return __predict_linear(x, y, inputs) elif model.model == 'Polynomial': return __predict_poly(model, x, y, inputs) elif model.model == 'MLP': return __predict_neural(model, x, y, inputs) elif model.model == 'Forest': return __predict_forest(model, x, y, inputs) return {'error': 'Incorrect model type'}
def analysis(request): ds_id = request.data['dataset_id'] in_columns = request.data['in_columns'] out_columns = request.data['out_columns'] df = get_dataframe(ds_id) in_uniques = dict(zip(in_columns, df[in_columns].nunique().tolist())) out_uniques = dict(zip(out_columns, df[out_columns].nunique().tolist())) in_types = {} for col in in_columns: is_string_type = is_string_dtype(df[col]) in_types[col] = 'str' if is_string_type else 'num' out_types = {} for col in out_columns: is_string_type = is_string_dtype(df[col]) out_types[col] = 'str' if is_string_type else 'num' all_columns = in_columns + out_columns incorrect_columns = __get_incorrect_columns(df, all_columns) nan_columns = miss_values.get_nan_columns(df, all_columns) config = Configuration.objects.get(owner=request.user) return Response({ 'unique_threshold': config.unique_values_threshold, 'in_columns': in_columns, 'out_columns': out_columns, 'in_unique': in_uniques, 'out_unique': out_uniques, 'in_types': in_types, 'out_types': out_types, 'incorrect_columns': incorrect_columns, 'nan_columns': nan_columns })
def remove_nan(request, data_id): df = get_dataframe(data_id) df = df.dropna() update_dataframe(df, data_id) return Response({'ok'})