def split_page(): try: data_id = request.args.get('data_id') my_data = UserData.query.filter_by(id=data_id).first() my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) if request.method == 'POST': training_set,test_set = data_frame.random_split(float(request.form['percent']),seed=0) save_data(my_data, request.form['train'], training_set) save_data(my_data, request.form['test'], test_set) flash('Successfully created train/test split for ' + my_data.name + '!', 'success') return redirect(url_for('main.my_project_page', project_id=my_data.project_id)) return render_template('pages/data/transforms/split.html', my_data=my_data, form=form, data_frame=data_frame) except Exception as e: flash('Opps! Something unexpected happened. On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error') error = ErrorLog() error.user_id = current_user.id error.error = str(e.__class__) error.parameters = request.args db.session.add(error) db.session.commit() return redirect(request.referrer)
def unique_page(): try: data_id = request.args.get('data_id') my_data = UserData.query.filter_by(id=data_id).first() my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) if request.method == 'POST': new_id = str(request.form['new_id']) name = str(request.form['name']) sf = data_frame.add_row_number(new_id) fwd_id = save_data(my_data, name, sf) flash('Successfully transformed the data!', 'success') return redirect(url_for('data.data_details_page', data_id=fwd_id)) return render_template('pages/data/transforms/unique.html', my_data=my_data, data_frame=data_frame, form=form) except Exception as e: flash('Opps! Something unexpected happened. On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error') error = ErrorLog() error.user_id = current_user.id error.error = str(e.__class__) error.parameters = request.args db.session.add(error) db.session.commit() return redirect(request.referrer)
def remove_columns_page(): try: data_id = request.args.get('data_id') my_data = UserData.query.filter_by(id=data_id).first() my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) if request.method == 'POST': features_utf = request.form.getlist('features') features_str = [] for feat in features_utf: features_str.append(str(feat)) sframe = data_frame.remove_columns(features_str) fwd_id = save_data(my_data, request.form['name'], sframe) flash('Data transform is sucessful!', 'success') return redirect(url_for('data.data_details_page', data_id=fwd_id)) return render_template('pages/data/transforms/remove_columns.html', my_data=my_data, form=form, data_frame=data_frame, names=data_frame.column_names(), types=data_frame.column_types()) except Exception as e: flash('Opps! Something unexpected happened. On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error') error = ErrorLog() error.user_id = current_user.id error.error = str(e.__class__) error.parameters = request.args db.session.add(error) db.session.commit() return redirect(request.referrer)
def recode_step2_page(): try: data_id = request.args.get('data_id') target = request.args.get('target') name = request.args.get('name') my_data = UserData.query.filter_by(id=data_id).first() my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) names=data_frame.column_names() types=data_frame.column_types() orig_data = data_frame[str(target)] norig_data = orig_data.to_numpy() target_data = data_frame[str(target)].unique() ntarget_data = target_data.to_numpy() if request.method == 'POST': mapped_values = [] data_frame = safely_add_col(str(target) + '_uncoded', data_frame[str(target)], data_frame) for x in range(0, ntarget_data.__len__()): mapped_values.append(str(request.form['new_value' + str(x)])) cross_ref = [] for x in range(0, names.__len__()): if (str(types[x].__name__) == "str"): cross_ref.append(str(names[x])) new_data = [] for field in norig_data: for y in range(0, ntarget_data.__len__()): if str(ntarget_data[y]) == str(field): new_data.append(int(mapped_values[y])) sa = SArray(new_data) data_frame[str(target)] = sa fwd_id = save_data(my_data, name, data_frame) flash('Successfully re-coded ' + target + '!', 'success') return redirect(url_for('data.data_details_page', data_id=fwd_id)) return render_template('pages/data/transforms/code_field_step2.html', my_data=my_data, form=form, data_frame=data_frame, names=names, name=name, types=types, ntarget_data=ntarget_data, target=target) except Exception as e: flash('Opps! Something unexpected happened. On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error') error = ErrorLog() error.user_id = current_user.id error.error = str(e.__class__) error.parameters = request.args db.session.add(error) db.session.commit() return redirect(request.referrer)
def fill_na_page(): try: data_id = request.args.get('data_id') my_data = UserData.query.filter_by(id=data_id).first() my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) names=data_frame.column_names() types=data_frame.column_types() if request.method == 'POST': value = str(request.form['value']) name = str(request.form['name']) for feature in request.form.getlist('features'): orig_data = data_frame[str(feature)] print(orig_data.dtype.__name__) if orig_data.dtype.__name__ == "int": try: data_frame[str(feature)] = orig_data.fillna(int(value)) except Exception as e: flash('Opps! Looks like you passed something I could not parse as an integer.', 'error') return redirect(request.referrer) if orig_data.dtype.__name__ == "float": try: data_frame[str(feature)] = orig_data.fillna(float(value)) except Exception as e: flash('Opps! Looks like you passed something I could not parse as an float.', 'error') return redirect(request.referrer) if orig_data.dtype.__name__ == "str": try: data_frame[str(feature)] = orig_data.fillna(str(value)) except Exception as e: flash('Opps! Looks like you passed something I could not parse as an string.', 'error') return redirect(request.referrer) fwd_id = save_data(my_data, name, data_frame) flash('Successfully replaced N/A values!', 'success') return redirect(url_for('data.data_details_page', data_id=fwd_id)) return render_template('pages/data/transforms/fill_na.html', my_data=my_data, data_frame=data_frame, names=names, types=types, form=form) except Exception as e: flash('Opps! Something unexpected happened. On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error') error = ErrorLog() error.user_id = current_user.id error.error = str(e.__class__) error.parameters = request.args db.session.add(error) db.session.commit() return redirect(request.referrer)
def classify_page(): #try: data_id = request.args.get('data_id') my_data = UserData.query.filter_by(id=data_id).first() my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) data_frame = tc.load_sframe(my_data.sname) target = None cols = [] display_cols = [] names=data_frame.column_names() types=data_frame.column_types() for x in range(0, names.__len__()): cols.append(str(names[x])) if request.method == 'POST': target = request.form['target'] data_frame = data_frame.dropna(str(target), how="all") orig_data = data_frame[str(target)] norig_data = orig_data.to_numpy() classes = [] for data in norig_data: appended = False for x in range(1, int(request.form['num_brackets'])+1): if float(data) >= float(request.form['lrange_' + str(x)]) and float(data) <= float(request.form['urange_' + str(x)]): print(request.form['class_' + str(x)]) classes.append(request.form['class_' + str(x)]) appended = True continue if appended == False: classes.append("unknown") data_frame = safely_add_col(str(request.form['field']), classes, data_frame) fwd_id = save_data(my_data, request.form['name'], data_frame) flash('Successfully transformed the data set!', 'success') return redirect(url_for('data.data_details_page', data_id=fwd_id)) return render_template('pages/data/transforms/classifier.html', my_data=my_data, form=form, data_frame=data_frame, names=names, types=types, target=target, cols=cols)
def outlliers_page(): try: data_id = request.args.get('data_id') my_data = UserData.query.filter_by(id=data_id).first() my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) names=data_frame.column_names() types=data_frame.column_types() if request.method == 'POST': cent = float(request.form['cent']) name = str(request.form['name']) target = str(request.form['target']) mean = data_frame[target].mean() rows = [] for row in data_frame: if row[target] is not None: diff = abs(float(row[target]) - mean) pdiff = diff/mean if pdiff < cent: rows.append(row) else: rows.append(row) sf = tc.SFrame(rows) sf = sf.unpack('X1', column_name_prefix='') print(sf) fwd_id = save_data(my_data, name, sf) flash('Successfully removed outliers!', 'success') return redirect(url_for('data.data_details_page', data_id=fwd_id)) return render_template('pages/data/transforms/outlier.html', my_data=my_data, data_frame=data_frame, names=names, types=types, form=form) except Exception as e: flash('Opps! Something unexpected happened. On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error') error = ErrorLog() error.user_id = current_user.id error.error = str(e.__class__) error.parameters = request.args db.session.add(error) db.session.commit() return redirect(request.referrer)
def convert_magic_page(): try: data_id = request.args.get('data_id') my_data = UserData.query.filter_by(id=data_id).first() my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) names=data_frame.column_names() types=data_frame.column_types() if request.method == 'POST': magic = str(request.form['magic']) name = str(request.form['name']) for feature in request.form.getlist('features'): orig_data = data_frame[str(feature)] norig_data = orig_data.to_numpy() new_data = [] for item in norig_data: if str(item) == magic: new_data.append(None) else: new_data.append(item) sa = SArray(new_data) data_frame[str(feature)] = sa fwd_id = save_data(my_data, name, data_frame) flash('Successfully cleared magic values!', 'success') return redirect(url_for('data.data_details_page', data_id=fwd_id)) return render_template('pages/data/transforms/convert_magic.html', my_data=my_data, data_frame=data_frame, names=names, types=types, form=form) except Exception as e: flash('Opps! Something unexpected happened. On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error') error = ErrorLog() error.user_id = current_user.id error.error = str(e.__class__) error.parameters = request.args db.session.add(error) db.session.commit() return redirect(request.referrer)
def smote_step1(): try: data_id = request.args.get('data_id') my_data = UserData.query.filter_by(id=data_id).first() my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) target = None cols = [] display_cols = [] names=data_frame.column_names() types=data_frame.column_types() for x in range(0, names.__len__()): if (str(types[x].__name__) == "str"): cols.append(str(names[x])) if request.method == 'POST': target = request.form['target'] return redirect(url_for('transforms.smote_page', data_id=my_data.id, target=target, name=request.form['name'], seed=request.form['seed'], neighbors=request.form['neighbors'], algorithm=request.form['algorithm'])) return render_template('pages/data/transforms/smote_step1.html', my_data=my_data, form=form, data_frame=data_frame, names=names, types=types, target=target, cols=cols) except Exception as e: flash('Opps! Something unexpected happened. On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error') error = ErrorLog() error.user_id = current_user.id error.error = str(e.__class__) error.parameters = request.args db.session.add(error) db.session.commit() return redirect(request.referrer)
def split_session_page(): try: data_id = request.args.get('data_id') my_data = UserData.query.filter_by(id=data_id).first() my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) cols = [] display_cols = [] names=data_frame.column_names() types=data_frame.column_types() print(names) for x in range(0, names.__len__()): if (str(types[x].__name__) == "int"): cols.append(str(names[x])) print(cols) if request.method == 'POST': training_set,test_set = tc.activity_classifier.util.random_split_by_session(data_frame, session_id=str(request.form['idField']), fraction=float(request.form['percent'])) save_data(my_data, request.form['train'], training_set) save_data(my_data, request.form['test'], test_set) flash('Successfully created train/test split for ' + my_data.name + '!', 'success') return redirect(url_for('main.my_project_page', project_id=my_data.project_id)) return render_template('pages/data/transforms/split_session.html', my_data=my_data, form=form, data_frame=data_frame, types=types, names=cols) except Exception as e: flash('Opps! Something unexpected happened. On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error') error = ErrorLog() error.user_id = current_user.id error.error = str(e.__class__) error.parameters = request.args db.session.add(error) db.session.commit() return redirect(request.referrer)
def rename_feature_page(): try: data_id = request.args.get('data_id') my_data = UserData.query.filter_by(id=data_id).first() my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) names=data_frame.column_names() types=data_frame.column_types() if request.method == 'POST': feature_name = str(request.form['feature_name']) name = str(request.form['name']) target = str(request.form['target']) if has_column(feature_name, data_frame): flash('Opps! You appear to already have a feature with this name.', 'error') return redirect(request.referrer) sf = data_frame.rename({target: feature_name}) fwd_id = save_data(my_data, name, sf) flash('Successfully transformed the data!', 'success') return redirect(url_for('data.data_details_page', data_id=fwd_id)) return render_template('pages/data/transforms/rename_feature.html', my_data=my_data, data_frame=data_frame, names=names, types=types, form=form) except Exception as e: flash('Opps! Something unexpected happened. On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error') error = ErrorLog() error.user_id = current_user.id error.error = str(e.__class__) error.parameters = request.args db.session.add(error) db.session.commit() return redirect(request.referrer)
def train_model_page(): try: tc.config.set_num_gpus(0) data_id = request.args.get('data_id') my_data = UserData.query.filter_by(id=data_id).first() if my_data.user_id is not current_user.id: flash('Opps! Do data found', 'error') return redirect(request.referrer) my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) if request.method == 'POST': form.populate_obj(my_model) model_type = request.form['model'] model_class = request.form['model_class'] max_depth = request.form['max_depth'] max_iterations = request.form['max_iterations'] session_id = str(request.form['session_id']) time_field = str(request.form['time_field']) my_model.mtype = model_type # data_frame = data_frame.sort(str(request.form['target'])) if model_type == 'svm': label_count = data_frame[str(request.form['target'])].unique() if len(label_count) > 2: flash('SVM only supports binary classification - try another method.', 'error') return redirect(request.referrer) if model_type != 'deep': df = shuffle(data_frame.to_dataframe()) for y in range(0, 500): df = shuffle(df) data_frame = tc.SFrame(data=df) else: tfrm = data_frame.to_dataframe() tfrm = tfrm.sort_values(by=[session_id, time_field]) data_frame = tc.SFrame(data=tfrm) data_frame[session_id] = data_frame[session_id].astype(int) options_dict = {} if max_depth is not None: options_dict['max_depth'] = int(max_depth) if max_iterations is not None: options_dict['max_iterations'] = int(max_iterations) data_frame_cleaned = data_frame.dropna(str(request.form['target']), how="any") cols = [] for feature in request.form.getlist('features'): if str(feature) == str(request.form['target']): flash('You can not select the target field in your training features.', 'error') return redirect(url_for('model.train_model_page', data_id=data_id)) data_frame_cleaned = data_frame_cleaned.dropna(str(feature), how="any") cols.append(str(feature)) if data_frame_cleaned.num_rows() < 2: flash('After cleaning, there is no data left. You have a data quality issue.', 'error') return redirect(url_for('model.train_model_page', data_id=data_id)) my_model.user_id = current_user.id print("USER ID") print(my_model.user_id) old_stdout = sys.stdout sys.stdout = mystdout = StringIO() training_loss = ((float(data_frame.num_rows()) - float(data_frame_cleaned.num_rows())) / float(data_frame.num_rows())) * 100 train_data = None test_data = None if model_type != 'deep': train_data,test_data = data_frame_cleaned.random_split(.80,seed=0) else: train_data,test_data = tc.activity_classifier.util.random_split_by_session(data_frame_cleaned, session_id=session_id, fraction=0.8) tc_model = None # Setup options if model_type == 'gradient': if max_depth is None: options_dict['max_depth'] = 6 if max_iterations is None: options_dict['max_iterations'] = 10 elif model_type == 'linear': # Do nothing interesting options_dict = {} elif model_type == 'decision': if max_depth is None: options_dict['max_depth'] = 6 elif model_type == 'random': if max_depth is None: options_dict['max_depth'] = 6 if max_iterations is None: options_dict['max_iterations'] = 10 elif model_type == 'svm': if max_iterations is None: options_dict['max_iterations'] = 10 elif model_type == 'deep': if max_iterations is None: options_dict['max_iterations'] = 10 results = {} console = None if model_class == "predictor": best_run = None test_run = None working_results = {} working_train_data,working_test_data = data_frame_cleaned.random_split(.80,seed=0) num_rolls = 50 if working_train_data.num_rows() < 1000: num_rolls = 70 if working_train_data.num_rows() < 500: num_rolls = 100 if working_train_data.num_rows() < 200: num_rolls = 200 for x in range(0, num_rolls): working_train_data,working_test_data = data_frame_cleaned.random_split(.80) if model_type == 'gradient': test_run = tc.boosted_trees_regression.create(working_train_data, target=str(request.form['target']), validation_set=None, features=cols, max_depth = options_dict['max_depth'], max_iterations = options_dict['max_iterations'] ) elif model_type == 'linear': test_run = tc.linear_regression.create(working_train_data, target=str(request.form['target']), validation_set=None, features=cols ) elif model_type == 'decision': test_run = tc.decision_tree_regression.create(working_train_data, target=str(request.form['target']), validation_set=None, features=cols, max_depth = options_dict['max_depth'] ) elif model_type == 'random': test_run = tc.random_forest_regression.create(working_train_data, target=str(request.form['target']), validation_set=None, features=cols, max_depth = options_dict['max_depth'], max_iterations = options_dict['max_iterations'] ) working_results = test_run.evaluate(working_test_data) if best_run is None or working_results['max_error'] < best_run: tc_model = test_run train_data = working_train_data test_data = working_test_data results = working_results best_run = results['max_error'] console = mystdout.getvalue() mystdout.truncate(0) else: if model_type == 'gradient': tc_model = tc.boosted_trees_classifier.create(train_data, target=str(request.form['target']), validation_set=None, features=cols, max_depth = options_dict['max_depth'], max_iterations = options_dict['max_iterations']) elif model_type == 'linear': tc_model = tc.logistic_classifier.create(train_data, target=str(request.form['target']), validation_set=None, features=cols ) elif model_type == 'decision': tc_model = tc.decision_tree_classifier.create(train_data, target=str(request.form['target']), validation_set=None, features=cols, max_depth = options_dict['max_depth'] ) elif model_type == 'random': tc_model = tc.random_forest_classifier.create(train_data, target=str(request.form['target']), validation_set=None, features=cols, max_depth = options_dict['max_depth'], max_iterations = options_dict['max_iterations'] ) elif model_type == 'svm': tc_model = tc.svm_classifier.create(train_data, target=str(request.form['target']), validation_set=None, features=cols, max_iterations = options_dict['max_iterations']) elif model_type == 'deep': tc_model = tc.activity_classifier.create(train_data, session_id=session_id, target=str(request.form['target']), validation_set=None, features=cols, max_iterations = options_dict['max_iterations']) results = tc_model.evaluate(test_data) my_model.user_id = current_user.id my_model.data_id = my_data.id my_model.project_id = my_data.project_id my_model.path = my_data.path my_model.options = options_dict my_model.api_key = str(uuid.uuid4()) imp = [] if model_type != 'linear' and model_type != 'svm' and model_type != 'deep': imp = tc_model.get_feature_importance() importance = [] for col in imp: importance.append({"name": str(col["name"]), "index": str(col["index"]), "count": str(col["count"])}) my_model.features = {"time_field": time_field, "session_id": session_id, "training_loss": training_loss, "training_rows": train_data.num_rows(), "test_rows": test_data.num_rows(), "features": cols, "target": request.form['target'], "importance": importance, "model_type": model_type, "model_class": model_class} sys.stdout = old_stdout if model_class == "predictor": my_model.results = results my_model.console = console else: print(results) if (model_type != 'svm'and model_type != 'deep'): my_model.results = {'f1_score': nan_to_null(results['f1_score']), 'auc': nan_to_null(results['auc']), 'recall': nan_to_null(results['recall']), 'precision': nan_to_null(results['precision']), 'log_loss': nan_to_null(results['log_loss']), 'accuracy': nan_to_null(results['accuracy'])} else: my_model.results = {'f1_score': nan_to_null(results['f1_score']), 'auc': "N/A", 'recall': nan_to_null(results['recall']), 'precision': nan_to_null(results['precision']), 'log_loss': "N/A", 'accuracy': nan_to_null(results['accuracy'])} my_model.console = mystdout.getvalue() my_model.mname = os.path.join(my_model.path, str(my_model.api_key) + "_model") tc_model.save(my_model.mname) my_model.console = my_model.console.strip("\x00") db.session.add(my_model) db.session.commit() flash('Model has been saved!', 'success') return redirect(url_for('model.model_details_page', model_id=my_model.id)) return render_template('pages/models/train_model_page.html', my_data=my_data, form=form, data_frame=data_frame, names=data_frame.column_names(), types=data_frame.column_types()) except Exception as e: flash('Opps! Something unexpected happened. On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP. Error: ' + str(e), 'error') error = ErrorLog() error.user_id = current_user.id error.error = str(e.__class__) error.parameters = request.args db.session.add(error) db.session.commit() return redirect(request.referrer)
def rolling_slope_page(): # try: data_id = request.args.get('data_id') my_data = UserData.query.filter_by(id=data_id).first() my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) if request.method == 'POST': idField = request.form['idField'] target = request.form['target'] xVal = request.form['xVal'] featArrays = {} features_utf = request.form.getlist('features') features_str = [] for feat in features_utf: features_str.append(str(feat)) for feature in request.form.getlist('features'): featArrays[str(feature)] = [] featArrays[xVal] = [] for feature in request.form.getlist('features'): data_frame = data_frame.dropna(str(feature), how="all") data_frame = data_frame.dropna(str(idField), how="all") data_frame = data_frame.dropna(str(xVal), how="all") size = len(data_frame) # Setup the final hash - make sure we have keys for everything final_frame_vals = {} for name in data_frame.column_names(): if name not in features_str: final_frame_vals[str(name)] = [] for key, value in featArrays.items(): if key in features_str: final_frame_vals[str(key) + "_slope"] = [] final_frame_vals[str(key) + "_intercept"] = [] final_frame_vals[str(target) + "_initial"] = [] targetVal = None for x in range(0, size): row = data_frame[x] if x == 0 or data_frame[x-1][idField] != row[idField]: targetVal = row[target] for feature in request.form.getlist('features'): featArrays[str(feature)].append(row[str(feature)]) featArrays[xVal].append(row[str(xVal)]) if len(featArrays) > 1: finalRow = {} for name in data_frame.column_names(): if (str(name) not in features_str): finalRow[str(name)] = row[str(name)] xValArr = np.array(featArrays[xVal]).astype(np.float) for key, value in featArrays.items(): if key != xVal: yValArr = np.array(value).astype(np.float) slope, intercept, r_value, p_value, std_err = linregress(xValArr, yValArr) finalRow[key+"_slope"] = float(slope) finalRow[key+"_intercept"] = float(intercept) for key, value in finalRow.items(): final_frame_vals[str(key)].append(value) final_frame_vals[str(target) + "_initial"].append(targetVal) if x == size-1 or row[idField] != data_frame[x+1][idField]: # Clear slope and static Xval accumulators for feature in request.form.getlist('features'): featArrays[str(feature)] = [] featArrays[str(xVal)] = [] sframe = tc.SFrame(data=final_frame_vals) fwd_id = save_data(my_data, request.form['name'], sframe) flash('Data transform is sucessful!', 'success') return redirect(url_for('data.data_details_page', data_id=fwd_id)) return render_template('pages/data/transforms/rolling_slope.html', my_data=my_data, form=form, data_frame=data_frame, names=data_frame.column_names(), types=data_frame.column_types())
def smote_page(): # try: data_id = request.args.get('data_id') name = request.args.get('name') target = request.args.get('target') seed = request.args.get('seed') algorithm = request.args.get('algorithm') neighbors = request.args.get('neighbors') my_data = UserData.query.filter_by(id=data_id).first() my_model = TrainedModel() form = TrainModelForm(request.form, obj=my_model) data_frame = tc.load_sframe(my_data.sname) data_frame_cleaned = data_frame orig_data = data_frame[str(target)] norig_data = orig_data.to_numpy() target_data = data_frame[str(target)].unique() ntarget_data = target_data.to_numpy() if request.method == 'POST': features_utf = request.form.getlist('features') features_utf = request.form.getlist('features') features_str = [] variables = [] for feat in features_utf: features_str.append(str(feat)) variables.append(str(feat)) data_frame_cleaned = data_frame_cleaned.dropna(str(feat), how="any") variables.append(str(request.form['target'])) df = shuffle(data_frame_cleaned.to_dataframe()) strategy = {} for x in range(0, ntarget_data.__len__()): strategy[int(ntarget_data[x])] = int(request.form['new_value' + str(x)]) sm = None y_res = None x_res = None try: if algorithm == "SMOTE": sm = SMOTE(random_state=int(seed), sampling_strategy = strategy, k_neighbors=int(neighbors)-1) else: sm = ADASYN(random_state=int(seed), sampling_strategy = strategy, n_neighbors=int(neighbors)-1) x_res, y_res = sm.fit_sample(df[variables], df[str(request.form['target'])]) except Exception as e: flash(str(e), 'error') return redirect(request.referrer) my_dict = {} np_y_res = np.array(y_res) res = np_y_res.astype(str) my_dict[str(request.form['target'])] = res x=0 sf = tc.SFrame({ str(request.form['target']): y_res }) df = pd.DataFrame(x_res) for feat in features_str: dat = [] for val in df.ix[:, x]: dat.append(val) sa = SArray(data=dat) sf = sf.add_column(sa, feat) x = x + 1 sa = SArray(data=y_res, dtype=str) sf = sf.add_column(sa, str(request.form['target']) + "_uncoded") fwd_id = save_data(my_data, request.form['name'], sf) flash('Data transform is sucessful!', 'success') return redirect(url_for('data.data_details_page', data_id=fwd_id)) return render_template('pages/data/transforms/smote.html', my_data=my_data, form=form, data_frame=data_frame, num_rows=data_frame.num_rows(), name=name, target=target, ntarget_data=ntarget_data, names=data_frame.column_names(), types=data_frame.column_types())