Exemple #1
0
def split_page():
    try:
        data_id = request.args.get('data_id')
        my_data = UserData.query.filter_by(id=data_id).first()
        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)

        if request.method == 'POST':
            training_set,test_set = data_frame.random_split(float(request.form['percent']),seed=0)
            save_data(my_data, request.form['train'], training_set)
            save_data(my_data, request.form['test'], test_set)

            flash('Successfully created train/test split for ' + my_data.name + '!', 'success')
            return redirect(url_for('main.my_project_page', project_id=my_data.project_id))

        return render_template('pages/data/transforms/split.html',
            my_data=my_data,
            form=form,
            data_frame=data_frame)
    except Exception as e:
        flash('Opps!  Something unexpected happened.  On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error')
        error = ErrorLog()
        error.user_id = current_user.id
        error.error = str(e.__class__)
        error.parameters = request.args
        db.session.add(error)
        db.session.commit()
        return redirect(request.referrer)
Exemple #2
0
def unique_page():
    try:
        data_id = request.args.get('data_id')
        my_data = UserData.query.filter_by(id=data_id).first()
        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)

        if request.method == 'POST':
            new_id = str(request.form['new_id'])
            name = str(request.form['name'])
            sf = data_frame.add_row_number(new_id)
            fwd_id = save_data(my_data, name, sf)
            flash('Successfully transformed the data!', 'success')
            return redirect(url_for('data.data_details_page', data_id=fwd_id))
        return render_template('pages/data/transforms/unique.html',
            my_data=my_data,
            data_frame=data_frame,
            form=form)
    except Exception as e:
        flash('Opps!  Something unexpected happened.  On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error')
        error = ErrorLog()
        error.user_id = current_user.id
        error.error = str(e.__class__)
        error.parameters = request.args
        db.session.add(error)
        db.session.commit()
        return redirect(request.referrer)
Exemple #3
0
def remove_columns_page():
    try:
        data_id = request.args.get('data_id')
        my_data = UserData.query.filter_by(id=data_id).first()
        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)

        if request.method == 'POST':
            features_utf = request.form.getlist('features')
            features_str = []

            for feat in features_utf:
                features_str.append(str(feat))
            sframe = data_frame.remove_columns(features_str)
            fwd_id = save_data(my_data, request.form['name'], sframe)

            flash('Data transform is sucessful!', 'success')
            return redirect(url_for('data.data_details_page', data_id=fwd_id))
        return render_template('pages/data/transforms/remove_columns.html',
            my_data=my_data,
            form=form,
            data_frame=data_frame,
            names=data_frame.column_names(),
            types=data_frame.column_types())
    except Exception as e:
        flash('Opps!  Something unexpected happened.  On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error')
        error = ErrorLog()
        error.user_id = current_user.id
        error.error = str(e.__class__)
        error.parameters = request.args
        db.session.add(error)
        db.session.commit()
        return redirect(request.referrer)
Exemple #4
0
def recode_step2_page():
    try:
        data_id = request.args.get('data_id')
        target = request.args.get('target')
        name = request.args.get('name')
        my_data = UserData.query.filter_by(id=data_id).first()
        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)
        names=data_frame.column_names()
        types=data_frame.column_types()

        orig_data = data_frame[str(target)]
        norig_data = orig_data.to_numpy()

        target_data = data_frame[str(target)].unique()
        ntarget_data = target_data.to_numpy()

        if request.method == 'POST':
            mapped_values = []
            data_frame = safely_add_col(str(target) + '_uncoded', data_frame[str(target)], data_frame)
            for x in range(0, ntarget_data.__len__()):
                mapped_values.append(str(request.form['new_value' + str(x)]))
            cross_ref = []
            for x in range(0, names.__len__()):
                if (str(types[x].__name__) == "str"):
                    cross_ref.append(str(names[x]))
            new_data = []
            for field in norig_data:
                for y in range(0, ntarget_data.__len__()):
                    if str(ntarget_data[y]) == str(field):
                        new_data.append(int(mapped_values[y]))
            sa = SArray(new_data)
            data_frame[str(target)] = sa
            fwd_id = save_data(my_data, name, data_frame)

            flash('Successfully re-coded ' + target + '!', 'success')
            return redirect(url_for('data.data_details_page', data_id=fwd_id))
        return render_template('pages/data/transforms/code_field_step2.html',
            my_data=my_data,
            form=form,
            data_frame=data_frame,
            names=names,
            name=name,
            types=types,
            ntarget_data=ntarget_data,
            target=target)
    except Exception as e:
        flash('Opps!  Something unexpected happened.  On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error')
        error = ErrorLog()
        error.user_id = current_user.id
        error.error = str(e.__class__)
        error.parameters = request.args
        db.session.add(error)
        db.session.commit()
        return redirect(request.referrer)
Exemple #5
0
def fill_na_page():
    try:
        data_id = request.args.get('data_id')
        my_data = UserData.query.filter_by(id=data_id).first()
        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)
        names=data_frame.column_names()
        types=data_frame.column_types()

        if request.method == 'POST':
            value = str(request.form['value'])
            name = str(request.form['name'])
            for feature in request.form.getlist('features'):
                orig_data = data_frame[str(feature)]
                print(orig_data.dtype.__name__)
                if orig_data.dtype.__name__ == "int":
                    try:
                        data_frame[str(feature)] = orig_data.fillna(int(value))
                    except Exception as e:
                        flash('Opps!  Looks like you passed something I could not parse as an integer.', 'error')
                        return redirect(request.referrer)
                if orig_data.dtype.__name__ == "float":
                    try:
                        data_frame[str(feature)] = orig_data.fillna(float(value))
                    except Exception as e:
                        flash('Opps!  Looks like you passed something I could not parse as an float.', 'error')
                        return redirect(request.referrer)
                if orig_data.dtype.__name__ == "str":
                    try:
                        data_frame[str(feature)] = orig_data.fillna(str(value))
                    except Exception as e:
                        flash('Opps!  Looks like you passed something I could not parse as an string.', 'error')
                        return redirect(request.referrer)
            fwd_id = save_data(my_data, name, data_frame)
            flash('Successfully replaced N/A values!', 'success')
            return redirect(url_for('data.data_details_page', data_id=fwd_id))

        return render_template('pages/data/transforms/fill_na.html',
            my_data=my_data,
            data_frame=data_frame,
            names=names,
            types=types,
            form=form)
    except Exception as e:
        flash('Opps!  Something unexpected happened.  On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error')
        error = ErrorLog()
        error.user_id = current_user.id
        error.error = str(e.__class__)
        error.parameters = request.args
        db.session.add(error)
        db.session.commit()
        return redirect(request.referrer)
Exemple #6
0
def classify_page():
    #try:
        data_id = request.args.get('data_id')
        my_data = UserData.query.filter_by(id=data_id).first()
        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)
        data_frame = tc.load_sframe(my_data.sname)
        target = None
        cols = []
        display_cols = []
        names=data_frame.column_names()
        types=data_frame.column_types()

        for x in range(0, names.__len__()):
            cols.append(str(names[x]))

        if request.method == 'POST':
            target = request.form['target']
            data_frame = data_frame.dropna(str(target), how="all")
            orig_data = data_frame[str(target)]
            norig_data = orig_data.to_numpy() 
            classes = []
            for data in norig_data:
                appended = False 
                for x in range(1, int(request.form['num_brackets'])+1):
                    if float(data) >= float(request.form['lrange_' + str(x)]) and float(data) <= float(request.form['urange_' + str(x)]):
                        print(request.form['class_' + str(x)]) 
                        classes.append(request.form['class_' + str(x)])
                        appended = True
                        continue 
                if appended == False:
                    classes.append("unknown")   

            data_frame = safely_add_col(str(request.form['field']), classes, data_frame)            
            fwd_id = save_data(my_data, request.form['name'], data_frame)
  
            flash('Successfully transformed the data set!', 'success')
            return redirect(url_for('data.data_details_page', data_id=fwd_id))

        return render_template('pages/data/transforms/classifier.html',
            my_data=my_data,
            form=form,
            data_frame=data_frame,
            names=names,
            types=types,
            target=target,
            cols=cols)
Exemple #7
0
def outlliers_page():
    try:
        data_id = request.args.get('data_id')
        my_data = UserData.query.filter_by(id=data_id).first()
        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)
        names=data_frame.column_names()
        types=data_frame.column_types()

        if request.method == 'POST':
            cent = float(request.form['cent'])
            name = str(request.form['name'])
            target = str(request.form['target'])
            mean = data_frame[target].mean()
            rows = []
            for row in data_frame:
                if row[target] is not None:
                    diff = abs(float(row[target]) - mean)
                    pdiff = diff/mean
                    if pdiff < cent:
                        rows.append(row)
                else:
                    rows.append(row)
            sf = tc.SFrame(rows)
            sf = sf.unpack('X1', column_name_prefix='')
            print(sf)
            fwd_id = save_data(my_data, name, sf)
            flash('Successfully removed outliers!', 'success')
            return redirect(url_for('data.data_details_page', data_id=fwd_id))
        return render_template('pages/data/transforms/outlier.html',
            my_data=my_data,
            data_frame=data_frame,
            names=names,
            types=types,
            form=form)
    except Exception as e:
        flash('Opps!  Something unexpected happened.  On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error')
        error = ErrorLog()
        error.user_id = current_user.id
        error.error = str(e.__class__)
        error.parameters = request.args
        db.session.add(error)
        db.session.commit()
        return redirect(request.referrer)
Exemple #8
0
def convert_magic_page():
    try:
        data_id = request.args.get('data_id')
        my_data = UserData.query.filter_by(id=data_id).first()
        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)
        names=data_frame.column_names()
        types=data_frame.column_types()

        if request.method == 'POST':
            magic = str(request.form['magic'])
            name = str(request.form['name'])
            for feature in request.form.getlist('features'):
                orig_data = data_frame[str(feature)]
                norig_data = orig_data.to_numpy()
                new_data = []
                for item in norig_data:
                    if str(item) == magic:
                        new_data.append(None)
                    else:
                        new_data.append(item)
                sa = SArray(new_data)
                data_frame[str(feature)] = sa
            fwd_id = save_data(my_data, name, data_frame)
            flash('Successfully cleared magic values!', 'success')
            return redirect(url_for('data.data_details_page', data_id=fwd_id))

        return render_template('pages/data/transforms/convert_magic.html',
            my_data=my_data,
            data_frame=data_frame,
            names=names,
            types=types,
            form=form)
    except Exception as e:
        flash('Opps!  Something unexpected happened.  On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error')
        error = ErrorLog()
        error.user_id = current_user.id
        error.error = str(e.__class__)
        error.parameters = request.args
        db.session.add(error)
        db.session.commit()
        return redirect(request.referrer)
Exemple #9
0
def smote_step1():
    try:
        data_id = request.args.get('data_id')
        my_data = UserData.query.filter_by(id=data_id).first()
        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)
        target = None
        cols = []
        display_cols = []
        names=data_frame.column_names()
        types=data_frame.column_types()

        for x in range(0, names.__len__()):
            if (str(types[x].__name__) == "str"):
                cols.append(str(names[x]))

        if request.method == 'POST':
            target = request.form['target']
            return redirect(url_for('transforms.smote_page', data_id=my_data.id, target=target, name=request.form['name'], seed=request.form['seed'], 
                neighbors=request.form['neighbors'], algorithm=request.form['algorithm']))

        return render_template('pages/data/transforms/smote_step1.html',
            my_data=my_data,
            form=form,
            data_frame=data_frame,
            names=names,
            types=types,
            target=target,
            cols=cols)
    except Exception as e:
        flash('Opps!  Something unexpected happened.  On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error')
        error = ErrorLog()
        error.user_id = current_user.id
        error.error = str(e.__class__)
        error.parameters = request.args
        db.session.add(error)
        db.session.commit()
        return redirect(request.referrer)
Exemple #10
0
def split_session_page():
    try:
        data_id = request.args.get('data_id')
        my_data = UserData.query.filter_by(id=data_id).first()
        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)
        cols = []
        display_cols = []
        names=data_frame.column_names()
        types=data_frame.column_types()
        print(names)
        for x in range(0, names.__len__()):
            if (str(types[x].__name__) == "int"):
                cols.append(str(names[x]))
        print(cols)
        if request.method == 'POST':
            training_set,test_set = tc.activity_classifier.util.random_split_by_session(data_frame, session_id=str(request.form['idField']), fraction=float(request.form['percent']))
            save_data(my_data, request.form['train'], training_set)
            save_data(my_data, request.form['test'], test_set)

            flash('Successfully created train/test split for ' + my_data.name + '!', 'success')
            return redirect(url_for('main.my_project_page', project_id=my_data.project_id))

        return render_template('pages/data/transforms/split_session.html',
            my_data=my_data,
            form=form,
            data_frame=data_frame,
            types=types,
            names=cols)
    except Exception as e:
        flash('Opps!  Something unexpected happened.  On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error')
        error = ErrorLog()
        error.user_id = current_user.id
        error.error = str(e.__class__)
        error.parameters = request.args
        db.session.add(error)
        db.session.commit()
        return redirect(request.referrer)
Exemple #11
0
def rename_feature_page():
    try:
        data_id = request.args.get('data_id')
        my_data = UserData.query.filter_by(id=data_id).first()
        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)
        names=data_frame.column_names()
        types=data_frame.column_types()

        if request.method == 'POST':
            feature_name = str(request.form['feature_name'])
            name = str(request.form['name'])
            target = str(request.form['target'])
            if has_column(feature_name, data_frame):
                flash('Opps!  You appear to already have a feature with this name.', 'error')
                return redirect(request.referrer)
            sf = data_frame.rename({target: feature_name})
            fwd_id = save_data(my_data, name, sf)
            flash('Successfully transformed the data!', 'success')
            return redirect(url_for('data.data_details_page', data_id=fwd_id))
        return render_template('pages/data/transforms/rename_feature.html',
            my_data=my_data,
            data_frame=data_frame,
            names=names,
            types=types,
            form=form)
    except Exception as e:
        flash('Opps!  Something unexpected happened.  On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP.', 'error')
        error = ErrorLog()
        error.user_id = current_user.id
        error.error = str(e.__class__)
        error.parameters = request.args
        db.session.add(error)
        db.session.commit()
        return redirect(request.referrer)
Exemple #12
0
def train_model_page():
    try:
        tc.config.set_num_gpus(0)
        data_id = request.args.get('data_id')
        my_data = UserData.query.filter_by(id=data_id).first()
        if my_data.user_id is not current_user.id:
            flash('Opps!  Do data found', 'error')
            return redirect(request.referrer)

        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)

        if request.method == 'POST':
            form.populate_obj(my_model)
            model_type = request.form['model']
            model_class = request.form['model_class']
            max_depth = request.form['max_depth']
            max_iterations = request.form['max_iterations']
            session_id = str(request.form['session_id'])
            time_field = str(request.form['time_field'])
            my_model.mtype = model_type
            # data_frame = data_frame.sort(str(request.form['target']))
            if model_type == 'svm':
                label_count = data_frame[str(request.form['target'])].unique()
                if len(label_count) > 2:
                    flash('SVM only supports binary classification - try another method.', 'error')
                    return redirect(request.referrer)
            if model_type != 'deep':
                df = shuffle(data_frame.to_dataframe())
                for y in range(0, 500):
                    df = shuffle(df)
                data_frame = tc.SFrame(data=df)
            else:
                tfrm = data_frame.to_dataframe()
                tfrm = tfrm.sort_values(by=[session_id, time_field])
                data_frame = tc.SFrame(data=tfrm)
                data_frame[session_id] = data_frame[session_id].astype(int)

            options_dict = {}
            if max_depth is not None:
                options_dict['max_depth'] = int(max_depth)
            if max_iterations is not None:
                options_dict['max_iterations'] = int(max_iterations)
            data_frame_cleaned = data_frame.dropna(str(request.form['target']), how="any")
            cols = []
            for feature in request.form.getlist('features'):
                if str(feature) == str(request.form['target']):
                    flash('You can not select the target field in your training features.', 'error')
                    return redirect(url_for('model.train_model_page', data_id=data_id))
                data_frame_cleaned = data_frame_cleaned.dropna(str(feature), how="any")
                cols.append(str(feature))
            if data_frame_cleaned.num_rows() < 2:
                flash('After cleaning, there is no data left. You have a data quality issue.', 'error')
                return redirect(url_for('model.train_model_page', data_id=data_id))
            my_model.user_id = current_user.id
            print("USER ID")
            print(my_model.user_id)
            old_stdout = sys.stdout
            sys.stdout = mystdout = StringIO()
            training_loss = ((float(data_frame.num_rows()) - float(data_frame_cleaned.num_rows())) / float(data_frame.num_rows())) * 100

            train_data = None
            test_data = None
            if model_type != 'deep':
                train_data,test_data = data_frame_cleaned.random_split(.80,seed=0)
            else:
                train_data,test_data = tc.activity_classifier.util.random_split_by_session(data_frame_cleaned, session_id=session_id, fraction=0.8)

            tc_model = None
            # Setup options
            if model_type == 'gradient':
                if max_depth is None:
                    options_dict['max_depth'] = 6
                if max_iterations is None:
                    options_dict['max_iterations'] = 10
            elif model_type == 'linear':
                # Do nothing interesting
                options_dict = {}
            elif model_type == 'decision':
                if max_depth is None:
                    options_dict['max_depth'] = 6
            elif model_type == 'random':
                if max_depth is None:
                    options_dict['max_depth'] = 6
                if max_iterations is None:
                    options_dict['max_iterations'] = 10
            elif model_type == 'svm':
                if max_iterations is None:
                    options_dict['max_iterations'] = 10
            elif model_type == 'deep':
                if max_iterations is None:
                    options_dict['max_iterations'] = 10
            results = {}
            console = None
            if model_class == "predictor":
                best_run = None
                test_run = None
                working_results = {}
                working_train_data,working_test_data = data_frame_cleaned.random_split(.80,seed=0)
                num_rolls = 50
                if working_train_data.num_rows() < 1000:
                    num_rolls = 70
                if working_train_data.num_rows() < 500:
                    num_rolls = 100     
                if working_train_data.num_rows() < 200:
                    num_rolls = 200                                     
                for x in range(0, num_rolls):
                    working_train_data,working_test_data = data_frame_cleaned.random_split(.80)
                    if model_type == 'gradient':
                        test_run = tc.boosted_trees_regression.create(working_train_data, target=str(request.form['target']), validation_set=None, features=cols, max_depth = options_dict['max_depth'], max_iterations = options_dict['max_iterations'] )
                    elif model_type == 'linear':
                        test_run = tc.linear_regression.create(working_train_data, target=str(request.form['target']), validation_set=None, features=cols )
                    elif model_type == 'decision':
                        test_run = tc.decision_tree_regression.create(working_train_data, target=str(request.form['target']), validation_set=None, features=cols, max_depth = options_dict['max_depth'] )
                    elif model_type == 'random':
                        test_run = tc.random_forest_regression.create(working_train_data, target=str(request.form['target']), validation_set=None, features=cols, max_depth = options_dict['max_depth'], max_iterations = options_dict['max_iterations'] )
                    working_results = test_run.evaluate(working_test_data)
                    if best_run is None or working_results['max_error'] < best_run:
                        tc_model = test_run
                        train_data = working_train_data
                        test_data = working_test_data
                        results = working_results
                        best_run = results['max_error']
                        console = mystdout.getvalue()
                    mystdout.truncate(0)
            else:
                if model_type == 'gradient':
                    tc_model = tc.boosted_trees_classifier.create(train_data, target=str(request.form['target']), validation_set=None, features=cols, max_depth = options_dict['max_depth'], max_iterations = options_dict['max_iterations'])
                elif model_type == 'linear':
                    tc_model = tc.logistic_classifier.create(train_data, target=str(request.form['target']), validation_set=None, features=cols )
                elif model_type == 'decision':
                    tc_model = tc.decision_tree_classifier.create(train_data, target=str(request.form['target']), validation_set=None, features=cols, max_depth = options_dict['max_depth'] )
                elif model_type == 'random':
                    tc_model = tc.random_forest_classifier.create(train_data, target=str(request.form['target']), validation_set=None, features=cols, max_depth = options_dict['max_depth'], max_iterations = options_dict['max_iterations'] )
                elif model_type == 'svm':
                    tc_model = tc.svm_classifier.create(train_data, target=str(request.form['target']), validation_set=None, features=cols, max_iterations = options_dict['max_iterations'])
                elif model_type == 'deep':
                    tc_model = tc.activity_classifier.create(train_data, session_id=session_id, target=str(request.form['target']), validation_set=None, features=cols, max_iterations = options_dict['max_iterations'])
                results = tc_model.evaluate(test_data)
            my_model.user_id = current_user.id
            my_model.data_id = my_data.id
            my_model.project_id = my_data.project_id
            my_model.path = my_data.path
            my_model.options = options_dict
            my_model.api_key = str(uuid.uuid4())
            imp = []
            if model_type != 'linear' and model_type != 'svm' and model_type != 'deep':
                imp = tc_model.get_feature_importance()
            importance = []
            for col in imp:
                importance.append({"name": str(col["name"]), "index": str(col["index"]), "count": str(col["count"])})
            my_model.features = {"time_field": time_field, "session_id": session_id, "training_loss": training_loss, "training_rows": train_data.num_rows(), "test_rows": test_data.num_rows(), "features": cols, "target": request.form['target'], "importance": importance, "model_type": model_type, "model_class": model_class}

            sys.stdout = old_stdout
            if model_class == "predictor":
                my_model.results = results
                my_model.console = console
            else:
                print(results)
                if (model_type != 'svm'and model_type != 'deep'):
                    my_model.results = {'f1_score': nan_to_null(results['f1_score']), 'auc': nan_to_null(results['auc']), 'recall': nan_to_null(results['recall']), 'precision': nan_to_null(results['precision']), 'log_loss': nan_to_null(results['log_loss']), 'accuracy': nan_to_null(results['accuracy'])}
                else:
                    my_model.results = {'f1_score': nan_to_null(results['f1_score']), 'auc': "N/A", 'recall': nan_to_null(results['recall']), 'precision': nan_to_null(results['precision']), 'log_loss': "N/A", 'accuracy': nan_to_null(results['accuracy'])}
                my_model.console = mystdout.getvalue()
            my_model.mname = os.path.join(my_model.path, str(my_model.api_key) + "_model")
            tc_model.save(my_model.mname)  

            my_model.console = my_model.console.strip("\x00")
            db.session.add(my_model)
            db.session.commit()
            flash('Model has been saved!', 'success')
            return redirect(url_for('model.model_details_page', model_id=my_model.id))

        return render_template('pages/models/train_model_page.html',
            my_data=my_data,
            form=form,
            data_frame=data_frame,
            names=data_frame.column_names(),
            types=data_frame.column_types())
    except Exception as e:
        flash('Opps!  Something unexpected happened.  On the brightside, we logged the error and will absolutely look at it and work to correct it, ASAP. Error: ' + str(e), 'error')
        error = ErrorLog()
        error.user_id = current_user.id
        error.error = str(e.__class__)
        error.parameters = request.args
        db.session.add(error)
        db.session.commit()
        return redirect(request.referrer)
Exemple #13
0
def rolling_slope_page():
    # try:
        data_id = request.args.get('data_id')
        my_data = UserData.query.filter_by(id=data_id).first()
        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)

        if request.method == 'POST':
            idField = request.form['idField']
            target = request.form['target']
            xVal = request.form['xVal']
            featArrays = {}
            features_utf = request.form.getlist('features')
            features_str = []

            for feat in features_utf:
                features_str.append(str(feat))
            for feature in request.form.getlist('features'):
                featArrays[str(feature)] = []
            featArrays[xVal] = []

            for feature in request.form.getlist('features'):
                data_frame = data_frame.dropna(str(feature), how="all")
            data_frame = data_frame.dropna(str(idField), how="all")
            data_frame = data_frame.dropna(str(xVal), how="all")
            size = len(data_frame)

            # Setup the final hash - make sure we have keys for everything
            final_frame_vals = {}
            for name in data_frame.column_names():
                if name not in features_str:
                    final_frame_vals[str(name)] = []
            for key, value in featArrays.items():
                if key in features_str:
                    final_frame_vals[str(key) + "_slope"] = []
                    final_frame_vals[str(key) + "_intercept"] = []
            final_frame_vals[str(target) + "_initial"] = []
            targetVal = None
            for x in range(0, size):
                row = data_frame[x]
                if x == 0 or data_frame[x-1][idField] != row[idField]:
                    targetVal = row[target]

                for feature in request.form.getlist('features'):
                    featArrays[str(feature)].append(row[str(feature)])
                featArrays[xVal].append(row[str(xVal)])

                if len(featArrays) > 1:
                    finalRow = {}
                    for name in data_frame.column_names():
                        if (str(name) not in features_str):
                            finalRow[str(name)] = row[str(name)]
                    xValArr = np.array(featArrays[xVal]).astype(np.float)
                    for key, value in featArrays.items():
                        if key != xVal:
                            yValArr = np.array(value).astype(np.float)
                            slope, intercept, r_value, p_value, std_err = linregress(xValArr, yValArr)
                            finalRow[key+"_slope"] = float(slope)
                            finalRow[key+"_intercept"] = float(intercept)

                    for key, value in finalRow.items():
                        final_frame_vals[str(key)].append(value)
                    final_frame_vals[str(target) + "_initial"].append(targetVal)
                if x == size-1 or row[idField] != data_frame[x+1][idField]:
                    # Clear slope and static Xval accumulators
                    for feature in request.form.getlist('features'):
                        featArrays[str(feature)] = []
                    featArrays[str(xVal)] = []
            sframe = tc.SFrame(data=final_frame_vals)
            fwd_id = save_data(my_data, request.form['name'], sframe)

            flash('Data transform is sucessful!', 'success')
            return redirect(url_for('data.data_details_page', data_id=fwd_id))
        return render_template('pages/data/transforms/rolling_slope.html',
            my_data=my_data,
            form=form,
            data_frame=data_frame,
            names=data_frame.column_names(),
            types=data_frame.column_types())
Exemple #14
0
def smote_page():
    # try:
        data_id = request.args.get('data_id')
        name = request.args.get('name')
        target = request.args.get('target')
        seed = request.args.get('seed')
        algorithm = request.args.get('algorithm')
        neighbors = request.args.get('neighbors')

        my_data = UserData.query.filter_by(id=data_id).first()
        my_model = TrainedModel()
        form = TrainModelForm(request.form, obj=my_model)
        data_frame = tc.load_sframe(my_data.sname)
        data_frame_cleaned = data_frame

        orig_data = data_frame[str(target)]
        norig_data = orig_data.to_numpy()

        target_data = data_frame[str(target)].unique()
        ntarget_data = target_data.to_numpy()

        if request.method == 'POST':
            features_utf = request.form.getlist('features')
            features_utf = request.form.getlist('features')
            features_str = []
            variables = []

            for feat in features_utf:
                features_str.append(str(feat))
                variables.append(str(feat))
                data_frame_cleaned = data_frame_cleaned.dropna(str(feat), how="any")
            variables.append(str(request.form['target']))
            df = shuffle(data_frame_cleaned.to_dataframe())
 
            strategy = {}
            for x in range(0, ntarget_data.__len__()):
                strategy[int(ntarget_data[x])] = int(request.form['new_value' + str(x)])

            sm = None
            y_res = None
            x_res = None
            try:
                if algorithm == "SMOTE":
                    sm = SMOTE(random_state=int(seed), sampling_strategy = strategy, k_neighbors=int(neighbors)-1)
                else:
                    sm = ADASYN(random_state=int(seed), sampling_strategy = strategy, n_neighbors=int(neighbors)-1)

                x_res, y_res = sm.fit_sample(df[variables], df[str(request.form['target'])])
            except Exception as e:
                flash(str(e), 'error')
                return redirect(request.referrer)
            my_dict = {}
            np_y_res = np.array(y_res)
            res = np_y_res.astype(str)
            my_dict[str(request.form['target'])] = res

            x=0
            sf = tc.SFrame({ str(request.form['target']): y_res })
            df = pd.DataFrame(x_res)
            for feat in features_str:
                dat = []
                for val in df.ix[:, x]:
                    dat.append(val)
                sa = SArray(data=dat)
                sf = sf.add_column(sa, feat)
                x = x + 1
            sa = SArray(data=y_res, dtype=str)    
            sf = sf.add_column(sa, str(request.form['target']) + "_uncoded")
            fwd_id = save_data(my_data, request.form['name'], sf)
            flash('Data transform is sucessful!', 'success')
            return redirect(url_for('data.data_details_page', data_id=fwd_id))
        return render_template('pages/data/transforms/smote.html',
            my_data=my_data,
            form=form,
            data_frame=data_frame,
            num_rows=data_frame.num_rows(),
            name=name,
            target=target,
            ntarget_data=ntarget_data,
            names=data_frame.column_names(),
            types=data_frame.column_types())