def predict_result_view(request):
    '''Takes care of the processing for the inputs that
    that are received through predict.html'''

    inputs = clean_inputs(request)
    data = pd.read_csv(inputs['data'])

    x = data.drop(inputs_check({})['label_column'], 1).values

    K.clear_session()
    model = activate_model('model.studio.temp')
    probabilities = model.predict(x)
    predictions = model.predict_classes(x)

    if inputs['id_column'] is None:
        ids = list(range(len(predictions)))
        inputs['id_column'] = 'ID'
    else:
        ids = data[inputs['id_column']]

    out = pd.DataFrame(probabilities)
    out.columns = inputs_check({})['pred_cols']
    preds = pd.DataFrame(predictions)
    preds.columns = ['class']
    out = out.merge(preds, left_index=True, right_index=True)
    out[inputs['id_column']] = ids

    plot_pred = plot_histogram(pd.DataFrame(probabilities))

    css_class = 'table table-striped table-bordered" id="data_frame'

    return render_template('predict_result.html',
                           plot_pred=plot_pred,
                           html_source=out.to_html(classes=css_class))
def optimize_process_view(request):
    '''Shows the results once optimize.html input is received.'''

    # clean the inputs
    inputs = clean_inputs(request)

    # get the current dataset
    data = retrieve_hdf5('dataframe')

    # get the name of the label column
    label_columns = inputs_check({})['label_column']

    # get the name/s of the prediction columns
    pred_cols = data.iloc[:, 0 - inputs_check({})['last_neuron']:].columns

    # combine the columns to be dropped temporarily
    if isinstance(label_columns, list) is False:
        label_columns = [label_columns]
    cols = list(pred_cols) + label_columns

    # create x and y data for optimize
    x = data.drop(cols, 1).values
    y = data[label_columns].values

    # run Talos hyperparemeter optimization
    results = run_optimize(x,
                           y,
                           grid_downsample=inputs['grid_downsample'],
                           epochs=inputs['epochs'])

    # remove columns that have a single unique value
    for col in results:
        if len(results[col].unique()) == 1:
            results.drop(col, 1, inplace=True)

    # prepare data for the contour/heatmap plot
    contour_data = corr_pearson(results)
    z = contour_data.values
    labels = contour_data.columns

    # create the plots
    plot_optimize_contour = plot_heatmap(z, labels, labels)
    plot_optimize = plot_scatter(results['val_acc'].values,
                                 results['val_loss'].values)

    return render_template(
        'optimize_result.html',
        plot_optimize=plot_optimize,
        plot_optimize_contour=plot_optimize_contour,
        html_source=results.to_html(
            classes='table table-striped table-bordered" id="data_frame'))
Example #3
0
def wrangle_process_view(request):

    # get the inputs from wrangle.html
    inputs = clean_inputs(request)

    # get the current version of input data
    data = retrieve_hdf5('dataframe')

    # get the name of the label column
    label_columns = inputs_check({})['label_column']

    # get the name/s of the prediction columns
    pred_cols = data.iloc[:, 0 - inputs_check({})['last_neuron']:].columns

    # combine the columns to be dropped temporarily
    if isinstance(label_columns, list) is False:
        label_columns = [label_columns]
    cols = list(pred_cols) + label_columns

    # remove prediction and label columns temporarily
    data_temp = data[cols]
    data.drop(cols, 1, inplace=True)

    # handle the user inputs for data transformation
    for key in inputs:
        if inputs[key] not in ['False', []]:
            if key == 'drop_cols':
                data = wrangle_tools.__getattribute__(key)(data, inputs[key])
            elif key == 'transformation':
                data = wrangle_tools.__getattribute__(inputs[key])(data)
            else:
                data = wrangle_tools.__getattribute__(key)(data)

    # join back the prediction and label columns
    data = data.merge(data_temp, left_index=True, right_index=True)

    # store a temporary file pending user confirmation to save
    store_hdf5(data, 'dataframe_temp')

    # the needed class string for dataTables to work correctly
    css_class = 'table table-striped table-bordered" id="data_frame'

    return render_template('wrangle_process.html',
                           html_source=data.to_html(classes=css_class))
Example #4
0
def restore_data():

    data = retrieve_hdf5('dataframe')

    # get the name of the label column
    label_columns = inputs_check({})['label_column']

    # get the name/s of the prediction columns
    pred_cols = inputs_check({})['pred_cols']

    # combine the columns to be dropped temporarily
    if isinstance(label_columns, list) is False:
        label_columns = [label_columns]
    cols = list(pred_cols) + label_columns

    x = data.drop(cols, 1).values
    y = data[label_columns].values

    return x, y, data
def last_activation_check():

    pred_type = inputs_check({})['prediction_type']

    if pred_type == 'binary':
        return 'sigmoid'
    elif pred_type == 'multiclass':
        return 'softmax'
    elif pred_type == 'multilabel':
        return 'softmax'
    elif pred_type == 'continuous':
        return None
Example #6
0
def loss_check():

    pred_type = inputs_check({})['prediction_type']

    if pred_type == 'binary':
        return 'binary_crossentropy'
    elif pred_type == 'multiclass':
        return 'sparse_categorical_crossentropy'
    elif pred_type == 'multilabel':
        return 'categorical_crossentropy'
    elif pred_type == 'continuous':
        return 'mean_absolute_error'
Example #7
0
def default_params():
    '''A general exploratory set of parameters'''

    p = {
        'neurons': [8, 16, 32, 64, 128, 256],
        'dropout': [0.1, 0.2, 0.3, 0.4, 0.5],
        'batch_size': [5, 10, 20, 40, 80],
        'layers': [1, 2, 3, 4, 5],
        'activation': ['relu', 'elu'],
        'last_activation': [last_activation_check()],
        'last_neuron': [inputs_check({})['last_neuron']],
        'optimizer': ['Nadam', 'Adam'],
        'losses': [loss_check()],
        'metric': ['acc'],
        'validation_split': [0.3]
    }

    return p
Example #8
0
def get_last_neuron(y, inputs):
    '''Infers the last_neuron count based on the type of prediction
    task in question.'''

    # get uniques for multiclass case
    if inputs['prediction_type'] == 'multiclass':
        inputs['last_neuron'] = len(np.unique(y))
        inputs['pred_cols'] = np.unique(y).tolist()

    # get dimensions for multilabel
    elif inputs['prediction_type'] == 'multilabel':
        inputs['last_neuron'] = y.shape[1]
        inputs['pred_cols'] = list(range(y.shape[1]))

    # assume as one for both binary and continuous cases
    else:
        inputs['last_neuron'] = 1
        inputs['pred_cols'] = [0]

    inputs['pred_cols'] = ['pred_' + str(i) for i in inputs['pred_cols']]
    inputs = inputs_check(inputs)

    return inputs
Example #9
0
def training_view(request):
    '''This view handles everything related with training
    for both first time training after inputs and consequent ones.

    Takes it's input from '/' i.e. studio.html and loads from cache
    as required for consequent uses.

    request : flask post request

    '''

    # C O N N E C T I O N
    inputs = clean_inputs(request)

    # handle the case where inputs are already in
    inputs = inputs_check(inputs)

    # load the data for the new input case
    referrer = request.headers.get("Referer")
    refs = ['http://127.0.0.1:5000/training', 'http://127.0.0.1:5000/']

    if referrer in refs:
        x, y, data = load_data(inputs)

    # load the data for all other cases
    else:
        x, y, data = restore_data()

    # calculate last neuron count
    inputs = get_last_neuron(y, inputs)

    K.clear_session()
    # create and train the model (same model goes to optimize)
    history, model = simple_neural_net(x, y, None, None, params=inputs)

    # add the predictions to the dataframe
    preds = pd.DataFrame(model.predict(x))
    preds.columns = ['pred_' + str(i) for i in range(len(preds.columns))]
    try:
        data.drop(preds.columns, 1, inplace=True)
    except KeyError:
        pass
    data = data.merge(preds, left_index=True, right_index=True)

    # store the dataframe and model
    save_model_as(model, 'model.studio.temp')
    store_hdf5(data, 'dataframe')

    # perform cross-validation
    score_mean, score_std, validated = cross_validation(x, y, inputs, model)

    # separate the prediction column/s
    preds = data.iloc[:, 0 - inputs['last_neuron']:]

    # create the plots
    cross_val, train, train_val, predictions = create_plots(
        validated, history, preds)

    # R E N D E R  P A G E
    return render_template('training.html',
                           plot_cross_val=cross_val,
                           plot_train=train,
                           plot_train_val=train_val,
                           plot_predictions=predictions)