def predict_result_view(request): '''Takes care of the processing for the inputs that that are received through predict.html''' inputs = clean_inputs(request) data = pd.read_csv(inputs['data']) x = data.drop(inputs_check({})['label_column'], 1).values K.clear_session() model = activate_model('model.studio.temp') probabilities = model.predict(x) predictions = model.predict_classes(x) if inputs['id_column'] is None: ids = list(range(len(predictions))) inputs['id_column'] = 'ID' else: ids = data[inputs['id_column']] out = pd.DataFrame(probabilities) out.columns = inputs_check({})['pred_cols'] preds = pd.DataFrame(predictions) preds.columns = ['class'] out = out.merge(preds, left_index=True, right_index=True) out[inputs['id_column']] = ids plot_pred = plot_histogram(pd.DataFrame(probabilities)) css_class = 'table table-striped table-bordered" id="data_frame' return render_template('predict_result.html', plot_pred=plot_pred, html_source=out.to_html(classes=css_class))
def optimize_process_view(request): '''Shows the results once optimize.html input is received.''' # clean the inputs inputs = clean_inputs(request) # get the current dataset data = retrieve_hdf5('dataframe') # get the name of the label column label_columns = inputs_check({})['label_column'] # get the name/s of the prediction columns pred_cols = data.iloc[:, 0 - inputs_check({})['last_neuron']:].columns # combine the columns to be dropped temporarily if isinstance(label_columns, list) is False: label_columns = [label_columns] cols = list(pred_cols) + label_columns # create x and y data for optimize x = data.drop(cols, 1).values y = data[label_columns].values # run Talos hyperparemeter optimization results = run_optimize(x, y, grid_downsample=inputs['grid_downsample'], epochs=inputs['epochs']) # remove columns that have a single unique value for col in results: if len(results[col].unique()) == 1: results.drop(col, 1, inplace=True) # prepare data for the contour/heatmap plot contour_data = corr_pearson(results) z = contour_data.values labels = contour_data.columns # create the plots plot_optimize_contour = plot_heatmap(z, labels, labels) plot_optimize = plot_scatter(results['val_acc'].values, results['val_loss'].values) return render_template( 'optimize_result.html', plot_optimize=plot_optimize, plot_optimize_contour=plot_optimize_contour, html_source=results.to_html( classes='table table-striped table-bordered" id="data_frame'))
def wrangle_process_view(request): # get the inputs from wrangle.html inputs = clean_inputs(request) # get the current version of input data data = retrieve_hdf5('dataframe') # get the name of the label column label_columns = inputs_check({})['label_column'] # get the name/s of the prediction columns pred_cols = data.iloc[:, 0 - inputs_check({})['last_neuron']:].columns # combine the columns to be dropped temporarily if isinstance(label_columns, list) is False: label_columns = [label_columns] cols = list(pred_cols) + label_columns # remove prediction and label columns temporarily data_temp = data[cols] data.drop(cols, 1, inplace=True) # handle the user inputs for data transformation for key in inputs: if inputs[key] not in ['False', []]: if key == 'drop_cols': data = wrangle_tools.__getattribute__(key)(data, inputs[key]) elif key == 'transformation': data = wrangle_tools.__getattribute__(inputs[key])(data) else: data = wrangle_tools.__getattribute__(key)(data) # join back the prediction and label columns data = data.merge(data_temp, left_index=True, right_index=True) # store a temporary file pending user confirmation to save store_hdf5(data, 'dataframe_temp') # the needed class string for dataTables to work correctly css_class = 'table table-striped table-bordered" id="data_frame' return render_template('wrangle_process.html', html_source=data.to_html(classes=css_class))
def restore_data(): data = retrieve_hdf5('dataframe') # get the name of the label column label_columns = inputs_check({})['label_column'] # get the name/s of the prediction columns pred_cols = inputs_check({})['pred_cols'] # combine the columns to be dropped temporarily if isinstance(label_columns, list) is False: label_columns = [label_columns] cols = list(pred_cols) + label_columns x = data.drop(cols, 1).values y = data[label_columns].values return x, y, data
def last_activation_check(): pred_type = inputs_check({})['prediction_type'] if pred_type == 'binary': return 'sigmoid' elif pred_type == 'multiclass': return 'softmax' elif pred_type == 'multilabel': return 'softmax' elif pred_type == 'continuous': return None
def loss_check(): pred_type = inputs_check({})['prediction_type'] if pred_type == 'binary': return 'binary_crossentropy' elif pred_type == 'multiclass': return 'sparse_categorical_crossentropy' elif pred_type == 'multilabel': return 'categorical_crossentropy' elif pred_type == 'continuous': return 'mean_absolute_error'
def default_params(): '''A general exploratory set of parameters''' p = { 'neurons': [8, 16, 32, 64, 128, 256], 'dropout': [0.1, 0.2, 0.3, 0.4, 0.5], 'batch_size': [5, 10, 20, 40, 80], 'layers': [1, 2, 3, 4, 5], 'activation': ['relu', 'elu'], 'last_activation': [last_activation_check()], 'last_neuron': [inputs_check({})['last_neuron']], 'optimizer': ['Nadam', 'Adam'], 'losses': [loss_check()], 'metric': ['acc'], 'validation_split': [0.3] } return p
def get_last_neuron(y, inputs): '''Infers the last_neuron count based on the type of prediction task in question.''' # get uniques for multiclass case if inputs['prediction_type'] == 'multiclass': inputs['last_neuron'] = len(np.unique(y)) inputs['pred_cols'] = np.unique(y).tolist() # get dimensions for multilabel elif inputs['prediction_type'] == 'multilabel': inputs['last_neuron'] = y.shape[1] inputs['pred_cols'] = list(range(y.shape[1])) # assume as one for both binary and continuous cases else: inputs['last_neuron'] = 1 inputs['pred_cols'] = [0] inputs['pred_cols'] = ['pred_' + str(i) for i in inputs['pred_cols']] inputs = inputs_check(inputs) return inputs
def training_view(request): '''This view handles everything related with training for both first time training after inputs and consequent ones. Takes it's input from '/' i.e. studio.html and loads from cache as required for consequent uses. request : flask post request ''' # C O N N E C T I O N inputs = clean_inputs(request) # handle the case where inputs are already in inputs = inputs_check(inputs) # load the data for the new input case referrer = request.headers.get("Referer") refs = ['http://127.0.0.1:5000/training', 'http://127.0.0.1:5000/'] if referrer in refs: x, y, data = load_data(inputs) # load the data for all other cases else: x, y, data = restore_data() # calculate last neuron count inputs = get_last_neuron(y, inputs) K.clear_session() # create and train the model (same model goes to optimize) history, model = simple_neural_net(x, y, None, None, params=inputs) # add the predictions to the dataframe preds = pd.DataFrame(model.predict(x)) preds.columns = ['pred_' + str(i) for i in range(len(preds.columns))] try: data.drop(preds.columns, 1, inplace=True) except KeyError: pass data = data.merge(preds, left_index=True, right_index=True) # store the dataframe and model save_model_as(model, 'model.studio.temp') store_hdf5(data, 'dataframe') # perform cross-validation score_mean, score_std, validated = cross_validation(x, y, inputs, model) # separate the prediction column/s preds = data.iloc[:, 0 - inputs['last_neuron']:] # create the plots cross_val, train, train_val, predictions = create_plots( validated, history, preds) # R E N D E R P A G E return render_template('training.html', plot_cross_val=cross_val, plot_train=train, plot_train_val=train_val, plot_predictions=predictions)