def post(self): dataset = api.payload['dataset'] df = get_dataset(dataset) if 'configuration' in api.payload: configuration_name = api.payload['configuration'] configuration = load_configuration(dataset, configuration_name) modify(df, configuration) df = shuffle(df) label = api.payload['labelColumn'] data_x = df.drop(label, axis=1) data_y = df[label] input_dim = data_x.columns.size output_dim = data_y.unique().size epochs = api.payload['epochs'] layers = api.payload['layers'] for layer in layers: if layer <= 0: return "Invalid layer", 400 validation_split = api.payload[ 'validationSplit'] if 'validationSplit' in api.payload else 0 keras.backend.clear_session() model = ClassificationModel(input_dim, output_dim, layers) history = model.fit(x=data_x, y=data_y, epochs=epochs, verbose=2, validation_split=validation_split) predicts = model.predict_classes(data_x) score = float((data_y == predicts).sum() / predicts.size) conf_matrix = confusion_matrix(data_y, predicts) plots = OrderedDict() plot_history_accuracy(history) plots['accuracy'] = plot_to_base64() plot_history_loss(history) plots['loss'] = plot_to_base64() plot_classification_predictions(data_y, predicts, orientation='vertical', stacked=False) plots['predictions'] = plot_to_base64() return { 'score': score, 'plots': plots, 'confusionMatrix': conf_matrix.tolist() }
def post(self): dataset = api.payload['dataset'] df = get_dataset(dataset) if 'configuration' in api.payload: configuration_name = api.payload['configuration'] configuration = load_configuration(dataset, configuration_name) modify(df, configuration) label = api.payload['labelColumn'] data_x = df.drop(label, axis=1) data_y = df[label] input_dim = data_x.columns.size output_dim = data_y.unique().size epochs = api.payload['epochs'] layers = api.payload['layers'] for layer in layers: if layer <= 0: return "Invalid layer", 400 train_scores = [] test_scores = [] kfolds = api.payload['kfolds'] if 'kfolds' in api.payload else 10 kf = StratifiedKFold(n_splits=kfolds, shuffle=True) for train_indices, test_indices in kf.split(data_x, data_y): train_x, train_y = data_x.iloc[train_indices], data_y.iloc[ train_indices] test_x, test_y = data_x.iloc[test_indices], data_y.iloc[ test_indices] keras.backend.clear_session() model = ClassificationModel(input_dim, output_dim, layers) model.fit(x=train_x, y=train_y.values, epochs=epochs, verbose=2) predicts = model.predict_classes(train_x) score = float((train_y == predicts).sum() / predicts.size) train_scores.append(score) predicts = model.predict_classes(test_x) score = float((test_y == predicts).sum() / predicts.size) test_scores.append(score) plots = OrderedDict() plot_cross_validation(train_scores, test_scores, plot_type='bar') plots['crossValidation'] = plot_to_base64() plot_cross_validation(train_scores, test_scores, plot_type='plot') plots['crossValidation2'] = plot_to_base64() return { 'trainScores': train_scores, 'testScores': test_scores, 'plots': plots, }
def get(self, dataset, configuration=None): result = try_load(dataset, configuration) if result: return result df = get_dataset(dataset) if configuration: loaded_configuration = load_configuration(dataset, configuration) modify(df, loaded_configuration) columns = [] for column_name in df: series = df[column_name] describe = series.describe() is_numeric = np.issubdtype(series.dtype.type, np.number) if is_numeric: descriptive_statistics = { "count": int(describe["count"]), "mean": describe["mean"], "std": describe["std"], "min": describe["min"], "p25": describe['25%'], "p50": describe["50%"], "p75": describe["75%"], "max": describe["max"] } else: descriptive_statistics = { "count": int(describe["count"]), "unique": int(describe["unique"]) } plots = {} plot_histogram(series, column_name, is_numeric) plots['histogram'] = plot_to_base64() if is_numeric: plot_box_and_violin(series) plots['boxplot'] = plot_to_base64() columns.append({ "name": column_name, "type": series.dtype.name, "numeric": is_numeric, "descriptiveStatistics": descriptive_statistics, "plots": plots }) result = { "columns": columns } store(dataset, configuration, result) return result
def get(self, name, dataset): model = load_model(name) metadata = load_model_metadata(name) df = get_dataset(dataset) # TODO: make it optional if metadata['configuration']: configuration = load_configuration(metadata['dataset'], metadata['configuration']) modify(df, configuration) label = metadata['label'] data_x = df.drop(label, axis=1) data_y = df[label] predicts = model.predict_classes(data_x) score = float((data_y == predicts).sum() / predicts.size) conf_matrix = confusion_matrix(data_y, predicts) plots = OrderedDict() plot_classification_predictions(data_y, predicts, orientation='vertical', stacked=False) plots['predictions'] = plot_to_base64() return { 'score': score, 'plots': plots, 'confusionMatrix': conf_matrix.tolist() }
def post(self): random_state = self._get_random_state() x, y = self._generate(random_state) plot_generated_dataset(x, y) plot = plot_to_base64() return {'randomState': random_state, 'plot': plot}