def __init__( self, path2dataset, result_filename, neighbor_suffix=None, expression_suffix=None, showHyperparameters=False, ): self.path2dataset = Path(path2dataset) self.result_filename = self.path2dataset / 'results' / result_filename print(f'Result file = {self.result_filename}') with openH5File(self.result_filename, 'r') as f: for k in f['hyperparameters'].keys(): v = f[f'hyperparameters/{k}'] if isinstance(v, h5py.Group): continue v = v[()] if k in ['repli_list']: v = np.array([_.decode('utf-8') for _ in v]) setattr(self, k, v) if showHyperparameters: print(f'{k} \t= {v}') self.num_repli = len(self.repli_list) self.use_spatial = [True] * self.num_repli loadDataset(self, neighbor_suffix=neighbor_suffix, expression_suffix=expression_suffix) self.columns_latent_states = np.array( [f'latent state {i}' for i in range(self.K)]) self.columns_exprs = np.array([f'expr {_}' for _ in self.genes[0]]) self.data = pd.DataFrame(index=range(sum(self.Ns))) self.data[['coor X', 'coor Y']] = np.concatenate([ loadExpression( self.path2dataset / 'files' / f'coordinates_{repli}.txt') for repli in self.repli_list ], axis=0) self.data['cell type'] = np.concatenate([ np.loadtxt(self.path2dataset / 'files' / f'celltypes_{repli}.txt', dtype=str) for repli in self.repli_list ], axis=0) self.data['repli'] = sum( [[repli] * N for repli, N in zip(self.repli_list, self.Ns)], []) self.data[self.columns_exprs] = np.concatenate(self.YTs, axis=0) self.scaling = [ G / self.GG * self.K / YT.sum(1).mean() for YT, G in zip(self.YTs, self.Gs) ] self.colors = {} self.orders = {} self.metagene_order = np.arange(self.K)
def __init__( self, path2dataset, repli_list, use_spatial, neighbor_suffix, expression_suffix, K, lambda_SigmaXInv, betas, prior_x_modes, result_filename=None, PyTorch_device='cpu', num_processes=1, ): self.PyTorch_device = PyTorch_device self.num_processes = num_processes self.path2dataset = Path(path2dataset) self.repli_list = repli_list self.use_spatial = use_spatial self.num_repli = len(self.repli_list) assert len(self.repli_list) == len(self.use_spatial) loadDataset(self, neighbor_suffix=neighbor_suffix, expression_suffix=expression_suffix) self.K = K self.YTs = [ G / self.GG * self.K * YT / YT.sum(1).mean() for YT, G in zip(self.YTs, self.Gs) ] self.lambda_SigmaXInv = lambda_SigmaXInv self.betas = betas self.prior_x_modes = prior_x_modes self.M_constraint = 'sum2one' self.X_constraint = 'none' self.dropout_mode = 'raw' self.sigma_yx_inv_mode = 'average' self.pairwise_potential_mode = 'normalized' if result_filename is not None: os.makedirs(self.path2dataset / 'results', exist_ok=True) self.result_filename = self.path2dataset / 'results' / result_filename logging.info( f'{print_datetime()}result file = {self.result_filename}') else: self.result_filename = None self.saveHyperparameters()
def loadModelForDataset(model_class, dataset_string, experiment_folder_name=None): log_file = sys.stdout if experiment_folder_name == None else open( f'{experiment_folder_name}/log_training.txt', 'w') if not (model_class in {'lr', 'mlp', 'tree', 'forest'}): raise Exception(f'{model_class} not supported.') if not (dataset_string in { 'random', 'mortgage', 'twomoon', 'german', 'credit', 'compass', 'adult' }): raise Exception(f'{dataset_string} not supported.') dataset_obj = loadData.loadDataset(dataset_string, return_one_hot=True, load_from_cache=True, debug_flag=False) X_train, X_test, y_train, y_test = dataset_obj.getTrainTestSplit() feature_names = dataset_obj.getInputAttributeNames( 'kurz') # easier to read (nothing to do with one-hot vs non-hit!) if model_class == 'tree': model_pretrain = DecisionTreeClassifier() elif model_class == 'forest': model_pretrain = RandomForestClassifier() elif model_class == 'lr': # IMPORTANT: The default solver changed from ‘liblinear’ to ‘lbfgs’ in 0.22; # therefore, results may differ slightly from paper. model_pretrain = LogisticRegression( ) # default penalty='l2', i.e., ridge elif model_class == 'mlp': model_pretrain = MLPClassifier(hidden_layer_sizes=(10, 10)) print( f'[INFO] Training `{model_class}` on {X_train.shape[0]:,} samples ' + f'(%{100 * X_train.shape[0] / (X_train.shape[0] + X_test.shape[0]):.2f}' + f'of {X_train.shape[0] + X_test.shape[0]:,} samples)...', file=log_file, ) model_trained = model_pretrain.fit(X_train, y_train) print( f'\tTraining accuracy: %{accuracy_score(y_train, model_trained.predict(X_train)) * 100:.2f}', file=log_file) print( f'\tTesting accuracy: %{accuracy_score(y_test, model_trained.predict(X_test)) * 100:.2f}', file=log_file) print('[INFO] done.\n', file=log_file) if model_class == 'tree': if SIMPLIFY_TREES: print('[INFO] Simplifying decision tree...', end='', file=log_file) model_trained.tree_ = treeUtils.simplifyDecisionTree( model_trained, False) print('\tdone.', file=log_file) treeUtils.saveTreeVisualization(model_trained, model_class, '', X_test, feature_names, experiment_folder_name) elif model_class == 'forest': for tree_idx in range(len(model_trained.estimators_)): if SIMPLIFY_TREES: print( f'[INFO] Simplifying decision tree (#{tree_idx + 1}/{len(model_trained.estimators_)})...', end='', file=log_file) model_trained.estimators_[ tree_idx].tree_ = treeUtils.simplifyDecisionTree( model_trained.estimators_[tree_idx], False) print('\tdone.', file=log_file) treeUtils.saveTreeVisualization( model_trained.estimators_[tree_idx], model_class, f'tree{tree_idx}', X_test, feature_names, experiment_folder_name) if experiment_folder_name: pickle.dump(model_trained, open(f'{experiment_folder_name}/_model_trained', 'wb')) return model_trained
def main(): model = myConvNet() if cuda: model = model.cuda() model.apply(weights_init) print(model) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) loss_fn = nn.CrossEntropyLoss() mean_train_losses = [] epochs = 10000 trainref = 'bigtrain' testref = 'bigtest' print(f"Trainref: {trainref}, testref: {testref}") trainset = loadData.loadDataset(trainref, flipHorizontal=True, flipVertical=True, meanNorm=True, stdNorm=False) train_loader = torch.utils.data.DataLoader(trainset, batch_size=500, shuffle=True, num_workers=2) testset = loadData.loadDataset(testref, flipHorizontal=False, flipVertical=False, meanNorm=True, stdNorm=False) test_loader = torch.utils.data.DataLoader(testset, batch_size=12, shuffle=True, num_workers=2) train = True for epoch in range(epochs): if train: train_losses = [] for i, (images, labels) in enumerate(train_loader): optimizer.zero_grad() inputs = images.float() if cuda: inputs = inputs.cuda() labels = labels.cuda() outputs = model(inputs) loss = loss_fn(outputs, labels) loss.backward() optimizer.step() train_losses.append(loss.item()) mean_train_losses.append(np.mean(train_losses)) print("Train losses: ", np.mean(train_losses)) correct = 0 total = 0 with torch.no_grad(): for images, labels in test_loader: inputs = images.float() if cuda: inputs = inputs.cuda() labels = labels.cuda() outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) correct += (predicted == labels).sum().item() total += labels.size(0) if epoch % 10 == 0: # Save model torch.save(model.state_dict(), f"models/model_{epoch}.pth") accuracy = 100 * correct / total print('epoch : {}, train loss : {:.4f} accuracy: {:.4f}'.format( epoch + 1, np.mean(train_losses), accuracy))
def runExperiments(dataset_values, model_class_values, norm_values, approaches_values, batch_number, sample_count, gen_cf_for, process_id): for dataset_string in dataset_values: print(f'\n\nExperimenting with dataset_string = `{dataset_string}`') for model_class_string in model_class_values: print( f'\tExperimenting with model_class_string = `{model_class_string}`' ) for norm_type_string in norm_values: print( f'\t\tExperimenting with norm_type_string = `{norm_type_string}`' ) for approach_string in approaches_values: print( f'\t\t\tExperimenting with approach_string = `{approach_string}`' ) # if norm_type_string == 'two_norm': # raise Exception(f'{norm_type_string} not supported.') if model_class_string in {'tree', 'forest'}: one_hot = False elif model_class_string in {'lr', 'mlp'}: one_hot = True else: raise Exception( f'{model_class_string} not recognized as a valid `model_class_string`.' ) # prepare experiment folder experiment_name = f'{dataset_string}__{model_class_string}__{norm_type_string}__{approach_string}__batch{batch_number}__samples{sample_count}__pid{process_id}' experiment_folder_name = f"_experiments/{datetime.now().strftime('%Y.%m.%d_%H.%M.%S')}__{experiment_name}" explanation_folder_name = f'{experiment_folder_name}/__explanation_log' minimum_distance_folder_name = f'{experiment_folder_name}/__minimum_distances' os.mkdir(f'{experiment_folder_name}') os.mkdir(f'{explanation_folder_name}') os.mkdir(f'{minimum_distance_folder_name}') log_file = open( f'{experiment_folder_name}/log_experiment.txt', 'w') # save some files dataset_obj = loadData.loadDataset(dataset_string, return_one_hot=one_hot, load_from_cache=False, debug_flag=False) pickle.dump( dataset_obj, open(f'{experiment_folder_name}/_dataset_obj', 'wb')) # training portion used to train models # testing portion used to compute counterfactuals X_train, X_test, y_train, y_test = dataset_obj.getTrainTestSplit( ) standard_deviations = list(X_train.std()) # train the model # model_trained = modelTraining.trainAndSaveModels( # model_class_string, # dataset_string, # experiment_folder_name, # ) model_trained = loadModel.loadModelForDataset( model_class_string, dataset_string, experiment_folder_name=experiment_folder_name) # get the predicted labels (only test set) # X_test = pd.concat([X_train, X_test]) # ONLY ACTIVATE THIS WHEN TEST SET IS NOT LARGE ENOUGH TO GEN' MODEL RECON DATASET X_test_pred_labels = model_trained.predict(X_test) all_pred_data_df = X_test # IMPORTANT: note that 'y' is actually 'pred_y', not 'true_y' all_pred_data_df['y'] = X_test_pred_labels neg_pred_data_df = all_pred_data_df.where( all_pred_data_df['y'] == 0).dropna() pos_pred_data_df = all_pred_data_df.where( all_pred_data_df['y'] == 1).dropna() batch_start_index = batch_number * sample_count batch_end_index = (batch_number + 1) * sample_count # generate counterfactuals for {only negative, negative & positive} samples if gen_cf_for == 'neg_only': iterate_over_data_df = neg_pred_data_df[ batch_start_index: batch_end_index] # choose only a subset to compare observable_data_df = pos_pred_data_df elif gen_cf_for == 'pos_only': iterate_over_data_df = pos_pred_data_df[ batch_start_index: batch_end_index] # choose only a subset to compare observable_data_df = neg_pred_data_df elif gen_cf_for == 'neg_and_pos': iterate_over_data_df = all_pred_data_df[ batch_start_index: batch_end_index] # choose only a subset to compare observable_data_df = all_pred_data_df else: raise Exception( f'{gen_cf_for} not recognized as a valid `gen_cf_for`.' ) # convert to dictionary for easier enumeration (iteration) iterate_over_data_dict = iterate_over_data_df.T.to_dict() observable_data_dict = observable_data_df.T.to_dict() # loop through samples for which we desire a counterfactual, # (to be saved as part of the same file of minimum distances) explanation_counter = 1 all_minimum_distances = {} for factual_sample_index, factual_sample in iterate_over_data_dict.items( ): factual_sample['y'] = bool(factual_sample['y']) print( '\t\t\t\t' f'Generating explanation for\t' f'batch #{batch_number}\t' f'sample #{explanation_counter}/{len(iterate_over_data_dict.keys())}\t' f'(sample index {factual_sample_index}): ', end='') # , file=log_file) explanation_counter = explanation_counter + 1 explanation_file_name = f'{explanation_folder_name}/sample_{factual_sample_index}.txt' explanation_object = generateExplanations( approach_string, explanation_file_name, model_trained, dataset_obj, factual_sample, norm_type_string, observable_data_dict, # used solely for minimum_observable method standard_deviations, # used solely for feature_tweaking method ) if 'MINT' in approach_string: print( f'\t- scf_found: {explanation_object["scf_found"]} -' f'\t- scf_plaus: {explanation_object["scf_plausible"]} -' f'\t- scf_time: {explanation_object["scf_time"]:.4f} -' f'\t- int_cost: {explanation_object["int_cost"]:.4f} -' f'\t- scf_dist: {explanation_object["scf_distance"]:.4f} -' ) # , file=log_file) else: # 'MACE' or other.. print( f'\t- cfe_found: {explanation_object["cfe_found"]} -' f'\t- cfe_plaus: {explanation_object["cfe_plausible"]} -' f'\t- cfe_time: {explanation_object["cfe_time"]:.4f} -' f'\t- int_cost: N/A -' f'\t- cfe_dist: {explanation_object["cfe_distance"]:.4f} -' ) # , file=log_file) all_minimum_distances[ f'sample_{factual_sample_index}'] = explanation_object pickle.dump( all_minimum_distances, open(f'{experiment_folder_name}/_minimum_distances', 'wb')) pprint( all_minimum_distances, open(f'{experiment_folder_name}/minimum_distances.txt', 'w'))
def loadModelForDataset(model_class, dataset_class, scm_class=None, num_train_samples=1e5, fair_nodes=None, fair_kernel_type=None, experiment_folder_name=None): log_file = sys.stdout if experiment_folder_name == None else open( f'{experiment_folder_name}/log_training.txt', 'w') if not (model_class in {'lr', 'mlp', 'tree', 'forest'}) and not ( model_class in fairRecourse.FAIR_MODELS): raise Exception(f'{model_class} not supported.') if not (dataset_class in { 'synthetic', 'mortgage', 'twomoon', 'german', 'credit', 'compass', 'adult', 'test' }): raise Exception(f'{dataset_class} not supported.') if dataset_class == 'adult': dataset_obj = loadData.loadDataset(dataset_class, return_one_hot=False, load_from_cache=False, index_offset=1) else: dataset_obj = loadData.loadDataset(dataset_class, return_one_hot=True, load_from_cache=False, meta_param=scm_class) if model_class not in fairRecourse.FAIR_MODELS: X_train, X_test, y_train, y_test = dataset_obj.getTrainTestSplit() y_all = pd.concat([y_train, y_test], axis=0) assert sum(y_all) / len( y_all) == 0.5, 'Expected class balance should be 50/50%.' else: if dataset_class == 'adult': X_train, X_test, y_train, y_test = dataset_obj.getTrainTestSplit( with_meta=False, balanced=False) X_train = pd.concat([X_train], axis=1)[fair_nodes] X_test = pd.concat([X_test], axis=1)[fair_nodes] else: X_train, X_test, U_train, U_test, y_train, y_test = dataset_obj.getTrainTestSplit( with_meta=True, balanced=False) X_train = pd.concat([X_train, U_train], axis=1)[fair_nodes] X_test = pd.concat([X_test, U_test], axis=1)[fair_nodes] if model_class == 'tree': model_pretrain = DecisionTreeClassifier() elif model_class == 'forest': model_pretrain = RandomForestClassifier() elif model_class == 'lr': # IMPORTANT: The default solver changed from ‘liblinear’ to ‘lbfgs’ in 0.22; # therefore, results may differ slightly from paper. model_pretrain = LogisticRegression( ) # default penalty='l2', i.e., ridge elif model_class == 'mlp': model_pretrain = MLPClassifier(hidden_layer_sizes=(10, 10)) else: model_pretrain = trainFairClassifier(model_class, fair_kernel_type) X_train = np.array(X_train) X_test = np.array(X_test) y_train = np.array(y_train) y_test = np.array(y_test) X_train = X_train[:num_train_samples] y_train = y_train[:num_train_samples] training_setup_string = f'[INFO] Training `{model_class}` on {X_train.shape[0]:,} samples ' + \ f'(%{100 * X_train.shape[0] / (X_train.shape[0] + X_test.shape[0]):.2f}' + \ f'of {X_train.shape[0] + X_test.shape[0]:,} samples)...' print(training_setup_string, file=log_file) print(training_setup_string) model_trained = model_pretrain.fit(X_train, y_train) train_accuracy_string = f'\t[INFO] Training accuracy: %{accuracy_score(y_train, model_trained.predict(X_train)) * 100:.2f}.' test_accuracy_string = f'\t[INFO] Testing accuracy: %{accuracy_score(y_test, model_trained.predict(X_test)) * 100:.2f}.' print(train_accuracy_string, file=log_file) print(test_accuracy_string, file=log_file) print(train_accuracy_string) print(test_accuracy_string) if hasattr(model_trained, 'best_estimator_'): hyperparams_string = f'\t[INFO] Hyper-parameters of best classifier selected by CV:\n\t{model_trained.best_estimator_}' print(hyperparams_string, file=log_file) print(hyperparams_string) # shouldn't deal with bad model; arbitrarily select offset to be 70% accuracy tmp = accuracy_score(y_train, model_trained.predict(X_train)) # TODO (fair): added try except loop for use of nonlinear classifiers in fairness experiments try: assert tmp > 0.70, f'Model accuracy only {tmp}' except: print('[INFO] logistic regression accuracy may be low (<70%)') pass classifier_obj = model_trained visualizeDatasetAndFixedModel(dataset_obj, classifier_obj, experiment_folder_name) feature_names = dataset_obj.getInputAttributeNames( 'kurz') # easier to read (nothing to do with one-hot vs non-hit!) if model_class == 'tree': if SIMPLIFY_TREES: print('[INFO] Simplifying decision tree...', end='', file=log_file) model_trained.tree_ = treeUtils.simplifyDecisionTree( model_trained, False) print('\tdone.', file=log_file) # treeUtils.saveTreeVisualization(model_trained, model_class, '', X_test, feature_names, experiment_folder_name) elif model_class == 'forest': for tree_idx in range(len(model_trained.estimators_)): if SIMPLIFY_TREES: print( f'[INFO] Simplifying decision tree (#{tree_idx + 1}/{len(model_trained.estimators_)})...', end='', file=log_file) model_trained.estimators_[ tree_idx].tree_ = treeUtils.simplifyDecisionTree( model_trained.estimators_[tree_idx], False) print('\tdone.', file=log_file) # treeUtils.saveTreeVisualization(model_trained.estimators_[tree_idx], model_class, f'tree{tree_idx}', X_test, feature_names, experiment_folder_name) if experiment_folder_name: pickle.dump(model_trained, open(f'{experiment_folder_name}/_model_trained', 'wb')) return model_trained
distances = sorted(distances, key=lambda x: x[1]) labels = list() for i in range(K): labels.append(distances[i][0]) counter = Counter(labels) prediction = counter.most_common(1)[0][0] if (prediction == testY): correct += 1 total = len(y_test) print("Accuracy = %s" % (correct / total)) K = 3 X, Y, imgPaths = loadDataset("HiraganaGit", loadAgain=False) indices = np.arange(len(X)) # np.random.seed(3) np.random.shuffle(indices) X = X[indices] Y = Y[indices] N = X.shape[0] Ntrain = int(N * 80 / 100) Ntest = int(N * 20 / 100) x_train = X[:Ntrain].reshape((Ntrain, 1, 84, 83)) y_train = Y[:Ntrain]
def loadModelForDataset(model_class, dataset_string, scm_class=None, experiment_folder_name=None): log_file = sys.stdout if experiment_folder_name == None else open( f'{experiment_folder_name}/log_training.txt', 'w') if not (model_class in {'lr', 'mlp', 'tree', 'forest'}): raise Exception(f'{model_class} not supported.') if not (dataset_string in { 'synthetic', 'mortgage', 'twomoon', 'german', 'credit', 'compass', 'adult', 'test', 'iris', 'housing', 'wine', 'poker' }): raise Exception(f'{dataset_string} not supported.') if model_class in {'tree', 'forest'}: one_hot = False elif model_class in {'lr', 'mlp'}: one_hot = True dataset_obj = loadData.loadDataset(dataset_string, return_one_hot=one_hot, load_from_cache=False, meta_param=scm_class) X_train, X_test, y_train, y_test = dataset_obj.getTrainTestSplit() X_all = pd.concat([X_train, X_test], axis=0) y_all = pd.concat([y_train, y_test], axis=0) if dataset_obj.problem_type == 'classification': assert y_all.value_counts().nunique( ) == 1 # Expected class balance should be equal. feature_names = dataset_obj.getInputAttributeNames( 'kurz') # easier to read (nothing to do with one-hot vs non-hit!) # Define model type if model_class == 'tree': if dataset_obj.problem_type == 'classification': model_pretrain = DecisionTreeClassifier() elif dataset_obj.problem_type == 'regression': model_pretrain = DecisionTreeRegressor() elif model_class == 'forest': if dataset_obj.problem_type == 'classification': model_pretrain = RandomForestClassifier(n_estimators=100) elif dataset_obj.problem_type == 'regression': model_pretrain = RandomForestRegressor(n_estimators=100) elif model_class == 'lr': # IMPORTANT: The default solver changed from ‘liblinear’ to ‘lbfgs’ in 0.22; # therefore, results may differ slightly from paper. model_pretrain = LogisticRegression( ) # default penalty='l2', i.e., ridge elif model_class == 'mlp': if dataset_obj.problem_type == 'classification': model_pretrain = MLPClassifier(hidden_layer_sizes=(10, 10)) elif dataset_obj.problem_type == 'regression': model_pretrain = MLPRegressor(hidden_layer_sizes=(10, 10)) tmp_text = f'[INFO] Training `{model_class}` on {X_train.shape[0]:,} samples ' + \ f'(%{100 * X_train.shape[0] / (X_train.shape[0] + X_test.shape[0]):.2f} ' + \ f'of {X_train.shape[0] + X_test.shape[0]:,} samples)...' print(tmp_text) print(tmp_text, file=log_file) model_trained = model_pretrain.fit(X_train, y_train) if dataset_obj.problem_type == 'classification': print( f'\tTraining accuracy: %{accuracy_score(y_train, model_trained.predict(X_train)) * 100:.2f}', file=log_file) print( f'\tTesting accuracy: %{accuracy_score(y_test, model_trained.predict(X_test)) * 100:.2f}', file=log_file) print( f'\tTraining accuracy: %{accuracy_score(y_train, model_trained.predict(X_train)) * 100:.2f}' ) print( f'\tTesting accuracy: %{accuracy_score(y_test, model_trained.predict(X_test)) * 100:.2f}' ) else: print( f'\tTraining MAE: {mean_absolute_error(y_train, model_trained.predict(X_train)):.2f}', file=log_file) print( f'\tTesting MAE: {mean_absolute_error(y_test, model_trained.predict(X_test)):.2f}', file=log_file) print( f'\tTraining MAE: {mean_absolute_error(y_train, model_trained.predict(X_train)):.2f}' ) print( f'\tTesting MAE: {mean_absolute_error(y_test, model_trained.predict(X_test)):.2f}' ) print('[INFO] done.\n', file=log_file) print('[INFO] done.\n') #assert accuracy_score(y_train, model_trained.predict(X_train)) > 0.70 # TODO uncomment classifier_obj = model_trained visualizeDatasetAndFixedModel(dataset_obj, classifier_obj, experiment_folder_name) if model_class == 'tree': if SIMPLIFY_TREES: print('[INFO] Simplifying decision tree...', end='', file=log_file) model_trained.tree_ = treeUtils.simplifyDecisionTree( model_trained, False) print('\tdone.', file=log_file) # treeUtils.saveTreeVisualization(model_trained, model_class, '', X_test, feature_names, experiment_folder_name) elif model_class == 'forest': for tree_idx in range(len(model_trained.estimators_)): if SIMPLIFY_TREES: print( f'[INFO] Simplifying decision tree (#{tree_idx + 1}/{len(model_trained.estimators_)})...', end='', file=log_file) model_trained.estimators_[ tree_idx].tree_ = treeUtils.simplifyDecisionTree( model_trained.estimators_[tree_idx], False) print('\tdone.', file=log_file) # treeUtils.saveTreeVisualization(model_trained.estimators_[tree_idx], model_class, f'tree{tree_idx}', X_test, feature_names, experiment_folder_name) if experiment_folder_name: pickle.dump(model_trained, open(f'{experiment_folder_name}/_model_trained', 'wb')) return model_trained
model = Sequential() model.add(Conv2D(50, (5, 5), input_shape=(1, 84, 83), data_format='channels_first', activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dense(num_classes, activation='softmax')) # Compile model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model # DATASET = "Hiragana73" DATASET = "HiraganaGit" #SWITCH THE LINES BELOW IF YOU NEED TO LOAD ALL THE DATA FROM THE DATASET AGAIN X, Y, imgPaths = loadDataset(DATASET, loadAgain=True) #X, Y, imgPaths = loadDataset(DATASET, loadAgain=False) X /= 255 #X has format (height, width, N) indices = np.arange(X.shape[0]) np.random.seed(3) np.random.shuffle(indices) X = X[indices] Y = Y[indices] imgPaths = imgPaths[indices] N = X.shape[0]