def __init__(self, smoothing, smooth_param = 0): LinearClassifier.__init__(self) self.trained = False self.likelihood = 0 self.prior = 0 self.smooth = smoothing self.smooth_param = smooth_param
def __init__(self): LinearClassifier.__init__(self) self.trained = False self.likelihood = 0 self.prior = 0 self.smooth = True self.smooth_param = 1
def __init__(self): LinearClassifier.__init__(self) self.trained = False self.likelihood = 0 self.prior = 0 self.smooth = False self.smooth_param = 1
def __init__(self,nr_epochs = 10, initial_step = 1.0, alpha = 1.0,regularizer = 1.0): LinearClassifier.__init__(self) self.trained = False self.nr_epochs = nr_epochs self.params_per_round = [] self.initial_step = initial_step self.alpha = alpha self.regularizer = regularizer
def __init__(self, labels, feature_generator, epochs=10, eta=1.): LinearClassifier.__init__(self, labels, feature_generator) self.parameters_for_epoch = [] self.n_epochs = epochs self.n_features = feature_generator.n_features() self.eta = eta
def setUpClass(cls): FEATURE_DIMENSION = mnist.train.images.shape[1] OUTPUT_DIMENSION = mnist.train.labels.shape[1] cls.classifier1 = LinearClassifier(FEATURE_DIMENSION, OUTPUT_DIMENSION, add_bias=False) cls.classifier2 = LinearClassifier(FEATURE_DIMENSION, OUTPUT_DIMENSION, add_bias=True) LinearClassifierTest.train_classifier(cls.classifier1) LinearClassifierTest.train_classifier(cls.classifier2)
def __init__(self, nr_epochs=10, initial_step=1.0, alpha=1.0, regularizer=1.0): LinearClassifier.__init__(self) self.trained = False self.nr_epochs = nr_epochs self.params_per_round = [] self.initial_step = initial_step self.alpha = alpha self.regularizer = regularizer
def main(): num_classes = 3 X, y = get_master_data() train_X, train_y, val_X, val_y, test_X, test_y = split_data(X, y) # hyperparams learning_rates = [1] reg_strengths = [0] num_iters = 5000 best_model = None best_accuracy = -1 results = {} for lr in learning_rates: for rs in reg_strengths: this_model = LinearClassifier(X.shape[1], num_classes) this_model.train(train_X, train_y, lr, rs, num_iters) y_pred = this_model.predict(val_X) val_accuracy = np.mean(y_pred == val_y) y_pred = this_model.predict(train_X) train_accuracy = np.mean(y_pred == train_y) print('This val accuracy: ' + str(val_accuracy)) if (val_accuracy > best_accuracy): best_model = this_model best_accuracy = val_accuracy results[(lr, rs)] = train_accuracy, val_accuracy this_model.print_model() for lr, rs in sorted(results): train_accuracy, val_accuracy = results[(lr, rs)] print('lr %e reg %e train_accuracy %f val_accuracy %f' % (lr, rs, train_accuracy, val_accuracy)) print(test_X[0]) y_pred_test = best_model.predict(test_X) test_accuracy = np.mean(y_pred_test == test_y) print('This test accuracy: ' + str(test_accuracy)) best_model.toFile('rowan_rest_grip_flex_linear_classifier_3_' + str(int(test_accuracy * 100)) + 'p.csv')
def main(test_input_dir, model_dir, test_upper_bound, result_save_dir): #Create a dataloader. logger.info("Create test dataloader from {}.".format(test_input_dir)) test_dataset = create_dataset(test_input_dir, num_examples=-1, num_options=20) test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False, drop_last=True) #Create a classifier model. logger.info("Create a classifier model.") classifier_model = LinearClassifier.from_pretrained( "cl-tohoku/bert-base-japanese-whole-word-masking") classifier_model.to(device) #Create a directory to save the results in. logger.info("Results will be saved in {}.".format(result_save_dir)) os.makedirs(result_save_dir, exist_ok=True) logger.info("Start model evaluation.") for i in range(test_upper_bound): model_filepath = os.path.join(model_dir, "checkpoint_{}.pt".format(i + 1)) logger.info("Load model parameters from {}.".format(model_filepath)) parameters = torch.load(model_filepath, map_location=device) classifier_model.load_state_dict(parameters) pred_labels, correct_labels, accuracy = evaluate( classifier_model, test_dataloader) logger.info("Accuracy: {}".format(accuracy)) #Save results as text files. res_filepath = os.path.join(result_save_dir, "result_test_{}.txt".format(i + 1)) labels_filepath = os.path.join(result_save_dir, "labels_test_{}.txt".format(i + 1)) with open(res_filepath, "w") as w: w.write("Accuracy: {}\n".format(accuracy)) with open(labels_filepath, "w") as w: for pred_label, correct_label in zip(pred_labels, correct_labels): w.write("{} {}\n".format(pred_label, correct_label)) logger.info("Finished model evaluation.")
def train_model_agg( self, bags, y, cv_split_bags=None, sample_weight=None, param_search=True ): """Train instance aggregation function using quantile function.""" # figure out number of quantiles and where to set them ninst = int( np.round( sum( [ len(bag) for bag in bags ] ) / float(len(bags)) ) ) if self.quantiles is not None: nq = self.quantiles else: nq = 16 if ninst <= nq: quantiles = np.linspace(0,100,ninst) else: quantiles = np.linspace(100.0/nq/2,100-100.0/nq/2,nq) p = [] test_y = [] if cv_split_bags is None: # train/test split skf = sklearn.model_selection.StratifiedKFold( n_splits=5, shuffle=True ) cv_split_bags = list(skf.split(bags,y)) # compute quantile function for f in range(5): train_idx,test_idx = cv_split_bags[f] for i in test_idx: pi = super(SIL,self).predict( bags[i], cv=f ) if pi.shape[1] == 2: q = np.percentile( pi[:,1], quantiles ) else: q = np.hstack( [ np.percentile( pi[:,c], quantiles ) for c in range(pi.shape[1]) ] ) p.append( q ) test_y.append( y[i] ) p = np.vstack(p) test_y = np.array(test_y) # train model model_agg = LinearClassifier( classifier='svm' ) self.C_agg,self.gamma_agg = model_agg.param_search( p, test_y, sample_weight=sample_weight, quick=False ) model_agg.C = self.C_agg model_agg.fit( p, test_y, sample_weight=sample_weight, param_search=param_search, calibrate=self._calibrate ) self._model_agg = (model_agg,quantiles)
#N_train = 20000 X_train = data["train_imgs"].squeeze()[:N_train, :] L_train = data["train_labels"][:N_train] T_train = np.zeros((N_train, L_train.max() + 1)) T_train[np.arange(N_train), L_train] = 1 N_test = data["test_no"] X_test = data["test_imgs"].squeeze() L_test = data["test_labels"] T_test = np.zeros((N_test, L_test.max() + 1)) T_test[np.arange(N_test), L_test] = 1 # ------------------------------------------------------------------------------ # ------ Closed form solution cf_model = LinearClassifier() cf_model.closed_form(X_train, T_train) acc, conf = evaluate(cf_model, X_test, L_test) print("[Closed Form] Accuracy on test set: %f" % acc) print(conf) plot_confusion_matrix(conf, 1, "Closed form") acc1 = np.ones(EPOCHS_NO) * acc print("-------------------") # ------------------------------------------------------------------------------ # ------ Gradient optimization of linear model
def _measure_disentanglement(self, autoencoder): beta = autoencoder.get_beta() train_classifier_inputs, train_classifier_labels = \ Evaluator.inputs_and_labels(autoencoder, self.classifier_train_json) logger.info('Beta = {0} | Classifier training data processed.'.format(beta)) test_classifier_inputs, test_classifier_labels = \ Evaluator.inputs_and_labels(autoencoder, self.classifier_test_json) logger.info('Beta = {0} | Classifier test data processed.'.format(beta)) valid_classifier_inputs, valid_classifier_labels = \ Evaluator.inputs_and_labels(autoencoder, self.classifier_validation_json) logger.info('Beta = {0} | Classifier validation data processed.'.format(beta)) autoencoder.close_session() # Constants. CLASSIFIER_INPUT_DIMENSION = autoencoder.get_code_dimension() logger.info('Beta = {0} | Create classifier.'.format(beta)) classifier = LinearClassifier(CLASSIFIER_INPUT_DIMENSION, CLASSIFIER_POSSIBLE_CLASSES_COUNT) epoch = 0 best_validation_score = np.inf consecutive_decreases = 0 LEN_TRAIN = len(train_classifier_inputs) while True: # Train with mini-batches. random_permutation = np.random.permutation(np.arange(LEN_TRAIN)) shuffled_train_inputs = train_classifier_inputs[random_permutation] shuffled_train_labels = train_classifier_labels[random_permutation] classifier.partial_fit(shuffled_train_inputs, shuffled_train_labels) # for start in range(0, LEN_TRAIN, BATCH_SIZE): # end = LEN_TRAIN if LEN_TRAIN - start < 2 * BATCH_SIZE else start + BATCH_SIZE # classifier.partial_fit(shuffled_train_inputs[start:end], # shuffled_train_labels[start:end]) # if end == LEN_TRAIN: # break # Early stopping. validation_score = classifier.get_cost(valid_classifier_inputs, valid_classifier_labels) logger.info('Beta = {0} | Classifier epoch {1} validation cost: {2}'.format( beta, epoch, validation_score)) if best_validation_score > validation_score: logger.info('Beta = {0} | *** OPTIMAL SO FAR ***'.format(beta)) best_validation_score = validation_score consecutive_decreases = 0 else: consecutive_decreases += 1 if consecutive_decreases > PATIENCE: break epoch += 1 logger.info('Beta = {0} | Classifier training completed.'.format(beta)) accuracy = classifier.accuracy(test_classifier_inputs, test_classifier_labels) logger.info('Beta = {0} | Classifier accuracy: {1}'.format(beta, accuracy)) with open(self.accuracy_output_file, 'w') as f: f.write('Beta = {0} | Classifier accuracy: {1}\n'.format(beta, accuracy)) self.classifier_accuracy_all.append((beta, accuracy)) classifier.close_session()
# discard samples missing a label for sample_weight category idx_train = idx_train[np.where(labels_sw[idx_train] != -1)[0]] X_train = [feats[samples[i]] for i in idx_train] y_train = labels[idx_train, c] y_sw = y_train + len(label_names[c]) * labels_sw[idx_train] uniq = np.unique(y_sw).tolist() counts = np.array([(y_sw == l).sum() for l in uniq]) counts = counts.sum().astype(float) / (counts * len(counts)) sw = np.array([counts[uniq.index(y)] for y in y_sw]) else: sw = None if mi_type is None: model = LinearClassifier(n_jobs=n_jobs, **options) model.fit(X_train, y_train, calibrate=calibrate, param_search=True, sample_weight=sw) elif mi_type in ['median', 'max']: model = SIL(n_jobs=n_jobs, **options) model.fit(X_train, y_train, calibrate=calibrate, param_search=True, sample_weight=sw) elif mi_type == 'quantile': if quantiles is not None: options['quantiles'] = int(quantiles)
def main(): # # CELL 1 # ''' # Imporation des bibliothèques python générales # ''' # import numpy as np # import matplotlib.pyplot as plt # import itertools # from sklearn.datasets import make_classification # ''' # Imporation des bibliothèques spécifiques au devoir # ''' # import utils # from linear_classifier import LinearClassifier # from two_layer_classifier import TwoLayerClassifier # %matplotlib inline # plt.rcParams['figure.figsize'] = (14.0, 8.0) # set default size of plots # %load_ext autoreload # %autoreload 2 # CELL 2 # Générer des données X_, y_ = make_classification(1000, n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, n_classes=3, random_state=6) # Centrer et réduire les données (moyenne = 0, écart-type = 1) mean = np.mean(X_, axis=0) std = np.std(X_, axis=0) X_ = (X_ - mean) / std # Afficher plt.figure(figsize=(8, 6)) plt.scatter(X_[:, 0], X_[:, 1], c=y_, edgecolors='k', cmap=plt.cm.Paired) plt.show(block=False) # CELL 3 num_val = 200 num_test = 200 num_train = 600 np.random.seed(1) idx = np.random.permutation(len(X_)) train_idx = idx[:num_train] val_idx = idx[num_train:num_train + num_val] test_idx = idx[-num_test:] X_train = X_[train_idx] y_train = y_[train_idx] X_val = X_[val_idx] y_val = y_[val_idx] X_test = X_[test_idx] y_test = y_[test_idx] # Afficher plt.figure(figsize=(8, 6)) plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, edgecolors='k', cmap=plt.cm.Paired) plt.title('Data train') plt.show(block=False) plt.figure(figsize=(8, 6)) plt.scatter(X_val[:, 0], X_val[:, 1], c=y_val, edgecolors='k', cmap=plt.cm.Paired) plt.title('Data Validation') plt.show(block=False) plt.figure(figsize=(8, 6)) plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, edgecolors='k', cmap=plt.cm.Paired) plt.title('Data test') plt.show(block=False) # CELL 4 accu = utils.test_sklearn_svm(X_train, y_train, X_test, y_test) print('Test accuracy: {:.3f}'.format(accu)) if accu < 0.7: print( 'ERREUR: L\'accuracy est trop faible. Il y a un problème avec les données. Vous pouvez essayer de refaire le mélange (case ci-haut).' ) # CELL 5 # En premier, vérifier la prédiction du modèle, la "forward pass" # 1. Générer le modèle avec des poids W aléatoires model = LinearClassifier(X_train, y_train, X_val, y_val, num_classes=3, bias=True) # 2. Appeler la fonction qui calcule l'accuracy et la loss moyenne pour l'ensemble des données d'entraînement _, loss = model.global_accuracy_and_cross_entropy_loss(X_train, y_train) # 3. Comparer au résultat attendu loss_attendu = -np.log( 1.0 / 3.0) # résultat aléatoire attendu soit -log(1/nb_classes) print('Sortie: {} Attendu: {}'.format(loss, loss_attendu)) if abs(loss - loss_attendu) > 0.05: print('ERREUR: la sortie de la fonction est incorrecte.') else: print('SUCCÈS') # CELL 6 # Vérification: Vous devez pouvoir faire du surapprentissage sur quelques échantillons. # Si l'accuracy reste faible, votre implémentation a un bogue. n_check = 5 X_check = X_train[:n_check] y_check = y_train[:n_check] model = LinearClassifier(X_check, y_check, X_val, y_val, num_classes=3, bias=True) loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train( num_epochs=10, lr=1.0, l2_reg=0.0) accu_train_finale = accu_train_curve[-1] print('Accuracy d\'entraînement, devrait être 1.0: {:.3f}'.format( accu_train_finale)) if accu_train_finale < 0.9999: print('ATTENTION: L\'accuracy n\'est pas 100%.') utils.plot_curves(loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve) else: print('SUCCÈS') # CELL 7 # Prenons encore un petit échantillon et testons différentes valeurs de l2_reg n_check = 5 X_check = X_train[:n_check] y_check = y_train[:n_check] model = LinearClassifier(X_check, y_check, X_val, y_val, num_classes=3, bias=True) for l2_r in np.arange(0, 1, 0.05): loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train( num_epochs=10, lr=1.0, l2_reg=l2_r) print( 'l2_reg= {:.4f} >> Loss/accuracy d\'entraînement : {:.3f} {:.3f}'. format(l2_r, loss_train_curve[-1], accu_train_curve[-1])) # CELL 8 # On instancie et entraîne notre modèle; cette fois-ci avec les données complètes. model = LinearClassifier(X_train, y_train, X_val, y_val, num_classes=3, bias=True) loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train( lr=0.001, num_epochs=25, l2_reg=0.01) # Illustration de la loss et de l'accuracy (le % de biens classés) à chaque itération utils.plot_curves(loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve) print('[Training] Loss: {:.3f} Accuracy: {:.3f}'.format( loss_train_curve[-1], accu_train_curve[-1])) print('[Validation] Loss: {:.3f} Accuracy: {:.3f}'.format( loss_val_curve[-1], accu_val_curve[-1])) # CELL 9 lr_choices = [1e-2, 1e-1, 1.0, 10.0] reg_choices = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6] lr_decay = 0.995 # learning rate is multiplied by this factor after each step best_accu = -1 best_params = None best_model = None best_curves = None for lr, reg in itertools.product(lr_choices, reg_choices): params = (lr, reg) curves = model.train(num_epochs=25, lr=lr, l2_reg=reg, lr_decay=lr_decay) loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = curves val_accu = accu_val_curve[-1] if val_accu > best_accu: print('Best val accuracy: {:.3f} | lr: {:.0e} | l2_reg: {:.0e}'. format(val_accu, lr, reg)) best_accu = val_accu best_params = params best_model = model best_curves = curves model = best_model utils.plot_curves(*best_curves) # CELL 10 # On ré-entraîne le modèle avec les meilleurs hyper-paramètres lr, reg = best_params model.train(num_epochs=25, lr=lr, l2_reg=reg, lr_decay=lr_decay) pred = model.predict(X_test) accu = (pred == y_test).mean() print('Test accuracy: {:.3f}'.format(accu)) # CELL 11 h = 0.01 # contrôle la résolution de la grille x_min, x_max = X_[:, 0].min() - .5, X_[:, 0].max() + .5 # Limites de la grille y_min, y_max = X_[:, 1].min() - .5, X_[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Créer la grille X_predict = np.c_[xx.ravel(), yy.ravel()] # Convertir la grille en une liste de points Z = model.predict(X_predict) # Classifier chaque point de la grille Z = Z.reshape(xx.shape) # Remettre en 2D plt.figure(figsize=(14, 8)) plt.pcolormesh( xx, yy, Z, cmap=plt.cm.Paired) # Colorier les cases selon les prédictions X_plot, y_plot = X_train, y_train X_plot, y_plot = X_train, y_train plt.scatter(X_plot[:, 0], X_plot[:, 1], c=y_plot, edgecolors='k', cmap=plt.cm.Paired) # Tracer les données plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.title('Frontières de décision') plt.show(block=False) # CELL 12 #Choisissez le type de données que vous voulez # NOTE IMPORTANTE: on vous encourage à tester différentes bases de données. Ceci dit, # votre solution sera testée avec Ncircles (N=4). Vous devez donc tester cette option. dataset_type = 'Ncircles' if dataset_type == 'moons': X_, y_ = sklearn.datasets.make_moons(n_samples=200, noise=0.5) num_classes = 2 elif dataset_type == 'gaussian_quantiles': X_, y_ = sklearn.datasets.make_gaussian_quantiles(n_samples=200, n_classes=2) num_classes = 2 elif dataset_type == '4blobs': d = 4 c1a = np.random.randn(50, 2) c1b = np.random.randn(50, 2) + (d, d) c2a = np.random.randn(50, 2) + (0, d) c2b = np.random.randn(50, 2) + (d, 0) X_ = np.concatenate([c1a, c1b, c2a, c2b], axis=0) y_ = np.array([0] * 100 + [1] * 100) num_classes = 2 elif dataset_type == '2circles': X_, y_ = sklearn.datasets.make_circles(n_samples=200) num_classes = 2 elif dataset_type == 'Ncircles': samples_per_class = 100 num_classes = 4 angles = np.linspace(0, 2 * np.pi, samples_per_class) radius = 1.0 + np.arange(num_classes) * 0.3 px = np.cos(angles[:, None]) * radius[None, :] # (100, 3) py = np.sin(angles[:, None]) * radius[None, :] # (100, 3) X_ = np.stack([px, py], axis=-1).reshape( (samples_per_class * num_classes, 2)) X_ += np.random.randn(len(X_[:, 0]), 2) / 8 y_ = np.array(list(range(num_classes)) * samples_per_class) else: print('Invalid dataset type') # CELL 13 plt.figure() plt.scatter(X_[:, 0], X_[:, 1], c=y_, cmap=plt.cm.Paired) plt.title('Données complètes') plt.show(block=False) # CELL 14 train_proportion = 0.5 val_proportion = 0.2 num_train = int(len(X_) * train_proportion) num_val = int(len(X_) * val_proportion) np.random.seed(0) idx = np.random.permutation(len(X_)) train_idx = idx[:num_train] val_idx = idx[num_train:num_train + num_val] test_idx = idx[num_train + num_val:] X_train = X_[train_idx] y_train = y_[train_idx] X_val = X_[val_idx] y_val = y_[val_idx] X_test = X_[test_idx] y_test = y_[test_idx] # CELL 15 # Affichons maintenant les données d'entraînement, de validation et de test. plt.figure() plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=plt.cm.Paired) plt.title('Train') plt.show(block=False) plt.figure() plt.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=plt.cm.Paired) plt.title('Validation') plt.show(block=False) plt.figure() plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=plt.cm.Paired) plt.title('Test') plt.show(block=False) # CELL 16 num_hidden_neurons = 10 model = TwoLayerClassifier(X_train, y_train, X_val, y_val, num_features=2, num_hidden_neurons=num_hidden_neurons, num_classes=num_classes) # CELL 17 # Vérifier que la sortie du réseau initialisé au hasard donne bien une prédiction égale pour chaque classe num_hidden_neurons = 10 model = TwoLayerClassifier(X_train, y_train, X_val, y_val, num_features=2, num_hidden_neurons=num_hidden_neurons, num_classes=num_classes) # 2. Appeler la fonction qui calcule l'accuracy et la loss moyenne pour l'ensemble des données d'entraînement _, loss = model.global_accuracy_and_cross_entropy_loss(X_train, y_train, 0) # 3. Comparer au résultat attendu loss_attendu = -np.log( 1.0 / num_classes) # résultat aléatoire attendu soit -log(1/nb_classes) print('Sortie: {} Attendu: {}'.format(loss, loss_attendu)) if abs(loss - loss_attendu) > 0.05: print('ERREUR: la sortie de la fonction est incorrecte.') else: print('SUCCÈS') # CELL 18 # Vérifier que le fait d'augmenter la régularisation L2 augmente également la loss for l2_r in np.arange(0, 2, 0.1): _, loss = model.global_accuracy_and_cross_entropy_loss( X_train, y_train, l2_r) print( 'l2_reg= {:.4f} >> Loss/accuracy d\'entraînement : {:.3f}'.format( l2_r, loss)) # CELL 19 # Vérification: Vous devez pouvoir faire du surapprentissage sur quelques échantillons. # Si l'accuracy reste faible, votre implémentation a un bogue. n_check = 5 X_check = X_train[:n_check] y_check = y_train[:n_check] model = TwoLayerClassifier(X_check, y_check, X_val, y_val, num_features=2, num_hidden_neurons=num_hidden_neurons, num_classes=num_classes) loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train( num_epochs=200, lr=0.01, l2_reg=0.0) print('Accuracy d\'entraînement, devrait être 1.0: {:.3f}'.format( accu_train_curve[-1])) if accu_train_curve[-1] < 0.98: print('ATTENTION: L\'accuracy n\'est pas 100%.') utils.plot_curves(loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve) else: print('SUCCÈS') # CELL 20 # Vérifier que le fait d'entraîner avec une régularisation L2 croissante augmente la loss et, éventuellement, diminue l'accuracy for l2_r in np.arange(0, 1, 0.1): loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train( num_epochs=200, lr=0.01, l2_reg=l2_r) print( 'l2_reg= {:.4f} >> Loss/accuracy d\'entraînement : {:.3f} {:.3f}'. format(l2_r, loss_train_curve[-1], accu_train_curve[-1])) # CELL 21 # On instancie notre modèle; cette fois-ci avec les données complètes. num_hidden_neurons = 20 model = TwoLayerClassifier(X_train, y_train, X_val, y_val, num_features=2, num_hidden_neurons=num_hidden_neurons, num_classes=num_classes, activation='relu') # CELL 22 loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train( num_epochs=200, lr=1e-2, l2_reg=0.0, momentum=0.5) # Illustration de la loss et de l'accuracy (le % de biens classés) à chaque itération utils.plot_curves(loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve) print('[Training] Loss: {:.3f} Accuracy: {:.3f}'.format( loss_train_curve[-1], accu_train_curve[-1])) print('[Validation] Loss: {:.3f} Accuracy: {:.3f}'.format( loss_val_curve[-1], accu_val_curve[-1])) # CELL 23 # Find the best hyperparameters lr and l2_reg lr_choices = [1e-4, 1e-3, 1e-2] reg_choices = [1e-1, 1e-2, 1e-3, 1e-4, 0] lr_decay = 1.0 # 0.995 # learning rate is multiplied by this factor after each step best_accu = -1 best_params = None best_model = None best_curves = None for lr, reg in itertools.product(lr_choices, reg_choices): params = (lr, reg) curves = model.train(num_epochs=50, lr=lr, l2_reg=reg, lr_decay=lr_decay, momentum=0.5) loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = curves val_accu = accu_val_curve[-1] if val_accu > best_accu: print('Best val accuracy: {:.3f} | lr: {:.0e} | l2_reg: {:.0e}'. format(val_accu, lr, reg)) best_accu = val_accu best_params = params best_model = model best_curves = curves else: print('accuracy: {:.3f} | lr: {:.0e} | l2_reg: {:.0e}'.format( val_accu, lr, reg)) model = best_model utils.plot_curves(*best_curves) # CELL 24 # On ré-entraîne le modèle avec les meilleurs hyper-paramètres lr, reg = best_params print(best_params) curves = model.train(num_epochs=200, lr=lr, l2_reg=reg, momentum=0.5) loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = curves pred = model.predict(X_test) accu = (pred == y_test).mean() print('Test accuracy: {:.3f}'.format(accu)) utils.plot_curves(*curves) # CELL 25 # Visualisation des résultats h = 0.05 # contrôle la résolution de la grille x_min, x_max = X_[:, 0].min() - .5, X_[:, 0].max() + .5 # Limites de la grille y_min, y_max = X_[:, 1].min() - .5, X_[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Créer la grille X_predict = np.c_[xx.ravel(), yy.ravel()] # Convertir la grille en une liste de points Z = model.predict(X_predict) # Classifier chaque point de la grille Z = Z.reshape(xx.shape) # Remettre en 2D plt.figure(figsize=(14, 8)) plt.pcolormesh( xx, yy, Z, cmap=plt.cm.Paired) # Colorier les cases selon les prédictions X_plot, y_plot = X_, y_ plt.scatter(X_plot[:, 0], X_plot[:, 1], c=y_plot, edgecolors='k', cmap=plt.cm.Paired) # Tracer les données plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.xticks(()) plt.yticks(()) plt.title('Frontières de décision') plt.show()
def param_search(self, bags, y, instances, classes, quick=True, C=1.0, gamma=1.0, bag_inst_idx=None, sample_weight=None, inst_search=False): """Search for best hyperparameters.""" if bag_inst_idx is None: bag_inst_idx = [ [i]*len(b) for i,b in enumerate(bags) ] if C is None: # figure out an inital set of hyperparameters using the mean of all instances from each bag td = np.array([ t.mean(axis=0) for t in bags ]) tl = np.array(y) # compute mean and std dev mu = td.mean(axis=0) sigma = td.std(axis=0) + 1e-3 td = ( td - mu ) / sigma model = LinearClassifier( classifier=self.classifier, kernel=self.kernel, n_jobs=self.n_jobs ) C,gamma = model.param_search( td, tl ) acc = {} bestacc = 0 bestg = None bestC = None while True: # start with values given and search in neighborhood; search will continue if best value falls on edge of neighborhood Cvals = [ float(2**e)*C for e in range(-2,3) ] if self.kernel == 'rbf': gvals = [ float(2**e)*gamma for e in range(-2,3) ] else: gvals = [1.0] # get instance indices for each bag idx = [] i = 0 for yi,inst in zip(y,bags): idx.append( np.arange(i,i+len(inst)) ) i += len(inst) if self.kernel == 'rbf': Cvals2 = [ C for C in Cvals ] else: Cvals2 = [ C for C in Cvals if (C,1.0) not in acc.keys() ] folds = 5 # grid search if inst_search: # find best instance-level classifier model = LinearClassifier( classifier=self.classifier, kernel=self.kernel, p=self.p, n_jobs=self.n_jobs ) bestC,bestg = model.param_search( instances, classes, quick, C=C, gamma=gamma, sample_weight=sample_weight ) bestacc = 0 else: # find best result at bag level skf = sklearn.model_selection.StratifiedKFold( n_splits=folds, shuffle=True ) labels = [ (y[i],np.array([classes[j] for j in idx[i]])) for i in range(len(y)) ] est = SIL( classifier=self.classifier, kernel=self.kernel, predict_type=self.predict_type, class_weight=self.class_weight, p=self.p, subset=self.subset, quantiles=self.quantiles, metric=self.metric ) gridcv = sklearn.model_selection.GridSearchCV( est, [{'C':Cvals2,'gamma':gvals}], cv=skf, n_jobs=self.n_jobs, refit=False ) gridcv.fit( bags, y, sample_weight=sample_weight, calibrate=self._calibrate ) for mean_score,params in zip(gridcv.cv_results_['mean_test_score'],gridcv.cv_results_['params']): acc[params['C'],params['gamma']] = mean_score if gridcv.best_score_ > bestacc: bestC = gridcv.best_params_['C'] bestg = gridcv.best_params_['gamma'] bestacc = gridcv.best_score_ if bestC == Cvals[0] or bestC == Cvals[-1] or ( self.kernel == 'rbf' and ( bestg == gvals[0] or bestg == gvals[-1] ) ): C = bestC gamma = bestg else: break self._model = None self._model_agg = None return bestC,bestg
def main(batch_size, num_epochs, lr, train_input_dir, dev1_input_dir, result_save_dir): logger.info("batch_size: {} num_epochs: {} lr: {}".format( batch_size, num_epochs, lr)) #Create dataloaders. logger.info("Create train dataloader from {}.".format(train_input_dir)) train_dataset = create_dataset(train_input_dir, num_examples=-1, num_options=4) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) logger.info("Create dev1 dataloader from {}.".format(dev1_input_dir)) dev1_dataset = create_dataset(dev1_input_dir, num_examples=-1, num_options=20) dev1_dataloader = DataLoader(dev1_dataset, batch_size=4, shuffle=False, drop_last=True) #Create a classifier model. logger.info("Create a classifier model.") classifier_model = LinearClassifier.from_pretrained( "cl-tohoku/bert-base-japanese-whole-word-masking") classifier_model.to(device) #Create an optimizer and a scheduler. optimizer = AdamW(classifier_model.parameters(), lr=lr, eps=1e-8) total_steps = len(train_dataloader) * num_epochs scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps) #Create a directory to save the results in. os.makedirs(result_save_dir, exist_ok=True) logger.info("Start model training.") for epoch in range(num_epochs): logger.info("===== Epoch {}/{} =====".format(epoch + 1, num_epochs)) mean_loss = train(classifier_model, optimizer, scheduler, train_dataloader) logger.info("Mean loss: {}".format(mean_loss)) #Save model parameters. checkpoint_filepath = os.path.join( result_save_dir, "checkpoint_{}.pt".format(epoch + 1)) torch.save(classifier_model.state_dict(), checkpoint_filepath) pred_labels, correct_labels, accuracy = evaluate( classifier_model, dev1_dataloader) logger.info("Accuracy: {}".format(accuracy)) #Save results as text files. res_filepath = os.path.join(result_save_dir, "result_eval_{}.txt".format(epoch + 1)) labels_filepath = os.path.join(result_save_dir, "labels_eval_{}.txt".format(epoch + 1)) with open(res_filepath, "w") as w: w.write("Accuracy: {}\n".format(accuracy)) with open(labels_filepath, "w") as w: for pred_label, correct_label in zip(pred_labels, correct_labels): w.write("{} {}\n".format(pred_label, correct_label)) logger.info("Finished model training.")