def loadLabel (self, filename, verbose=True): ''' Get the solution/truth values''' if verbose: print("========= Reading " + filename) start = time.time() if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")): with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file: vprint (verbose, "Loading pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) return pickle.load(pickle_file) if 'task' not in self.info.keys(): self.getTypeProblem(filename) # IG: Here change to accommodate the new multiclass label format if self.info['task'] == 'multilabel.classification': label = data_io.data(filename) elif self.info['task'] == 'multiclass.classification': label = data_converter.convert_to_num(data_io.data(filename)) else: label = np.ravel(data_io.data(filename)) # get a column vector #label = np.array([np.ravel(data_io.data(filename))]).transpose() # get a column vector if self.use_pickle: with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file: vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) p = pickle.Pickler(pickle_file) p.fast = True p.dump(label) end = time.time() if verbose: print( "[+] Success in %5.2f sec" % (end - start)) return label
basename = 'Iris' D = DataManager(basename, input_dir) # Load data print D mymodel = model() # Train Yonehot_tr = D.data['Y_train'] # Attention pour les utilisateurs de problemes multiclasse, # mettre convert_to_num DANS la methode fit car l'ingestion program # fournit Yonehot_tr a la methode "fit" # Ceux qui resolvent des problemes a 2 classes ou des problemes de # regression n'en ont pas besoin Ytrue_tr = convert_to_num( Yonehot_tr, verbose=False) # For multi-class only, to be compatible with scikit-learn mymodel = mymodel.fit(D.data['X_train'], Ytrue_tr) # Making predictions Ypred_tr = mymodel.predict(D.data['X_train']) Ypred_va = mymodel.predict(D.data['X_valid']) Ypred_te = mymodel.predict(D.data['X_test']) # We can compute the training success rate acc_tr = accuracy_score(Ytrue_tr, Ypred_tr) # But it might be optimistic compared to the validation and test accuracy # that we cannot compute (except by making submissions to Codalab) # So, we can use cross-validation: acc_cv = cross_val_score(mymodel, D.data['X_train'],