def loadLabel (self, filename, verbose=True): ''' Get the solution/truth values''' if verbose: print("========= Reading " + filename) start = time.time() if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")): with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file: vprint (verbose, "Loading pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) return pickle.load(pickle_file) if 'task' not in self.info.keys(): self.getTypeProblem(filename) # IG: Here change to accommodate the new multiclass label format if self.info['task'] == 'multilabel.classification': label = data_io.data(filename) elif self.info['task'] == 'multiclass.classification': label = data_io.data(filename) # IG: I changed that because it was too confusing. #label = data_converter.convert_to_num(data_io.data(filename)) else: label = np.ravel(data_io.data(filename)) # get a column vector #label = np.array([np.ravel(data_io.data(filename))]).transpose() # get a column vector if self.use_pickle: with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file: vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) p = pickle.Pickler(pickle_file) p.fast = True p.dump(label) end = time.time() if verbose: print( "[+] Success in %5.2f sec" % (end - start)) return label
def loadLabel (self, filename, verbose=True): ''' Get the solution/truth values''' if verbose: print("========= Reading " + filename) start = time.time() if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")): with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file: vprint (verbose, "Loading pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) return pickle.load(pickle_file) if 'task' not in self.info.keys(): self.getTypeProblem(filename) # IG: Here change to accommodate the new multiclass label format if self.info['task'] == 'multilabel.classification': label = data_io.data(filename) elif self.info['task'] == 'multiclass.classification': label = data_converter.convert_to_num(data_io.data(filename)) else: label = np.ravel(data_io.data(filename)) # get a column vector #label = np.array([np.ravel(data_io.data(filename))]).transpose() # get a column vector if self.use_pickle: with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file: vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) p = pickle.Pickler(pickle_file) p.fast = True p.dump(label) end = time.time() if verbose: print( "[+] Success in %5.2f sec" % (end - start)) return label
# Inventory the datasets (new style, datasets in subdirectories) input_dir = argv[1] datanames = data_io.inventory_data(input_dir) # The output directory will contain the scores, create it if it does not exist output_dir = argv[2] data_io.mkdir(output_dir) if len(datanames) == 0: print("****** No data found ******") # Loop over datasets for basename in datanames: print("****** Processing " + basename.capitalize() + " ******") # Fake predictions on validation and test data X = data_io.data( path.join(input_dir, basename, basename + '_valid.data')) Yvalid = random.rand(X.shape[0]) X = data_io.data( path.join(input_dir, basename, basename + '_test.data')) Ytest = random.rand(X.shape[0]) # Write results to files data_io.write(path.join(output_dir, basename + '_valid.predict'), Yvalid) data_io.write(path.join(output_dir, basename + '_test.predict'), Ytest) # Lots of debug code... data_io.show_io(input_dir, output_dir) data_io.show_version() exit(0)