def set_up(self): # load data using kameleon-mcmc code logger.info("Loading data") X, y = GlassPosterior._load_glass_data() # normalise and whiten dataset, as done in kameleon-mcmc code logger.info("Whitening data") X -= np.mean(X, 0) L = np.linalg.cholesky(np.cov(X.T)) X = sp.linalg.solve_triangular(L, X.T, lower=True).T # build target, as in kameleon-mcmc code self.gp_posterior = PseudoMarginalHyperparameters(X, y, self.n_importance, self.prior_log_pdf, self.ridge, num_shogun_threads=1)
def set_up(self): # load data using kameleon-mcmc code logger.info("Loading data") X, y = GlassPosterior._load_glass_data() # normalise and whiten dataset, as done in kameleon-mcmc code logger.info("Whitening data") X -= np.mean(X, 0) L = np.linalg.cholesky(np.cov(X.T)) X = sp.linalg.solve_triangular(L, X.T, lower=True).T # build target, as in kameleon-mcmc code self.gp_posterior = PseudoMarginalHyperparameters(X, y, self.n_importance, self.prior_log_pdf, self.ridge, num_shogun_threads=1)
def _load_glass_data(data_dir=os.sep.join( [os.path.expanduser('~'), "data"])): filename = os.sep.join([data_dir, "glass.data"]) try: data = np.loadtxt(filename, delimiter=",") except IOError: # make sure dir exists try: os.makedirs(data_dir) except OSError: pass url = "https://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data" logger.warning("%s not found. Trying to download from %s" % (filename, url)) urllib.urlretrieve(url, filename) # try again try: data = np.loadtxt(filename, delimiter=",") except IOError: raise RuntimeError( "Download failed. Please download manually.") # make sure file is as expected s_reference = "eb292f3709b6fbbeb18a34f95e2293470cbe58ed" logger.info("Asserting sha1sum(%s)==%s" % (filename, s_reference)) s = sha1sum(filename) if s != s_reference: raise RuntimeError("sha1sum(%s) is %s while reference is %s" % (filename, s, s_reference)) # create a binary "window glass" vs "non-window glass" labelling lab = data[:, -1] lab = np.array([1. if x <= 4 else -1.0 for x in lab]) # cut off ids and labeling data = data[:, 1:-1] return data, lab
def _load_glass_data(data_dir=os.sep.join([os.path.expanduser('~'), "data"])): filename = os.sep.join([data_dir, "glass.data"]) try: data = np.loadtxt(filename, delimiter=",") except IOError: # make sure dir exists try: os.makedirs(data_dir) except OSError: pass url = "https://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data" logger.warning("%s not found. Trying to download from %s" % (filename, url)) urllib.urlretrieve (url, filename) # try again try: data = np.loadtxt(filename, delimiter=",") except IOError: raise RuntimeError("Download failed. Please download manually.") # make sure file is as expected s_reference = "eb292f3709b6fbbeb18a34f95e2293470cbe58ed" logger.info("Asserting sha1sum(%s)==%s" % (filename, s_reference)) s = sha1sum(filename) if s != s_reference: raise RuntimeError("sha1sum(%s) is %s while reference is %s" % (filename,s, s_reference)) # create a binary "window glass" vs "non-window glass" labelling lab = data[:, -1] lab = np.array([1. if x <= 4 else -1.0 for x in lab]) # cut off ids and labeling data = data[:, 1:-1] return data, lab