def set_up(self):
     # load data using kameleon-mcmc code
     logger.info("Loading data")
     X, y = GlassPosterior._load_glass_data()
 
     # normalise and whiten dataset, as done in kameleon-mcmc code
     logger.info("Whitening data")
     X -= np.mean(X, 0)
     L = np.linalg.cholesky(np.cov(X.T))
     X = sp.linalg.solve_triangular(L, X.T, lower=True).T
     
     # build target, as in kameleon-mcmc code
     self.gp_posterior = PseudoMarginalHyperparameters(X, y,
                                                       self.n_importance,
                                                       self.prior_log_pdf,
                                                       self.ridge,
                                                       num_shogun_threads=1)
    def set_up(self):
        # load data using kameleon-mcmc code
        logger.info("Loading data")
        X, y = GlassPosterior._load_glass_data()

        # normalise and whiten dataset, as done in kameleon-mcmc code
        logger.info("Whitening data")
        X -= np.mean(X, 0)
        L = np.linalg.cholesky(np.cov(X.T))
        X = sp.linalg.solve_triangular(L, X.T, lower=True).T

        # build target, as in kameleon-mcmc code
        self.gp_posterior = PseudoMarginalHyperparameters(X,
                                                          y,
                                                          self.n_importance,
                                                          self.prior_log_pdf,
                                                          self.ridge,
                                                          num_shogun_threads=1)
    def _load_glass_data(data_dir=os.sep.join(
        [os.path.expanduser('~'), "data"])):
        filename = os.sep.join([data_dir, "glass.data"])

        try:
            data = np.loadtxt(filename, delimiter=",")
        except IOError:
            # make sure dir exists
            try:
                os.makedirs(data_dir)
            except OSError:
                pass

            url = "https://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data"
            logger.warning("%s not found. Trying to download from %s" %
                           (filename, url))
            urllib.urlretrieve(url, filename)

            # try again
            try:
                data = np.loadtxt(filename, delimiter=",")
            except IOError:
                raise RuntimeError(
                    "Download failed. Please download manually.")

        # make sure file is as expected
        s_reference = "eb292f3709b6fbbeb18a34f95e2293470cbe58ed"
        logger.info("Asserting sha1sum(%s)==%s" % (filename, s_reference))
        s = sha1sum(filename)
        if s != s_reference:
            raise RuntimeError("sha1sum(%s) is %s while reference is %s" %
                               (filename, s, s_reference))

        # create a binary "window glass" vs "non-window glass" labelling
        lab = data[:, -1]
        lab = np.array([1. if x <= 4 else -1.0 for x in lab])

        # cut off ids and labeling
        data = data[:, 1:-1]

        return data, lab
 def _load_glass_data(data_dir=os.sep.join([os.path.expanduser('~'), "data"])):
     filename = os.sep.join([data_dir, "glass.data"])
     
     try:
         data = np.loadtxt(filename, delimiter=",")
     except IOError:
         # make sure dir exists
         try:
             os.makedirs(data_dir)
         except OSError:
             pass
         
         url = "https://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data"
         logger.warning("%s not found. Trying to download from %s" % (filename, url))
         urllib.urlretrieve (url, filename)
         
         # try again
         try:
             data = np.loadtxt(filename, delimiter=",")
         except IOError:
             raise RuntimeError("Download failed. Please download manually.")
     
     # make sure file is as expected
     s_reference = "eb292f3709b6fbbeb18a34f95e2293470cbe58ed"
     logger.info("Asserting sha1sum(%s)==%s" % (filename, s_reference))
     s = sha1sum(filename)
     if s != s_reference:
         raise RuntimeError("sha1sum(%s) is %s while reference is %s" % (filename,s, s_reference))
     
     
     # create a binary "window glass" vs "non-window glass" labelling
     lab = data[:, -1]
     lab = np.array([1. if x <= 4 else -1.0 for x in lab])
     
     # cut off ids and labeling
     data = data[:, 1:-1]
     
     return data, lab