def __init__(self, path='~/devel/data/GEMLeR_GeneExpression/Breast_Colon.pkl', fold_no=0): path = os.path.expanduser(path) data, target = ut.pickleLoad(path) super(GeneData, self).createCVSplit(data, target, use_fold=fold_no) super(GeneData, self).__init__() self.example_shape = data.shape[-1] self.n_lab = 1
def __init__(self, path='~/devel/data/PianoRoll/Nottingham_enc.pkl', n_tap=20, n_lab=58): path = os.path.expanduser(path) (self.train_d, self.valid_d, self.test_d) = ut.pickleLoad(path) super(PianoData, self).__init__(n_lab=n_lab) self.example_shape = self.train_d[0].shape[-1] self.n_taps = n_tap self.n_lab = n_lab
def download(self): if os.name == 'nt': dest = os.path.join(os.environ['APPDATA'], 'ELEKTRONN') else: dest = os.path.join(os.path.expanduser('~'), '.ELEKTRONN') if not os.path.exists(dest): os.makedirs(dest) dest = os.path.join(dest, 'mnist.pkl.gz') if os.path.exists(dest): print "Found existing mnist data" return ut.pickleLoad(dest) else: print "Downloading mnist data from" print "http://www.elektronn.org/downloads/mnist.pkl.gz" f = urllib2.urlopen("http://www.elektronn.org/downloads/mnist.pkl.gz") data = f.read() print "Saving data to %s" %(dest,) with open(dest, "wb") as code: code.write(data) return ut.pickleLoad(dest)
def __init__(self, path='~/devel/data/Buzz/Twitter/twitter.pkl', norm_targets=True, target_scale=9999, fold_no=0): path = os.path.expanduser(path) data, target = ut.pickleLoad(path) # N = len(data) # data = data.reshape((N, -1)) # data = data[:8000] # target = target[:8000] if norm_targets: target /= target.max() if target_scale is not None: target = np.log10(target * target_scale + 1) super(BuzzData, self).createCVSplit(data, target, use_fold=fold_no) super(BuzzData, self).__init__() self.example_shape = data.shape[-1] self.n_taps = data.shape[-2] self.n_lab = 1
def __init__(self, path='~/devel/data/adult.pkl', create=False): path = os.path.expanduser(path) if create: self._fields = 'age,workclass,fnlwgt,education,educationnum,maritalstatus,occupation,relationship,race,sex,capitalgain,capitalloss,hoursperweek,nativecountry,target'.split(',') self._kinds = 'cont,cat,cat,cat,cont,cat,cat,cat,cat,cat,cont,cont,cont,cat,cat'.split(',') data_socket = urllib2.urlopen('http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data') train_d = np.genfromtxt(data_socket, skip_header=1, delimiter=',', names=self._fields, dtype=None) train_d = self._normalise_adult(train_d) self.train_l = train_d[:, -1].astype('int16') # np.expand_dims(train_d[:,-1].astype('int16'), 1) self.train_d = train_d[:, :-1] test_socket = urllib2.urlopen('http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test') valid_d = np.genfromtxt(test_socket, skip_header=1, delimiter=',', names=self._fields, dtype=None) valid_d = self._normalise_adult(valid_d) self.valid_l = valid_d[:, -1].astype('int16') #np.expand_dims(valid_d[:,-1].astype('int16'), 1) self.valid_d = valid_d[:, :-1] ut.pickleSave((self.train_d, self.train_l, self.valid_d, self.valid_l), path) else: self.train_d, self.train_l, self.valid_d, self.valid_l = ut.pickleLoad(path) super(AdultData, self).__init__()
def __init__(self, path=None, convert2image=True, warp_on=False, shift_augment=True, center=True): if path is None: (self.train_d, self.train_l), (self.valid_d, self.valid_l), (self.test_d, self.test_l) = self.download() else: path = os.path.expanduser(path) (self.train_d, self.train_l), (self.valid_d, self.valid_l), (self.test_d, self.test_l) = ut.pickleLoad(path) self.warp_on = warp_on self.shif_augment = shift_augment self.return_flat = not convert2image self.test_l = self.test_l.astype(np.int16) self.train_l = self.train_l.astype(np.int16) self.valid_l = self.valid_l.astype(np.int16) if center: self.test_d -= self.test_d.mean() self.train_d -= self.train_d.mean() self.valid_d -= self.valid_d.mean() self.convert_to_image() if self.shif_augment: self._stripborder(1) self.train_d, self.train_l = self._augmentMNIST(self.train_d, self.train_l, crop=2, factor=4) super(MNISTData, self).__init__() if not convert2image: self.example_shape = self.train_d[0].size print "MNIST data is converted/augmented to shape", self.example_shape