Beispiel #1
0
 def __init__(self, path='~/devel/data/GEMLeR_GeneExpression/Breast_Colon.pkl', fold_no=0):
     path = os.path.expanduser(path)
     data, target = ut.pickleLoad(path)
     super(GeneData, self).createCVSplit(data, target, use_fold=fold_no)
     super(GeneData, self).__init__()
     self.example_shape = data.shape[-1]
     self.n_lab = 1
Beispiel #2
0
 def __init__(self, path='~/devel/data/PianoRoll/Nottingham_enc.pkl', n_tap=20, n_lab=58):
     path = os.path.expanduser(path)
     (self.train_d, self.valid_d, self.test_d) = ut.pickleLoad(path)
     super(PianoData, self).__init__(n_lab=n_lab)
     self.example_shape = self.train_d[0].shape[-1]
     self.n_taps = n_tap
     self.n_lab = n_lab
Beispiel #3
0
    def download(self):
        if os.name == 'nt':
            dest = os.path.join(os.environ['APPDATA'], 'ELEKTRONN')
        else:
            dest = os.path.join(os.path.expanduser('~'), '.ELEKTRONN')

        if not os.path.exists(dest):
            os.makedirs(dest)

        dest = os.path.join(dest, 'mnist.pkl.gz')

        if os.path.exists(dest):
            print "Found existing mnist data"
            return ut.pickleLoad(dest)
        else:
            print "Downloading mnist data from"
            print "http://www.elektronn.org/downloads/mnist.pkl.gz"
            f = urllib2.urlopen("http://www.elektronn.org/downloads/mnist.pkl.gz")
            data = f.read()
            print "Saving data to %s" %(dest,)
            with open(dest, "wb") as code:
                code.write(data)

            return ut.pickleLoad(dest)
Beispiel #4
0
    def __init__(self, path='~/devel/data/Buzz/Twitter/twitter.pkl', norm_targets=True, target_scale=9999, fold_no=0):
        path = os.path.expanduser(path)
        data, target = ut.pickleLoad(path)
        #    N = len(data)
        #    data = data.reshape((N, -1))
        #    data = data[:8000]
        #    target = target[:8000]
        if norm_targets:
            target /= target.max()
        if target_scale is not None:
            target = np.log10(target * target_scale + 1)

        super(BuzzData, self).createCVSplit(data, target, use_fold=fold_no)
        super(BuzzData, self).__init__()
        self.example_shape = data.shape[-1]
        self.n_taps = data.shape[-2]
        self.n_lab = 1
Beispiel #5
0
    def __init__(self, path='~/devel/data/adult.pkl', create=False):
        path = os.path.expanduser(path)
        if create:
            self._fields = 'age,workclass,fnlwgt,education,educationnum,maritalstatus,occupation,relationship,race,sex,capitalgain,capitalloss,hoursperweek,nativecountry,target'.split(',')
            self._kinds = 'cont,cat,cat,cat,cont,cat,cat,cat,cat,cat,cont,cont,cont,cat,cat'.split(',')

            data_socket = urllib2.urlopen('http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data')
            train_d = np.genfromtxt(data_socket, skip_header=1, delimiter=',', names=self._fields, dtype=None)
            train_d = self._normalise_adult(train_d)
            self.train_l = train_d[:, -1].astype('int16')  # np.expand_dims(train_d[:,-1].astype('int16'), 1)
            self.train_d = train_d[:, :-1]

            test_socket = urllib2.urlopen('http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test')
            valid_d = np.genfromtxt(test_socket, skip_header=1, delimiter=',', names=self._fields, dtype=None)
            valid_d = self._normalise_adult(valid_d)
            self.valid_l = valid_d[:, -1].astype('int16')  #np.expand_dims(valid_d[:,-1].astype('int16'), 1)
            self.valid_d = valid_d[:, :-1]

            ut.pickleSave((self.train_d, self.train_l, self.valid_d, self.valid_l), path)

        else:
            self.train_d, self.train_l, self.valid_d, self.valid_l = ut.pickleLoad(path)

        super(AdultData, self).__init__()
Beispiel #6
0
    def __init__(self, path=None, convert2image=True, warp_on=False, shift_augment=True, center=True):
        if path is None:
            (self.train_d, self.train_l), (self.valid_d, self.valid_l), (self.test_d, self.test_l) = self.download()
        else:
            path = os.path.expanduser(path)
            (self.train_d, self.train_l), (self.valid_d, self.valid_l), (self.test_d, self.test_l) = ut.pickleLoad(path)

        self.warp_on = warp_on
        self.shif_augment = shift_augment
        self.return_flat = not convert2image
        self.test_l = self.test_l.astype(np.int16)
        self.train_l = self.train_l.astype(np.int16)
        self.valid_l = self.valid_l.astype(np.int16)

        if center:
            self.test_d -= self.test_d.mean()
            self.train_d -= self.train_d.mean()
            self.valid_d -= self.valid_d.mean()

        self.convert_to_image()
        if self.shif_augment:
            self._stripborder(1)
            self.train_d, self.train_l = self._augmentMNIST(self.train_d, self.train_l, crop=2, factor=4)

        super(MNISTData, self).__init__()
        if not convert2image:
            self.example_shape = self.train_d[0].size

        print "MNIST data is converted/augmented to shape", self.example_shape