Example #1
0
    def reset(self, params, rep):
        name = params['dataset']
        print params
        random.seed(abs(hash(str(params))))
        if name.startswith('toy'):
            data_dims, cone_dims = [int(x) for x in
                                    name.split('-')[1:]]
            self.dimensions = [cone_dims]
            self.dataset = make_data(data_dims, cone_dims)
        elif name.startswith('wn'):
            self.dimensions = params['dimensions']
            self.dataset = SvmlightDataset(
                load_svmlight_file('../../../Documents/conewordnetdata/data-nouns-deps-mi/' + name + '.mat'))
            print self.dataset.target.shape
            print self.dataset.data.shape
        else:
            self.dimensions = params['dimensions']
            self.dataset = fetch_mldata(name)

        # Ensure that the data is always shuffled the same way:
        # seed RNG on data itself
        seed = int(hashlib.sha1(self.dataset.data).hexdigest()[:7], 16)
        # print len(self.dataset.data), self.dataset.target.shape[0]
        # shuffled_data, shuffled_target = utils.shuffle(
        #     self.dataset.data, self.dataset.target, random_state = seed)
       
        # StratifiedKFold is deterministic
        self.cv = KFold(k = params['repetitions'], n = self.dataset.target.shape[0],
                   shuffle = True, random_state = seed)
                
        train, test = list(self.cv)[rep]
        print len(train), len(test)

        self.X_train = self.dataset.data[train]
        self.X_test = self.dataset.data[test]

        self.y_train = self.dataset.target[train]
        self.y_test = self.dataset.target[test]
 def generator(data_dims, cone_dims, num_instances=1000):
     return make_data(
         data_dims, cone_dims, size=num_instances, epsilon=epsilon)
 def generateNoisyTestData(self, data_dims, cone_dims, num_instances=1000):
     return make_data(data_dims, cone_dims, size=num_instances, noise=0.1)