def test_display_dimensions(self): data_dir = 'datasets/' data_set = 'glass/glass.data' file = os.path.join(data_dir, data_set) print('Displaying data set {%s} in the Rn' % file) glass = Retriever(file, delimiter=',') # Glass has the samples' ids in the first column. glass.split_column(0) # Additionally, its last column represents the target feature. glass.split_target() data, c = glass.retrieve() reduced_data = algorithms.Isomap(data, e=20).run() d = Displayer(title=data_set) # Scatter all dimensions (3-by-3), using as many graphs as necessary. for begin in range(0, glass.features_count, 3): end = min(glass.features_count, begin + 3) d.load(data[:, begin:end], color=c, title='Dimensions: d e [%i, %i]' % (begin + 1, end)) d \ .load('Reduced glass data-set', reduced_data, c) \ .show()
def _load_data(self): r = Retriever('../../datasets/breast-cancer/wdbc.data', delimiter=',') r.split_column(0) # Remove ids. # Split target from data and retrieve both. # Target feature is actually located in the 2nd column, but considering # we had the ids removed, it's now in the 1st one. self.data, self.target = r.split_target(0).retrieve() self.data = self.data.astype(float) self.feature_names = ['radius', 'texture', 'perimeter']
def _load_data(self): r = Retriever(self.file, delimiter=',') r.split_column(0) # Remove ids. self.data, self.target = r.split_target().retrieve() self.original_data = self.data