def load_dermatology(): dermatology_file = pd.read_csv('datasets/dermatology.data', header=None) dermatology_dataset = np.array(dermatology_file) ages = [int(age) for age in dermatology_dataset[:, -2]] dermatology_dataset[:, -2] = np.array(ages) new_dermatology = dataset_utils.prepare_data(dermatology_dataset) n_features = new_dermatology.shape[1] - 2 return new_dermatology, n_features
def load_verbetral(): vertebral_file = pd.read_csv('datasets/column_3C.dat', header=None, sep=' ') vertebral_dataset = np.array(vertebral_file) new_vertebral = dataset_utils.prepare_data(vertebral_dataset) n_features = new_vertebral.shape[1] - 2 return new_vertebral, n_features
def load_breast_cancer(): breast_cancer_file = pd.read_csv('datasets/breast-cancer-wisconsin.csv', header=None) breast_cancer_dataset = np.array(breast_cancer_file) labels_bc = breast_cancer_dataset[:, -1] labels_bc = np.where(labels_bc == 4, 1, 0) breast_cancer_dataset[:, -1] = labels_bc new_breast_cancer = dataset_utils.prepare_data(breast_cancer_dataset) n_features = new_breast_cancer.shape[1] - 2 return new_breast_cancer, n_features
def load_artificial_regression(): def regression_function(x): return 3 * sin(x) + 1 N = 500 regression_dataset = np.random.normal(size=N).reshape((N, 1)) y_regression = np.array( [regression_function(x) for x in regression_dataset]).reshape((N, 1)) regression_dataset = np.append(regression_dataset, y_regression, axis=1) new_dataset = dataset_utils.prepare_data(np.array(regression_dataset)) n_features = 1 plt.scatter(new_dataset[:, 1], new_dataset[:, -1]) plt.show() return new_dataset, n_features
def load_iris_dataset(): """ Carrega o dataset, especificando qual classe se deseja que seja representada pelo número 1. As demais se tornam 0. A label passada pode ser: 0 - setosa; 1 - versicolor; 2 - virginica; """ try: iris = datasets.load_iris() stream = open('configurations/irisConfigurations.yml', 'r', encoding='utf-8').read() iris_cfg = yaml.load(stream=stream, Loader=yaml.FullLoader) n_features = len(iris_cfg['features']) iris_classes = list(iris['target_names']) label = iris_classes.index(iris_cfg['flower_to_classify']) attributes = np.array(iris['data']) if n_features < 4: chosen_features = list(set(iris_cfg['features'])) features_indexes = sorted( [iris['feature_names'].index(x) for x in chosen_features]) new_attributes = [attributes[:, i] for i in features_indexes] attributes = np.array(new_attributes).T labels = np.array(iris['target']) labels = np.reshape(labels, [labels.shape[0], 1]) iris_dataset = np.append(attributes, labels, axis=1) new_dataset = dataset_utils.transform_dataset(iris_dataset, label) new_dataset = dataset_utils.prepare_data(new_dataset) return new_dataset, n_features except IndexError: print('Erro ao carregar Iris.') print('Verifique se o arquivo irisConfiguration.yml está correto.') exit()
def load_multiclass_artificial(): n_features = 2 dataset = np.array([[np.random.uniform(1, 3), y, 0] for y in np.random.uniform(4, 6, 50)]) plt.plot(dataset[:, 0], dataset[:, 1], 'ro') dataset = np.append(dataset, [[np.random.uniform(4, 6), y, 1] for y in np.random.uniform(1, 3, 50)], axis=0) plt.plot(dataset[50:-1, 0], dataset[50:-1, 1], 'y*') dataset = np.append(dataset, [[np.random.uniform(7, 9), y, 2] for y in np.random.uniform(4, 6, 50)], axis=0) plt.plot(dataset[100:-1, 0], dataset[100:-1, 1], 'b^') plt.axis([0, 10, 0, 8]) plt.savefig('plots/artificial_data_plot.png') plt.show() new_dataset = dataset_utils.prepare_data(dataset) return new_dataset, n_features
def artificial_data_p(): n_features = 2 dataset = np.array([[np.random.uniform(0, 0.5), y, 0] for y in np.random.uniform(0, 0.5, 10)]) dataset = np.append(dataset, [[np.random.uniform(0, 0.5), y, 0] for y in np.random.uniform(7, 7.5, 10)], axis=0) dataset = np.append(dataset, [[np.random.uniform(3, 3.5), y, 0] for y in np.random.uniform(0, 0.5, 10)], axis=0) plt.plot(dataset[:, 0], dataset[:, 1], 'ro') dataset = np.append(dataset, [[np.random.uniform(3, 3.5), y, 1] for y in np.random.uniform(7, 7.5, 10)], axis=0) plt.plot(dataset[30:, 0], dataset[30:, 1], 'bo') plt.axis([-1, 4, -1, 8]) plt.show() new_dataset = dataset_utils.prepare_data(dataset) return new_dataset, n_features
def load_multiclass_iris(): iris = datasets.load_iris() stream = open('configurations/irisConfigurations.yml', 'r', encoding='utf-8').read() iris_cfg = yaml.load(stream=stream, Loader=yaml.FullLoader) n_features = len(iris_cfg['features']) attributes = np.array(iris['data']) if n_features < 4: chosen_features = list(set(iris_cfg['features'])) features_indexes = sorted( [iris['feature_names'].index(x) for x in chosen_features]) new_attributes = [attributes[:, i] for i in features_indexes] attributes = np.array(new_attributes).T labels = np.array(iris['target']) iris_dataset = np.append(attributes, labels[:, None], axis=1) new_dataset = dataset_utils.prepare_data(iris_dataset) return new_dataset, n_features