コード例 #1
0
def load_dermatology():
    dermatology_file = pd.read_csv('datasets/dermatology.data', header=None)
    dermatology_dataset = np.array(dermatology_file)
    ages = [int(age) for age in dermatology_dataset[:, -2]]
    dermatology_dataset[:, -2] = np.array(ages)
    new_dermatology = dataset_utils.prepare_data(dermatology_dataset)
    n_features = new_dermatology.shape[1] - 2

    return new_dermatology, n_features
コード例 #2
0
def load_verbetral():
    vertebral_file = pd.read_csv('datasets/column_3C.dat',
                                 header=None,
                                 sep=' ')
    vertebral_dataset = np.array(vertebral_file)
    new_vertebral = dataset_utils.prepare_data(vertebral_dataset)
    n_features = new_vertebral.shape[1] - 2

    return new_vertebral, n_features
コード例 #3
0
def load_breast_cancer():
    breast_cancer_file = pd.read_csv('datasets/breast-cancer-wisconsin.csv',
                                     header=None)
    breast_cancer_dataset = np.array(breast_cancer_file)
    labels_bc = breast_cancer_dataset[:, -1]
    labels_bc = np.where(labels_bc == 4, 1, 0)
    breast_cancer_dataset[:, -1] = labels_bc
    new_breast_cancer = dataset_utils.prepare_data(breast_cancer_dataset)
    n_features = new_breast_cancer.shape[1] - 2

    return new_breast_cancer, n_features
コード例 #4
0
def load_artificial_regression():
    def regression_function(x):
        return 3 * sin(x) + 1

    N = 500
    regression_dataset = np.random.normal(size=N).reshape((N, 1))
    y_regression = np.array(
        [regression_function(x) for x in regression_dataset]).reshape((N, 1))
    regression_dataset = np.append(regression_dataset, y_regression, axis=1)
    new_dataset = dataset_utils.prepare_data(np.array(regression_dataset))
    n_features = 1
    plt.scatter(new_dataset[:, 1], new_dataset[:, -1])
    plt.show()
    return new_dataset, n_features
コード例 #5
0
def load_iris_dataset():
    """
       Carrega o dataset, especificando qual classe
       se deseja que seja representada pelo número 1.
       As demais se tornam 0.

       A label passada pode ser:
         0 - setosa;
         1 - versicolor;
         2 - virginica;
    """
    try:
        iris = datasets.load_iris()
        stream = open('configurations/irisConfigurations.yml',
                      'r',
                      encoding='utf-8').read()
        iris_cfg = yaml.load(stream=stream, Loader=yaml.FullLoader)
        n_features = len(iris_cfg['features'])

        iris_classes = list(iris['target_names'])
        label = iris_classes.index(iris_cfg['flower_to_classify'])

        attributes = np.array(iris['data'])

        if n_features < 4:
            chosen_features = list(set(iris_cfg['features']))
            features_indexes = sorted(
                [iris['feature_names'].index(x) for x in chosen_features])
            new_attributes = [attributes[:, i] for i in features_indexes]
            attributes = np.array(new_attributes).T

        labels = np.array(iris['target'])
        labels = np.reshape(labels, [labels.shape[0], 1])

        iris_dataset = np.append(attributes, labels, axis=1)
        new_dataset = dataset_utils.transform_dataset(iris_dataset, label)
        new_dataset = dataset_utils.prepare_data(new_dataset)

        return new_dataset, n_features

    except IndexError:
        print('Erro ao carregar Iris.')
        print('Verifique se o arquivo irisConfiguration.yml está correto.')
        exit()
コード例 #6
0
def load_multiclass_artificial():
    n_features = 2
    dataset = np.array([[np.random.uniform(1, 3), y, 0]
                        for y in np.random.uniform(4, 6, 50)])
    plt.plot(dataset[:, 0], dataset[:, 1], 'ro')
    dataset = np.append(dataset, [[np.random.uniform(4, 6), y, 1]
                                  for y in np.random.uniform(1, 3, 50)],
                        axis=0)
    plt.plot(dataset[50:-1, 0], dataset[50:-1, 1], 'y*')
    dataset = np.append(dataset, [[np.random.uniform(7, 9), y, 2]
                                  for y in np.random.uniform(4, 6, 50)],
                        axis=0)
    plt.plot(dataset[100:-1, 0], dataset[100:-1, 1], 'b^')

    plt.axis([0, 10, 0, 8])
    plt.savefig('plots/artificial_data_plot.png')
    plt.show()

    new_dataset = dataset_utils.prepare_data(dataset)

    return new_dataset, n_features
コード例 #7
0
def artificial_data_p():
    n_features = 2
    dataset = np.array([[np.random.uniform(0, 0.5), y, 0]
                        for y in np.random.uniform(0, 0.5, 10)])
    dataset = np.append(dataset, [[np.random.uniform(0, 0.5), y, 0]
                                  for y in np.random.uniform(7, 7.5, 10)],
                        axis=0)
    dataset = np.append(dataset, [[np.random.uniform(3, 3.5), y, 0]
                                  for y in np.random.uniform(0, 0.5, 10)],
                        axis=0)
    plt.plot(dataset[:, 0], dataset[:, 1], 'ro')
    dataset = np.append(dataset, [[np.random.uniform(3, 3.5), y, 1]
                                  for y in np.random.uniform(7, 7.5, 10)],
                        axis=0)
    plt.plot(dataset[30:, 0], dataset[30:, 1], 'bo')

    plt.axis([-1, 4, -1, 8])
    plt.show()

    new_dataset = dataset_utils.prepare_data(dataset)

    return new_dataset, n_features
コード例 #8
0
def load_multiclass_iris():
    iris = datasets.load_iris()
    stream = open('configurations/irisConfigurations.yml',
                  'r',
                  encoding='utf-8').read()
    iris_cfg = yaml.load(stream=stream, Loader=yaml.FullLoader)
    n_features = len(iris_cfg['features'])

    attributes = np.array(iris['data'])

    if n_features < 4:
        chosen_features = list(set(iris_cfg['features']))
        features_indexes = sorted(
            [iris['feature_names'].index(x) for x in chosen_features])
        new_attributes = [attributes[:, i] for i in features_indexes]
        attributes = np.array(new_attributes).T

    labels = np.array(iris['target'])

    iris_dataset = np.append(attributes, labels[:, None], axis=1)
    new_dataset = dataset_utils.prepare_data(iris_dataset)

    return new_dataset, n_features