Exemple #1
0
def test_label_encoder():
    """Test LabelEncoder's transform and inverse_transform methods"""
    le = LabelEncoder()
    le.fit([1, 1, 4, 5, -1, 0])
    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]), [1, 2, 3, 3, 4, 0, 0])
    assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]), [0, 1, 4, 4, 5, -1, -1])
    assert_raises(ValueError, le.transform, [0, 6])
Exemple #2
0
def test_label_encoder_negative_ints():
    le = LabelEncoder()
    le.fit([1, 1, 4, 5, -1, 0])
    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]),
                       [1, 2, 3, 3, 4, 0, 0])
    assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]),
                       [0, 1, 4, 4, 5, -1, -1])
    assert_raises(ValueError, le.transform, [0, 6])
Exemple #3
0
def test_label_encoder_empty_array():
    le = LabelEncoder()
    le.fit(np.array(["1", "2", "1", "2", "2"]))
    # test empty transform
    transformed = le.transform([])
    assert_array_equal(np.array([]), transformed)
    # test empty inverse transform
    inverse_transformed = le.inverse_transform([])
    assert_array_equal(np.array([]), inverse_transformed)
def test_label_encoder_negative_ints():
    le = LabelEncoder()
    le.fit([1, 1, 4, 5, -1, 0])
    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]),
                       [1, 2, 3, 3, 4, 0, 0])
    assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]),
                       [0, 1, 4, 4, 5, -1, -1])
    assert_raises(ValueError, le.transform, [0, 6])
def test_label_encoder_empty_array(values):
    le = LabelEncoder()
    le.fit(values)
    # test empty transform
    transformed = le.transform([])
    assert_array_equal(np.array([]), transformed)
    # test empty inverse transform
    inverse_transformed = le.inverse_transform([])
    assert_array_equal(np.array([]), inverse_transformed)
Exemple #6
0
def test_label_encoder_string_labels():
    """Test LabelEncoder's transform and inverse_transform methods with
    non-numeric labels"""
    le = LabelEncoder()
    le.fit(["paris", "paris", "tokyo", "amsterdam"])
    assert_array_equal(le.classes_, ["amsterdam", "paris", "tokyo"])
    assert_array_equal(le.transform(["tokyo", "tokyo", "paris"]), [2, 2, 1])
    assert_array_equal(le.inverse_transform([2, 2, 1]),
                       ["tokyo", "tokyo", "paris"])
    assert_raises(ValueError, le.transform, ["london"])
Exemple #7
0
def test_label_encoder_errors():
    # Check that invalid arguments yield ValueError
    le = LabelEncoder()
    assert_raises(ValueError, le.transform, [])
    assert_raises(ValueError, le.inverse_transform, [])

    # Fail on unseen labels
    le = LabelEncoder()
    le.fit([1, 2, 3, 1, -1])
    assert_raises(ValueError, le.inverse_transform, [-1])
def test_label_encoder_errors():
    # Check that invalid arguments yield ValueError
    le = LabelEncoder()
    assert_raises(ValueError, le.transform, [])
    assert_raises(ValueError, le.inverse_transform, [])

    # Fail on unseen labels
    le = LabelEncoder()
    le.fit([1, 2, 3, 1, -1])
    assert_raises(ValueError, le.inverse_transform, [-1])
Exemple #9
0
def test_label_encoder():
    """Test LabelEncoder's transform and inverse_transform methods"""
    le = LabelEncoder()
    le.fit([1, 1, 4, 5, -1, 0])
    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]),
                       [1, 2, 3, 3, 4, 0, 0])
    assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]),
                       [0, 1, 4, 4, 5, -1, -1])
    assert_raises(ValueError, le.transform, [0, 6])
def test_label_encoder_string_labels():
    """Test LabelEncoder's transform and inverse_transform methods with
    non-numeric labels"""
    le = LabelEncoder()
    le.fit(["paris", "paris", "tokyo", "amsterdam"])
    assert_array_equal(le.classes_, ["amsterdam", "paris", "tokyo"])
    assert_array_equal(le.transform(["tokyo", "tokyo", "paris"]),
                       [2, 2, 1])
    assert_array_equal(le.inverse_transform([2, 2, 1]),
                       ["tokyo", "tokyo", "paris"])
    assert_raises(ValueError, le.transform, ["london"])
Exemple #11
0
def test_label_encoder_errors():
    # Check that invalid arguments yield ValueError
    le = LabelEncoder()
    assert_raises(ValueError, le.transform, [])
    assert_raises(ValueError, le.inverse_transform, [])

    # Fail on unseen labels
    le = LabelEncoder()
    le.fit([1, 2, 3, -1, 1])
    msg = "contains previously unseen labels"
    assert_raise_message(ValueError, msg, le.inverse_transform, [-2])
    assert_raise_message(ValueError, msg, le.inverse_transform, [-2, -3, -4])
def preprocess(data):
    for column in data:
        if data.dtypes[column] == object:
            data[column].fillna("Não mensurado", inplace=True)
            encoder = LabelEncoder()
            encoder.fit(data[column].tolist())
            data[column] = encoder.transform(data[column])
        elif data.dtypes[column] == float:
            data[column].fillna(0, inplace=True)
        elif data.dtypes[column] == int:
            data[column].fillna(0, inplace=True)
    return data
Exemple #13
0
def test_label_encoder():
    # Test LabelEncoder's transform and inverse_transform methods
    le = LabelEncoder()
    le.fit([1, 1, 4, 5, -1, 0])
    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]), [1, 2, 3, 3, 4, 0, 0])
    assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]), [0, 1, 4, 4, 5, -1, -1])
    assert_raises(ValueError, le.transform, [0, 6])

    le.fit(["apple", "orange"])
    msg = "bad input shape"
    assert_raise_message(ValueError, msg, le.transform, "apple")
Exemple #14
0
def test_label_encoder_errors():
    # Check that invalid arguments yield ValueError
    le = LabelEncoder()
    assert_raises(ValueError, le.transform, [])
    assert_raises(ValueError, le.inverse_transform, [])

    # Fail on unseen labels
    le = LabelEncoder()
    le.fit([1, 2, 3, -1, 1])
    msg = "contains previously unseen labels"
    assert_raise_message(ValueError, msg, le.inverse_transform, [-2])
    assert_raise_message(ValueError, msg, le.inverse_transform, [-2, -3, -4])
def test_label_encoder(values, classes, unknown):
    # Test LabelEncoder's transform, fit_transform and
    # inverse_transform methods
    le = LabelEncoder()
    le.fit(values)
    assert_array_equal(le.classes_, classes)
    assert_array_equal(le.transform(values), [1, 0, 2, 0, 2])
    assert_array_equal(le.inverse_transform([1, 0, 2, 0, 2]), values)
    le = LabelEncoder()
    ret = le.fit_transform(values)
    assert_array_equal(ret, [1, 0, 2, 0, 2])

    with pytest.raises(ValueError, match="unseen labels"):
        le.transform(unknown)
Exemple #16
0
def test_label_encoder():
    # Test LabelEncoder's transform and inverse_transform methods
    le = LabelEncoder()
    le.fit([1, 1, 4, 5, -1, 0])
    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]),
                       [1, 2, 3, 3, 4, 0, 0])
    assert_array_equal(le.inverse_transform([1, 2, 3, 3, 4, 0, 0]),
                       [0, 1, 4, 4, 5, -1, -1])
    assert_raises(ValueError, le.transform, [0, 6])

    le.fit(["apple", "orange"])
    msg = "bad input shape"
    assert_raise_message(ValueError, msg, le.transform, "apple")
Exemple #17
0
def test_label_encoder(values, classes, unknown):
    # Test LabelEncoder's transform, fit_transform and
    # inverse_transform methods
    le = LabelEncoder()
    le.fit(values)
    assert_array_equal(le.classes_, classes)
    assert_array_equal(le.transform(values), [1, 0, 2, 0, 2])
    assert_array_equal(le.inverse_transform([1, 0, 2, 0, 2]), values)
    le = LabelEncoder()
    ret = le.fit_transform(values)
    assert_array_equal(ret, [1, 0, 2, 0, 2])

    with pytest.raises(ValueError, match="unseen labels"):
        le.transform(unknown)
Exemple #18
0
class LabelEncoderImpl():
    def __init__(self):
        self._hyperparams = {}
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
Exemple #19
0
def _conform_targets(targets):
    """
    Conform targets to  [0, n_targets-1].

    Parameters
    ----------
    targets : array (n_targets, )

    Returns
    -------
    targets_conformed : array (n_targets, )
        targets are between 0 and n_targets-1
    label_encoder : LabelEncoder
        fit on targets, used to invert back using
        label_encoder.inverse_transform
    """
    le = LabelEncoder()
    le.fit(targets)
    return le.transform(targets), le
Exemple #20
0
def test_label_encoder_errors():
    # Check that invalid arguments yield ValueError
    le = LabelEncoder()
    with pytest.raises(ValueError):
        le.transform([])
    with pytest.raises(ValueError):
        le.inverse_transform([])

    # Fail on unseen labels
    le = LabelEncoder()
    le.fit([1, 2, 3, -1, 1])
    msg = "contains previously unseen labels"
    with pytest.raises(ValueError, match=msg):
        le.inverse_transform([-2])
    with pytest.raises(ValueError, match=msg):
        le.inverse_transform([-2, -3, -4])

    # Fail on inverse_transform("")
    msg = "bad input shape ()"
    with pytest.raises(ValueError, match=msg):
        le.inverse_transform("")
Exemple #21
0
def main():
    print('\033[1m' + 'Loading all the datasets...' + '\033[0m')
    arffs_dic = obtain_arffs('./datasets/')

    # Extract an specific database
    dataset_name = 'breast-w'  # possible datasets ('hypothyroid', 'breast-w', 'waveform')
    dat1 = arffs_dic[dataset_name]
    df1 = pd.DataFrame(dat1[0])  # original data in pandas dataframe
    groundtruth_labels = df1[df1.columns[
        len(df1.columns) - 1]].values  # original labels in a numpy array
    df1 = df1.drop(df1.columns[len(df1.columns) - 1], 1)
    if dataset_name == 'hypothyroid':
        df1 = df1.drop(
            'TBG', 1
        )  # This column only contains NaNs so does not add any value to the clustering
    data1 = df1.values  # original data in a numpy array without labels
    load = Preprocess()
    data_x = load.preprocess_method(data1)
    data_x = data_x.astype(np.float64)
    le = LabelEncoder()
    le.fit(np.unique(groundtruth_labels))
    groundtruth_labels = le.transform(groundtruth_labels)

    num_clusters = len(
        np.unique(groundtruth_labels))  # Number of different labels

    # -------------------------------------------------------------------------------Compute covariance and eigenvectors
    original_mean = np.mean(data_x, axis=0)

    cov_m = compute_covariance(data_x, original_mean)
    eig_vals, eig_vect = np.linalg.eig(cov_m)

    idxsort = eig_vals.argsort()[::-1]
    eig_vals = eig_vals[idxsort].real
    eig_vect = eig_vect[:, idxsort].real

    # ---------------------------------------------------------------------Decide the number of features we want to keep
    prop_variance = 0.9
    k = proportion_of_variance(eig_vals, prop_variance)
    print('\nThe value of K selected to obtain a proportion of variance = ' +
          str(prop_variance) + ' is: ' + str(k) + '\n')

    eig_vals_red = eig_vals[:k]
    eig_vect_red = eig_vect[:, :k]  # Eigenvectors are in columns (8xk)

    # ---------------------------------------------------------------------------------Reduce dimensionality of the data
    # A1) Using our implementation of PCA
    transf_data_x = np.dot((eig_vect_red.T), (data_x - original_mean).T).T

    # B1) Using the PCA implementation of sklearn
    pca = PCA(n_components=k)
    transf_data_x_sklearn = pca.fit_transform(data_x)

    # C1) Using the incremental PCA implementation of sklearn
    incrementalpca = IncrementalPCA(n_components=k)
    transf_data_x_sklearn2 = incrementalpca.fit_transform(data_x)

    # --------------------------------------------------------------------------------------------------Reconstruct data
    # A2) Reconstruct data with our method
    reconstruct_data_x = np.dot(eig_vect_red, transf_data_x.T)
    reconstruct_data_x = reconstruct_data_x.T + original_mean

    # B2) Reconstruct data with PCA sklearn
    reconstruct_data_x1 = np.dot(pca.components_.T, transf_data_x_sklearn.T)
    reconstruct_data_x1 = reconstruct_data_x1.T + original_mean

    # C2) Reconstruct data with incremental PCA sklearn
    reconstruct_data_x2 = np.dot(incrementalpca.components_.T,
                                 transf_data_x_sklearn2.T)
    reconstruct_data_x2 = reconstruct_data_x2.T + original_mean

    # ----------------------------------------------------------------Error between original data and reconstructed data
    # A3) Error between original data and reconstruct data
    error = reconstruct_data_x - data_x
    total_error = (np.sum(abs(error)) / np.sum(abs(data_x))) * 100
    print(
        'The relative error after reconstructing the original matrix with K = '
        + str(k) + ' is ' + '\033[1m' + '\033['
        '94m' + str(round(total_error, 2)) + '%' + '\033[0m' +
        ' [using our implementation of PCA]')

    # B3) Error between original data and reconstruct data 1
    error1 = reconstruct_data_x1 - data_x
    total_error1 = (np.sum(abs(error1)) / np.sum(abs(data_x))) * 100
    print(
        'The relative error after reconstructing the original matrix with K = '
        + str(k) + ' is ' + '\033[1m' + '\033['
        '94m' + str(round(total_error1, 2)) + '%' + '\033[0m' +
        ' [using pca.fit_transform of Sklearn]')

    # C3) Error between original data and reconstruct data 2
    error2 = reconstruct_data_x2 - data_x
    total_error2 = (np.sum(abs(error2)) / np.sum(abs(data_x))) * 100
    print(
        'The relative error after reconstructing the original matrix with K = '
        + str(k) + ' is ' + '\033[1m' + '\033['
        '94m' + str(round(total_error2, 2)) + '%' + '\033[0m' +
        ' [using incrementalpca.fit_transform of Sklearn]')

    # ------------------------------------------------------------------------------Kmeans with dimensionality reduction
    print(
        '\n---------------------------------------------------------------------------------------------------------'
    )
    print('K-MEANS APPLIED TO THE ORIGINAL DATA')
    tester_kmeans(data_x, groundtruth_labels)
    print(
        '\n---------------------------------------------------------------------------------------------------------'
    )
    print(
        'K-MEANS APPLIED TO THE TRANSFORMED DATA USING OUR IMPLEMENTATION OF PCA'
    )
    labels = tester_kmeans(transf_data_x, groundtruth_labels)
    print(
        '\n---------------------------------------------------------------------------------------------------------'
    )
    print(
        'K-MEANS APPLIED TO THE TRANSFORMED DATA USING pca.fit_transform OF SKLEARN'
    )
    tester_kmeans(transf_data_x_sklearn, groundtruth_labels)
    print(
        '\n---------------------------------------------------------------------------------------------------------'
    )
    print(
        'K-MEANS APPLIED TO THE TRANSFORMED DATA USING incrementalpca.fit_transform OF SKLEARN'
    )
    tester_kmeans(transf_data_x_sklearn2, groundtruth_labels)
    print(
        '\n---------------------------------------------------------------------------------------------------------'
    )

    # -----------------------------------------------------------------------------------------------------Scatter plots
    ploting_boolean = False
    plot_scatters = False  # only change to True for a database with not too many features (like breast-w)

    if ploting_boolean:
        # Plot eigenvector
        plt.plot(eig_vals, 'ro-', linewidth=2, markersize=6)
        plt.title('Magnitude of the eigenvalues')
        plt.show()

        if plot_scatters:
            # Plottings: scatter plots
            # Original data with groundtruth labels
            ploting_v(data_x, num_clusters, groundtruth_labels,
                      'original data with groundtruth labels')
            # Transfomed data with our implementation of PCA and with groundtruth labels
            ploting_v(transf_data_x, num_clusters, groundtruth_labels,
                      'transformed data (our PCA) with groundtruth '
                      'labels')
            # Transfomed data with pca.fit_transform and with groundtruth labels
            ploting_v(
                transf_data_x_sklearn, num_clusters, groundtruth_labels,
                'transformed data (Sklearn PCA v1) '
                'with groundtruth labels')
            # Transfomed data with incrementalpca.fit_transform and with groundtruth labels
            ploting_v(
                transf_data_x_sklearn2, num_clusters, groundtruth_labels,
                'transformed data (Sklearn PCA v2) '
                'with groundtruth labels')

        # ------------------------------------------------------------------------------------------------------3D plots
        # Plottings: 3D plots
        # Original data without labels
        ploting_v3d(data_x, 1, np.zeros(len(groundtruth_labels)),
                    'original data without labels')
        # Original data with groundtruth labels
        ploting_v3d(data_x, num_clusters, groundtruth_labels,
                    'original data with groundtruth labels')
        # Reconstructed data without labels
        ploting_v3d(reconstruct_data_x, 1, np.zeros(len(groundtruth_labels)),
                    'reconstructed data without labels')
        # Transfomed data with our implementation of PCA and without labels
        ploting_v3d(transf_data_x, 1, np.zeros(len(groundtruth_labels)),
                    'transformed data without labels')
        # Transfomed data with our implementation of PCA and with groundtruth_labels
        ploting_v3d(transf_data_x, num_clusters, groundtruth_labels,
                    'transformed data with groundtruth labels')
        # Transfomed data with our implementation of PCA and with the labels obtained with our K-means
        ploting_v3d(transf_data_x, num_clusters, labels,
                    'transformed data with labels from our K-means')
        # Plot of the correlation matrix of the dataset
        plot_corr_matrix(data_x, legend=False)
def test_label_encoder_str_bad_shape(dtype):
    le = LabelEncoder()
    le.fit(np.array(["apple", "orange"], dtype=dtype))
    msg = "bad input shape"
    assert_raise_message(ValueError, msg, le.transform, "apple")
Exemple #23
0
def test_label_encoder_str_bad_shape(dtype):
    le = LabelEncoder()
    le.fit(np.array(["apple", "orange"], dtype=dtype))
    msg = "bad input shape"
    with pytest.raises(ValueError, match=msg):
        le.transform("apple")
def r_precision(S:np.ndarray, y:np.ndarray, metric:str='distance',
                average:str='weighted', return_y_pred:int=0,
                verbose:int=0, n_jobs:int=1) -> float:
    """ Calculate R-Precision (recall at R-th position).

    Parameters
    ----------
    S : ndarray or CSR matrix
        Distance (similarity) matrix

    y : ndarray
        Target (ground truth) labels

    metric : 'distance' or 'similarity', optional, default: 'similarity'
        Define, whether `S` is a distance or similarity matrix.

    average : 'weighted', 'macro' or None, optional, default: 'weighted'
        Ignored. Weighted and macro precisions are returned.

    return_y_pred : int, optional, default: 0
        If > 0, return the labels of the `return_y_pred` nearest neighbors

    verbose : int, optional, default: 0
        Increasing level of output.

    n_jobs : int, optional, default: 1
        Number of parallel processes to use.

    Returns
    -------
    r_precision : dictionary with following keys:
        macro : float
            Macro R-Precision.

        weighted : float
            Weighted R-Precision.

        per_item : ndarray
            R-Precision at the object.

        relevant_items : ndarray
            Relevant items per class.

        y_true : ndarray
            Target labels (req. for weighting).

        y_pred : ndarray
            Labels of some k-nearest neighbors
    """
    io.check_distance_matrix_shape(S)
    io.check_distance_matrix_shape_fits_labels(S, y)
    io.check_valid_metric_parameter(metric)
    log = ConsoleLogging()
    n, _ = S.shape
    S_is_sparse = issparse(S)
    if metric != 'similarity' or not S_is_sparse:
        raise NotImplementedError("Only sparse similarity matrices so far.")

    # Map labels to 0..n(labels)-1
    le = LabelEncoder()
    # Add int.min for misclassifications
    incorr_orig = np.array([np.nan]).astype(int)
    le.fit(np.append(y, incorr_orig))
    y = le.transform(y)
    incorrect = le.transform(incorr_orig)
    # Number of relevant items, i.e. number of each label
    relevant_items = np.bincount(y) - 1 # one less for self class
    # R-Precision for each item
    r_prec = np.zeros(n, dtype=np.float)
    
    # Classify each point in test set
    if verbose:
        log.message("Creating shared memory data.")
    n_random_pred = mp.Value(ctypes.c_int)
    n_random_pred.value = 0
    if verbose and log:
        log.message("Spawning processes for prediction.")
    y_pred = np.zeros((n, return_y_pred), dtype=float)
    kwargs = {'y_pred' : return_y_pred,
              'incorrect' : incorrect}
    with mp.Pool(processes=n_jobs, 
                 initializer=_load_shared_csr, 
                 initargs=(S, y, n_random_pred, relevant_items)) as pool:
        for i, r in enumerate(
            pool.imap(
                func=partial(_r_prec_worker, **kwargs),
                iterable=range(n), 
                chunksize=int(1e2))):
            if verbose and ((i+1)%int(1e7 / 10**verbose) == 0 or i == n-1):
                log.message("Classification: {} of {} on {}.".format(
                            i+1, n, mp.current_process().name), flush=True)
            try:
                r_prec[i] = r[0]
                y_pred[i, :] = r[1]
            except:
                r_prec[i] = r
            if i == n-1:
                pass
    pool.join()

    if verbose and log:
        log.message("Retrieving nearest neighbors.")
    # Work-around for new scikit-learn requirement of 1D arrays for LabelEncoder
    y_pred = np.asarray([le.inverse_transform(col) for col in y_pred.T.astype(int)]).T
    if verbose and log:
        log.message("Finishing.")
    if n_random_pred.value:
        log.warning(("{} queries were classified randomly, because all "
                     "distances were non-finite numbers or there were no other "
                     "objects in the same class.").format(n_random_pred.value))
    return_dict = {'macro' : r_prec.mean(),
                   'weighted' : np.average(r_prec, weights=relevant_items[y]),
                   'per_item' : r_prec,
                   'relevant_items' : relevant_items,
                   'y_true' : y,
                   'y_pred' : y_pred}
    return return_dict
Exemple #25
0
def test_label_encoder_str_bad_shape(dtype):
    le = LabelEncoder()
    le.fit(np.array(["apple", "orange"], dtype=dtype))
    msg = "bad input shape"
    assert_raise_message(ValueError, msg, le.transform, "apple")
Exemple #26
0
def main():
    print('\033[1m' + 'Loading all the datasets...' + '\033[0m')
    arffs_dic = obtain_arffs('./datasetsSelected/')

    # Extract an specific database
    dataset_name = 'sick'  #sick # nursery
    dataset = arffs_dic[dataset_name]

    # ------------------------------------------------------------------------------------ Compute indices for each fold
    # Use folder 0 of that particular dataset to find indices of train and test for each fold
    ref_data = np.concatenate((dataset[0][0], dataset[0][1]), axis=0)
    df_aux = pd.DataFrame(ref_data)
    df_aux = df_aux.fillna('nonna').values
    ref_data_dic = {}
    for i in range(df_aux.shape[0]):
        ref_data_dic[str(df_aux[i, :])] = i

    trn_tst_dic = trn_tst_idxs(ref_data_dic, dataset)

    # --------------------------------------------------------------------------------- Reading parameters from keyboard
    C, kernel, decision_function = read_keyboard()

    # ------------------------------------------------------------------------------------------------------- Preprocess
    df1 = pd.DataFrame(ref_data)
    groundtruth_labels = df1[df1.columns[
        len(df1.columns) - 1]].values  # original labels in a numpy array
    df1 = df1.drop(df1.columns[len(df1.columns) - 1], 1)
    if dataset_name == 'sick':
        df1 = df1.drop(
            'TBG', 1
        )  # This column only contains NaNs so does not add any value to the clustering

    data1 = df1.values  # original data in a numpy array without labels
    load = Preprocess()

    # ---------------------------------------------------------------------------------------- Encode groundtruth labels
    le = LabelEncoder()
    le.fit(np.unique(groundtruth_labels))
    groundtruth_labels = le.transform(groundtruth_labels)

    data_x = load.preprocess_method(data1)
    # -------------------------------------------------------------------------------------------- Supervised classifier
    # Compute accuracy for each fold
    accuracies = []
    fold_number = 0
    start_time = time.time()
    for trn_idxs, tst_idxs in trn_tst_dic.values():
        fold_number = fold_number + 1
        print('Computing accuracy for fold number ' + str(fold_number))
        trn_data = data_x[trn_idxs]
        trn_labels = groundtruth_labels[trn_idxs]
        tst_data = data_x[tst_idxs]
        tst_labels = groundtruth_labels[tst_idxs]

        svecm = SVM_Algorithm(C, kernel, decision_function)
        acc = svecm.algorithm(trn_data, trn_labels, tst_data, tst_labels)
        accuracies.append(acc)

    mean_accuracies = str(round(np.mean(accuracies), 4))
    std_accuracies = str(round(np.std(accuracies), 3))
    print('\n\033[1m' +
          'The mean accuracy of classification in the test set is: ' +
          mean_accuracies + ' ± ' + std_accuracies + '\033[0m')
    print('\033[1mRunning time for the 10 folds: %s seconds\033[0m' %
          round(time.time() - start_time, 4))
def preprocess_classes(classes):
    encoder = LabelEncoder()
    encoder.fit(classes)
    return encoder.transform(classes)