from statistics import stdev import pandas as pd from sklearn.model_selection import cross_validate from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import LabelEncoder, MinMaxScaler from classification.decomposition import OneVsRestClassifierForRandomBinaryClassifier from dataset.aggregations import * from preprocessing.missing_values import MissingValuesInserterColumnsIndependent from classification.k_neighbours import KNNAlgorithmF, KNNAlgorithmM import setup_path as pth vertebral = pd.read_csv(pth.concatenate_path_os_independent('column_3C.dat'), sep=' ') X = vertebral.iloc[:, :-1].to_numpy() y = vertebral.iloc[:, -1].to_numpy() y = LabelEncoder().fit_transform(y) X = MinMaxScaler().fit_transform(X) missing = (0.0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5) dfs = [] ind = 0 ks = [(2, 4), (3, 5)] for miss in missing: for agg in (A1Aggregation(), A2Aggregation(), A3Aggregation(), A4Aggregation(p=3), A5Aggregation(), A6Aggregation(), A7Aggregation(), A8Aggregation(), A9Aggregation(), A10Aggregation()): for r in (2, 5, 10): for k in ks: X_missing = MissingValuesInserterColumnsIndependent(columns=range(X.shape[1]), nan_representation=-1, percentage=miss)\ .fit_transform(X) multiclassF = OneVsRestClassifierForRandomBinaryClassifier(KNNAlgorithmF(missing_representation=-1, r=r, aggregation=agg, k_neighbours=k)) f_result = cross_validate(multiclassF, X_missing, y, scoring='roc_auc_ovo', return_estimator=True, cv=10)
from statistics import mean, stdev import pandas as pd from sklearn.model_selection import cross_validate from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import LabelEncoder, MinMaxScaler from classification.decomposition import OneVsRestClassifierForRandomBinaryClassifier from classification.k_neighbours import KNNAlgorithmF, KNNAlgorithmM from preprocessing.missing_values import MissingValuesInserterColumnsIndependent from dataset.aggregations import * import setup_path as pth wifi = pd.read_csv( pth.concatenate_path_os_independent('sensor_readings_4.data'), header=None) X = wifi.iloc[:, :-1].to_numpy() y = wifi.iloc[:, -1].to_numpy() y = LabelEncoder().fit_transform(y) s = [] for c in np.unique(y): s.append(np.argwhere(y == c)[:200]) indexes = np.concatenate(s, axis=None) X = X[indexes] y = y[indexes] X = MinMaxScaler().fit_transform(X) missing = (0.0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5) dfs = [] ind = 0
from statistics import mean, stdev import pandas as pd from sklearn.model_selection import cross_validate from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import LabelEncoder, MinMaxScaler from classification.decomposition import OneVsRestClassifierForRandomBinaryClassifier from classification.k_neighbours import KNNAlgorithmF, KNNAlgorithmM from preprocessing.missing_values import MissingValuesInserterColumnsIndependent from dataset.aggregations import * import setup_path as pth tissue = pd.read_excel(pth.concatenate_path_os_independent('BreastTissue.xls'), sheet_name='Data') X = tissue.iloc[:, 2:].to_numpy() y = tissue.iloc[:, 1].to_numpy() y = LabelEncoder().fit_transform(y) X = MinMaxScaler().fit_transform(X) missing = (0.0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5) dfs = [] ind = 0 ks = [(2, 4), (3, 5)] for miss in missing: for agg in (A1Aggregation(), A2Aggregation(), A3Aggregation(), A4Aggregation(p=3), A5Aggregation(), A6Aggregation(), A7Aggregation(), A8Aggregation(), A9Aggregation(), A10Aggregation()): for r in (2, 5, 10): for k in ks: X_missing = MissingValuesInserterColumnsIndependent(columns=range(X.shape[1]), nan_representation=-1, percentage=miss)\ .fit_transform(X) multiclassF = OneVsRestClassifierForRandomBinaryClassifier(KNNAlgorithmF(missing_representation=-1, r=r, aggregation=agg, k_neighbours=k))
from statistics import mean, stdev import pandas as pd from sklearn.model_selection import cross_validate from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import MinMaxScaler from classification.decomposition import OneVsRestClassifierForRandomBinaryClassifier from dataset.aggregations import * from classification.k_neighbours import KNNAlgorithmF, KNNAlgorithmM from preprocessing.missing_values import MissingValuesInserterColumnsIndependent import setup_path as pth leaf = pd.read_csv(pth.concatenate_path_os_independent('leaf.csv')) X = leaf.iloc[:, 2:].to_numpy() y = leaf.iloc[:, 0].to_numpy() X = MinMaxScaler().fit_transform(X) missing = (0.0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5) dfs = [] ind = 0 ks = [(2, 4), (3, 5)] for miss in missing: for agg in (A1Aggregation(), A2Aggregation(), A3Aggregation(), A4Aggregation(p=3), A5Aggregation(), A6Aggregation(), A7Aggregation(), A8Aggregation(), A9Aggregation(), A10Aggregation()): for r in (2, 5, 10): for k in ks: X_missing = MissingValuesInserterColumnsIndependent(columns=range(X.shape[1]), nan_representation=-1, percentage=miss)\ .fit_transform(X) multiclassF = OneVsRestClassifierForRandomBinaryClassifier(KNNAlgorithmF(missing_representation=-1, r=r, aggregation=agg, k_neighbours=k)) f_result = cross_validate(multiclassF, X_missing, y, scoring='roc_auc_ovo', return_estimator=True, cv=2)
from statistics import stdev import pandas as pd from sklearn.model_selection import cross_validate from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import MinMaxScaler from classification.decomposition import OneVsRestClassifierForRandomBinaryClassifier from classification.k_neighbours import KNNAlgorithmF, KNNAlgorithmM from preprocessing.missing_values import MissingValuesInserterColumnsIndependent from dataset.aggregations import * import setup_path as pth wifi = pd.read_csv( pth.concatenate_path_os_independent('wifi_localization.txt'), header=None, sep='\t') X = wifi.iloc[:, :-1].to_numpy() y = wifi.iloc[:, -1].to_numpy() X = MinMaxScaler().fit_transform(X) missing = (0.0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5) dfs = [] ind = 0 ks = [(2, 4), (3, 5)] for miss in missing: for agg in (A1Aggregation(), A2Aggregation(), A3Aggregation(), A4Aggregation(p=3), A5Aggregation(), A6Aggregation(), A7Aggregation(), A8Aggregation(), A9Aggregation(), A10Aggregation()): for r in (2, 5, 10):
from statistics import stdev from sklearn.model_selection import cross_validate from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import MinMaxScaler import pandas as pd from classification.decomposition import OneVsRestClassifierForRandomBinaryClassifier from dataset.aggregations import * from classification.k_neighbours import KNNAlgorithmF, KNNAlgorithmM, A1Aggregation from preprocessing.missing_values import MissingValuesInserterColumnsIndependent import setup_path as pth seeds = pd.read_csv(pth.concatenate_path_os_independent('seeds_dataset.txt'), sep='\t') X = seeds.iloc[:, :-1].to_numpy() y = seeds.iloc[:, -1].to_numpy() X = MinMaxScaler().fit_transform(X) missing = (0.0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5) dfs = [] ind = 0 ks = [(2, 4), (3, 5)] for miss in missing: for agg in (A1Aggregation(), A2Aggregation(), A3Aggregation(), A4Aggregation(p=3), A5Aggregation(), A6Aggregation(), A7Aggregation(), A8Aggregation(), A9Aggregation(), A10Aggregation()): for r in (2, 5, 10): for k in ks: X_missing = MissingValuesInserterColumnsIndependent(columns=range(X.shape[1]), nan_representation=-1, percentage=miss)\ .fit_transform(X)