コード例 #1
0
from statistics import stdev
import pandas as pd
from sklearn.model_selection import cross_validate
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from classification.decomposition import OneVsRestClassifierForRandomBinaryClassifier
from dataset.aggregations import *
from preprocessing.missing_values import MissingValuesInserterColumnsIndependent
from classification.k_neighbours import KNNAlgorithmF, KNNAlgorithmM
import setup_path as pth

vertebral = pd.read_csv(pth.concatenate_path_os_independent('column_3C.dat'), sep=' ')
X = vertebral.iloc[:, :-1].to_numpy()
y = vertebral.iloc[:, -1].to_numpy()
y = LabelEncoder().fit_transform(y)

X = MinMaxScaler().fit_transform(X)

missing = (0.0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)
dfs = []
ind = 0
ks = [(2, 4), (3, 5)]
for miss in missing:
    for agg in (A1Aggregation(), A2Aggregation(), A3Aggregation(), A4Aggregation(p=3), A5Aggregation(), A6Aggregation(),
                A7Aggregation(), A8Aggregation(), A9Aggregation(), A10Aggregation()):
        for r in (2, 5, 10):
            for k in ks:
                X_missing = MissingValuesInserterColumnsIndependent(columns=range(X.shape[1]), nan_representation=-1, percentage=miss)\
                    .fit_transform(X)
                multiclassF = OneVsRestClassifierForRandomBinaryClassifier(KNNAlgorithmF(missing_representation=-1, r=r, aggregation=agg, k_neighbours=k))
                f_result = cross_validate(multiclassF, X_missing, y, scoring='roc_auc_ovo', return_estimator=True, cv=10)
コード例 #2
0
from statistics import mean, stdev
import pandas as pd
from sklearn.model_selection import cross_validate
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from classification.decomposition import OneVsRestClassifierForRandomBinaryClassifier
from classification.k_neighbours import KNNAlgorithmF, KNNAlgorithmM
from preprocessing.missing_values import MissingValuesInserterColumnsIndependent
from dataset.aggregations import *
import setup_path as pth

wifi = pd.read_csv(
    pth.concatenate_path_os_independent('sensor_readings_4.data'), header=None)

X = wifi.iloc[:, :-1].to_numpy()
y = wifi.iloc[:, -1].to_numpy()
y = LabelEncoder().fit_transform(y)

s = []
for c in np.unique(y):
    s.append(np.argwhere(y == c)[:200])
indexes = np.concatenate(s, axis=None)

X = X[indexes]
y = y[indexes]

X = MinMaxScaler().fit_transform(X)

missing = (0.0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)
dfs = []
ind = 0
コード例 #3
0
from statistics import mean, stdev
import pandas as pd
from sklearn.model_selection import cross_validate
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from classification.decomposition import OneVsRestClassifierForRandomBinaryClassifier
from classification.k_neighbours import KNNAlgorithmF, KNNAlgorithmM
from preprocessing.missing_values import MissingValuesInserterColumnsIndependent
from dataset.aggregations import *
import setup_path as pth

tissue = pd.read_excel(pth.concatenate_path_os_independent('BreastTissue.xls'), sheet_name='Data')
X = tissue.iloc[:, 2:].to_numpy()
y = tissue.iloc[:, 1].to_numpy()
y = LabelEncoder().fit_transform(y)

X = MinMaxScaler().fit_transform(X)


missing = (0.0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)
dfs = []
ind = 0
ks = [(2, 4), (3, 5)]
for miss in missing:
    for agg in (A1Aggregation(), A2Aggregation(), A3Aggregation(), A4Aggregation(p=3), A5Aggregation(), A6Aggregation(),
                A7Aggregation(), A8Aggregation(), A9Aggregation(), A10Aggregation()):
        for r in (2, 5, 10):
            for k in ks:
                X_missing = MissingValuesInserterColumnsIndependent(columns=range(X.shape[1]), nan_representation=-1, percentage=miss)\
                    .fit_transform(X)
                multiclassF = OneVsRestClassifierForRandomBinaryClassifier(KNNAlgorithmF(missing_representation=-1, r=r, aggregation=agg, k_neighbours=k))
コード例 #4
0
from statistics import mean, stdev
import pandas as pd
from sklearn.model_selection import cross_validate
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MinMaxScaler
from classification.decomposition import OneVsRestClassifierForRandomBinaryClassifier
from dataset.aggregations import *
from classification.k_neighbours import KNNAlgorithmF, KNNAlgorithmM
from preprocessing.missing_values import MissingValuesInserterColumnsIndependent
import setup_path as pth

leaf = pd.read_csv(pth.concatenate_path_os_independent('leaf.csv'))
X = leaf.iloc[:, 2:].to_numpy()
y = leaf.iloc[:, 0].to_numpy()

X = MinMaxScaler().fit_transform(X)

missing = (0.0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)
dfs = []
ind = 0
ks = [(2, 4), (3, 5)]

for miss in missing:
    for agg in (A1Aggregation(), A2Aggregation(), A3Aggregation(), A4Aggregation(p=3), A5Aggregation(), A6Aggregation(),
                A7Aggregation(), A8Aggregation(), A9Aggregation(), A10Aggregation()):
        for r in (2, 5, 10):
            for k in ks:
                X_missing = MissingValuesInserterColumnsIndependent(columns=range(X.shape[1]), nan_representation=-1, percentage=miss)\
                    .fit_transform(X)
                multiclassF = OneVsRestClassifierForRandomBinaryClassifier(KNNAlgorithmF(missing_representation=-1, r=r, aggregation=agg, k_neighbours=k))
                f_result = cross_validate(multiclassF, X_missing, y, scoring='roc_auc_ovo', return_estimator=True, cv=2)
コード例 #5
0
from statistics import stdev
import pandas as pd
from sklearn.model_selection import cross_validate
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MinMaxScaler
from classification.decomposition import OneVsRestClassifierForRandomBinaryClassifier
from classification.k_neighbours import KNNAlgorithmF, KNNAlgorithmM
from preprocessing.missing_values import MissingValuesInserterColumnsIndependent
from dataset.aggregations import *
import setup_path as pth

wifi = pd.read_csv(
    pth.concatenate_path_os_independent('wifi_localization.txt'),
    header=None,
    sep='\t')

X = wifi.iloc[:, :-1].to_numpy()
y = wifi.iloc[:, -1].to_numpy()

X = MinMaxScaler().fit_transform(X)

missing = (0.0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)
dfs = []
ind = 0
ks = [(2, 4), (3, 5)]
for miss in missing:
    for agg in (A1Aggregation(), A2Aggregation(), A3Aggregation(),
                A4Aggregation(p=3), A5Aggregation(), A6Aggregation(),
                A7Aggregation(), A8Aggregation(), A9Aggregation(),
                A10Aggregation()):
        for r in (2, 5, 10):
コード例 #6
0
from statistics import stdev
from sklearn.model_selection import cross_validate
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from classification.decomposition import OneVsRestClassifierForRandomBinaryClassifier
from dataset.aggregations import *
from classification.k_neighbours import KNNAlgorithmF, KNNAlgorithmM, A1Aggregation
from preprocessing.missing_values import MissingValuesInserterColumnsIndependent
import setup_path as pth

seeds = pd.read_csv(pth.concatenate_path_os_independent('seeds_dataset.txt'),
                    sep='\t')
X = seeds.iloc[:, :-1].to_numpy()
y = seeds.iloc[:, -1].to_numpy()

X = MinMaxScaler().fit_transform(X)

missing = (0.0, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5)
dfs = []
ind = 0
ks = [(2, 4), (3, 5)]
for miss in missing:
    for agg in (A1Aggregation(), A2Aggregation(), A3Aggregation(),
                A4Aggregation(p=3), A5Aggregation(), A6Aggregation(),
                A7Aggregation(), A8Aggregation(), A9Aggregation(),
                A10Aggregation()):
        for r in (2, 5, 10):
            for k in ks:
                X_missing = MissingValuesInserterColumnsIndependent(columns=range(X.shape[1]), nan_representation=-1, percentage=miss)\
                    .fit_transform(X)