def fit(self , X , y): X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = self.test_size , random_state = self.random_state); # m行n列なのでnが返ってくる。つまり特徴量の次元 dim = X_train.shape[1]; self.indices_ = tuple(range(dim)); self.subsets_ = [self.indices_]; score = self._calc_score(X_train , y_train , X_test , y_test , self.indices_); self.scores_ = [score]; while dim > self.k_features: scores = []; subsets = []; # indices_からrの組み合わせのいてれーた for p in combinations(self.indices_ , r=dim-1): score = self._calc_score(X_train , y_train , X_test , y_test , p); scores.append(score); subsets.append(p); best = np.argmax(scores); self.indices_ = subsets[best]; self.subsets_.append(self.indices_); dim -= 1; self.scores_.append(scores[best]); self.k_score_ = self.scores_[-1];
def fit(self, X, y): X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=self.test_size, random_state=self.random_state) dim = X_train.shape[1] self.indices_ = tuple(range(dim)) self.subsets_ = [self.indices_] score = self._calc_score(X_train, y_train, X_test, y_test, self.indices_) self.scores_ = [score] while dim > self.k_features: scores = [] subsets = [] for p in combinations(self.indices_, r=dim - 1): score = self._calc_score(X_train, y_train, X_test, y_test, p) scores.append(score) subsets.append(p) best = np.argmax(scores) self.indices_ = subsets[best] self.subsets_.append(self.indices_) dim -= 1 self.scores_.append(scores[best]) self.k_score_ = self.scores_[-1] return self
def divide_data(self, test_size=0.3, random_state=1): from distutils.version import LooseVersion as Version from sklearn import __version__ as sklearn_version if Version(sklearn_version) < '0.18': from sklearn.grid_search import train_test_split else: from sklearn.model_selection import train_test_split self._X_train, self._X_test, self._y_train, self._y_test = train_test_split(self._X, self._y, test_size=test_size, random_state=random_state) self.standarize()
def load_iris(test_size=0.3, random_state=0): iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state) sc = StandardScaler() sc.fit(X_train) # 平均0, 分散1に標準化 X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) # 縦に連結 y_combined = np.hstack((y_train, y_test)) # 横に連結 return [ X_combined_std, X_train_std, X_train, X_test_std, X_test, y_combined, y_train, y_test ]
df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline'] print('Class labels', np.unique(df_wine['Class label'])) print('\nWine data excerpt:\n\n', df_wine.head()) X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.3, random_state=0) ############################################################################# print(50 * '=') print('Section: Bringing features onto the same scale') print(50 * '-') mms = MinMaxScaler() X_train_norm = mms.fit_transform(X_train) X_test_norm = mms.transform(X_test) stdsc = StandardScaler() X_train_std = stdsc.fit_transform(X_train) X_test_std = stdsc.transform(X_test)
from sklearn.grid_search import train_test_split else: from sklearn.model_selection import train_test_split ############################################################################# print(50 * '=') print('Section: First steps with scikit-learn') print(50 * '-') iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target print('Class labels:', np.unique(y)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) ############################################################################# print(50 * '=') print('Section: Training a perceptron via scikit-learn') print(50 * '-') ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) print('Y array shape', y_test.shape)
activity = 7#ラベルの個数 features = 35 featureReductionComponent=15 #※事前にデータセットのラベルが右端に入力されていること bunkatu=5#k分割の分割数 AccuracyPCA, AccuracySTD =0, 0 a = np.loadtxt('Feature/Features.csv', delimiter=',', dtype='float') X=a[:,0:features] y=a[:,features:features+1] #ラベルをintに変換し、配列を1次元に変換(必須) y=np.array(y, dtype=int) y=y.reshape(-1,) X_train,X_test,y_train,y_test= train_test_split(X, y, test_size=0.2, stratify=y) #※X_train,X_testはスケーリングしていないため、必ずStandardScaler()にかける事が必要 print(X_train.shape[0],X_train.shape[1]) print(X_test.shape[0],X_test.shape[1]) ############################################################################# sc = StandardScaler() X_train_std = sc.fit_transform(X_train) X_test_std = sc.transform(X_test) #print (X_train_std) ############################################################################# print(50 * '=') print('Section: Total and explained variance')
'machine-learning-databases/wine/wine.data', header=None) df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline'] print('Wine data excerpt:\n\n:', df_wine.head()) X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() X_train_std = sc.fit_transform(X_train) X_test_std = sc.transform(X_test) cov_mat = np.cov(X_train_std.T) eigen_vals, eigen_vecs = np.linalg.eig(cov_mat) print('\nEigenvalues \n%s' % eigen_vals) ############################################################################# print(50 * '=') print('Section: Total and explained variance') print(50 * '-')
if Version(sklearn_version) < '0.18': from sklearn.grid_search import train_test_split else: from sklearn.model_selection import train_test_split ############################################################################# print(50 * '=') print('Section: First steps with scikit-learn') print(50 * '-') iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target print('Class labels:', np.unique(y)) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) ############################################################################# print(50 * '=') print('Section: Training a perceptron via scikit-learn') print(50 * '-') ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) print('Y array shape', y_test.shape)
print('Breast Cancer dataset excerpt:\n\n') print(df.head()) print("Breast Cancer dataset dimensions: \n\n") print(df.shape) X = df.loc[:, 2:].values y = df.loc[:, 1].values le = LabelEncoder() y = le.fit_transform(y) y_enc = le.transform(["M", "B"]) print("Label encoding example, le.transform(['M', 'B'])") print(le.transform(["M", "B"])) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.20, random_state=1) print(50 * "=") print("Section: Combining transformers and estimators in a pipeline") print(50 * "-") pipe_lr = Pipeline([("scl", StandardScaler()), ("pca", PCA(n_components=2)), ("clf", LogisticRegression(random_state=1))]) pipe_lr.fit(X_train, y_train) print("Test Accuracy: %.3f" % pipe_lr.score(X_test, y_test)) y_pred = pipe_lr.predict(X_test) print(50 * "=") print("Section: K-fold cross-validation") print(50 * "-")
print('Breast Cancer dataset excerpt:\n\n') print(df.head()) print('Breast Cancer dataset dimensions:\n\n') print(df.shape) X = df.loc[:, 2:].values y = df.loc[:, 1].values le = LabelEncoder() y = le.fit_transform(y) y_enc = le.transform(['M', 'B']) print("Label encoding example, le.transform(['M', 'B'])") print(le.transform(['M', 'B'])) X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.20, random_state=1) ############################################################################# print(50 * '=') print('Section: Combining transformers and estimators in a pipeline') print(50 * '-') pipe_lr = Pipeline([('scl', StandardScaler()), ('pca', PCA(n_components=2)), ('clf', LogisticRegression(random_state=1))]) pipe_lr.fit(X_train, y_train) print('Test Accuracy: %.3f' % pipe_lr.score(X_test, y_test)) y_pred = pipe_lr.predict(X_test)
def do(self): iris = datasets.load_iris() X = iris.data[:, [2, 3]] y = iris.target print('Etykiety klas:', np.unique(y)) if Version(sklearn_version) < '0.18': from sklearn.grid_search import train_test_split else: from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) ppn = Perceptron(n_iter_no_change=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) y_test.shape y_pred = ppn.predict(X_test_std) print('Nieprawidłowo sklasyfikowane próbki: %d' % (y_test != y_pred).sum()) print('Dokładność: %.2f' % accuracy_score(y_test, y_pred)) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) # plot_decision_regions(X=X_combined_std, y=y_combined, # classifier=ppn, test_idx=range(105, 150)) # plt.xlabel('Długość płatka [standaryzowana]') # plt.ylabel('Szerokość płatka [standaryzowana]') # plt.legend(loc='upper left') # plt.tight_layout() #plt.savefig('./rysunki/03_01.png', dpi=300) #plt.show() from sklearn.linear_model import LogisticRegression lr = LogisticRegression(C=100.0, random_state=0) lr.fit(X_train_std, y_train) # plot_decision_regions(X_combined_std, y_combined, # classifier=lr, test_idx=range(105, 150)) # plt.xlabel('Długość płatka [standaryzowana]') # plt.ylabel('Szerokość płatka [standaryzowana]') # plt.legend(loc='upper left') # plt.tight_layout() # #plt.savefig('./rysunki/03_05.png', dpi=300) # plt.show() from sklearn.svm import SVC svm = SVC(kernel='linear', C=1.0, random_state=0) svm.fit(X_train_std, y_train) plot_decision_regions(X_combined_std, y_combined, classifier=svm, test_idx=range(105, 150)) plt.xlabel('Długość płatka [standaryzowana]') plt.ylabel('Szerokość płatka [standaryzowana]') plt.legend(loc='upper left') plt.tight_layout() #plt.savefig('./rysunki/03_10.png', dpi=300) plt.show() pass