Ejemplo n.º 1
0
    def fit(self , X , y):

        X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = self.test_size ,
                                                               random_state = self.random_state);

        # m行n列なのでnが返ってくる。つまり特徴量の次元
        dim = X_train.shape[1];
        self.indices_ = tuple(range(dim));
        self.subsets_ = [self.indices_];

        score = self._calc_score(X_train , y_train , X_test , y_test , self.indices_);
        self.scores_ = [score];

        while dim > self.k_features:
            scores = [];
            subsets = [];

            # indices_からrの組み合わせのいてれーた
            for p in combinations(self.indices_ , r=dim-1):
                score = self._calc_score(X_train , y_train , X_test , y_test , p);
                scores.append(score);
                subsets.append(p);

            best = np.argmax(scores);
            self.indices_ = subsets[best];
            self.subsets_.append(self.indices_);
            dim -= 1;

            self.scores_.append(scores[best]);

        self.k_score_ = self.scores_[-1];
    def fit(self, X, y):

        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=self.test_size,
                             random_state=self.random_state)

        dim = X_train.shape[1]
        self.indices_ = tuple(range(dim))
        self.subsets_ = [self.indices_]
        score = self._calc_score(X_train, y_train,
                                 X_test, y_test, self.indices_)
        self.scores_ = [score]

        while dim > self.k_features:
            scores = []
            subsets = []

            for p in combinations(self.indices_, r=dim - 1):
                score = self._calc_score(X_train, y_train,
                                         X_test, y_test, p)
                scores.append(score)
                subsets.append(p)

            best = np.argmax(scores)
            self.indices_ = subsets[best]
            self.subsets_.append(self.indices_)
            dim -= 1

            self.scores_.append(scores[best])
        self.k_score_ = self.scores_[-1]

        return self
Ejemplo n.º 3
0
Archivo: ch04.py Proyecto: 1iyiwei/pyml
    def fit(self, X, y):

        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=self.test_size,
                             random_state=self.random_state)

        dim = X_train.shape[1]
        self.indices_ = tuple(range(dim))
        self.subsets_ = [self.indices_]
        score = self._calc_score(X_train, y_train,
                                 X_test, y_test, self.indices_)
        self.scores_ = [score]

        while dim > self.k_features:
            scores = []
            subsets = []

            for p in combinations(self.indices_, r=dim - 1):
                score = self._calc_score(X_train, y_train,
                                         X_test, y_test, p)
                scores.append(score)
                subsets.append(p)

            best = np.argmax(scores)
            self.indices_ = subsets[best]
            self.subsets_.append(self.indices_)
            dim -= 1

            self.scores_.append(scores[best])
        self.k_score_ = self.scores_[-1]

        return self
Ejemplo n.º 4
0
    def divide_data(self, test_size=0.3, random_state=1):
        from distutils.version import LooseVersion as Version
        from sklearn import __version__ as sklearn_version
        if Version(sklearn_version) < '0.18':
            from sklearn.grid_search import train_test_split
        else:
            from sklearn.model_selection import train_test_split

        self._X_train, self._X_test, self._y_train, self._y_test = train_test_split(self._X, self._y, test_size=test_size, random_state=random_state)
        self.standarize()
Ejemplo n.º 5
0
def load_iris(test_size=0.3, random_state=0):
    iris = datasets.load_iris()

    X = iris.data[:, [2, 3]]
    y = iris.target

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state)

    sc = StandardScaler()
    sc.fit(X_train)
    # 平均0, 分散1に標準化
    X_train_std = sc.transform(X_train)
    X_test_std = sc.transform(X_test)

    X_combined_std = np.vstack((X_train_std, X_test_std))  # 縦に連結
    y_combined = np.hstack((y_train, y_test))  # 横に連結

    return [
        X_combined_std, X_train_std, X_train, X_test_std, X_test, y_combined,
        y_train, y_test
    ]
df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',
                   'Alcalinity of ash', 'Magnesium', 'Total phenols',
                   'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
                   'Color intensity', 'Hue', 'OD280/OD315 of diluted wines',
                   'Proline']

print('Class labels', np.unique(df_wine['Class label']))

print('\nWine data excerpt:\n\n', df_wine.head())


X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.3, random_state=0)


#############################################################################
print(50 * '=')
print('Section: Bringing features onto the same scale')
print(50 * '-')

mms = MinMaxScaler()
X_train_norm = mms.fit_transform(X_train)
X_test_norm = mms.transform(X_test)

stdsc = StandardScaler()
X_train_std = stdsc.fit_transform(X_train)
X_test_std = stdsc.transform(X_test)
Ejemplo n.º 7
0
    from sklearn.grid_search import train_test_split
else:
    from sklearn.model_selection import train_test_split

#############################################################################
print(50 * '=')
print('Section: First steps with scikit-learn')
print(50 * '-')

iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
print('Class labels:', np.unique(y))

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

#############################################################################
print(50 * '=')
print('Section: Training a perceptron via scikit-learn')
print(50 * '-')

ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)
ppn.fit(X_train_std, y_train)
print('Y array shape', y_test.shape)
Ejemplo n.º 8
0
activity = 7#ラベルの個数
features = 35
featureReductionComponent=15
#※事前にデータセットのラベルが右端に入力されていること

bunkatu=5#k分割の分割数
AccuracyPCA, AccuracySTD =0, 0

a = np.loadtxt('Feature/Features.csv', delimiter=',', dtype='float')
X=a[:,0:features]
y=a[:,features:features+1]
#ラベルをintに変換し、配列を1次元に変換(必須)
y=np.array(y, dtype=int)
y=y.reshape(-1,)

X_train,X_test,y_train,y_test= train_test_split(X, y, test_size=0.2, stratify=y)
#※X_train,X_testはスケーリングしていないため、必ずStandardScaler()にかける事が必要


print(X_train.shape[0],X_train.shape[1])
print(X_test.shape[0],X_test.shape[1])

#############################################################################
sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)
#print (X_train_std)

#############################################################################
print(50 * '=')
print('Section: Total and explained variance')
Ejemplo n.º 9
0
Archivo: ch05.py Proyecto: 1iyiwei/pyml
                      'machine-learning-databases/wine/wine.data',
                      header=None)

df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',
                   'Alcalinity of ash', 'Magnesium', 'Total phenols',
                   'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
                   'Color intensity', 'Hue',
                   'OD280/OD315 of diluted wines', 'Proline']

print('Wine data excerpt:\n\n:', df_wine.head())


X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.3, random_state=0)

sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)

cov_mat = np.cov(X_train_std.T)
eigen_vals, eigen_vecs = np.linalg.eig(cov_mat)

print('\nEigenvalues \n%s' % eigen_vals)


#############################################################################
print(50 * '=')
print('Section: Total and explained variance')
print(50 * '-')
Ejemplo n.º 10
0
Archivo: ch03.py Proyecto: 1iyiwei/pyml
if Version(sklearn_version) < '0.18':
    from sklearn.grid_search import train_test_split
else:
    from sklearn.model_selection import train_test_split

#############################################################################
print(50 * '=')
print('Section: First steps with scikit-learn')
print(50 * '-')

iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
print('Class labels:', np.unique(y))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=0)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

#############################################################################
print(50 * '=')
print('Section: Training a perceptron via scikit-learn')
print(50 * '-')

ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)
ppn.fit(X_train_std, y_train)
print('Y array shape', y_test.shape)
Ejemplo n.º 11
0
print('Breast Cancer dataset excerpt:\n\n')
print(df.head())

print("Breast Cancer dataset dimensions: \n\n")
print(df.shape)

X = df.loc[:, 2:].values
y = df.loc[:, 1].values
le = LabelEncoder()
y = le.fit_transform(y)
y_enc = le.transform(["M", "B"])
print("Label encoding example, le.transform(['M', 'B'])")
print(le.transform(["M", "B"]))

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.20, random_state=1)

print(50 * "=")
print("Section: Combining transformers and estimators in a pipeline")
print(50 * "-")

pipe_lr = Pipeline([("scl", StandardScaler()), ("pca", PCA(n_components=2)),
                    ("clf", LogisticRegression(random_state=1))])

pipe_lr.fit(X_train, y_train)
print("Test Accuracy: %.3f" % pipe_lr.score(X_test, y_test))
y_pred = pipe_lr.predict(X_test)

print(50 * "=")
print("Section: K-fold cross-validation")
print(50 * "-")
Ejemplo n.º 12
0
Archivo: ch06.py Proyecto: 1iyiwei/pyml
print('Breast Cancer dataset excerpt:\n\n')
print(df.head())

print('Breast Cancer dataset dimensions:\n\n')
print(df.shape)

X = df.loc[:, 2:].values
y = df.loc[:, 1].values
le = LabelEncoder()
y = le.fit_transform(y)
y_enc = le.transform(['M', 'B'])
print("Label encoding example, le.transform(['M', 'B'])")
print(le.transform(['M', 'B']))

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.20, random_state=1)


#############################################################################
print(50 * '=')
print('Section: Combining transformers and estimators in a pipeline')
print(50 * '-')


pipe_lr = Pipeline([('scl', StandardScaler()),
                    ('pca', PCA(n_components=2)),
                    ('clf', LogisticRegression(random_state=1))])

pipe_lr.fit(X_train, y_train)
print('Test Accuracy: %.3f' % pipe_lr.score(X_test, y_test))
y_pred = pipe_lr.predict(X_test)
Ejemplo n.º 13
0
    def do(self):
        iris = datasets.load_iris()
        X = iris.data[:, [2, 3]]
        y = iris.target
        print('Etykiety klas:', np.unique(y))

        if Version(sklearn_version) < '0.18':
            from sklearn.grid_search import train_test_split
        else:
            from sklearn.model_selection import train_test_split

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.3,
                                                            random_state=0)

        sc = StandardScaler()
        sc.fit(X_train)
        X_train_std = sc.transform(X_train)
        X_test_std = sc.transform(X_test)

        ppn = Perceptron(n_iter_no_change=40, eta0=0.1, random_state=0)
        ppn.fit(X_train_std, y_train)
        y_test.shape

        y_pred = ppn.predict(X_test_std)
        print('Nieprawidłowo sklasyfikowane próbki: %d' %
              (y_test != y_pred).sum())
        print('Dokładność: %.2f' % accuracy_score(y_test, y_pred))

        X_combined_std = np.vstack((X_train_std, X_test_std))
        y_combined = np.hstack((y_train, y_test))

        # plot_decision_regions(X=X_combined_std, y=y_combined,
        #                     classifier=ppn, test_idx=range(105, 150))
        # plt.xlabel('Długość płatka [standaryzowana]')
        # plt.ylabel('Szerokość płatka [standaryzowana]')
        # plt.legend(loc='upper left')

        # plt.tight_layout()
        #plt.savefig('./rysunki/03_01.png', dpi=300)
        #plt.show()

        from sklearn.linear_model import LogisticRegression

        lr = LogisticRegression(C=100.0, random_state=0)
        lr.fit(X_train_std, y_train)

        # plot_decision_regions(X_combined_std, y_combined,
        #                     classifier=lr, test_idx=range(105, 150))
        # plt.xlabel('Długość płatka [standaryzowana]')
        # plt.ylabel('Szerokość płatka [standaryzowana]')
        # plt.legend(loc='upper left')
        # plt.tight_layout()
        # #plt.savefig('./rysunki/03_05.png', dpi=300)
        # plt.show()

        from sklearn.svm import SVC

        svm = SVC(kernel='linear', C=1.0, random_state=0)
        svm.fit(X_train_std, y_train)

        plot_decision_regions(X_combined_std,
                              y_combined,
                              classifier=svm,
                              test_idx=range(105, 150))
        plt.xlabel('Długość płatka [standaryzowana]')
        plt.ylabel('Szerokość płatka [standaryzowana]')
        plt.legend(loc='upper left')
        plt.tight_layout()
        #plt.savefig('./rysunki/03_10.png', dpi=300)
        plt.show()
        pass