FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @author: richard lyman ''' import numpy as np import ocr_utils import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.cross_validation import train_test_split y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=(9,17),nChars=500) def sigmoid(z): return 1.0 / (1.0 + np.exp(-z)) z = np.arange(-7, 7, 0.1) phi_z = sigmoid(z) title='sigmoid' plt.plot(z, phi_z) plt.axvline(0.0, color='k') plt.axhspan(0.0, 1.0, facecolor='1.0', alpha=1.0, ls='dotted') plt.axhline(y=0.5, ls='dotted', color='k') plt.yticks([0.0, 0.5, 1.0]) plt.ylim(-0.1, 1.1) plt.xlabel('z') plt.ylabel('$\phi (z)$')
def get_params(self, deep=True): """ Get classifier parameter names for GridSearch""" if not deep: return super(MajorityVoteClassifier, self).get_params(deep=False) else: out = self.named_classifiers.copy() for name, step in six.iteritems(self.named_classifiers): for key, value in six.iteritems(step.get_params(deep=True)): out['%s__%s' % (name, key)] = value return out charsToTrain = (48, 51) nChars = 500 y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train=charsToTrain, columns=(9, 17), nChars=nChars) le = LabelEncoder() y = le.fit_transform(y) X_train, X_test, y_train, y_test =\ train_test_split(X, y, test_size=0.5, random_state=1) clf1 = LogisticRegression(penalty='l2', C=0.001, random_state=0) clf2 = DecisionTreeClassifier(max_depth=1, criterion='entropy', random_state=0)
SOFTWARE. @author: richard lyman ''' from sklearn.ensemble import AdaBoostClassifier import ocr_utils from sklearn.cross_validation import train_test_split import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import LabelEncoder charsToTrain=(48,51) nChars = 1000 y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = charsToTrain , columns=(9,17), nChars=nChars) le = LabelEncoder() y = le.fit_transform(y) X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.40,random_state=1) from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score tree = DecisionTreeClassifier(criterion='entropy', max_depth=1) ada = AdaBoostClassifier(base_estimator=tree, n_estimators=500, learning_rate=0.1,
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @author: richard lyman ''' import numpy as np import ocr_utils from sklearn.preprocessing import StandardScaler y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B( chars_to_train=(48, 49, 50), columns=(9, 17), test_size=0.3, nChars=300, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @author: richard lyman ''' import numpy as np import ocr_utils import matplotlib.pyplot as plt y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train=(48, 49, 50), columns=range(0, 20), nChars=1000, random_state=0) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) from sklearn.preprocessing import StandardScaler stdsc = StandardScaler() X_train_std = stdsc.fit_transform(X_train) X_test_std = stdsc.transform(X_test) sc = StandardScaler()
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @author: richard lyman ''' import numpy as np import ocr_utils from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.cross_validation import cross_val_score from sklearn.grid_search import GridSearchCV from sklearn.svm import SVC if __name__ == '__main__': y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,51) , test_size=0.3, columns=(9,17), random_state=0) pipe_svc = Pipeline([('scl', StandardScaler()), ('clf', SVC(random_state=1))]) c_gamma_range = [0.01, 0.1, 1.0, 10.0] param_grid = [{'clf__C': c_gamma_range, 'clf__kernel': ['linear']}, {'clf__C': c_gamma_range, 'clf__gamma': c_gamma_range, 'clf__kernel': ['rbf'],}] gs = GridSearchCV(estimator=pipe_svc, param_grid=param_grid,
from scipy import interp import matplotlib.pyplot as plt import numpy as np import ocr_utils from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.cross_validation import StratifiedKFold from sklearn.decomposition import PCA from sklearn.cross_validation import train_test_split from sklearn.metrics import make_scorer, precision_score, roc_curve, auc if __name__ == '__main__': y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train=(48, 51), columns=(9, 17), random_state=0) from sklearn.preprocessing import LabelEncoder # the ROC is for data with a binary outcome. Change the ASCII characters to 0,1 le = LabelEncoder() y = le.fit_transform(y) le.transform((48, 51)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1) pipe_lr = Pipeline([('scl', StandardScaler()), ('pca', PCA(n_components=2)),
import ocr_utils import numpy as np from numpy.random import seed import matplotlib.pyplot as plt ############################################################################# # read images and scatter plot # retrieve 100 sets of target numbers and column sums # y: the ascii characters 48 and 49 ('0', '1') # X: the sum of the vertical pixels in the rows in horizontal columns 9 and 17 ascii_characters_to_train = (48, 51) columnsXY = (9, 17) nchars = 500 y, X, y_test, X_test, labels = ocr_utils.load_E13B( chars_to_train=ascii_characters_to_train, columns=columnsXY, nChars=120) y = np.where(y == ascii_characters_to_train[1], -1, 1) ############################################################################# # AdalineSGD from Python Machine Learning class AdalineSGD(object): """ADAptive LInear NEuron classifier. Parameters ------------ eta : float Learning rate (between 0.0 and 1.0) n_iter : int Passes over the training dataset.
# from sklearn.metrics import make_scorer,roc_curve, auc from scipy import interp import matplotlib.pyplot as plt import numpy as np import ocr_utils from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.cross_validation import StratifiedKFold from sklearn.decomposition import PCA from sklearn.cross_validation import train_test_split from sklearn.metrics import make_scorer,precision_score,roc_curve, auc if __name__ == '__main__': y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,51) , columns=(9,17), random_state=0) from sklearn.preprocessing import LabelEncoder # the ROC is for data with a binary outcome. Change the ASCII characters to 0,1 le = LabelEncoder() y = le.fit_transform(y) le.transform((48,51)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1) pipe_lr = Pipeline([('scl', StandardScaler()), ('pca', PCA(n_components=2)), ('clf', LogisticRegression(penalty='l2',random_state=0,C=100.0))]) # X_train2 = X_train[:, [4, 14]] X_train2 = X_train
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @author: richard lyman ''' import numpy as np import ocr_utils import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.cross_validation import train_test_split y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train=(48, 49, 50), columns=(9, 17), nChars=500) def sigmoid(z): return 1.0 / (1.0 + np.exp(-z)) z = np.arange(-7, 7, 0.1) phi_z = sigmoid(z) title = 'sigmoid' plt.plot(z, phi_z) plt.axvline(0.0, color='k') plt.axhspan(0.0, 1.0, facecolor='1.0', alpha=1.0, ls='dotted') plt.axhline(y=0.5, ls='dotted', color='k') plt.yticks([0.0, 0.5, 1.0])
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @author: richard lyman ''' import numpy as np import ocr_utils import matplotlib.pyplot as plt y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=range(0,20), nChars=1000, random_state=0) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0) from sklearn.preprocessing import StandardScaler stdsc = StandardScaler() X_train_std = stdsc.fit_transform(X_train) X_test_std = stdsc.transform(X_test) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train)
@author: richard lyman ''' import ocr_utils import numpy as np ############################################################################# # read images and scatter plot # retrieve 400 sets of target numbers and column sums # y: the ascii characters 48 and 49 ('0', '1') # X: the sum of the vertical pixels in the rows in horizontal columns 9 and 17 ascii_characters_to_train = (48,49) columnsXY = (9,17) y, X, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train=ascii_characters_to_train , columns=columnsXY,nChars=256) # put the ASCII equivalent of the unique characters in y into the legend of the plot legend=[] for ys in np.unique(y): legend.append('{} \'{}\''.format(ys, chr(ys))) ocr_utils.scatter_plot(X=X, y=y, legend_entries=legend, axis_labels = ['column {} sum'.format(columnsXY[i]) for i in range(len(columnsXY))], title='E13B sum of columns') ############################################################################# # read and show character images for '0', and '1' # select the digits in columnsXY in the E13B font
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @author: richard lyman ''' import numpy as np import ocr_utils columnsXY = range(0,20) y_train, X_train, y_test, X_test, labels = ocr_utils.load_E13B(chars_to_train = (48,49,50) , columns=columnsXY , test_size=0.3, nChars=1000, random_state=0) from sklearn.preprocessing import StandardScaler stdsc = StandardScaler() X_train_std = stdsc.fit_transform(X_train) X_test_std = stdsc.transform(X_test) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) from sklearn.linear_model import LogisticRegression