def cmd_hack(args): info('hack called with args %s' % str(args)) warn('this is a warning') err('this is an error') info('this is merely info') print getTags() a, b = u.split([i for i in range(20)], 17) print a print b fail('now you\'ve done it')
def cmd_hack(args): tset = u.loadInputs2(True, '28x28') print len(tset.images), 'image(s)' print tset.images[0] print tset.images[0].np_data.dtype sys.exit(1) info('hack called with args %s' % str(args)) warn('this is a warning') err('this is an error') info('this is merely info') a, b = u.split([i for i in range(20)], 17) print a print b nda = np.array([0, 0, 1]) print(nda) print(n2c(nda)) fail('now you\'ve done it')
def cmd_train(args): if len(args) == 0: err('error: not enough arguments.') err('usage: run <classify-tag>') sys.exit(1) tag = args[0] tags = u.getTags() if tag not in tags: fail('error: tag "%s" not found.\nKnown tags are: %s' % (tag, str(tags))) info('classifying for tag "%s"' % tag) imageFiles, imageData, labels, imageJson = u.loadInputs() # see multi-label learning comment below. for now, slice() to one label labels = u.slice(labels, tags.index(tag)) NO_YES = ['NO ' + tag, 'YES ' + tag] verbose('data shape: %s labels shape: %s' % (imageData.shape, labels.shape)) trainedFiles, testFiles = u.split(imageFiles, 326) trainedImages, testImages = u.split(imageData, 326) trainedLabels, testLabels = u.split(labels, 326) # some classifiers can only handle a single target # http://stackoverflow.com/questions/31881138/predicting-outcome-of-multiple-targets-in-scikit-learn # try this? http://scikit-learn.org/stable/modules/generated/sklearn.multioutput.MultiOutputClassifier.html verbose('trained data shape: %s trained labels shape: %s' % (trainedImages.shape, trainedLabels.shape)) classifier = GaussianNB( ) # 92.6% correct on training data set, wrong between white <-> gray #classifier = LogisticRegression() # 100% correct on training, empirically worse on test data tho #classifier = KNeighborsClassifier() # not as good as GaussianNB, particularly false positives/negatives on blue #classifier = ElasticNet() classifier.fit(trainedImages, trainedLabels) predicts = classifier.predict(testImages) verbose('predicts.shape: %s' % str(predicts.shape)) #print predicts probs = classifier.predict_proba(testImages) verbose('probs shape: %s' % str(probs.shape)) verbose('testLabels: %s' % str(testLabels)) htmlFile = tag + '-gaussian-unclassified.html' u.outputHtml(htmlFile, testFiles, [NO_YES[int(p)] for p in predicts], [NO_YES[int(i)] for i in testLabels], None) info('saved test results: %s' % htmlFile)
def cmd_run(args): if len(args) == 0: err('error: not enough arguments.') err('usage: run <classify-tag>') sys.exit(1) tag = args[0] tags = u.getTags() if tag not in tags: fail('error: tag "%s" not found.\nKnown tags are: %s' % (tag, str(tags))) info('classifying for tag "%s"' % tag) imageFiles, imageData, labels, imgJson = u.loadInputs() # see multi-label learning comment below. for now, slice() to one label labels = u.slice(labels, tags.index(tag)) NO_YES = ['NO ' + tag, 'YES ' + tag] verbose('data shape: %s labels shape: %s' % (imageData.shape, labels.shape)) trainedFiles, testFiles = u.split(imageFiles, 326) trainedImages, testImages = u.split(imageData, 326) trainedLabels, testLabels = u.split(labels, 326) # some classifiers can only handle a single target # http://stackoverflow.com/questions/31881138/predicting-outcome-of-multiple-targets-in-scikit-learn # try this? http://scikit-learn.org/stable/modules/generated/sklearn.multioutput.MultiOutputClassifier.html verbose('trained data shape: %s trained labels shape: %s' % (trainedImages.shape, trainedLabels.shape)) model = Sequential() model.add(Dense(None, input_dim=14400)) model.add(Activation('relu')) verbose('model summary:') model.summary() info('model built, compiling') model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) # Convert labels to categorical one-hot encoding # think I've done the equivalent of this in the slice() above (?) #binaryTrainedLabels = keras.utils.to_categorical(labels, num_classes=10) model.fit(trainedImages, trainedLabels, epochs=10, batch_size=326) classifier = GaussianNB( ) # 92.6% correct on training data set, wrong between white <-> gray #classifier = LogisticRegression() # 100% correct on training, empirically worse on test data tho #classifier = ElasticNet() classifier.fit(trainedImages, trainedLabels) predicts = classifier.predict(testImages) verbose('predicts.shape: %s' % str(predicts.shape)) #print predicts probs = classifier.predict_proba(testImages) verbose('probs shape: %s' % str(probs.shape)) verbose('testLabels: %s' % str(testLabels)) htmlFile = tag + '-unclassified.html' u.outputHtml(htmlFile, testFiles, [NO_YES[int(p)] for p in predicts], [NO_YES[int(i)] for i in testLabels], None) info('saved test results: %s' % htmlFile) predicts = classifier.predict(trainedImages) htmlFile = tag + '-classified.html' u.outputHtml(htmlFile, trainedFiles, [NO_YES[int(p)] for p in predicts], [NO_YES[int(i)] for i in trainedLabels], None) info('saved test results: %s' % htmlFile)