def recognise(image, addr, extras): result = "" x = image.width - 1 channels = getChannels(image) bestBounds = [] #cv.NamedWindow("pic", 1) #cv.NamedWindow("cols", 0) while len(result) < nSegs and x >= minW: x = cap.getBound(image, cap.CAP_BOUND_RIGHT, start=x) ratings = [] for w in xrange(minW, min(maxW + 1, x)): bounds = findBounds(image, x, w) subImage = cap.getSubImage(image, bounds) flags = findColors(subImage) for index, flag in enumerate(flags): if not flag: continue seg = getSegment(channels[index], image, bounds) seg = cap.flattenImage(adjustSize(seg, segSize)) guesses = ann.run(seg) charIndex = cap.argmax(guesses) ratings.append((guesses[charIndex], charIndex, index, bounds, seg)) best = max(ratings, key=itemgetter(0)) result += charset[best[1]] bestChannel = channels[best[2]] cv.SetImageROI(bestChannel, best[3]) cv.Set(bestChannel, 96, bestChannel) cv.ResetImageROI(bestChannel) bestBounds.append(best[3]) bestW = best[3][2] x -= bestW #print ann.run(best[4]) cap.processExtras([cap.drawComponents(image, bestBounds)], addr, extras, cap.CAP_STAGE_RECOGNISE) return result[::-1]
def recogniseOne(seg): resized = adjustSize(seg, segSize) array = cap.flattenImage(resized) guesses = ann.run(array) ichar = cap.argmax(guesses) guess = guesses[ichar] return (guess, ichar)
def recognise(image, addr, extras): result = "" x = image.width - 1 channels = getChannels(image) bestBounds = [] #cv.NamedWindow("pic", 1) #cv.NamedWindow("cols", 0) while len(result) < nSegs and x >= minW: x = cap.getBound(image, cap.CAP_BOUND_RIGHT, start=x) ratings = [] for w in xrange(minW, min(maxW + 1, x)): bounds = findBounds(image, x, w) subImage = cap.getSubImage(image, bounds) flags = findColors(subImage) for index, flag in enumerate(flags): if not flag: continue seg = getSegment(channels[index], image, bounds) seg = cap.flattenImage(adjustSize(seg, segSize)) guesses = ann.run(seg) charIndex = cap.argmax(guesses) ratings.append( (guesses[charIndex], charIndex, index, bounds, seg)) best = max(ratings, key=itemgetter(0)) result += charset[best[1]] bestChannel = channels[best[2]] cv.SetImageROI(bestChannel, best[3]) cv.Set(bestChannel, 96, bestChannel) cv.ResetImageROI(bestChannel) bestBounds.append(best[3]) bestW = best[3][2] x -= bestW #print ann.run(best[4]) cap.processExtras([cap.drawComponents(image, bestBounds)], addr, extras, cap.CAP_STAGE_RECOGNISE) return result[::-1]
def mainLoop(): n_iter = 0 last_save = 0 min_test_MSE = 1.0 max_iters_after_save = 50 try: while True: n_iter += 1 print "Iteration: %5d " % (n_iter), seg_copy = map(lambda (c, seg): (c, cv.CloneImage(seg)), segments) seg_copy = map(lambda (c, seg): (c, spoil(seg)), seg_copy) shuffle(seg_copy) f = open(train_file, "w") f.write("%d %d %d\n" % (len(segments), num_input, num_output)) for c, image in seg_copy: image = adjustSize(image, (segW, segH)) for y in range(image.height): for x in range(image.width): n = image[y, x] / 159.375 - 0.8 f.write("%f " % n) f.write("\n") n = charset.index(c) f.write("-1 " * n + "1" + " -1" * (num_output - n - 1) + "\n") f.close() train = libfann.training_data() train.read_train_from_file(train_file) ann.train_epoch(train) train.destroy_train() print "Train MSE: %f " % (ann.get_MSE()), print "Train bit fail: %5d " % (ann.get_bit_fail()), ann.test_data(test) mse = ann.get_MSE() print "Test MSE: %f " % (mse), print "Test bit fail: %5d " % (ann.get_bit_fail()), if mse < min_test_MSE: min_test_MSE = mse ann.save(ann_file) last_save = n_iter print "saved", if n_iter - last_save > max_iters_after_save: break print except KeyboardInterrupt: print "Interrupted by user."
from pyfann import libfann from lconsts import segW, segH, num_input, num_output, charset, train_file, ann_file sys.path.append("..") from cap import resizeFit as adjustSize, segment_dir segments = os.listdir(segment_dir) f = open(train_file, "w") f.write("%d %d %d\n" % (len(segments), num_input, num_output)) for name in segments: image = cv.LoadImage(os.path.join(segment_dir, name), cv.CV_LOAD_IMAGE_GRAYSCALE) image = adjustSize(image, (segW, segH)) for y in range(image.height): for x in range(image.width): n = image[y, x] / 159.375 - 0.8 f.write("%f " % n) f.write("\n") c = os.path.splitext(name)[0][0] n = charset.index(c) f.write("-1 " * n + "1" + " -1" * (num_output - n - 1) + "\n") f.close() print "Samples: %d" % len(segments) print "Input: %d" % num_input print "Output: %d" % num_output