else: for c in line: transcript += c.encode('utf-8', 'backslashreplace') + ' ' return transcript.rstrip() parser = argparse.ArgumentParser( "Generate NC File for Urdu-1D-NoPos experiments") parser.add_argument("-l", "--height", default=48, type=int) parser.add_argument("-t", "--notargets", action="store_true") parser.add_argument("-o", "--ncFileName", default=None) parser.add_argument("files", nargs="*") args = parser.parse_args() #Read all file names images = ocrolib.glob_all(args.files) if len(images) == 0: parser.print_help() sys.exit(0) print "# Input images:", len(images) # load the OCRopus line normalizer (center) print '# Loading OCRopus line normalizer' lnorm = lineest.CenterNormalizer() lnorm.setHeight(args.height) counter = 0 print '# Reading %d Text-line images and corresponding transcription' % len( images)
parser.add_argument("-v","--verbose",action="store_true") parser.add_argument("-d","--display",type=int,default=0, help="display output for every nth iteration, where n=DISPLAY, default: %(default)s") parser.add_argument("-m","--movie",default=None) parser.add_argument("-M","--moviesample",default=None) parser.add_argument("-q","--quiet",action="store_true") parser.add_argument("-Q","--nocheck",action="store_true") parser.add_argument("-p","--pad",type=int,default=16) # add file parser.add_argument("-f","--file",default=None,help="path to file listing input files, one per line") parser.add_argument("files",nargs="*") args = parser.parse_args() inputs = ocrolib.glob_all(args.files) if args.file is not None: print("getting training data from file") with open(args.file) as file: for l in file: inputs.append(l.rstrip()) if len(inputs)==0: parser.print_help() sys.exit(0) print("# inputs", len(inputs)) # pre-execute any python commands
if ncomps < lo: return "too few connected components (got %d, wanted >=%d)" % (ncomps, lo) if ncomps > hi * ratio: return "too many connected components (got %d, wanted <=%d)" % (ncomps, hi) return None # compute the list of files to be classified if len(args.files) < 1: parser.print_help() sys.exit(0) print_info("") print_info("#" * 10, (" ".join(sys.argv))[:60]) print_info("") inputs = ocrolib.glob_all(args.files) if not args.quiet: print_info("#inputs" + str(len(inputs))) # disable parallelism when anything is being displayed if args.show >= 0 or args.save is not None: args.parallel = 1 # load the network used for classification try: network = ocrolib.load_object(args.model, verbose=1) for x in network.walk(): x.postLoad() for x in network.walk(): if isinstance(x, lstm.LSTM): x.allocate(5000)