def add_request(lang, filenames): job_req = { 'jobname': 'build_djvu', 'run_cmd': 'python', 'force': True, 'args': [ os.path.expanduser('build_djvu_from_ocr_rate.py'), '-lang:' + lang, '-only_text', ], 'max_vmem': 768, } for f in filenames: job_req['args'].append(f) db_obj = sge_jobs.DbJob() print job_req db_obj.add_request(**job_req)
def handle_status(params, start_response): default_limit = 50 max_limit = 1000 state_filter = params.get('filter', '') cmd_filter = params.get('cmd_filter', None) limit = get_int_param(params, 'limit', default_limit, max_limit) offset = get_int_param(params, 'offset', 0, None) #log(params) db_obj = sge_jobs.DbJob() text = common_html.get_head( 'hocr', css='shared.css').encode('utf-8') + '\n <body>\n' html, jobs = job_table(db_obj, state_filter, limit, offset, default_limit, max_limit, cmd_filter) text += html text += accounting_table(db_obj, jobs, state_filter, limit, offset, default_limit, max_limit) text += ' </body>\n</html>' start_response('200 OK', [('Content-Type', 'text/html; charset=UTF-8'), ('Content-Length', str(len(text))), ('Access-Control-Allow-Origin', '*')]) return [text]
def gen_hocr_request(params): job_req = { 'jobname': 'hocr', 'run_cmd': 'python', 'args': [ os.path.expanduser('~/phe/hocr/hocr.py'), '-lang:' + params['lang'], '-book:' + params['book'] ], 'max_vmem': 1024, } db_obj = sge_jobs.DbJob() db_obj.add_request(**job_req)
def add_hocr_request(lang, book, force = False): job_req = { 'jobname' : 'hocr', 'run_cmd' : 'python', 'args' : [ os.path.expanduser('~/phe/hocr/hocr.py'), '-lang:' + lang, '-book:' + book ], 'max_vmem' : 2048, } if force: job_req['force'] = True db_obj = sge_jobs.DbJob() db_obj.add_request(**job_req)
def add_tidy_ocr_request(lang, filename): job_req = { 'jobname' : 'tidy_ocr', 'run_cmd' : 'python', 'force' : False, 'args' : [ os.path.expanduser('~/botpywi/tidy_ocr.py'), '-lang:' + lang, '-monochrome', '' + filename ], 'max_vmem' : 2048, } db_obj = sge_jobs.DbJob() print job_req db_obj.add_request(**job_req)
def queue_pdf_to_djvu(ia_id): job_req = { 'jobname' : 'pdf_to_djvu', 'run_cmd' : 'python', 'force' : True, 'args' : [ os.path.expanduser('~/phe/ocr/pdf_to_djvu.py'), # FIXME: later use command line switch to provide a more general # service ia_id, ], 'max_vmem' : 2048, } db_obj = sge_jobs.DbJob() print job_req db_obj.add_request(**job_req)
def add_ocr_request(lang, filename): job_req = { 'jobname': 'ocr', 'run_cmd': 'python', 'force': True, 'args': [ os.path.expanduser('~/phe/ocr/ocr_djvu.py'), '-lang:' + lang, '' + filename ], 'max_vmem': 2048, } db_obj = sge_jobs.DbJob() print job_req db_obj.add_request(**job_req)
def add_hocr_request(lang, sublang, filename): job_req = { 'jobname': 'correct_ocr', 'run_cmd': 'python', 'force': True, 'args': [ os.path.expanduser('~/botpywi/correct_ocr.py'), '-lang:' + lang, '-auto', '' + filename ], 'max_vmem': 2048, } if sublang: job_req['args'].append('-sublang:' + sublang), db_obj = sge_jobs.DbJob() print job_req db_obj.add_request(**job_req)