def script_for_document(scriptjson, project, pid): """ Modify the given script for a specific file. """ doc = project.get_storage().get(pid) tree = script.Script(json.loads(scriptjson)) validate_batch_script(tree) # get the input node and replace it with out path binname = ".bin".join(os.path.splitext(doc.image_label)) recname = os.path.splitext(doc.image_label)[0] + ".html" oldinput = tree.get_nodes_by_attr("stage", stages.INPUT)[0] rec = tree.get_nodes_by_attr("stage", stages.RECOGNIZE)[0] # assume the binary is the first input to the recogniser bin = rec.input(0) input = tree.new_node("storage.DocImageFileIn", doc.image_label, params=[("project", project.pk), ("pid", pid)]) recout = tree.add_node("storage.DocWriter", recname, params=[("project", project.pk), ("pid", pid), ("attribute", "transcript")]) binout = tree.add_node("storage.DocWriter", binname, params=[("project", project.pk), ("pid", pid), ("attribute", "binary")]) tree.replace_node(oldinput, input) recout.set_input(0, rec) binout.set_input(0, bin) return json.dumps(tree.serialize(), indent=2)
def run_preset(request): """ Execute a script (sent as JSON). """ evalnode = request.POST.get("node", "") jsondata = request.POST.get("script") nodes = json.loads(jsondata) tree = script.Script(nodes) errors = tree.validate() if errors: return HttpResponse(json.dumps(dict( status="VALIDATION", errors=errors, )), mimetype="application/json") term = tree.get_node(evalnode) if term is None: terms = tree.get_terminals() if not terms: return HttpResponse(json.dumps(dict( status="NOSCRIPT", )), mimetype="application/json") term = terms[0] async = OcrTask.run_celery_task("run.script", (evalnode, nodes, request.output_path, _cache_name(request)), untracked=True, asyncronous=True, queue="interactive") out = dict( node=evalnode, task_id=async.task_id, status=async.status, results=async.result ) response = HttpResponse(mimetype="application/json") json.dump(out, response, ensure_ascii=False) return response
def run(nodelist, outpath): s = script.Script(nodelist) term = s.get_terminals()[0] print "Rendering to %s" % outpath os.environ["NODETREE_WRITE_FILEOUT"] = "1" out = s.add_node("util.FileOut", "Output", params=[("path", os.path.abspath(outpath))]) out.set_input(0, term) out.eval()
def validate_preset(self, data): this = json.loads(self.data) tree = script.Script(data) errors = [] for name, preds in this.iteritems(): for pred in preds: perrors = self.validate_predicate(name, pred, tree) if perrors: errors.extend(perrors) return errors
def test_invalid_scripts(self): """ Test supposedly invalid script DO raise errors. """ for name, nodes in self.invalidscripts.iteritems(): if not name.startswith("invalid"): continue s = script.Script(nodes) terms = s.get_terminals() self.assertTrue(len(terms) > 0, msg="No terminal nodes found.") # check we get an expected type from evaling the nodes for n in terms: self.assertRaises(exceptions.ValidationError, n.eval)
def clear_node_cache(request): """ Clear the preset cache for a single node. """ evalnode = request.POST.get("node") jsondata = request.POST.get("script") nodes = json.loads(jsondata) tree = script.Script(nodes) node = tree.get_node(evalnode) cacheclass = pluginutils.get_dzi_cacher(settings) cacher = cacheclass( path=os.path.join(settings.MEDIA_ROOT, settings.TEMP_PATH), key=_cache_name(request)) cacher.clear_cache(node) return HttpResponse(json.dumps({"ok": True}), mimetype="application/json")
def run(self, project_pk, pid, scriptjson): """ Runs the convert action. """ project = Project.objects.get(pk=project_pk) storage = project.get_storage() doc = storage.get(pid) logger = self.get_logger() logger.debug("Running Document Batch Item: %s") # try and delete the existing binary dzi file dzipath = storage.document_attr_dzi_path(doc, "binary") dzifiles = os.path.splitext(dzipath)[0] + "_files" try: os.unlink(dzipath) shutil.rmtree(dzifiles) except OSError: pass progress_handler = get_progress_callback(self.request.id) abort_handler = get_abort_callback(self.request.id) progress_handler(0) tree = script.Script(json.loads(scriptjson), nodekwargs=dict(logger=logger, abort_func=abort_handler, progress_func=progress_handler)) logger.debug("Running tree: %s", json.dumps(tree.serialize(), indent=2)) try: # write out the binary... this should cache it's input os.environ["NODETREE_WRITE_FILEOUT"] = "1" doc.script_content = json.dumps(tree.serialize(), indent=2) doc.script_label = "%s.json" % os.path.splitext(doc.label)[0] doc.script_mimetype = "application/json" # set document metadata to indicate it's an OCR "draft" doc.ocr_status = status.RUNNING doc.save() # process the nodes [t.eval() for t in tree.get_terminals()] except Exception, err: logger.exception("Unhandled exception: %s", err) # set document metadata to indicate it's an OCR "draft" doc.ocr_status = status.ERROR
def test_valid_scripts(self): """ Test the supposedly valid script don't raise errors. """ for name, nodes in self.validscripts.iteritems(): if name.startswith("invalid"): continue s = script.Script(nodes) terms = s.get_terminals() self.assertTrue(len(terms) > 0, msg="No terminal nodes found.") # check we get an expected type from evaling the nodes for n in terms: out = n.eval() self.assertIn(type(out), (unicode, dict, list, numpy.ndarray), msg="Unexpected output type for node %s: %s" % (n.name, type(out)))
def script_for_page_file(scriptjson, filepath, writepath): """ Modify the given script for a specific file. """ tree = script.Script(json.loads(scriptjson)) validate_batch_script(tree) # get the input node and replace it with out path input = tree.get_nodes_by_attr("stage", stages.INPUT)[0] input.set_param("path", filepath) # attach a fileout node to the binary input of the recognizer and # save it as a binary file rec = tree.get_nodes_by_attr("stage", stages.RECOGNIZE)[0] outpath = ocrutils.get_binary_path(filepath, writepath) outbin = tree.add_node("util.FileOut", "OutputBinary", params=[("path", os.path.abspath(outpath).encode()), ("create_dir", True)]) outbin.set_input(0, rec.input(0)) return json.dumps(tree.serialize(), indent=2)
def run(self, evalnode, nodelist, writepath, cachedir): """ Runs the convert action. """ logger = self.get_logger() cacheclass = pluginutils.get_dzi_cacher(settings) cacher = cacheclass(path=os.path.join(settings.MEDIA_ROOT, settings.TEMP_PATH), key=cachedir, logger=logger) logger.debug("Using cacher: %s, Bases %s", cacher, cacheclass.__bases__) try: tree = script.Script(nodelist, nodekwargs=dict(logger=logger, cacher=cacher)) term = tree.get_node(evalnode) if term is None: term = tree.get_terminals()[0] result = term.eval() except exceptions.NodeError, err: logger.error("Node Error (%s): %s", err.node, err.message) return dict(type="error", node=err.node.label, error=err.message)
def handle(self, *args, **options): if len(args) != 3: raise CommandError("Usage: %s" % self.help) scriptfile, infile, outfile = args try: with open(scriptfile, "r") as f: nodes = json.load(f) except Exception: raise CommandError("Invalid script file: %s" % scriptfile) if nodes is None: raise CommandError("No nodes found in script: %s" % scriptfile) s = script.Script(nodes) input = s.get_nodes_by_attr("stage", stages.INPUT)[0] input.set_param("path", infile) term = s.get_terminals()[0] sys.stderr.write("Rendering to %s\n" % outfile) os.environ["NODETREE_WRITE_FILEOUT"] = "1" out = s.add_node("util.FileOut", "Output", params=[("path", os.path.abspath(outfile))]) out.set_input(0, term) out.eval()
def run_preset(cls, preset, handle): """Run a preset on the given handle.""" s = script.Script(json.loads(preset.data)) s = cls._set_script_input(s, handle) term = s.get_terminals()[0] return term.eval()