def cleanup(timeout=604800, remove_errors=False): """ The /cleanup handler. Remove builds that are finished and haven't been accessed within the timeout, which is by default 7 days. With remove_errors, removes the builds with status Error. Requires secret_key parameter in query. """ secret_key = request.values.get('secret_key', '') if check_secret_key(secret_key): builds = app.config["BUILDS"] to_remove = [] for h, b in builds.items(): # log.info("accessed_time %s, hash %s" % (time.time() - b.accessed_time, h)) if (finished(b.status) and time.time() - b.accessed_time > timeout or b.status in [Status.Error, Status.ParseError] and remove_errors): to_remove.append((h, b)) res = [] for h, b in to_remove: log.info("Removing %s" % h) b.remove_files() del builds[h] res.append("<removed hash='%s'/>" % h) if len(res) == 0: res = "<message>No hashes to be removed.</message>\n" else: res = "\n".join(res) res = "<message>\n%s</message>\n" % res else: res = "<error>Failed to run cleanup: secret key could not be confirmed.</error>\n" return Response(res, mimetype='application/xml')
def zip_result(self): """Create a zip archive of all the result files in the export.original folder.""" log.info("Creating zip file...") assert(finished(self.status)) if self.status == Status.Done: if os.listdir(self.export_dir): zipf = zipfile.ZipFile(self.zipfpath, 'w', compression=zipfile.ZIP_DEFLATED) filelike = io.BytesIO() with zipfile.ZipFile(filelike, 'w', compression=zipfile.ZIP_DEFLATED) as zipflike: for root, _dirs, files in os.walk(self.export_dir): for xmlfile in files: newfilename = xmlfile[:-4] + "_annotated.xml" zipflike.write(os.path.join(root, xmlfile), arcname="korpus/" + newfilename) zipf.write(os.path.join(root, xmlfile), arcname="korpus/" + newfilename) return filelike else: # används inte just nu out = ['<trace>' + escape(self.trace) + '</trace>', '<stderr>' + escape(self.stderr) + '</stderr>', '<stdout>' + escape(self.stdout) + '</stdout>'] return "\n".join(out) + "\n" else: # används inte just nu out = ['<trace>' + escape(self.trace) + '</trace>', '<stderr>' + escape(self.stderr) + '</stderr>', '<stdout>' + escape(self.stdout) + '</stdout>'] return "\n".join(out) + "\n"
def get_result(): assert (finished(build.status)) build.access() try: return build.result() + '</result>\n' except Exception as error: log.error("Error while getting result: %s" % str(error)) return "<error>%s\n</error>\n</result>\n" % ERROR_MSG["no_result"]
def join_build(build, incremental): """ Joins an existing build, and sends increment messages until it is completed, then sends the build's result. """ # Make a new queue which receives messages from the builder process queue = Queue() build.queues.append(queue) def get_result(): assert(finished(build.status)) build.access() return build.result() + '</result>\n' # Send this build's hash yield "<build hash='%s'/>\n" % build.build_hash # Result already exists if finished(build.status): log.info("Result already exists since %s" % pretty_epoch_time(build.status_change_time)) yield get_result() # Listen for completion else: if incremental and build.status == Status.Running: log.info("Already running, sending increment message") yield build.increment_msg() while True: msg_type, msg = queue.get() if msg_type == Message.StatusChange: log.info("Message %s" % Status.lookup[msg]) # Has status changed to finished? if msg_type == Message.StatusChange: if finished(msg): break # Increment message elif incremental and msg_type == Message.Increment: yield msg log.info("Getting result...") yield get_result()
def cleanup(builds, timeout=86400, remove_errors=False): """ Removes builds that are finished and haven't been accessed within the timeout, which is by default 24 hours. With remove_errors, removes the all with status Error. """ to_remove = [] for h, b in builds.iteritems(): if (finished(b.status) and time.time() - b.accessed_time > timeout or b.status == Status.Error and remove_errors): log.info("Removing %s" % h) b.remove_files() to_remove.append(h) res = "" for h in to_remove: del builds[h] res += "<removed hash='%s'>\n" % h return [res]
def result(self): """ Returns the result: either a built corpus with possible warning messages, or the error messages for an unsuccessful build """ assert(finished(self.status)) if self.status == Status.Done: out = [] if self.warnings: out.append('<warning>' + escape(self.warnings) + '</warning>') try: with open(self.out_file, "r") as f: out.append(f.read()) return "\n".join(out) except: self.change_status(Status.Error) log.exception("Result file is missing") return "<error>Result file is missing </error>" else: out = ['<trace>' + escape(self.trace) + '</trace>', '<stderr>' + escape(self.stderr) + '</stderr>', '<stdout>' + escape(self.stdout) + '</stdout>'] return "\n".join(out) + "\n"
def join_build(build, incremental, fileupload=False): """ Join an existing build, and send increment messages until it is completed. Then send the build's result or the link to the downloadable zip file. """ # Make a new queue which receives messages from the builder process queue = Queue() build.queues.append(queue) def get_result(): assert (finished(build.status)) build.access() try: return build.result() + '</result>\n' except Exception as error: log.error("Error while getting result: %s" % str(error)) return "<error>%s\n</error>\n</result>\n" % ERROR_MSG["no_result"] # Send this build's hash if fileupload: yield "<build hash='%s' type='files'/>\n" % build.build_hash, build else: yield "<build hash='%s'/>\n" % build.build_hash # Result already exists if finished(build.status): log.info("Result already exists since %s" % pretty_epoch_time(build.status_change_time)) if fileupload: yield get_result(), build else: yield get_result() # Listen for completion else: if incremental and build.status == Status.Running: log.info("Already running, sending increment message") if fileupload: yield build.increment_msg(), build else: yield build.increment_msg() while True: msg_type, msg = queue.get() if msg_type == Message.StatusChange: log.info("Message %s" % Status.lookup[msg]) # Has status changed to finished? if msg_type == Message.StatusChange: if finished(msg): break # Increment message elif incremental and msg_type == Message.Increment: if fileupload: yield msg, build else: yield msg log.info("Getting result...") if fileupload: yield get_result(), build else: yield get_result()
def get_result(): assert(finished(build.status)) build.access() return build.result() + '</result>\n'
def result(self): """ Return the result: either a built corpus with possible warning messages, or the error messages for an unsuccessful build. """ assert(finished(self.status)) out = [] # Result when Parse Error if self.status == Status.ParseError: if self.warnings: out.append('<warning>' + escape(self.warnings) + '</warning>') out.append("<error>%s</error>" % ERROR_MSG["parsing_error"]) log.error(ERROR_MSG["parsing_error"]) return "\n".join(out) # Result when Done elif self.status == Status.Done: download_link = "%s/download?hash=%s" % (Config.backend, self.build_hash) if self.warnings: out.append('<warning>' + escape(self.warnings) + '</warning>') if hasattr(self, 'out_files'): for out_file in self.out_files: if not os.path.exists(out_file): self.change_status(Status.Error) out.append("<error>%s</error>" % ERROR_MSG["missing_file"]) log.error(ERROR_MSG["missing_file"]) return "\n".join(out) result = "<corpus link='%s'/>\n" % download_link return result else: # Check for empty input (e.g. "<text></text>") try: wordfile = os.path.join(self.annotations_dir, self.filename + '.token.word') with open(wordfile, "r") as f: word_contents = f.read() if not word_contents: raise ValueError('empty token.word file') except: self.change_status(Status.Error) log.exception(ERROR_MSG["empty_input"]) return "<error>%s</error>" % ERROR_MSG["empty_input"] # Check if result file is not empty try: with open(self.out_file, "r") as f: result_contents = f.read() if not result_contents.strip("<corpus>\n").rstrip("</corpus>\n\n"): raise ValueError('empty result file') else: result_contents = result_contents.replace("<corpus", "<corpus link='%s'" % download_link) out.append(result_contents) return "\n".join(out) except: self.change_status(Status.Error) log.exception(ERROR_MSG["no_result"]) return "<error>%s</error>" % ERROR_MSG["no_result"] else: out = ['<trace>' + escape(self.trace) + '</trace>', '<stderr>' + escape(self.stderr) + '</stderr>', '<stdout>' + escape(self.stdout) + '</stdout>', '<error>' + ERROR_MSG["no_result"] + '</error>'] return "\n".join(out) + "\n"