def cleanup(timeout=604800, remove_errors=False):
    """
    The /cleanup handler.
    Remove builds that are finished and haven't been accessed within the timeout,
    which is by default 7 days.
    With remove_errors, removes the builds with status Error.
    Requires secret_key parameter in query.
    """
    secret_key = request.values.get('secret_key', '')
    if check_secret_key(secret_key):
        builds = app.config["BUILDS"]
        to_remove = []
        for h, b in builds.items():
            # log.info("accessed_time %s, hash %s" % (time.time() - b.accessed_time, h))
            if (finished(b.status) and time.time() - b.accessed_time > timeout
                    or b.status in [Status.Error, Status.ParseError]
                    and remove_errors):
                to_remove.append((h, b))
        res = []
        for h, b in to_remove:
            log.info("Removing %s" % h)
            b.remove_files()
            del builds[h]
            res.append("<removed hash='%s'/>" % h)
        if len(res) == 0:
            res = "<message>No hashes to be removed.</message>\n"
        else:
            res = "\n".join(res)
            res = "<message>\n%s</message>\n" % res
    else:
        res = "<error>Failed to run cleanup: secret key could not be confirmed.</error>\n"

    return Response(res, mimetype='application/xml')
Exemple #2
0
 def zip_result(self):
     """Create a zip archive of all the result files in the export.original folder."""
     log.info("Creating zip file...")
     assert(finished(self.status))
     if self.status == Status.Done:
         if os.listdir(self.export_dir):
             zipf = zipfile.ZipFile(self.zipfpath, 'w', compression=zipfile.ZIP_DEFLATED)
             filelike = io.BytesIO()
             with zipfile.ZipFile(filelike, 'w', compression=zipfile.ZIP_DEFLATED) as zipflike:
                 for root, _dirs, files in os.walk(self.export_dir):
                     for xmlfile in files:
                         newfilename = xmlfile[:-4] + "_annotated.xml"
                         zipflike.write(os.path.join(root, xmlfile), arcname="korpus/" + newfilename)
                         zipf.write(os.path.join(root, xmlfile), arcname="korpus/" + newfilename)
             return filelike
         else:
             # används inte just nu
             out = ['<trace>' + escape(self.trace) + '</trace>',
                    '<stderr>' + escape(self.stderr) + '</stderr>',
                    '<stdout>' + escape(self.stdout) + '</stdout>']
             return "\n".join(out) + "\n"
     else:
         # används inte just nu
         out = ['<trace>' + escape(self.trace) + '</trace>',
                '<stderr>' + escape(self.stderr) + '</stderr>',
                '<stdout>' + escape(self.stdout) + '</stdout>']
         return "\n".join(out) + "\n"
Exemple #3
0
 def get_result():
     assert (finished(build.status))
     build.access()
     try:
         return build.result() + '</result>\n'
     except Exception as error:
         log.error("Error while getting result: %s" % str(error))
         return "<error>%s\n</error>\n</result>\n" % ERROR_MSG["no_result"]
Exemple #4
0
def join_build(build, incremental):
    """
    Joins an existing build, and sends increment messages
    until it is completed, then sends the build's result.
    """

    # Make a new queue which receives messages from the builder process
    queue = Queue()
    build.queues.append(queue)

    def get_result():
        assert(finished(build.status))
        build.access()
        return build.result() + '</result>\n'

    # Send this build's hash
    yield "<build hash='%s'/>\n" % build.build_hash

    # Result already exists
    if finished(build.status):
        log.info("Result already exists since %s" %
                 pretty_epoch_time(build.status_change_time))
        yield get_result()

    # Listen for completion
    else:

        if incremental and build.status == Status.Running:
            log.info("Already running, sending increment message")
            yield build.increment_msg()

        while True:
            msg_type, msg = queue.get()
            if msg_type == Message.StatusChange:
                log.info("Message %s" % Status.lookup[msg])
            # Has status changed to finished?
            if msg_type == Message.StatusChange:
                if finished(msg):
                    break
            # Increment message
            elif incremental and msg_type == Message.Increment:
                yield msg

        log.info("Getting result...")
        yield get_result()
Exemple #5
0
def cleanup(builds, timeout=86400, remove_errors=False):
    """
    Removes builds that are finished and haven't been accessed within the timeout,
    which is by default 24 hours.

    With remove_errors, removes the all with status Error.
    """
    to_remove = []
    for h, b in builds.iteritems():
        if (finished(b.status) and time.time() - b.accessed_time > timeout
            or b.status == Status.Error and remove_errors):
            log.info("Removing %s" % h)
            b.remove_files()
            to_remove.append(h)
    res = ""
    for h in to_remove:
        del builds[h]
        res += "<removed hash='%s'>\n" % h
    return [res]
Exemple #6
0
 def result(self):
     """
     Returns the result: either a built corpus with possible warning messages,
     or the error messages for an unsuccessful build
     """
     assert(finished(self.status))
     if self.status == Status.Done:
         out = []
         if self.warnings:
             out.append('<warning>' + escape(self.warnings) + '</warning>')
         try:
             with open(self.out_file, "r") as f:
                 out.append(f.read())
             return "\n".join(out)
         except:
             self.change_status(Status.Error)
             log.exception("Result file is missing")
             return "<error>Result file is missing </error>"
     else:
         out = ['<trace>' + escape(self.trace) + '</trace>',
                '<stderr>' + escape(self.stderr) + '</stderr>',
                '<stdout>' + escape(self.stdout) + '</stdout>']
         return "\n".join(out) + "\n"
Exemple #7
0
def join_build(build, incremental, fileupload=False):
    """
    Join an existing build, and send increment messages
    until it is completed. Then send the build's result or
    the link to the downloadable zip file.
    """
    # Make a new queue which receives messages from the builder process
    queue = Queue()
    build.queues.append(queue)

    def get_result():
        assert (finished(build.status))
        build.access()
        try:
            return build.result() + '</result>\n'
        except Exception as error:
            log.error("Error while getting result: %s" % str(error))
            return "<error>%s\n</error>\n</result>\n" % ERROR_MSG["no_result"]

    # Send this build's hash
    if fileupload:
        yield "<build hash='%s' type='files'/>\n" % build.build_hash, build
    else:
        yield "<build hash='%s'/>\n" % build.build_hash

    # Result already exists
    if finished(build.status):
        log.info("Result already exists since %s" %
                 pretty_epoch_time(build.status_change_time))
        if fileupload:
            yield get_result(), build
        else:
            yield get_result()

    # Listen for completion
    else:
        if incremental and build.status == Status.Running:
            log.info("Already running, sending increment message")
            if fileupload:
                yield build.increment_msg(), build
            else:
                yield build.increment_msg()

        while True:
            msg_type, msg = queue.get()
            if msg_type == Message.StatusChange:
                log.info("Message %s" % Status.lookup[msg])
            # Has status changed to finished?
            if msg_type == Message.StatusChange:
                if finished(msg):
                    break
            # Increment message
            elif incremental and msg_type == Message.Increment:
                if fileupload:
                    yield msg, build
                else:
                    yield msg

        log.info("Getting result...")
        if fileupload:
            yield get_result(), build
        else:
            yield get_result()
Exemple #8
0
 def get_result():
     assert(finished(build.status))
     build.access()
     return build.result() + '</result>\n'
Exemple #9
0
    def result(self):
        """
        Return the result: either a built corpus with possible warning messages,
        or the error messages for an unsuccessful build.
        """
        assert(finished(self.status))
        out = []

        # Result when Parse Error
        if self.status == Status.ParseError:
            if self.warnings:
                out.append('<warning>' + escape(self.warnings) + '</warning>')
                out.append("<error>%s</error>" % ERROR_MSG["parsing_error"])
                log.error(ERROR_MSG["parsing_error"])
                return "\n".join(out)

        # Result when Done
        elif self.status == Status.Done:
            download_link = "%s/download?hash=%s" % (Config.backend, self.build_hash)

            if self.warnings:
                out.append('<warning>' + escape(self.warnings) + '</warning>')

            if hasattr(self, 'out_files'):
                for out_file in self.out_files:
                    if not os.path.exists(out_file):
                        self.change_status(Status.Error)
                        out.append("<error>%s</error>" % ERROR_MSG["missing_file"])
                        log.error(ERROR_MSG["missing_file"])
                        return "\n".join(out)

                result = "<corpus link='%s'/>\n" % download_link
                return result

            else:
                # Check for empty input (e.g. "<text></text>")
                try:
                    wordfile = os.path.join(self.annotations_dir, self.filename + '.token.word')
                    with open(wordfile, "r") as f:
                        word_contents = f.read()
                        if not word_contents:
                            raise ValueError('empty token.word file')
                except:
                    self.change_status(Status.Error)
                    log.exception(ERROR_MSG["empty_input"])
                    return "<error>%s</error>" % ERROR_MSG["empty_input"]

                # Check if result file is not empty
                try:
                    with open(self.out_file, "r") as f:
                        result_contents = f.read()
                        if not result_contents.strip("<corpus>\n").rstrip("</corpus>\n\n"):
                            raise ValueError('empty result file')
                        else:
                            result_contents = result_contents.replace("<corpus", "<corpus link='%s'" % download_link)
                            out.append(result_contents)
                            return "\n".join(out)
                except:
                    self.change_status(Status.Error)
                    log.exception(ERROR_MSG["no_result"])
                    return "<error>%s</error>" % ERROR_MSG["no_result"]

        else:
            out = ['<trace>' + escape(self.trace) + '</trace>',
                   '<stderr>' + escape(self.stderr) + '</stderr>',
                   '<stdout>' + escape(self.stdout) + '</stdout>',
                   '<error>' + ERROR_MSG["no_result"] + '</error>']
            return "\n".join(out) + "\n"