def __init__(self, connector, datastore, serviceName, serviceQueue, objectStoreQueue, **extra):
     '''
     Runs Process init first and then creates required connections.
     '''
     HSN2TaskProcessor.__init__(self, connector, datastore, serviceName, serviceQueue, objectStoreQueue, **extra)
     self.thug = extra.get("thug")
     self.thugDir = os.path.dirname(self.thug)
     self.parser = ThugAnalysisParser()
class ThugTaskProcessor(HSN2TaskProcessor):
    '''
    Task processor for Thug.
    What should be done in processing:
    1) launch appropriate Thug methods with required arguments
    2) read output - determine whether successful or failed
    3a) If failed throw TaskFailedException
    3b) If successful return tuple (task, warnings)
    '''
    thug = None
    thugDir = None
    parser = None

    def __init__(self, connector, datastore, serviceName, serviceQueue, objectStoreQueue, **extra):
        '''
        Runs Process init first and then creates required connections.
        '''
        HSN2TaskProcessor.__init__(self, connector, datastore, serviceName, serviceQueue, objectStoreQueue, **extra)
        self.thug = extra.get("thug")
        self.thugDir = os.path.dirname(self.thug)
        self.parser = ThugAnalysisParser()

    def taskProcess(self):
        '''	This method should be overridden with what is to be performed.
                Returns a list of warnings (warnings). The current task is available at self.currentTask'''
        if len(self.objects) == 0:
            raise ObjectStoreException("Task processing didn't find task object.")

        referer = ""

        if self.objects[0].isSet("url_original"):
            url = self.objects[0].url_original
        elif self.objects[0].isSet("url_normalized"):
            url = self.objects[0].url_normalized
        else:
            raise ParamException("Both url_original and url_normalized are missing.")
        if self.objects[0].isSet("referer"):
            referer = "--referer=%s" % self.objects[0].referer

        useragent = ""
        proxy = ""
        verbose = "--verbose"
        debug = ""
        save_zip = False
        save_js_context = True
        delay = 3000
        timeout = 60 * 3
        threshold = 1024

        try:
            for param in self.currentTask.parameters:
                if param.name == "useragent":
                    value = str(param.value)
                    if value:
                        useragent = "--useragent=%s" % value
                elif param.name == "proxy":
                    value = str(param.value)
                    if value:
                        proxy = "--proxy=%s" % value
                elif param.name == "verbose":
                    verbose = self.paramToBool(param)
                    if verbose:
                        verbose = "--verbose"
                    else:
                        verbose = ""
                elif param.name == "debug":
                    debug = self.paramToBool(param)
                    if debug:
                        debug = "--debug"
                    else:
                        debug = ""
                elif param.name == "save_zip":
                    save_zip = self.paramToBool(param)
                elif param.name == "save_js_context":
                    save_js_context = self.paramToBool(param)
                elif param.name == "delay":
                    delay = int(param.value)
                    if delay < 0:
                        raise ParamException("%s" % "delay cannot be smaller than 0")
                elif param.name == "timeout":
                    timeout = int(param.value)
                    if timeout < 0:
                        raise ParamException("%s" % "timeout cannot be smaller than 0")
                elif param.name == "threshold":
                    threshold = int(param.value)
                    if threshold < 0:
                        raise ParamException("%s" % "threshold cannot be smaller than 0")
        except ParamException:
            raise
        except Exception as e:
            raise ParamException("%s" % str(e))

        delay = "--delay={}".format(delay)
        timeout_str = "--timeout={}".format(timeout)
        threshold = "--threshold={}".format(threshold) if threshold > 0 else ""
        args = ["/usr/bin/hsn2-limit-memory", "python", self.thug, "-F", "-M", timeout_str, delay, threshold, useragent, proxy, verbose, debug, referer, url]
        args = [unicode(x).encode("utf-8") for x in args if len(x) > 0]

        self.objects[0].addTime("thug_time_start", int(time.time() * 1000))
        output, timedout, return_code = self.runExternal(args, timeout * 1.5)

        if return_code != 0:
            if timedout:
                message = "Thug analysis timeout"
            else:
                message = "Thug returncode was {}".format(return_code)
            # logging.warning(output[0])
            logging.warning(message)
            self.objects[0].addString("thug_error", message)

            tmp = tempfile.mkstemp()
            os.write(tmp[0], output[0])
            os.close(tmp[0])
            self.objects[0].addBytes("thug_error_details", self.dsAdapter.putFile(tmp[1], self.currentTask.job))
            self.remove_tmp(tmp[1])
        else:
            self.objects[0].addTime("thug_time_stop", int(time.time() * 1000))
            if output[0] is not None:
                match = ANALYSIS_DIR_REGEXP.search(output[0])
                if match:
                    relativeLogDir = match.group(1)
                    logDir = os.path.abspath(os.path.join(self.thugDir, relativeLogDir))
                    xmlFile = "%s/analysis/maec11/analysis.xml" % logDir
                    ret = self.parseXML(xmlFile, save_js_context)
                    if ret is False:
                        self.objects[0].addString("thug_error", str(output[1]))
                    else:
                        logging.debug("Analysis parsed %s", xmlFile)

                    if save_zip and os.path.isdir(logDir):
                        self.storeZip(logDir)
                    self.remove_tmp(logDir)
                    parent_dir = os.path.dirname(logDir)
                    try:
                        os.rmdir(parent_dir)
                        logging.info("Removed log directory parent %s", parent_dir)
                    except:
                        logging.info("Couldn't remove log directory parent - non empty %s", parent_dir)
                else:
                    self.objects[0].addBool("thug_active", False)
                    self.objects[0].addString("thug_error", "Couldn't find log dir in output: " + repr(output[0]))
        return []

    def runExternal(self, args, timeout=60):
        """
        Execute args, limit execution time to 'timeout' seconds.
        Uses the subprocess module and subprocess.PIPE.
        """
        logging.debug(args)

        proc = subprocess.Popen(
            args,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            cwd=self.thugDir  # this will cause the logs to be written to '/opt/thug/logs' assuming that self.thugDir is '/opt/thug/src'
        )

        start = time.time()

        stdout_chunks = []
        stderr = ""

        timedout = False
        while proc.poll() is None:
            timedout = time.time() - start >= timeout
            if timedout:
                self.terminateProc(proc)
                break
            if not self.keepRunning:
                self.terminateProc(proc)
                raise ShutdownException("Shutdown while waiting for thug to finish processing")
            try:
                stdout_chunk = proc.stdout.read(100)
            except ValueError:
                stdout_chunk = ""
            if stdout_chunk:
                stdout_chunks.append(stdout_chunk)
            else:
                time.sleep(0.1)

        try:
            stdout_chunk = proc.stdout.read()
        except ValueError:
            stdout_chunk = ""
        if stdout_chunk:
            stdout_chunks.append(stdout_chunk)
        stdout = "".join(stdout_chunks)

        return (stdout, stderr), timedout, proc.returncode

    def terminateProc(self, proc):
        try:
            proc.stdout.close()
            proc.stderr.close()
            proc.terminate()
        except Exception as exc:
            logging.exception(exc)

    def parseXML(self, xmlFile, saveJsContext):
        (parsed, found_exploits, found_behaviours, found_js_contexts) = self.parser.parseFile(xmlFile, saveJsContext) if os.path.isfile(xmlFile) else False
        self.objects[0].addBool("thug_active", parsed)
        self.objects[0].addBool("thug_detected", found_exploits)

        if not parsed:
            return False

        self.objects[0].addBytes("thug_analysis_file", self.dsAdapter.putFile(xmlFile, self.currentTask.job))

        bList = ow.toBehaviorList(found_behaviours)
        tmp = tempfile.mkstemp()
        os.write(tmp[0], bList.SerializeToString())
        os.close(tmp[0])
        self.objects[0].addBytes("thug_behaviors", self.dsAdapter.putFile(tmp[1], self.currentTask.job))
        self.remove_tmp(tmp[1])

        cList = ow.toJSContextList(found_js_contexts)
        tmp = tempfile.mkstemp()
        os.write(tmp[0], cList.SerializeToString())
        os.close(tmp[0])
        self.objects[0].addBytes("js_context_list", self.dsAdapter.putFile(tmp[1], self.currentTask.job))
        self.remove_tmp(tmp[1])
        return True

    def storeZip(self, dirPath):
        zip_ = shutil.make_archive(dirPath, "zip_", dirPath, dirPath, verbose=False)
        self.objects[0].addBytes("thug_analysis_zip", self.dsAdapter.putFile(zip_, self.currentTask.job))
        logging.debug("'%s' zip_ stored" % zip_)
        os.remove(zip_)

    def remove_tmp(self, path):
        try:
            if os.path.isdir(path):
                shutil.rmtree(path)
            else:
                os.unlink(path)
        except Exception as exc:
            logging.warning(u"Exception when trying to remove temporary files: %s %s", path, exc)