def _cleanup_submission(self, task: SubmissionTask, file_list: List[str]):
        """Clean up code that is the same for canceled and finished submissions"""
        submission = task.submission
        sid = submission.sid

        # Erase the temporary data which may have accumulated during processing
        for file_hash in file_list:
            hash_name = get_temporary_submission_data_name(sid, file_hash=file_hash)
            ExpiringHash(hash_name, host=self.redis).delete()

        if submission.params.quota_item and submission.params.submitter:
            self.log.info(f"[{sid}] Submission no longer counts toward {submission.params.submitter.upper()} quota")
            Hash('submissions-' + submission.params.submitter, self.redis_persist).pop(sid)

        if task.completed_queue:
            self.volatile_named_queue(task.completed_queue).push(submission.as_primitives())

        # Send complete message to any watchers.
        watcher_list = ExpiringSet(make_watcher_list_name(sid), host=self.redis)
        for w in watcher_list.members():
            NamedQueue(w).push(WatchQueueMessage({'status': 'STOP'}).as_primitives())

        # Clear the timeout watcher
        watcher_list.delete()
        self.timeout_watcher.clear(sid)
        self.active_submissions.pop(sid)

        # Count the submission as 'complete' either way
        self.counter.increment('submissions_completed')
class DispatchHash:
    def __init__(self,
                 sid: str,
                 client: Union[Redis, StrictRedis],
                 fetch_results=False):
        """

        :param sid:
        :param client:
        :param fetch_results: Preload all the results on the redis server.
        """
        self.client = client
        self.sid = sid
        self._dispatch_key = f'{sid}{dispatch_tail}'
        self._finish_key = f'{sid}{finished_tail}'
        self._finish = self.client.register_script(finish_script)

        # cache the schedules calculated for the dispatcher, used to prevent rebuilding the
        # schedule repeatedly, and for telling the UI what services are pending
        self.schedules = ExpiringHash(f'dispatch-hash-schedules-{sid}',
                                      host=self.client)

        # How many services are outstanding for each file in the submission
        self._outstanding_service_count = ExpiringHash(
            f'dispatch-hash-files-{sid}', host=self.client)
        # Track which files have been extracted by what, in order to rebuild the file tree later
        self._file_tree = ExpiringSet(f'dispatch-hash-parents-{sid}',
                                      host=self.client)
        self._attempts = ExpiringHash(f'dispatch-hash-attempts-{sid}',
                                      host=self.client)

        # Local caches for _files and finished table
        self._cached_files = set(self._outstanding_service_count.keys())
        self._cached_results = dict()
        if fetch_results:
            self._cached_results = self.all_results()

        # Errors that are related to a submission, but not the terminal errors of a service
        self._other_errors = ExpiringSet(f'dispatch-hash-errors-{sid}',
                                         host=self.client)

        # TODO set these expire times from the global time limit for submissions
        retry_call(self.client.expire, self._dispatch_key, 60 * 60)
        retry_call(self.client.expire, self._finish_key, 60 * 60)

    def add_file(self, file_hash: str, file_limit, parent_hash) -> bool:
        """Add a file to a submission.

        Returns: Whether the file could be added to the submission or has been rejected.
        """
        if parent_hash:
            self._file_tree.add(f'{file_hash}-{parent_hash}')
        else:
            self._file_tree.add(file_hash)

        # If it was already in the set, we don't need to check remotely
        if file_hash in self._cached_files:
            return True

        # If the set is already full, and its not in the set, then we don't need to check remotely
        if len(self._cached_files) >= file_limit:
            return False

        # Our local checks are unclear, check remotely,
        # 0 => already exists, still want to return true
        # 1 => didn't exist before
        # None => over size limit, return false
        if self._outstanding_service_count.limited_add(file_hash, 0,
                                                       file_limit) is not None:
            # If it was added, add it to the local cache so we don't need to check again
            self._cached_files.add(file_hash)
            return True
        return False

    def add_error(self, error_key: str) -> bool:
        """Add an error to a submission.

        NOTE: This method is for errors occuring outside of any errors handled via 'fail_*recoverable'

        Returns true if the error is new, false if the error is a duplicate.
        """
        return self._other_errors.add(error_key) > 0

    def dispatch(self, file_hash: str, service: str):
        """Mark that a service has been dispatched for the given sha."""
        if retry_call(self.client.hset, self._dispatch_key,
                      f"{file_hash}-{service}", time.time()):
            self._outstanding_service_count.increment(file_hash, 1)

    def drop_dispatch(self, file_hash: str, service: str):
        """If a dispatch has been found to be un-needed remove the counters."""
        if retry_call(self.client.hdel, self._dispatch_key,
                      f"{file_hash}-{service}"):
            self._outstanding_service_count.increment(file_hash, -1)

    def dispatch_count(self):
        """How many tasks have been dispatched for this submission."""
        return retry_call(self.client.hlen, self._dispatch_key)

    def dispatch_time(self, file_hash: str, service: str) -> float:
        """When was dispatch called for this sha/service pair."""
        result = retry_call(self.client.hget, self._dispatch_key,
                            f"{file_hash}-{service}")
        if result is None:
            return 0
        return float(result)

    def all_dispatches(self) -> Dict[str, Dict[str, float]]:
        """Load the entire table of things that should currently be running."""
        rows = retry_call(self.client.hgetall, self._dispatch_key)
        output = {}
        for key, timestamp in rows.items():
            file_hash, service = key.split(b'-', maxsplit=1)
            file_hash = file_hash.decode()
            service = service.decode()
            if file_hash not in output:
                output[file_hash] = {}
            output[file_hash][service] = float(timestamp)
        return output

    def fail_recoverable(self,
                         file_hash: str,
                         service: str,
                         error_key: str = None):
        """A service task has failed, but should be retried, clear that it has been dispatched.

        After this call, the service is in a non-dispatched state, and the status can't be update
        until it is dispatched again.
        """
        if error_key:
            self._other_errors.add(error_key)
        retry_call(self.client.hdel, self._dispatch_key,
                   f"{file_hash}-{service}")
        self._outstanding_service_count.increment(file_hash, -1)

    def fail_nonrecoverable(self, file_hash: str, service,
                            error_key) -> Tuple[int, bool]:
        """A service task has failed and should not be retried, entry the error as the result.

        Has exactly the same semantics as `finish` but for errors.
        """
        return retry_call(self._finish,
                          args=[
                              self.sid, file_hash, service,
                              json.dumps(['error', error_key, 0, False, ''])
                          ])

    def finish(self,
               file_hash,
               service,
               result_key,
               score,
               classification,
               drop=False) -> Tuple[int, bool]:
        """
        As a single transaction:
         - Remove the service from the dispatched list
         - Add the file to the finished list, with the given result key
         - return the number of items in the dispatched list and if this was a duplicate call to finish
        """
        return retry_call(self._finish,
                          args=[
                              self.sid, file_hash, service,
                              json.dumps([
                                  'result', result_key, score, drop,
                                  str(classification)
                              ])
                          ])

    def finished_count(self) -> int:
        """How many tasks have been finished for this submission."""
        return retry_call(self.client.hlen, self._finish_key)

    def finished(self, file_hash, service) -> Union[DispatchRow, None]:
        """If a service has been finished, return the key of the result document."""
        # Try the local cache
        result = self._cached_results.get(file_hash, {}).get(service, None)
        if result:
            return result
        # Try the server
        result = retry_call(self.client.hget, self._finish_key,
                            f"{file_hash}-{service}")
        if result:
            return DispatchRow(*json.loads(result))
        return None

    def all_finished(self) -> bool:
        """Are there no outstanding tasks, and at least one finished task."""
        return self.finished_count() > 0 and self.dispatch_count() == 0

    def all_results(self) -> Dict[str, Dict[str, DispatchRow]]:
        """Get all the records stored in the dispatch table.

        :return: output[file_hash][service_name] -> DispatchRow
        """
        rows = retry_call(self.client.hgetall, self._finish_key)
        output = {}
        for key, status in rows.items():
            file_hash, service = key.split(b'-', maxsplit=1)
            file_hash = file_hash.decode()
            service = service.decode()
            if file_hash not in output:
                output[file_hash] = {}
            output[file_hash][service] = DispatchRow(*json.loads(status))
        return output

    def all_extra_errors(self):
        """Return the set of errors not part of the dispatch table itself."""
        return self._other_errors.members()

    def all_files(self):
        return self._outstanding_service_count.keys()

    def file_tree(self):
        """Returns a mapping from file, to a list of files that are that file's parents.

        A none value being in the list indicates that the file is one of the root files of the submission.
        """
        edges = self._file_tree.members()
        output = {}
        for string in edges:
            if '-' in string:
                child, parent = string.split('-')
            else:
                child, parent = string, None

            if child not in output:
                output[child] = []
            output[child].append(parent)
        return output

    def delete(self):
        """Clear the tables from the redis server."""
        retry_call(self.client.delete, self._dispatch_key)
        retry_call(self.client.delete, self._finish_key)
        self.schedules.delete()
        self._outstanding_service_count.delete()
        self._file_tree.delete()
        self._other_errors.delete()
        self._attempts.delete()
    def dispatch_file(self, task: FileTask):
        """ Handle a message describing a file to be processed.

        This file may be:
            - A new submission or extracted file.
            - A file that has just completed a stage of processing.
            - A file that has not completed a a stage of processing, but this
              call has been triggered by a timeout or similar.

        If the file is totally new, we will setup a dispatch table, and fill it in.

        Once we make/load a dispatch table, we will dispatch whichever group the table
        shows us hasn't been completed yet.

        When we dispatch to a service, we check if the task is already in the dispatch
        queue. If it isn't proceed normally. If it is, check that the service is still online.
        """
        # Read the message content
        file_hash = task.file_info.sha256
        active_task = self.active_submissions.get(task.sid)

        if active_task is None:
            self.log.warning(f"[{task.sid}] Untracked submission is being processed")
            return

        submission_task = SubmissionTask(active_task)
        submission = submission_task.submission

        # Refresh the watch on the submission, we are still working on it
        self.timeout_watcher.touch(key=task.sid, timeout=int(self.config.core.dispatcher.timeout),
                                   queue=SUBMISSION_QUEUE, message={'sid': task.sid})

        # Open up the file/service table for this submission
        dispatch_table = DispatchHash(task.sid, self.redis, fetch_results=True)

        # Load things that we will need to fill out the
        file_tags = ExpiringSet(task.get_tag_set_name(), host=self.redis)
        file_tags_data = file_tags.members()
        temporary_submission_data = ExpiringHash(task.get_temporary_submission_data_name(), host=self.redis)
        temporary_data = [dict(name=row[0], value=row[1]) for row in temporary_submission_data.items().items()]

        # Calculate the schedule for the file
        schedule = self.build_schedule(dispatch_table, submission, file_hash, task.file_info.type)
        started_stages = []

        # Go through each round of the schedule removing complete/failed services
        # Break when we find a stage that still needs processing
        outstanding = {}
        score = 0
        errors = 0
        while schedule and not outstanding:
            stage = schedule.pop(0)
            started_stages.append(stage)

            for service_name in stage:
                service = self.scheduler.services.get(service_name)
                if not service:
                    continue

                # Load the results, if there are no results, then the service must be dispatched later
                # Don't look at if it has been dispatched, as multiple dispatches are fine,
                # but missing a dispatch isn't.
                finished = dispatch_table.finished(file_hash, service_name)
                if not finished:
                    outstanding[service_name] = service
                    continue

                # If the service terminated in an error, count the error and continue
                if finished.is_error:
                    errors += 1
                    continue

                # if the service finished, count the score, and check if the file has been dropped
                score += finished.score
                if not submission.params.ignore_filtering and finished.drop:
                    schedule.clear()
                    if schedule:  # If there are still stages in the schedule, over write them for next time
                        dispatch_table.schedules.set(file_hash, started_stages)

        # Try to retry/dispatch any outstanding services
        if outstanding:
            self.log.info(f"[{task.sid}] File {file_hash} sent to services : {', '.join(list(outstanding.keys()))}")

            for service_name, service in outstanding.items():

                # Find the actual file name from the list of files in submission
                filename = None
                for file in submission.files:
                    if task.file_info.sha256 == file.sha256:
                        filename = file.name
                        break

                # Build the actual service dispatch message
                config = self.build_service_config(service, submission)
                service_task = ServiceTask(dict(
                    sid=task.sid,
                    metadata=submission.metadata,
                    min_classification=task.min_classification,
                    service_name=service_name,
                    service_config=config,
                    fileinfo=task.file_info,
                    filename=filename or task.file_info.sha256,
                    depth=task.depth,
                    max_files=task.max_files,
                    ttl=submission.params.ttl,
                    ignore_cache=submission.params.ignore_cache,
                    ignore_dynamic_recursion_prevention=submission.params.ignore_dynamic_recursion_prevention,
                    tags=file_tags_data,
                    temporary_submission_data=temporary_data,
                    deep_scan=submission.params.deep_scan,
                    priority=submission.params.priority,
                ))
                dispatch_table.dispatch(file_hash, service_name)
                queue = get_service_queue(service_name, self.redis)
                queue.push(service_task.priority, service_task.as_primitives())

        else:
            # There are no outstanding services, this file is done
            # clean up the tags
            file_tags.delete()

            # If there are no outstanding ANYTHING for this submission,
            # send a message to the submission dispatcher to finalize
            self.counter.increment('files_completed')
            if dispatch_table.all_finished():
                self.log.info(f"[{task.sid}] Finished processing file '{file_hash}' starting submission finalization.")
                self.submission_queue.push({'sid': submission.sid})
            else:
                self.log.info(f"[{task.sid}] Finished processing file '{file_hash}'. Other files are not finished.")