Example #1
0
    def save(self, payload, archive=False, **kwargs):
        """
        Save results to S3

        :param bytes payload: Content to be stored in Amazon S3 container
        :param **kwargs s3bucket: Bucket name to be used
        :param **kwargs sha1: SHA1 hash to be used as a filename

        :returns: Filename used to save the payload
        :rtype: bytes

        """

        if 's3bucket' in kwargs:
            self.connect(bucket_name=kwargs['s3bucket'])
        else:
            self.connect()

        if 'sha1' in kwargs:
            filename = kwargs['sha1']
        else:
            filename = get_sha1(payload)

        key = boto.s3.key.Key(self.bucket, filename)

        try:
            key.set_contents_from_string(payload) 
        except:
            self.stoq.log.error("Unable to save file to S3")
            return None

        return filename
Example #2
0
    def save(self, payload, archive=False, **kwargs):
        """
        Save results to S3

        :param bytes payload: Content to be stored in Amazon S3 container
        :param **kwargs s3bucket: Bucket name to be used
        :param **kwargs sha1: SHA1 hash to be used as a filename

        :returns: Filename used to save the payload
        :rtype: bytes

        """

        s3bucket = kwargs.get('index', self.bucket_name)
        self.connect(bucket_name=s3bucket)

        filename = kwargs.get('sha1', get_sha1(payload))

        key = boto3.s3.key.Key(self.bucket, filename)

        try:
            key.set_contents_from_string(payload)
        except Exception as err:
            self.log.error("Unable to save file to S3: {}".format(str(err)))
            return None

        return filename
Example #3
0
    def attachment_metadata(self, payload=None, filename=None, uuid=None):
        # Make sure we have a payload, otherwise return None
        if not payload or len(payload) <= 0:
            return None

        attachment_json = {}

        # Generate hashes
        attachment_json['md5'] = get_md5(payload)
        attachment_json['sha1'] = get_sha1(payload)
        attachment_json['sha256'] = get_sha256(payload)
        attachment_json['sha512'] = get_sha512(payload)
        attachment_json['ssdeep'] = get_ssdeep(payload)

        # Get magic type
        attachment_json['magic'] = get_magic(payload)

        # Get size
        attachment_json['size'] = len(payload)

        # Define the filename as provided
        attachment_json['filename'] = filename

        # Make sure we have the parent uuid generated with the original email
        attachment_json['uuid'] = uuid.copy()

        # Generate a unique ID
        attachment_json['uuid'].append(self.stoq.get_uuid)

        return attachment_json
Example #4
0
    def attachment_metadata(self, payload=None, filename=None, uuid=None):
        # Make sure we have a payload, otherwise return None
        if not payload or len(payload) <= 0:
            return None

        attachment_json = {}

        # Generate hashes
        attachment_json['md5'] = get_md5(payload)
        attachment_json['sha1'] = get_sha1(payload)
        attachment_json['sha256'] = get_sha256(payload)
        attachment_json['sha512'] = get_sha512(payload)
        attachment_json['ssdeep'] = get_ssdeep(payload)

        # Get magic type
        attachment_json['magic'] = get_magic(payload)

        # Get size
        attachment_json['size'] = len(payload)

        # Define the filename as provided
        attachment_json['filename'] = filename

        # Make sure we have the parent uuid generated with the original email
        attachment_json['uuid'] = uuid.copy()

        # Generate a unique ID
        attachment_json['uuid'].append(self.stoq.get_uuid)

        return attachment_json
Example #5
0
    def save(self, payload, archive=False, **kwargs):
        """
        Save results to disk

        :param str payload: Content to be saved
        :param bool archive: Is this a file that is being archived?
        :param **kwargs sha1: SHA1 hash to use as a filename

        """

        if archive:
            if 'sha1' in kwargs:
                sha1 = kwargs['sha1']
            else:
                sha1 = get_sha1(payload)

            path = self.stoq.hashpath(sha1)
            self.stoq.write(payload, path=path, filename=sha1, binary=True)

            self.stoq.log.info("Saving file to disk: {}/{}".format(path, sha1))

        else:
            path = os.path.join(self.stoq.results_dir, self.parentname)
            self.stoq.write(path=path, payload=self.stoq.dumps(payload))

            self.stoq.log.info("Saving file to disk: {}".format(path))

        return True
Example #6
0
    def save(self, payload, archive=False, **kwargs):
        """
        Save results to S3

        :param bytes payload: Content to be stored in Amazon S3 container
        :param **kwargs s3bucket: Bucket name to be used
        :param **kwargs sha1: SHA1 hash to be used as a filename

        :returns: Filename used to save the payload
        :rtype: bytes

        """

        s3bucket = kwargs.get('index', self.bucket_name)
        self.connect(bucket_name=s3bucket)

        filename = kwargs.get('sha1', get_sha1(payload))

        key = boto.s3.key.Key(self.bucket, filename)

        try:
            key.set_contents_from_string(payload) 
        except Exception as err:
            self.stoq.log.error("Unable to save file to S3: {}".format(str(err)))
            return None

        return filename
Example #7
0
    def save(self, payload, archive=False, **kwargs):
        """
        Save results to disk

        :param str payload: Content to be saved
        :param bool archive: Is this a file that is being archived?
        :param str index: Directory name to save content to
        :param str sha1: SHA1 hash to use as a filename
        :param str filename: Filename to save the file as
        :param str path: Path where the file will be saved to
        :param bool use_date: Append current date to the path
        :param bool append: Allow append to output file?

        """

        if archive:
            filename = kwargs.get('sha1', get_sha1(payload))
            path = self.stoq.hashpath(filename)
            binary = kwargs.get('binary', True)
            append = kwargs.get('append', False)
        else:
            path = kwargs.get('path', None)

            if not path:
                index = kwargs.get('index', self.parentname)
                path = "{}/{}".format(self.stoq.results_dir, index)

            append = kwargs.get('append', False)
            filename = kwargs.get('filename', None)
            binary = kwargs.get('binary', False)

        if not binary:
            payload = self.stoq.dumps(payload, compactly=self.compactly)

            # Append a newline to the result, if we are appending to a file
            if append:
                payload += '\n'

        use_date = kwargs.get('use_date', False)
        if use_date:
            now = datetime.now().strftime(self.date_format)
            path = "{}/{}".format(path, now)

        fullpath = self.stoq.write(path=path, filename=filename, payload=payload,
                                   binary=binary, append=append)

        return fullpath
Example #8
0
    def scan(self, payload, **kwargs):
        """
        Publish messages to single or multiple RabbitMQ queues for processing

        :param bytes payload: Payload to be published
        :param **kwargs path: Path to file being ingested
        :param **kwargs user_comments: Comments associated with payload
        :param **kwargs submission_list: List of queues to publish to

        :returns: Results from scan
        :rtype: True

        """

        super().scan()

        self.stoq.log.info("Ingesting: %s" % kwargs['path'])

        # For every file we ingest we are going to assign a unique
        # id so we can link everything across the scope of the ingest.
        # This will be assigned to submissions within archive files as well
        # in order to simplify correlating files post-ingest.
        kwargs['uuid'] = self.stoq.get_uuid

        if payload:
            kwargs['sha1'] = get_sha1(payload)

        kwargs['path'] = os.path.abspath(kwargs['path'])

        if self.user_comments:
            kwargs['user_comments'] = self.user_comments

        if 'submission_list' in kwargs:
            self.submission_list = kwargs['submission_list']
            kwargs.pop('submission_list')

        if self.stoq.worker.archive_connector:
            kwargs['archive'] = self.archive_connector
        else:
            kwargs['archive'] = "file"

        for routing_key in self.submission_list:
            self.publish_connector.publish(kwargs, routing_key)

        return True
Example #9
0
    def save(self, payload, archive=False, **kwargs):
        """
        Save results to disk

        :param str payload: Content to be saved
        :param bool archive: Is this a file that is being archived?
        :param str index: Directory name to save content to
        :param str sha1: SHA1 hash to use as a filename
        :param str filename: Filename to save the file as
        :param str path: Path where the file will be saved to
        :param bool append: Allow append to output file?

        """

        if archive:
            filename = kwargs.get('sha1', get_sha1(payload))
            path = self.stoq.hashpath(filename)
            binary = kwargs.get('binary', True)
            append = kwargs.get('append', False)
        else:
            path = kwargs.get('path', None)
            if not path:
                path = self.stoq.results_dir
                name = kwargs.get('index', self.parentname)
                path = os.path.join(path, name)

            append = kwargs.get('append', False)
            filename = kwargs.get('filename', None)
            binary = kwargs.get('binary', False)

        if not binary:
            payload = self.stoq.dumps(payload, compactly=self.compactly)

            # Append a newline to the result, if we are appending to a file
            if append:
                payload += '\n'

        fullpath = self.stoq.write(path=path, filename=filename, payload=payload,
                                   binary=binary, append=append)

        return fullpath
Example #10
0
    def save(self, payload, archive=False, **kwargs):
        """
        Save results to GCS

        :param bytes payload: Content to be stored in GCS
        :param **kwargs bucket: Bucket name to be used
        :param **kwargs sha1: SHA1 hash to be used as a filename

        :returns: Filename used to save the payload
        :rtype: bytes

        """

        if not self.conn:
            self.connect()

        bucket = kwargs.get('index', self.bucket_name)
        sha1 = kwargs.get('sha1', get_sha1(payload))
        magic = get_magic(payload)

        hashpath = '/'.join(list(sha1[:5]))
        filename = "{}/{}".format(hashpath, sha1)

        body = {
            'name': filename
        }

        content = BytesIO(payload)
        media_body = http.MediaIoBaseUpload(content, magic)

        try:
            req = self.conn.objects().insert(bucket=bucket, body=body,
                                             media_body=media_body)
            resp = req.execute()
            self.stoq.log.debug(resp)
        except Exception as err:
            self.stoq.log.error("Unable to save file to GCS: {}".format(str(err)))
            return None

        return filename
Example #11
0
    def save(self, payload, archive=False, **kwargs):
        """
        Save results to GCS

        :param bytes payload: Content to be stored in GCS
        :param **kwargs bucket: Bucket name to be used
        :param **kwargs sha1: SHA1 hash to be used as a filename

        :returns: Filename used to save the payload
        :rtype: bytes

        """

        if not self.conn:
            self.connect()

        bucket = kwargs.get('index', self.bucket_name)
        sha1 = kwargs.get('sha1', get_sha1(payload))
        magic = get_magic(payload)

        hashpath = '/'.join(list(sha1[:5]))
        filename = "{}/{}".format(hashpath, sha1)

        body = {'name': filename}

        content = BytesIO(payload)
        media_body = http.MediaIoBaseUpload(content, magic)

        try:
            req = self.conn.objects().insert(bucket=bucket,
                                             body=body,
                                             media_body=media_body)
            resp = req.execute()
            self.log.debug(resp)
        except Exception as err:
            self.log.error("Unable to save file to GCS: {}".format(str(err)))
            return None

        return filename
Example #12
0
    def start(self, payload=None, **kwargs):
        """
        Process the payload with the worker plugin

        :param bytes payload: (optional) Payload to be processed
        :param \*\*kwargs: addtional arguments that may be needed by the 
                           worker plugin (i.e., username and password via HTTP)
        :type kwargs: dict or None

        :returns: Tuple of JSON results and template rendered results
        :rtype: dict and str

        """

        archive_type = False
        template_results = None
        payload_hashes = None
        results = {}
        results['results'] = []
        worker_result = {}

        results['date'] = self.stoq.get_time

        # If we don't have a uuid, let's generate one
        if 'uuid' not in kwargs:
            kwargs['uuid'] = self.stoq.get_uuid

        # If we have no payload, let's try to find one to process
        if not payload and 'archive' in kwargs:
            # We are going to use the 'archive' field in kwargs to define where
            # we are going to get the file from. Once we know that, we will
            # load the appropriate plugin if required. Then, we will call
            # get_file() to grab the payload.
            archive_type = kwargs['archive']
            self.load_connector(archive_type)
            if hasattr(self.connectors[archive_type], 'get_file'):
                payload = self.connectors[archive_type].get_file(**kwargs)
            else:
                self.stoq.log.warn("Connector unable to get file..skipping")
                return False

        if payload:
            # Make sure we define this before possibly modifying the full file
            # path when/if we archive.
            if 'filename' not in kwargs:
                if 'path' in kwargs:
                    kwargs['filename'] = os.path.basename(kwargs['path'])
                else:
                    kwargs['filename'] = "Unknown"

            # If this worker wants us to save this payload to the archive, let's
            # handle that now before anything else. Otherwise any subsequent
            # plugins may not be able to retrieve the files. We are however going
            # to skip saving the payload if our source is the same as the
            # connector.
            if self.archive_connector and self.archive_connector != archive_type:
                payload_hashes = self.save_payload(payload,
                                                   self.archive_connector)

            # Some workers don't need a hash to be generated, so let's only
            # generate hashes if needed. This is defined in the .stoq
            # configuration file for the worker plugin. We are also only going
            # to generate a hash if our save_payload function hasn't been
            # called. Otherwise, we will just use those results.
            if self.hashpayload:
                if payload_hashes:
                    worker_result.update(payload_hashes)
                else:
                    worker_result.update(get_hashes(payload))

        # Send our payload to the worker, and store the results
        worker_result['scan'] = self.scan(payload, **kwargs)

        worker_result['plugin'] = self.name
        worker_result['uuid'] = kwargs['uuid']
        if payload:
            worker_result['size'] = len(payload)
        worker_result['payload_id'] = 0

        # Keep track of our total count of payloads, in case yara dispatch
        # finds something
        payload_id = 1

        results['results'].append(worker_result)

        # If we want to use the dispatcher, let's do that now
        if self.dispatch:
            # Our carver, extractor, and decoder plugins will return a list of
            # set()s. Let's make sure we handle the initial payload the same
            # way, so we can simplify the below routine.
            dispatch_payloads = [(None, payload)]

            current_depth = 0

            # Track hashes of payloads so we don't handle duplicates.
            processed_sha1s = {}

            while dispatch_payloads and self.stoq.max_recursion >= current_depth:
                for dispatch_payload in dispatch_payloads:
                    # Skip over this payload if we've already processed it
                    current_hash = get_sha1(dispatch_payload[1])
                    if current_hash in processed_sha1s:
                        continue

                    processed_sha1s[current_hash] = True

                    dispatch_payloads = self.yara_dispatcher(
                        dispatch_payload[1])

                    # Something was carved, let's gather the metadata
                    if dispatch_payloads:
                        dispatch_results = self._parse_dispatch_results(
                            dispatch_payloads, **kwargs)
                        if dispatch_results:
                            # Iterate over the results, grab the sha1, and add
                            # it to the list of processed hashes. Then, add the
                            # dispatch results to the primary results
                            for index, res in enumerate(dispatch_results):
                                if res['sha1'] in processed_sha1s:
                                    continue
                                processed_sha1s[res['sha1']] = True
                                res['payload_id'] = payload_id
                                payload_id += 1
                                results['results'].append(res)

                current_depth += 1

        results['payloads'] = payload_id

        # Parse output with a template
        if self.template:
            try:
                template_path = "{}/templates".format(self.plugin_path)

                tpl_env = Environment(loader=FileSystemLoader(template_path),
                                      trim_blocks=True,
                                      lstrip_blocks=True)
                template_results = tpl_env.get_template(
                    self.template).render(results=results)
            except TemplateNotFound:
                self.stoq.log.error("Unable to load template. Does {}/{} "
                                    "exist?".format(template_path,
                                                    self.template))
            except Exception as err:
                self.stoq.log.error(str(err))

        # If we are saving the results from the worker, let's take care of
        # it. This is defined in the .stoq configuration file for the
        # worker plugin. An output_connector must also be defined.
        if self.saveresults and self.output_connector:
            # Just to ensure we have loaded a connector for output
            self.load_connector(self.output_connector)
            # If there is a template that is named after the output connector
            # pass the templated results to the connector, otherwise pass the
            # raw results
            if template_results:
                if self.template.split(".")[0] == self.output_connector:
                    self.connectors[self.output_connector].save(
                        template_results)
            else:
                # Attempt to save the results, and pass along the primary results
                # as **kwargs, otherwise just pass along the results.
                try:
                    kwargs = {'sha1': results['results'][0]['sha1']}
                    self.connectors[self.output_connector].save(
                        results, **kwargs)
                except (KeyError, IndexError):
                    self.connectors[self.output_connector].save(results)

        return results, template_results
Example #13
0
    def start(self, payload=None, **kwargs):
        """
        Process the payload with the worker plugin

        :param bytes payload: (optional) Payload to be processed
        :param \*\*kwargs: addtional arguments that may be needed by the
                           worker plugin (i.e., username and password via HTTP)
        :type kwargs: dict or None

        :returns: Tuple of JSON results and template rendered results
        :rtype: dict and str or lists

        """

        archive_type = False
        payload_hashes = None
        template_results = None
        results = {}
        results['results'] = []
        results['plugins'] = {}
        worker_result = {}

        results['date'] = self.stoq.get_time

        # If we don't have a uuid, let's generate one
        kwargs['uuid'] = kwargs.get('uuid', self.stoq.get_uuid)

        # If we have no payload, let's try to find one to process
        if not payload and 'archive' in kwargs:
            # We are going to use the 'archive' field in kwargs to define where
            # we are going to get the file from. Once we know that, we will
            # load the appropriate plugin if required. Then, we will call
            # get_file() to grab the payload.
            archive_type = kwargs['archive']
            worker_result['archive'] = kwargs['archive']

            self.load_connector(archive_type)
            if hasattr(self.connectors[archive_type], 'get_file'):
                payload = self.connectors[archive_type].get_file(**kwargs)
            else:
                self.stoq.log.warn("Connector unable to get file..skipping")
                return False

        if payload:
            # Make sure we define this before possibly modifying the full file
            # path when/if we archive.
            if 'filename' not in kwargs:
                if 'path' in kwargs:
                    kwargs['filename'] = os.path.basename(kwargs['path'])
                    worker_result['path'] = kwargs['path']
                else:
                    kwargs['filename'] = "Unknown"

                # Make sure we save the filename in the worker results as well
                worker_result['filename'] = kwargs['filename']

            # If this worker wants us to save this payload to the archive,
            # let's handle that now before anything else. Otherwise any
            # subsequent plugins may not be able to retrieve the files. We are
            # however going to skip saving the payload if our source is the
            # same as the connector.
            if self.archive_connector and self.archive_connector != archive_type:
                payload_hashes = self.save_payload(payload, self.archive_connector)

            # Some workers don't need a hash to be generated, so let's only
            # generate hashes if needed. This is defined in the .stoq
            # configuration file for the worker plugin. We are also only going
            # to generate a hash if our save_payload function hasn't been
            # called. Otherwise, we will just use those results.
            if self.hashpayload:
                if payload_hashes:
                    worker_result.update(payload_hashes)
                else:
                    worker_result.update(get_hashes(payload))

        # Send our payload to the worker, and store the results
        worker_result['scan'] = self.scan(payload, **kwargs)

        worker_result['plugin'] = self.name

        worker_result['uuid'] = kwargs['uuid']

        if payload:
            worker_result['size'] = len(payload)

        # Preserve the original metadata that was submitted with this payload
        worker_result['source_meta'] = kwargs.copy()

        # Check to see if the keys are in the primary result dict, if so,
        # we will remove them from the source_meta key, otherwise, we will
        # leave it be. Meant to reduce duplication of data when chaining
        # plugins.
        for k, v in kwargs.items():
            if k in worker_result:
                if v == worker_result[k]:
                    worker_result['source_meta'].pop(k, None)

            # Sometimes when chaining plugins source_meta will be appended
            # but the keys should be at the root of the results. Let's make
            # sure we move them to the root rather than storing them in the
            # source_meta
            elif k in ('filename', 'puuid', 'magic', 'ssdeep', 'path', 'size'):
                worker_result[k] = v
                worker_result['source_meta'].pop(k, None)

        worker_result['payload_id'] = 0
        results['plugins'].update({"0": self.name})

        # Keep track of our total count of payloads, in case yara dispatch
        # finds something
        payload_id = 1

        results['results'].append(worker_result)

        # If we want to use the dispatcher, let's do that now
        if self.dispatch:
            # Our carver, extractor, and decoder plugins will return a list of
            # set()s. Let's make sure we handle the initial payload the same
            # way, so we can simplify the below routine.
            dispatch_payloads = [({}, payload)]

            current_depth = 0

            # Track hashes of payloads so we don't handle duplicates.
            processed_hashes = {}

            while dispatch_payloads and int(self.stoq.max_recursion) >= current_depth:
                for index, dispatch_payload in enumerate(dispatch_payloads):

                    dispatch_payloads.pop(index)

                    current_hash = dispatch_payload[0].get('sha1', get_sha1(dispatch_payload[1]))

                    # Skip over this payload if we've already processed it
                    if current_hash in processed_hashes:
                        self.stoq.log.info("Skipping duplicate hash: {}".format(current_hash))
                        continue

                    processed_hashes.setdefault(current_hash, True)
                    # We are copy()ing processed hashes so we don't dispatch
                    # payloads twice, but we still want to be able to send
                    # dispatched payloads for additional processing
                    temp_processed_hashes = processed_hashes.copy()

                    # Send the payload to the yara dispatcher
                    for yara_result in self.yara_dispatcher(dispatch_payload[1]):
                        dispatch_result = self._parse_dispatch_results(yara_result, **kwargs)

                        if dispatch_result['sha1'] in temp_processed_hashes:
                            self.stoq.log.info("Skipping duplicate hash: {}".format(dispatch_result['sha1']))
                            continue

                        temp_processed_hashes.setdefault(dispatch_result['sha1'], True)

                        dispatch_payloads.append(yara_result)

                        dispatch_result['payload_id'] = payload_id

                        if dispatch_result.get('save').lower() == 'true' and self.archive_connector:
                            self.save_payload(yara_result[1], self.archive_connector)

                        results['results'].append(dispatch_result)
                        results['plugins'].update({str(payload_id): dispatch_result['dispatcher']})

                        payload_id += 1

                current_depth += 1

        results['payloads'] = payload_id

        # If we want the results for all plugins to be returned in one
        # big json blob, combined_results must be true.
        if self.combined_results:
            results, template_results = self._save_results(results)
        else:
            # Looks like we want to save each result individually, this
            # gets complex.
            split_results = []
            split_template_results = []

            # Make sure we save the top level key/values so we can append
            # them to the new individual result dict
            result_date = results['date']
            result_payloads = results['payloads']
            result_plugins = results['plugins']

            for result in results['results']:
                # Create the new individual results dict
                plugin_result = {}
                plugin_result['date'] = result_date
                plugin_result['payloads'] = result_payloads
                plugin_result['plugins'] = result_plugins
                plugin_result['results'] = [result]

                # Because this function returns the results, we are going
                # to save the individual results as it is returned from
                # the _save_results function
                r, t = self._save_results(plugin_result)

                # Append the results to the main results list. In many cases
                # templates won't be utilized, so no sense in saving them if
                # nothing is there.
                split_results.append(r)
                if t:
                    split_template_results.append(t)

            # Replace the original results with our newly created list of
            # results.
            results = split_results
            if split_template_results:
                template_results = split_template_results

        return results, template_results
Example #14
0
    def start(self, payload=None, **kwargs):
        """
        Process the payload with the worker plugin

        :param bytes payload: (optional) Payload to be processed
        :param \*\*kwargs: addtional arguments that may be needed by the
                           worker plugin (i.e., username and password via HTTP)
        :type kwargs: dict or None

        :returns: Tuple of JSON results and template rendered results
        :rtype: dict and str or lists

        """

        archive_type = False
        payload_hashes = None
        template_results = None
        results = {}
        results['results'] = []
        results['plugins'] = {}
        worker_result = {}

        results['date'] = self.stoq.get_time

        # If we don't have a uuid, let's generate one
        kwargs['uuid'] = kwargs.get('uuid', self.stoq.get_uuid)

        # Set the Originating uuid to that of the first payload submitted
        kwargs['ouuid'] = kwargs.get('ouuid', kwargs['uuid'])

        # If we have no payload, let's try to find one to process
        if not payload and 'archive' in kwargs:
            # We are going to use the 'archive' field in kwargs to define where
            # we are going to get the file from. Once we know that, we will
            # load the appropriate plugin if required. Then, we will call
            # get_file() to grab the payload.
            archive_type = kwargs['archive']
            worker_result['archive'] = kwargs['archive']

            self.load_connector(archive_type)
            if hasattr(self.connectors[archive_type], 'get_file'):
                payload = self.connectors[archive_type].get_file(**kwargs)
            else:
                self.stoq.log.warn("Connector unable to get file..skipping")
                return False

        if payload:
            # Make sure we define this before possibly modifying the full file
            # path when/if we archive.
            if 'filename' not in kwargs:
                if 'path' in kwargs:
                    kwargs['filename'] = os.path.basename(kwargs['path'])
                    worker_result['path'] = kwargs['path']
                else:
                    kwargs['filename'] = "Unknown"

                # Make sure we save the filename in the worker results as well
                worker_result['filename'] = kwargs['filename']

            # If this worker wants us to save this payload to the archive,
            # let's handle that now before anything else. Otherwise any
            # subsequent plugins may not be able to retrieve the files. We are
            # however going to skip saving the payload if our source is the
            # same as the connector.
            if self.archive_connector and self.archive_connector != archive_type:
                payload_hashes = self.save_payload(payload,
                                                   self.archive_connector)

            # Some workers don't need a hash to be generated, so let's only
            # generate hashes if needed. This is defined in the .stoq
            # configuration file for the worker plugin. We are also only going
            # to generate a hash if our save_payload function hasn't been
            # called. Otherwise, we will just use those results.
            if self.hashpayload:
                if payload_hashes:
                    worker_result.update(payload_hashes)
                else:
                    worker_result.update(get_hashes(payload))

        # Send our payload to the worker, and store the results
        worker_result['scan'] = self.scan(payload, **kwargs)

        worker_result['plugin'] = self.name

        worker_result['uuid'] = kwargs['uuid']

        if payload:
            worker_result['size'] = len(payload)

        # Preserve the original metadata that was submitted with this payload
        worker_result['source_meta'] = kwargs.copy()

        # Check to see if the keys are in the primary result dict, if so,
        # we will remove them from the source_meta key, otherwise, we will
        # leave it be. Meant to reduce duplication of data when chaining
        # plugins.
        for k, v in kwargs.items():
            if k in worker_result:
                if v == worker_result[k]:
                    worker_result['source_meta'].pop(k, None)

            # Sometimes when chaining plugins source_meta will be appended
            # but the keys should be at the root of the results. Let's make
            # sure we move them to the root rather than storing them in the
            # source_meta
            elif k in ('filename', 'puuid', 'magic', 'ssdeep', 'path',
                       'ouuid'):
                worker_result[k] = v
                worker_result['source_meta'].pop(k, None)

        worker_result['payload_id'] = 0
        results['plugins'].update({"0": self.name})

        # Keep track of our total count of payloads, in case yara dispatch
        # finds something
        payload_id = 1

        results['results'].append(worker_result)

        # If we want to use the dispatcher, let's do that now
        if self.dispatch:
            # Our carver, extractor, and decoder plugins will return a list of
            # set()s. Let's make sure we handle the initial payload the same
            # way, so we can simplify the below routine.
            dispatch_payloads = [({}, payload)]
            dispatch_queue = []

            current_depth = 0

            # Track hashes of payloads so we don't handle duplicates.
            processed_hashes = {}

            while dispatch_payloads and int(
                    self.stoq.max_recursion) >= current_depth:
                for index, dispatch_payload in enumerate(dispatch_payloads):

                    dispatch_payloads.pop(index)

                    current_hash = dispatch_payload[0].get(
                        'sha1', get_sha1(dispatch_payload[1]))

                    # Skip over this payload if we've already processed it
                    if current_hash in processed_hashes:
                        self.stoq.log.info(
                            "Skipping duplicate hash: {}".format(current_hash))
                        continue

                    processed_hashes.setdefault(current_hash, True)
                    # We are copy()ing processed hashes so we don't dispatch
                    # payloads twice, but we still want to be able to send
                    # dispatched payloads for additional processing
                    temp_processed_hashes = processed_hashes.copy()

                    # Send the payload to the yara dispatcher
                    for yara_result in self.yara_dispatcher(
                            dispatch_payload[1]):
                        dispatch_result = self._parse_dispatch_results(
                            yara_result, **kwargs)

                        if dispatch_result['sha1'] in temp_processed_hashes:
                            self.stoq.log.info(
                                "Skipping duplicate hash: {}".format(
                                    dispatch_result['sha1']))
                            continue

                        temp_processed_hashes.setdefault(
                            dispatch_result['sha1'], True)

                        dispatch_queue.append(yara_result)

                        dispatch_result['payload_id'] = payload_id

                        if dispatch_result.get('save').lower(
                        ) == 'true' and self.archive_connector:
                            self.save_payload(yara_result[1],
                                              self.archive_connector)

                        results['results'].append(dispatch_result)
                        results['plugins'].update(
                            {str(payload_id): dispatch_result['plugin']})

                        payload_id += 1

                dispatch_payloads = dispatch_queue.copy()
                dispatch_queue = []

                current_depth += 1

        results['payloads'] = payload_id

        # If we want the results for all plugins to be returned in one
        # big json blob, combined_results must be true.
        if self.combined_results:
            results, template_results = self._save_results(results)
        else:
            # Looks like we want to save each result individually, this
            # gets complex.
            split_results = []
            split_template_results = []

            # Make sure we save the top level key/values so we can append
            # them to the new individual result dict
            result_date = results['date']
            result_payloads = results['payloads']
            result_plugins = results['plugins']

            for result in results['results']:
                # Create the new individual results dict
                plugin_result = {}
                plugin_result['date'] = result_date
                plugin_result['payloads'] = result_payloads
                plugin_result['plugins'] = result_plugins
                plugin_result['results'] = [result]

                # Because this function returns the results, we are going
                # to save the individual results as it is returned from
                # the _save_results function
                r, t = self._save_results(plugin_result)

                # Append the results to the main results list. In many cases
                # templates won't be utilized, so no sense in saving them if
                # nothing is there.
                split_results.append(r)
                if t:
                    split_template_results.append(t)

            # Replace the original results with our newly created list of
            # results.
            results = split_results
            if split_template_results:
                template_results = split_template_results

        return results, template_results
    def scan(self, payload, **kwargs):
        """
        Publish messages to single or multiple RabbitMQ queues for processing

        :param bytes payload: Payload to be published
        :param str path: Path to file being ingested
        :param list submission_list: List of queues to publish to
        :param int priority: Priority of message, if supported by publisher

        :returns: True

        """

        super().scan()

        opts = {}

        # For every file we ingest we are going to assign a unique
        # id so we can link everything across the scope of the ingest.
        # This will be assigned to submissions within archive files as well
        # in order to simplify correlating files post-ingest.
        if 'uuid' not in kwargs:
            kwargs['uuid'] = [self.stoq.get_uuid]

        if payload and 'sha1' not in kwargs:
            kwargs['sha1'] = get_sha1(payload)

        if 'path' in kwargs:
            kwargs['path'] = os.path.abspath(kwargs['path'])
            self.log.info("Ingesting {}".format(kwargs['path']))
        else:
            self.log.info("Ingesting {}".format(kwargs['uuid'][-1]))

        if 'submission_list' in kwargs:
            self.submission_list = kwargs['submission_list']
            kwargs.pop('submission_list')

        # Using self.stoq.worker.archive_connector in case this plugin is
        # called from another plugin. This will ensure that the correct
        # archive connector is defined when the message is published.
        if self.stoq.worker.archive_connector and self.name != self.stoq.worker.name:
            kwargs['archive'] = self.stoq.worker.archive_connector
        elif self.archive_connector:
            kwargs['archive'] = self.archive_connector
        else:
            kwargs['archive'] = "file"

        if 'priority' in kwargs:
            opts['priority'] = int(kwargs['priority'])
            kwargs.pop('priority')
        else:
            opts['priority'] = self.priority

        for routing_key in self.submission_list:
            if self.publish_connector.payload:
                payload = self.stoq.get_file(kwargs['path'])
                self.publish_connector.publish(kwargs, routing_key, payload=payload, **opts)
            else:
                self.publish_connector.publish(kwargs, routing_key, **opts)

        return True
    def scan(self, payload, **kwargs):
        """
        Publish messages to single or multiple RabbitMQ queues for processing

        :param bytes payload: Payload to be published
        :param str path: Path to file being ingested
        :param list submission_list: List of queues to publish to
        :param int priority: Priority of message, if supported by publisher

        :returns: True

        """

        super().scan()

        opts = {}

        # For every file we ingest we are going to assign a unique
        # id so we can link everything across the scope of the ingest.
        # This will be assigned to submissions within archive files as well
        # in order to simplify correlating files post-ingest.
        if 'uuid' not in kwargs:
            kwargs['uuid'] = [self.stoq.get_uuid]

        if payload and 'sha1' not in kwargs:
            kwargs['sha1'] = get_sha1(payload)

        if 'path' in kwargs:
            kwargs['path'] = os.path.abspath(kwargs['path'])
            self.log.info("Ingesting {}".format(kwargs['path']))
        else:
            self.log.info("Ingesting {}".format(kwargs['uuid'][-1]))

        if 'submission_list' in kwargs:
            self.submission_list = kwargs['submission_list']
            kwargs.pop('submission_list')

        # Using self.stoq.worker.archive_connector in case this plugin is
        # called from another plugin. This will ensure that the correct
        # archive connector is defined when the message is published.
        if self.stoq.worker.archive_connector and self.name != self.stoq.worker.name:
            kwargs['archive'] = self.stoq.worker.archive_connector
        elif self.archive_connector:
            kwargs['archive'] = self.archive_connector
        else:
            kwargs['archive'] = "file"

        if 'priority' in kwargs:
            opts['priority'] = int(kwargs['priority'])
            kwargs.pop('priority')
        else:
            opts['priority'] = self.priority

        for routing_key in self.submission_list:
            if 'payload' in signature(
                    self.publish_connector.publish).parameters:
                payload = self.stoq.get_file(kwargs['path'])
                self.publish_connector.publish(kwargs,
                                               routing_key,
                                               payload=payload,
                                               **opts)
            else:
                self.publish_connector.publish(kwargs, routing_key, **opts)

        return True
Example #17
0
    def save(self, payload, archive=False, **kwargs):
        """
        Save results to mongodb

        :param dict/bytes payload: Content to be inserted into mongodb
        :param bool archive: Define whether the payload to be inserted as a
                             binary sample that should be saved in GridFS.
        :param **kwargs sha1: SHA1 hash of payload. Used with saving results
                              as well as payloads to GridFS. Automatically
                              generated if not value is provided.
        :param **kwargs *: Any additional attributes that should be added
                           to the GridFS object on insert

        """

        self.archive = archive

        # Let's attempt to save our data to mongo at most 3 times. The very
        # first time, this will always fail because we haven't made a
        # connection to mongodb or gridfs yet.
        for save_attempt in range(3):
            try:
                if self.archive:
                    # Assign the indexed _id key to be that of the sha1 for the
                    # file.  This will eliminate duplicate files stored within
                    # GridFS.
                    if save_attempt == 0:
                        kwargs['_id'] = kwargs['sha1']
                    try:
                        # Attempt to insert the payload into GridFS
                        with self.collection.new_file(**kwargs) as fp:
                            fp.write(payload)
                        break
                    except (DuplicateKeyError, FileExists):
                        # Looks like the file is a duplicate, let's just
                        # continue on.
                        break
                else:
                    # Check to see if we have a sha1 in the payload to be
                    # saved. If not, let's hash the payload with sha1.
                    try:
                        payload['_id'] = kwargs['sha1']
                    except KeyError:
                        payload['_id'] = get_sha1(payload)

                    try:
                        # Attempt to insert
                        self.collection.insert(payload)
                    except DuplicateKeyError:
                        # Looks like our insert failed due to a Duplicate key.
                        # Let's go ahead and update the results key with our
                        # new results rather than create a new record.
                        for result in payload['results']:
                            self.collection.update({'_id': payload['_id']},
                                                   {'$push':
                                                    {'results': result}})

                # Python oddity. If we do not remove this here, even though we
                # are not returning value and using it, it still overwrites the
                # original results value. If anyone knows how to avoid this,
                # please let me know and save me some sanity.
                payload.pop('_id')

                # Success..let's break out of our loop.
                break
            except:
                # We probably don't have a valid MongoDB connection. Time to
                # make one.
                self.connect()

        super().save()