def save(self, payload, archive=False, **kwargs): """ Save results to S3 :param bytes payload: Content to be stored in Amazon S3 container :param **kwargs s3bucket: Bucket name to be used :param **kwargs sha1: SHA1 hash to be used as a filename :returns: Filename used to save the payload :rtype: bytes """ if 's3bucket' in kwargs: self.connect(bucket_name=kwargs['s3bucket']) else: self.connect() if 'sha1' in kwargs: filename = kwargs['sha1'] else: filename = get_sha1(payload) key = boto.s3.key.Key(self.bucket, filename) try: key.set_contents_from_string(payload) except: self.stoq.log.error("Unable to save file to S3") return None return filename
def save(self, payload, archive=False, **kwargs): """ Save results to S3 :param bytes payload: Content to be stored in Amazon S3 container :param **kwargs s3bucket: Bucket name to be used :param **kwargs sha1: SHA1 hash to be used as a filename :returns: Filename used to save the payload :rtype: bytes """ s3bucket = kwargs.get('index', self.bucket_name) self.connect(bucket_name=s3bucket) filename = kwargs.get('sha1', get_sha1(payload)) key = boto3.s3.key.Key(self.bucket, filename) try: key.set_contents_from_string(payload) except Exception as err: self.log.error("Unable to save file to S3: {}".format(str(err))) return None return filename
def attachment_metadata(self, payload=None, filename=None, uuid=None): # Make sure we have a payload, otherwise return None if not payload or len(payload) <= 0: return None attachment_json = {} # Generate hashes attachment_json['md5'] = get_md5(payload) attachment_json['sha1'] = get_sha1(payload) attachment_json['sha256'] = get_sha256(payload) attachment_json['sha512'] = get_sha512(payload) attachment_json['ssdeep'] = get_ssdeep(payload) # Get magic type attachment_json['magic'] = get_magic(payload) # Get size attachment_json['size'] = len(payload) # Define the filename as provided attachment_json['filename'] = filename # Make sure we have the parent uuid generated with the original email attachment_json['uuid'] = uuid.copy() # Generate a unique ID attachment_json['uuid'].append(self.stoq.get_uuid) return attachment_json
def save(self, payload, archive=False, **kwargs): """ Save results to disk :param str payload: Content to be saved :param bool archive: Is this a file that is being archived? :param **kwargs sha1: SHA1 hash to use as a filename """ if archive: if 'sha1' in kwargs: sha1 = kwargs['sha1'] else: sha1 = get_sha1(payload) path = self.stoq.hashpath(sha1) self.stoq.write(payload, path=path, filename=sha1, binary=True) self.stoq.log.info("Saving file to disk: {}/{}".format(path, sha1)) else: path = os.path.join(self.stoq.results_dir, self.parentname) self.stoq.write(path=path, payload=self.stoq.dumps(payload)) self.stoq.log.info("Saving file to disk: {}".format(path)) return True
def save(self, payload, archive=False, **kwargs): """ Save results to S3 :param bytes payload: Content to be stored in Amazon S3 container :param **kwargs s3bucket: Bucket name to be used :param **kwargs sha1: SHA1 hash to be used as a filename :returns: Filename used to save the payload :rtype: bytes """ s3bucket = kwargs.get('index', self.bucket_name) self.connect(bucket_name=s3bucket) filename = kwargs.get('sha1', get_sha1(payload)) key = boto.s3.key.Key(self.bucket, filename) try: key.set_contents_from_string(payload) except Exception as err: self.stoq.log.error("Unable to save file to S3: {}".format(str(err))) return None return filename
def save(self, payload, archive=False, **kwargs): """ Save results to disk :param str payload: Content to be saved :param bool archive: Is this a file that is being archived? :param str index: Directory name to save content to :param str sha1: SHA1 hash to use as a filename :param str filename: Filename to save the file as :param str path: Path where the file will be saved to :param bool use_date: Append current date to the path :param bool append: Allow append to output file? """ if archive: filename = kwargs.get('sha1', get_sha1(payload)) path = self.stoq.hashpath(filename) binary = kwargs.get('binary', True) append = kwargs.get('append', False) else: path = kwargs.get('path', None) if not path: index = kwargs.get('index', self.parentname) path = "{}/{}".format(self.stoq.results_dir, index) append = kwargs.get('append', False) filename = kwargs.get('filename', None) binary = kwargs.get('binary', False) if not binary: payload = self.stoq.dumps(payload, compactly=self.compactly) # Append a newline to the result, if we are appending to a file if append: payload += '\n' use_date = kwargs.get('use_date', False) if use_date: now = datetime.now().strftime(self.date_format) path = "{}/{}".format(path, now) fullpath = self.stoq.write(path=path, filename=filename, payload=payload, binary=binary, append=append) return fullpath
def scan(self, payload, **kwargs): """ Publish messages to single or multiple RabbitMQ queues for processing :param bytes payload: Payload to be published :param **kwargs path: Path to file being ingested :param **kwargs user_comments: Comments associated with payload :param **kwargs submission_list: List of queues to publish to :returns: Results from scan :rtype: True """ super().scan() self.stoq.log.info("Ingesting: %s" % kwargs['path']) # For every file we ingest we are going to assign a unique # id so we can link everything across the scope of the ingest. # This will be assigned to submissions within archive files as well # in order to simplify correlating files post-ingest. kwargs['uuid'] = self.stoq.get_uuid if payload: kwargs['sha1'] = get_sha1(payload) kwargs['path'] = os.path.abspath(kwargs['path']) if self.user_comments: kwargs['user_comments'] = self.user_comments if 'submission_list' in kwargs: self.submission_list = kwargs['submission_list'] kwargs.pop('submission_list') if self.stoq.worker.archive_connector: kwargs['archive'] = self.archive_connector else: kwargs['archive'] = "file" for routing_key in self.submission_list: self.publish_connector.publish(kwargs, routing_key) return True
def save(self, payload, archive=False, **kwargs): """ Save results to disk :param str payload: Content to be saved :param bool archive: Is this a file that is being archived? :param str index: Directory name to save content to :param str sha1: SHA1 hash to use as a filename :param str filename: Filename to save the file as :param str path: Path where the file will be saved to :param bool append: Allow append to output file? """ if archive: filename = kwargs.get('sha1', get_sha1(payload)) path = self.stoq.hashpath(filename) binary = kwargs.get('binary', True) append = kwargs.get('append', False) else: path = kwargs.get('path', None) if not path: path = self.stoq.results_dir name = kwargs.get('index', self.parentname) path = os.path.join(path, name) append = kwargs.get('append', False) filename = kwargs.get('filename', None) binary = kwargs.get('binary', False) if not binary: payload = self.stoq.dumps(payload, compactly=self.compactly) # Append a newline to the result, if we are appending to a file if append: payload += '\n' fullpath = self.stoq.write(path=path, filename=filename, payload=payload, binary=binary, append=append) return fullpath
def save(self, payload, archive=False, **kwargs): """ Save results to GCS :param bytes payload: Content to be stored in GCS :param **kwargs bucket: Bucket name to be used :param **kwargs sha1: SHA1 hash to be used as a filename :returns: Filename used to save the payload :rtype: bytes """ if not self.conn: self.connect() bucket = kwargs.get('index', self.bucket_name) sha1 = kwargs.get('sha1', get_sha1(payload)) magic = get_magic(payload) hashpath = '/'.join(list(sha1[:5])) filename = "{}/{}".format(hashpath, sha1) body = { 'name': filename } content = BytesIO(payload) media_body = http.MediaIoBaseUpload(content, magic) try: req = self.conn.objects().insert(bucket=bucket, body=body, media_body=media_body) resp = req.execute() self.stoq.log.debug(resp) except Exception as err: self.stoq.log.error("Unable to save file to GCS: {}".format(str(err))) return None return filename
def save(self, payload, archive=False, **kwargs): """ Save results to GCS :param bytes payload: Content to be stored in GCS :param **kwargs bucket: Bucket name to be used :param **kwargs sha1: SHA1 hash to be used as a filename :returns: Filename used to save the payload :rtype: bytes """ if not self.conn: self.connect() bucket = kwargs.get('index', self.bucket_name) sha1 = kwargs.get('sha1', get_sha1(payload)) magic = get_magic(payload) hashpath = '/'.join(list(sha1[:5])) filename = "{}/{}".format(hashpath, sha1) body = {'name': filename} content = BytesIO(payload) media_body = http.MediaIoBaseUpload(content, magic) try: req = self.conn.objects().insert(bucket=bucket, body=body, media_body=media_body) resp = req.execute() self.log.debug(resp) except Exception as err: self.log.error("Unable to save file to GCS: {}".format(str(err))) return None return filename
def start(self, payload=None, **kwargs): """ Process the payload with the worker plugin :param bytes payload: (optional) Payload to be processed :param \*\*kwargs: addtional arguments that may be needed by the worker plugin (i.e., username and password via HTTP) :type kwargs: dict or None :returns: Tuple of JSON results and template rendered results :rtype: dict and str """ archive_type = False template_results = None payload_hashes = None results = {} results['results'] = [] worker_result = {} results['date'] = self.stoq.get_time # If we don't have a uuid, let's generate one if 'uuid' not in kwargs: kwargs['uuid'] = self.stoq.get_uuid # If we have no payload, let's try to find one to process if not payload and 'archive' in kwargs: # We are going to use the 'archive' field in kwargs to define where # we are going to get the file from. Once we know that, we will # load the appropriate plugin if required. Then, we will call # get_file() to grab the payload. archive_type = kwargs['archive'] self.load_connector(archive_type) if hasattr(self.connectors[archive_type], 'get_file'): payload = self.connectors[archive_type].get_file(**kwargs) else: self.stoq.log.warn("Connector unable to get file..skipping") return False if payload: # Make sure we define this before possibly modifying the full file # path when/if we archive. if 'filename' not in kwargs: if 'path' in kwargs: kwargs['filename'] = os.path.basename(kwargs['path']) else: kwargs['filename'] = "Unknown" # If this worker wants us to save this payload to the archive, let's # handle that now before anything else. Otherwise any subsequent # plugins may not be able to retrieve the files. We are however going # to skip saving the payload if our source is the same as the # connector. if self.archive_connector and self.archive_connector != archive_type: payload_hashes = self.save_payload(payload, self.archive_connector) # Some workers don't need a hash to be generated, so let's only # generate hashes if needed. This is defined in the .stoq # configuration file for the worker plugin. We are also only going # to generate a hash if our save_payload function hasn't been # called. Otherwise, we will just use those results. if self.hashpayload: if payload_hashes: worker_result.update(payload_hashes) else: worker_result.update(get_hashes(payload)) # Send our payload to the worker, and store the results worker_result['scan'] = self.scan(payload, **kwargs) worker_result['plugin'] = self.name worker_result['uuid'] = kwargs['uuid'] if payload: worker_result['size'] = len(payload) worker_result['payload_id'] = 0 # Keep track of our total count of payloads, in case yara dispatch # finds something payload_id = 1 results['results'].append(worker_result) # If we want to use the dispatcher, let's do that now if self.dispatch: # Our carver, extractor, and decoder plugins will return a list of # set()s. Let's make sure we handle the initial payload the same # way, so we can simplify the below routine. dispatch_payloads = [(None, payload)] current_depth = 0 # Track hashes of payloads so we don't handle duplicates. processed_sha1s = {} while dispatch_payloads and self.stoq.max_recursion >= current_depth: for dispatch_payload in dispatch_payloads: # Skip over this payload if we've already processed it current_hash = get_sha1(dispatch_payload[1]) if current_hash in processed_sha1s: continue processed_sha1s[current_hash] = True dispatch_payloads = self.yara_dispatcher( dispatch_payload[1]) # Something was carved, let's gather the metadata if dispatch_payloads: dispatch_results = self._parse_dispatch_results( dispatch_payloads, **kwargs) if dispatch_results: # Iterate over the results, grab the sha1, and add # it to the list of processed hashes. Then, add the # dispatch results to the primary results for index, res in enumerate(dispatch_results): if res['sha1'] in processed_sha1s: continue processed_sha1s[res['sha1']] = True res['payload_id'] = payload_id payload_id += 1 results['results'].append(res) current_depth += 1 results['payloads'] = payload_id # Parse output with a template if self.template: try: template_path = "{}/templates".format(self.plugin_path) tpl_env = Environment(loader=FileSystemLoader(template_path), trim_blocks=True, lstrip_blocks=True) template_results = tpl_env.get_template( self.template).render(results=results) except TemplateNotFound: self.stoq.log.error("Unable to load template. Does {}/{} " "exist?".format(template_path, self.template)) except Exception as err: self.stoq.log.error(str(err)) # If we are saving the results from the worker, let's take care of # it. This is defined in the .stoq configuration file for the # worker plugin. An output_connector must also be defined. if self.saveresults and self.output_connector: # Just to ensure we have loaded a connector for output self.load_connector(self.output_connector) # If there is a template that is named after the output connector # pass the templated results to the connector, otherwise pass the # raw results if template_results: if self.template.split(".")[0] == self.output_connector: self.connectors[self.output_connector].save( template_results) else: # Attempt to save the results, and pass along the primary results # as **kwargs, otherwise just pass along the results. try: kwargs = {'sha1': results['results'][0]['sha1']} self.connectors[self.output_connector].save( results, **kwargs) except (KeyError, IndexError): self.connectors[self.output_connector].save(results) return results, template_results
def start(self, payload=None, **kwargs): """ Process the payload with the worker plugin :param bytes payload: (optional) Payload to be processed :param \*\*kwargs: addtional arguments that may be needed by the worker plugin (i.e., username and password via HTTP) :type kwargs: dict or None :returns: Tuple of JSON results and template rendered results :rtype: dict and str or lists """ archive_type = False payload_hashes = None template_results = None results = {} results['results'] = [] results['plugins'] = {} worker_result = {} results['date'] = self.stoq.get_time # If we don't have a uuid, let's generate one kwargs['uuid'] = kwargs.get('uuid', self.stoq.get_uuid) # If we have no payload, let's try to find one to process if not payload and 'archive' in kwargs: # We are going to use the 'archive' field in kwargs to define where # we are going to get the file from. Once we know that, we will # load the appropriate plugin if required. Then, we will call # get_file() to grab the payload. archive_type = kwargs['archive'] worker_result['archive'] = kwargs['archive'] self.load_connector(archive_type) if hasattr(self.connectors[archive_type], 'get_file'): payload = self.connectors[archive_type].get_file(**kwargs) else: self.stoq.log.warn("Connector unable to get file..skipping") return False if payload: # Make sure we define this before possibly modifying the full file # path when/if we archive. if 'filename' not in kwargs: if 'path' in kwargs: kwargs['filename'] = os.path.basename(kwargs['path']) worker_result['path'] = kwargs['path'] else: kwargs['filename'] = "Unknown" # Make sure we save the filename in the worker results as well worker_result['filename'] = kwargs['filename'] # If this worker wants us to save this payload to the archive, # let's handle that now before anything else. Otherwise any # subsequent plugins may not be able to retrieve the files. We are # however going to skip saving the payload if our source is the # same as the connector. if self.archive_connector and self.archive_connector != archive_type: payload_hashes = self.save_payload(payload, self.archive_connector) # Some workers don't need a hash to be generated, so let's only # generate hashes if needed. This is defined in the .stoq # configuration file for the worker plugin. We are also only going # to generate a hash if our save_payload function hasn't been # called. Otherwise, we will just use those results. if self.hashpayload: if payload_hashes: worker_result.update(payload_hashes) else: worker_result.update(get_hashes(payload)) # Send our payload to the worker, and store the results worker_result['scan'] = self.scan(payload, **kwargs) worker_result['plugin'] = self.name worker_result['uuid'] = kwargs['uuid'] if payload: worker_result['size'] = len(payload) # Preserve the original metadata that was submitted with this payload worker_result['source_meta'] = kwargs.copy() # Check to see if the keys are in the primary result dict, if so, # we will remove them from the source_meta key, otherwise, we will # leave it be. Meant to reduce duplication of data when chaining # plugins. for k, v in kwargs.items(): if k in worker_result: if v == worker_result[k]: worker_result['source_meta'].pop(k, None) # Sometimes when chaining plugins source_meta will be appended # but the keys should be at the root of the results. Let's make # sure we move them to the root rather than storing them in the # source_meta elif k in ('filename', 'puuid', 'magic', 'ssdeep', 'path', 'size'): worker_result[k] = v worker_result['source_meta'].pop(k, None) worker_result['payload_id'] = 0 results['plugins'].update({"0": self.name}) # Keep track of our total count of payloads, in case yara dispatch # finds something payload_id = 1 results['results'].append(worker_result) # If we want to use the dispatcher, let's do that now if self.dispatch: # Our carver, extractor, and decoder plugins will return a list of # set()s. Let's make sure we handle the initial payload the same # way, so we can simplify the below routine. dispatch_payloads = [({}, payload)] current_depth = 0 # Track hashes of payloads so we don't handle duplicates. processed_hashes = {} while dispatch_payloads and int(self.stoq.max_recursion) >= current_depth: for index, dispatch_payload in enumerate(dispatch_payloads): dispatch_payloads.pop(index) current_hash = dispatch_payload[0].get('sha1', get_sha1(dispatch_payload[1])) # Skip over this payload if we've already processed it if current_hash in processed_hashes: self.stoq.log.info("Skipping duplicate hash: {}".format(current_hash)) continue processed_hashes.setdefault(current_hash, True) # We are copy()ing processed hashes so we don't dispatch # payloads twice, but we still want to be able to send # dispatched payloads for additional processing temp_processed_hashes = processed_hashes.copy() # Send the payload to the yara dispatcher for yara_result in self.yara_dispatcher(dispatch_payload[1]): dispatch_result = self._parse_dispatch_results(yara_result, **kwargs) if dispatch_result['sha1'] in temp_processed_hashes: self.stoq.log.info("Skipping duplicate hash: {}".format(dispatch_result['sha1'])) continue temp_processed_hashes.setdefault(dispatch_result['sha1'], True) dispatch_payloads.append(yara_result) dispatch_result['payload_id'] = payload_id if dispatch_result.get('save').lower() == 'true' and self.archive_connector: self.save_payload(yara_result[1], self.archive_connector) results['results'].append(dispatch_result) results['plugins'].update({str(payload_id): dispatch_result['dispatcher']}) payload_id += 1 current_depth += 1 results['payloads'] = payload_id # If we want the results for all plugins to be returned in one # big json blob, combined_results must be true. if self.combined_results: results, template_results = self._save_results(results) else: # Looks like we want to save each result individually, this # gets complex. split_results = [] split_template_results = [] # Make sure we save the top level key/values so we can append # them to the new individual result dict result_date = results['date'] result_payloads = results['payloads'] result_plugins = results['plugins'] for result in results['results']: # Create the new individual results dict plugin_result = {} plugin_result['date'] = result_date plugin_result['payloads'] = result_payloads plugin_result['plugins'] = result_plugins plugin_result['results'] = [result] # Because this function returns the results, we are going # to save the individual results as it is returned from # the _save_results function r, t = self._save_results(plugin_result) # Append the results to the main results list. In many cases # templates won't be utilized, so no sense in saving them if # nothing is there. split_results.append(r) if t: split_template_results.append(t) # Replace the original results with our newly created list of # results. results = split_results if split_template_results: template_results = split_template_results return results, template_results
def start(self, payload=None, **kwargs): """ Process the payload with the worker plugin :param bytes payload: (optional) Payload to be processed :param \*\*kwargs: addtional arguments that may be needed by the worker plugin (i.e., username and password via HTTP) :type kwargs: dict or None :returns: Tuple of JSON results and template rendered results :rtype: dict and str or lists """ archive_type = False payload_hashes = None template_results = None results = {} results['results'] = [] results['plugins'] = {} worker_result = {} results['date'] = self.stoq.get_time # If we don't have a uuid, let's generate one kwargs['uuid'] = kwargs.get('uuid', self.stoq.get_uuid) # Set the Originating uuid to that of the first payload submitted kwargs['ouuid'] = kwargs.get('ouuid', kwargs['uuid']) # If we have no payload, let's try to find one to process if not payload and 'archive' in kwargs: # We are going to use the 'archive' field in kwargs to define where # we are going to get the file from. Once we know that, we will # load the appropriate plugin if required. Then, we will call # get_file() to grab the payload. archive_type = kwargs['archive'] worker_result['archive'] = kwargs['archive'] self.load_connector(archive_type) if hasattr(self.connectors[archive_type], 'get_file'): payload = self.connectors[archive_type].get_file(**kwargs) else: self.stoq.log.warn("Connector unable to get file..skipping") return False if payload: # Make sure we define this before possibly modifying the full file # path when/if we archive. if 'filename' not in kwargs: if 'path' in kwargs: kwargs['filename'] = os.path.basename(kwargs['path']) worker_result['path'] = kwargs['path'] else: kwargs['filename'] = "Unknown" # Make sure we save the filename in the worker results as well worker_result['filename'] = kwargs['filename'] # If this worker wants us to save this payload to the archive, # let's handle that now before anything else. Otherwise any # subsequent plugins may not be able to retrieve the files. We are # however going to skip saving the payload if our source is the # same as the connector. if self.archive_connector and self.archive_connector != archive_type: payload_hashes = self.save_payload(payload, self.archive_connector) # Some workers don't need a hash to be generated, so let's only # generate hashes if needed. This is defined in the .stoq # configuration file for the worker plugin. We are also only going # to generate a hash if our save_payload function hasn't been # called. Otherwise, we will just use those results. if self.hashpayload: if payload_hashes: worker_result.update(payload_hashes) else: worker_result.update(get_hashes(payload)) # Send our payload to the worker, and store the results worker_result['scan'] = self.scan(payload, **kwargs) worker_result['plugin'] = self.name worker_result['uuid'] = kwargs['uuid'] if payload: worker_result['size'] = len(payload) # Preserve the original metadata that was submitted with this payload worker_result['source_meta'] = kwargs.copy() # Check to see if the keys are in the primary result dict, if so, # we will remove them from the source_meta key, otherwise, we will # leave it be. Meant to reduce duplication of data when chaining # plugins. for k, v in kwargs.items(): if k in worker_result: if v == worker_result[k]: worker_result['source_meta'].pop(k, None) # Sometimes when chaining plugins source_meta will be appended # but the keys should be at the root of the results. Let's make # sure we move them to the root rather than storing them in the # source_meta elif k in ('filename', 'puuid', 'magic', 'ssdeep', 'path', 'ouuid'): worker_result[k] = v worker_result['source_meta'].pop(k, None) worker_result['payload_id'] = 0 results['plugins'].update({"0": self.name}) # Keep track of our total count of payloads, in case yara dispatch # finds something payload_id = 1 results['results'].append(worker_result) # If we want to use the dispatcher, let's do that now if self.dispatch: # Our carver, extractor, and decoder plugins will return a list of # set()s. Let's make sure we handle the initial payload the same # way, so we can simplify the below routine. dispatch_payloads = [({}, payload)] dispatch_queue = [] current_depth = 0 # Track hashes of payloads so we don't handle duplicates. processed_hashes = {} while dispatch_payloads and int( self.stoq.max_recursion) >= current_depth: for index, dispatch_payload in enumerate(dispatch_payloads): dispatch_payloads.pop(index) current_hash = dispatch_payload[0].get( 'sha1', get_sha1(dispatch_payload[1])) # Skip over this payload if we've already processed it if current_hash in processed_hashes: self.stoq.log.info( "Skipping duplicate hash: {}".format(current_hash)) continue processed_hashes.setdefault(current_hash, True) # We are copy()ing processed hashes so we don't dispatch # payloads twice, but we still want to be able to send # dispatched payloads for additional processing temp_processed_hashes = processed_hashes.copy() # Send the payload to the yara dispatcher for yara_result in self.yara_dispatcher( dispatch_payload[1]): dispatch_result = self._parse_dispatch_results( yara_result, **kwargs) if dispatch_result['sha1'] in temp_processed_hashes: self.stoq.log.info( "Skipping duplicate hash: {}".format( dispatch_result['sha1'])) continue temp_processed_hashes.setdefault( dispatch_result['sha1'], True) dispatch_queue.append(yara_result) dispatch_result['payload_id'] = payload_id if dispatch_result.get('save').lower( ) == 'true' and self.archive_connector: self.save_payload(yara_result[1], self.archive_connector) results['results'].append(dispatch_result) results['plugins'].update( {str(payload_id): dispatch_result['plugin']}) payload_id += 1 dispatch_payloads = dispatch_queue.copy() dispatch_queue = [] current_depth += 1 results['payloads'] = payload_id # If we want the results for all plugins to be returned in one # big json blob, combined_results must be true. if self.combined_results: results, template_results = self._save_results(results) else: # Looks like we want to save each result individually, this # gets complex. split_results = [] split_template_results = [] # Make sure we save the top level key/values so we can append # them to the new individual result dict result_date = results['date'] result_payloads = results['payloads'] result_plugins = results['plugins'] for result in results['results']: # Create the new individual results dict plugin_result = {} plugin_result['date'] = result_date plugin_result['payloads'] = result_payloads plugin_result['plugins'] = result_plugins plugin_result['results'] = [result] # Because this function returns the results, we are going # to save the individual results as it is returned from # the _save_results function r, t = self._save_results(plugin_result) # Append the results to the main results list. In many cases # templates won't be utilized, so no sense in saving them if # nothing is there. split_results.append(r) if t: split_template_results.append(t) # Replace the original results with our newly created list of # results. results = split_results if split_template_results: template_results = split_template_results return results, template_results
def scan(self, payload, **kwargs): """ Publish messages to single or multiple RabbitMQ queues for processing :param bytes payload: Payload to be published :param str path: Path to file being ingested :param list submission_list: List of queues to publish to :param int priority: Priority of message, if supported by publisher :returns: True """ super().scan() opts = {} # For every file we ingest we are going to assign a unique # id so we can link everything across the scope of the ingest. # This will be assigned to submissions within archive files as well # in order to simplify correlating files post-ingest. if 'uuid' not in kwargs: kwargs['uuid'] = [self.stoq.get_uuid] if payload and 'sha1' not in kwargs: kwargs['sha1'] = get_sha1(payload) if 'path' in kwargs: kwargs['path'] = os.path.abspath(kwargs['path']) self.log.info("Ingesting {}".format(kwargs['path'])) else: self.log.info("Ingesting {}".format(kwargs['uuid'][-1])) if 'submission_list' in kwargs: self.submission_list = kwargs['submission_list'] kwargs.pop('submission_list') # Using self.stoq.worker.archive_connector in case this plugin is # called from another plugin. This will ensure that the correct # archive connector is defined when the message is published. if self.stoq.worker.archive_connector and self.name != self.stoq.worker.name: kwargs['archive'] = self.stoq.worker.archive_connector elif self.archive_connector: kwargs['archive'] = self.archive_connector else: kwargs['archive'] = "file" if 'priority' in kwargs: opts['priority'] = int(kwargs['priority']) kwargs.pop('priority') else: opts['priority'] = self.priority for routing_key in self.submission_list: if self.publish_connector.payload: payload = self.stoq.get_file(kwargs['path']) self.publish_connector.publish(kwargs, routing_key, payload=payload, **opts) else: self.publish_connector.publish(kwargs, routing_key, **opts) return True
def scan(self, payload, **kwargs): """ Publish messages to single or multiple RabbitMQ queues for processing :param bytes payload: Payload to be published :param str path: Path to file being ingested :param list submission_list: List of queues to publish to :param int priority: Priority of message, if supported by publisher :returns: True """ super().scan() opts = {} # For every file we ingest we are going to assign a unique # id so we can link everything across the scope of the ingest. # This will be assigned to submissions within archive files as well # in order to simplify correlating files post-ingest. if 'uuid' not in kwargs: kwargs['uuid'] = [self.stoq.get_uuid] if payload and 'sha1' not in kwargs: kwargs['sha1'] = get_sha1(payload) if 'path' in kwargs: kwargs['path'] = os.path.abspath(kwargs['path']) self.log.info("Ingesting {}".format(kwargs['path'])) else: self.log.info("Ingesting {}".format(kwargs['uuid'][-1])) if 'submission_list' in kwargs: self.submission_list = kwargs['submission_list'] kwargs.pop('submission_list') # Using self.stoq.worker.archive_connector in case this plugin is # called from another plugin. This will ensure that the correct # archive connector is defined when the message is published. if self.stoq.worker.archive_connector and self.name != self.stoq.worker.name: kwargs['archive'] = self.stoq.worker.archive_connector elif self.archive_connector: kwargs['archive'] = self.archive_connector else: kwargs['archive'] = "file" if 'priority' in kwargs: opts['priority'] = int(kwargs['priority']) kwargs.pop('priority') else: opts['priority'] = self.priority for routing_key in self.submission_list: if 'payload' in signature( self.publish_connector.publish).parameters: payload = self.stoq.get_file(kwargs['path']) self.publish_connector.publish(kwargs, routing_key, payload=payload, **opts) else: self.publish_connector.publish(kwargs, routing_key, **opts) return True
def save(self, payload, archive=False, **kwargs): """ Save results to mongodb :param dict/bytes payload: Content to be inserted into mongodb :param bool archive: Define whether the payload to be inserted as a binary sample that should be saved in GridFS. :param **kwargs sha1: SHA1 hash of payload. Used with saving results as well as payloads to GridFS. Automatically generated if not value is provided. :param **kwargs *: Any additional attributes that should be added to the GridFS object on insert """ self.archive = archive # Let's attempt to save our data to mongo at most 3 times. The very # first time, this will always fail because we haven't made a # connection to mongodb or gridfs yet. for save_attempt in range(3): try: if self.archive: # Assign the indexed _id key to be that of the sha1 for the # file. This will eliminate duplicate files stored within # GridFS. if save_attempt == 0: kwargs['_id'] = kwargs['sha1'] try: # Attempt to insert the payload into GridFS with self.collection.new_file(**kwargs) as fp: fp.write(payload) break except (DuplicateKeyError, FileExists): # Looks like the file is a duplicate, let's just # continue on. break else: # Check to see if we have a sha1 in the payload to be # saved. If not, let's hash the payload with sha1. try: payload['_id'] = kwargs['sha1'] except KeyError: payload['_id'] = get_sha1(payload) try: # Attempt to insert self.collection.insert(payload) except DuplicateKeyError: # Looks like our insert failed due to a Duplicate key. # Let's go ahead and update the results key with our # new results rather than create a new record. for result in payload['results']: self.collection.update({'_id': payload['_id']}, {'$push': {'results': result}}) # Python oddity. If we do not remove this here, even though we # are not returning value and using it, it still overwrites the # original results value. If anyone knows how to avoid this, # please let me know and save me some sanity. payload.pop('_id') # Success..let's break out of our loop. break except: # We probably don't have a valid MongoDB connection. Time to # make one. self.connect() super().save()