Ejemplo n.º 1
0
    def try_run(self):
        try:
            self.service_class = load_module_by_path(SERVICE_PATH)
        except Exception:
            self.log.error("Could not find service in path.")
            raise

        if not os.path.exists(self.runtime_service_manifest):
            # In case service tag have not been overwritten we will do it here (This is mainly use during debugging)
            service_tag = os.environ.get("SERVICE_TAG", f"{FRAMEWORK_VERSION}.{SYSTEM_VERSION}.0.dev0").encode("utf-8")

            with open(self.service_manifest, "rb") as srv_manifest:
                with open(self.runtime_service_manifest, "wb") as runtime_manifest:
                    for line in srv_manifest.readlines():
                        runtime_manifest.write(line.replace(b"$SERVICE_TAG", service_tag))

        # Start task receiving fifo
        self.log.info('Waiting for receive task named pipe to be ready...')
        if not os.path.exists(self.task_fifo_path):
            os.mkfifo(self.task_fifo_path)
        self.task_fifo = open(self.task_fifo_path, "r")

        # Start task completing fifo
        self.log.info('Waiting for complete task named pipe to be ready...')
        if not os.path.exists(self.done_fifo_path):
            os.mkfifo(self.done_fifo_path)
        self.done_fifo = open(self.done_fifo_path, "w")

        # Reload the service again with the new config parameters (if any) received from service server
        self.load_service_attributes()
        self.service.start_service()

        while self.running:
            try:
                read_ready, _, _ = select.select([self.task_fifo], [], [], 1)
                if not read_ready:
                    continue
            except ValueError:
                self.log.info('Task fifo is closed. Cleaning up...')
                return

            task_json_path = self.task_fifo.readline().strip()
            if not task_json_path:
                self.log.info('Received an empty message for Task fifo. Cleaning up...')
                return

            self.log.info(f"Task found in: {task_json_path}")
            with open(task_json_path, 'r') as f:
                task = ServiceTask(json.load(f))
            self.service.handle_task(task)

            # Notify task handler that processing is done
            result_json = os.path.join(tempfile.gettempdir(), f"{task.sid}_{task.fileinfo.sha256}_result.json")
            error_json = os.path.join(tempfile.gettempdir(), f"{task.sid}_{task.fileinfo.sha256}_error.json")
            if os.path.exists(result_json):
                msg = f"{json.dumps([result_json, SUCCESS])}\n"
            elif os.path.exists(error_json):
                msg = f"{json.dumps([error_json, ERROR])}\n"
            else:
                msg = f"{json.dumps([None, ERROR])}\n"

            self.done_fifo.write(msg)
            self.done_fifo.flush()
Ejemplo n.º 2
0
    def __init__(self,
                 datastore,
                 logger,
                 classification=None,
                 redis=None,
                 persistent_redis=None,
                 metrics_name='ingester'):
        self.datastore = datastore
        self.log = logger

        # Cache the user groups
        self.cache_lock = threading.RLock(
        )  # TODO are middle man instances single threaded now?
        self._user_groups = {}
        self._user_groups_reset = time.time() // HOUR_IN_SECONDS
        self.cache = {}
        self.notification_queues = {}
        self.whitelisted = {}
        self.whitelisted_lock = threading.RLock()

        # Create a config cache that will refresh config values periodically
        self.config = forge.CachedObject(forge.get_config)

        # Module path parameters are fixed at start time. Changing these involves a restart
        self.is_low_priority = load_module_by_path(
            self.config.core.ingester.is_low_priority)
        self.get_whitelist_verdict = load_module_by_path(
            self.config.core.ingester.get_whitelist_verdict)
        self.whitelist = load_module_by_path(
            self.config.core.ingester.whitelist)

        # Constants are loaded based on a non-constant path, so has to be done at init rather than load
        constants = forge.get_constants(self.config)
        self.priority_value = constants.PRIORITIES
        self.priority_range = constants.PRIORITY_RANGES
        self.threshold_value = constants.PRIORITY_THRESHOLDS

        # Connect to the redis servers
        self.redis = redis or get_client(
            host=self.config.core.redis.nonpersistent.host,
            port=self.config.core.redis.nonpersistent.port,
            private=False,
        )
        self.persistent_redis = persistent_redis or get_client(
            host=self.config.core.redis.persistent.host,
            port=self.config.core.redis.persistent.port,
            private=False,
        )

        # Classification engine
        self.ce = classification or forge.get_classification()

        # Metrics gathering factory
        self.counter = MetricsFactory(metrics_type='ingester',
                                      schema=Metrics,
                                      redis=self.redis,
                                      config=self.config,
                                      name=metrics_name)

        # State. The submissions in progress are stored in Redis in order to
        # persist this state and recover in case we crash.
        self.scanning = Hash('m-scanning-table', self.persistent_redis)

        # Input. The dispatcher creates a record when any submission completes.
        self.complete_queue = NamedQueue(_completeq_name, self.redis)

        # Internal. Dropped entries are placed on this queue.
        # self.drop_queue = NamedQueue('m-drop', self.persistent_redis)

        # Input. An external process places submission requests on this queue.
        self.ingest_queue = NamedQueue(INGEST_QUEUE_NAME,
                                       self.persistent_redis)

        # Output. Duplicate our input traffic into this queue so it may be cloned by other systems
        self.traffic_queue = CommsQueue('submissions', self.redis)

        # Internal. Unique requests are placed in and processed from this queue.
        self.unique_queue = PriorityQueue('m-unique', self.persistent_redis)

        # Internal, delay queue for retrying
        self.retry_queue = PriorityQueue('m-retry', self.persistent_redis)

        # Internal, timeout watch queue
        self.timeout_queue = PriorityQueue('m-timeout', self.redis)

        # Internal, queue for processing duplicates
        #   When a duplicate file is detected (same cache key => same file, and same
        #   submission parameters) the file won't be ingested normally, but instead a reference
        #   will be written to a duplicate queue. Whenever a file is finished, in the complete
        #   method, not only is the original ingestion finalized, but all entries in the duplicate queue
        #   are finalized as well. This has the effect that all concurrent ingestion of the same file
        #   are 'merged' into a single submission to the system.
        self.duplicate_queue = MultiQueue(self.persistent_redis)

        # Output. submissions that should have alerts generated
        self.alert_queue = NamedQueue(ALERT_QUEUE_NAME, self.persistent_redis)

        # Utility object to help submit tasks to dispatching
        self.submit_client = SubmissionClient(datastore=self.datastore,
                                              redis=self.redis)
Ejemplo n.º 3
0
def get_process_alert_message():
    config = get_config()
    return load_module_by_path(config.core.alerter.process_alert_message)
Ejemplo n.º 4
0
    def try_run(self):
        try:
            self.service_class = load_module_by_path(SERVICE_PATH)
        except ValueError:
            raise
        except Exception:
            LOG.error(
                "Could not find service in path. Check your environment variables."
            )
            raise

        self.load_service_manifest()

        if not os.path.isfile(FILE_PATH):
            LOG.info(f"File not found: {FILE_PATH}")
            return

        self.file_dir = os.path.dirname(FILE_PATH)

        # Get filename and working dir
        file_name = os.path.basename(FILE_PATH)
        working_dir = os.path.join(
            self.file_dir,
            f'{os.path.basename(FILE_PATH)}_{SERVICE_NAME.lower()}')

        # Start service
        self.service.start_service()

        # Identify the file
        file_info = self.identify.fileinfo(FILE_PATH)
        if file_info['type'] == "archive/cart" or file_info[
                'magic'] == "custom: archive/cart":
            # This is a CART file, uncart it and recreate the file info object
            original_temp = os.path.join(tempfile.gettempdir(),
                                         file_info['sha256'])
            with open(FILE_PATH, 'rb') as ifile, open(original_temp,
                                                      'wb') as ofile:
                unpack_stream(ifile, ofile)

            file_info = self.identify.fileinfo(original_temp)
            target_file = os.path.join(tempfile.gettempdir(),
                                       file_info['sha256'])
            shutil.move(original_temp, target_file)
            LOG.info(
                f"File was a CaRT archive, it was un-CaRTed to {target_file} for processing"
            )

        else:
            # It not a cart, move the file to the right place to be processed
            target_file = os.path.join(tempfile.gettempdir(),
                                       file_info['sha256'])
            shutil.copyfile(FILE_PATH, target_file)

        # Create service processing task
        service_task = ServiceTask(
            dict(
                sid=get_random_id(),
                metadata={},
                service_name=SERVICE_NAME,
                service_config=self.submission_params,
                fileinfo=dict(
                    magic=file_info['magic'],
                    md5=file_info['md5'],
                    mime=file_info['mime'],
                    sha1=file_info['sha1'],
                    sha256=file_info['sha256'],
                    size=file_info['size'],
                    type=file_info['type'],
                ),
                filename=file_name,
                min_classification=forge.get_classification().UNRESTRICTED,
                max_files=501,  # TODO: get the actual value
                ttl=3600))

        LOG.info(f"Starting task with SID: {service_task.sid}")

        # Set the working directory to a directory with same parent as input file
        if os.path.isdir(working_dir):
            shutil.rmtree(working_dir)
        if not os.path.isdir(working_dir):
            os.makedirs(os.path.join(working_dir, 'working_directory'))

        self.service.handle_task(service_task)

        # Move the result.json and extracted/supplementary files to the working directory
        source = os.path.join(tempfile.gettempdir(), 'working_directory')
        if not os.path.exists(source):
            os.makedirs(source)

        files = os.listdir(source)
        for f in files:
            shutil.move(os.path.join(source, f),
                        os.path.join(working_dir, 'working_directory'))

        # Cleanup files from the original directory created by the service base
        shutil.rmtree(source)

        result_json = os.path.join(
            tempfile.gettempdir(),
            f'{service_task.sid}_{service_task.fileinfo.sha256}_result.json')

        if not os.path.exists(result_json):
            raise Exception(
                "A service error occured and no result json was found.")

        # Validate the generated result
        with open(result_json, 'r') as fh:
            try:
                result = json.load(fh)
                result.pop('temp_submission_data', None)
                for file in result['response']['extracted'] + result[
                        'response']['supplementary']:
                    file.pop('path', None)

                # Load heuristics
                heuristics = get_heuristics()

                # Transform heuristics and calculate score
                total_score = 0
                for section in result['result']['sections']:
                    # Ignore tag and sig safe flags since we have no connection to the safelist
                    section.pop('zeroize_on_tag_safe', None)
                    section.pop('zeroize_on_sig_safe', None)

                    if section['heuristic']:
                        heur_id = section['heuristic']['heur_id']

                        try:
                            section['heuristic'], new_tags = HeuristicHandler(
                            ).service_heuristic_to_result_heuristic(
                                section['heuristic'], heuristics)
                            for tag in new_tags:
                                section['tags'].setdefault(tag[0], [])
                                if tag[1] not in section['tags'][tag[0]]:
                                    section['tags'][tag[0]].append(tag[1])
                            total_score += section['heuristic']['score']
                        except InvalidHeuristicException:
                            section['heuristic'] = None
                        section['heuristic']['name'] = heuristics[heur_id][
                            'name']
                result['result']['score'] = total_score

                # Add timestamps for creation, archive and expiry
                result['created'] = now_as_iso()
                result['archive_ts'] = now_as_iso(1 * 24 * 60 * 60)
                result['expiry_ts'] = now_as_iso(service_task.ttl * 24 * 60 *
                                                 60)

                result = Result(result)

                # Print the result on console if in debug mode
                if args.debug:
                    f"{SERVICE_NAME.upper()}-RESULT".center(60, '-')
                    for line in pprint.pformat(
                            result.result.as_primitives()).split('\n'):
                        LOG.debug(line)
            except Exception as e:
                LOG.error(f"Invalid result created: {str(e)}")

        LOG.info(
            f"Cleaning up file used for temporary processing: {target_file}")
        os.unlink(target_file)

        LOG.info(
            f"Moving {result_json} to the working directory: {working_dir}/result.json"
        )
        shutil.move(result_json, os.path.join(working_dir, 'result.json'))

        LOG.info(
            f"Successfully completed task. Output directory: {working_dir}")
Ejemplo n.º 5
0
        name = name.lower()
        classpath = 'unknown'
        # noinspection PyBroadException
        try:
            classpath = settings.classpath
            cfg = settings.config
            if isinstance(cfg, str):
                # TODO: this needs testing that can only be done when a service datasource is available.
                path = cfg
                cfg = config
                for point in path.split('.'):
                    if 'enabled' in cfg:
                        if not cfg['enabled']:
                            raise SkipDatasource()
                    cfg = cfg.get(point)
            cls = load_module_by_path(classpath)
            obj = cls(LOGGER, **cfg)
            sources[name] = create_query_datasource(obj)
        except SkipDatasource:
            continue
        except Exception:
            LOGGER.exception(
                "Problem creating %s datasource (%s)", name, classpath
            )
except Exception:
    LOGGER.exception("No datasources")


# noinspection PyUnusedLocal
@hash_search_api.route("/<file_hash>/", methods=["GET"])
@api_login(required_priv=['R'])
Ejemplo n.º 6
0
    def try_run(self):
        try:
            self.service_class = load_module_by_path(SERVICE_PATH)
        except Exception:
            self.log.error("Could not find service in path.")
            raise

        self.load_service_attributes()

        # Start task receiving fifo
        self.log.info('Waiting for receive task named pipe to be ready...')
        if not os.path.exists(TASK_FIFO_PATH):
            os.mkfifo(TASK_FIFO_PATH)
        self.task_fifo = open(TASK_FIFO_PATH, "r")

        # Start task completing fifo
        self.log.info('Waiting for complete task named pipe to be ready...')
        if not os.path.exists(DONE_FIFO_PATH):
            os.mkfifo(DONE_FIFO_PATH)
        self.done_fifo = open(DONE_FIFO_PATH, "w")

        # Reload the service again with the new config parameters (if any) received from service server
        self.load_service_attributes(save=False)
        self.service.start_service()

        while self.running:
            try:
                read_ready, _, _ = select.select([self.task_fifo], [], [], 1)
                if not read_ready:
                    continue
            except ValueError:
                self.log.info('Task fifo is closed. Cleaning up...')
                return

            task_json_path = self.task_fifo.readline().strip()
            if not task_json_path:
                self.log.info(
                    'Received an empty message for Task fifo. Cleaning up...')
                return

            self.log.info(f"Task found in: {task_json_path}")
            with open(task_json_path, 'r') as f:
                task = ServiceTask(json.load(f))
            self.service.handle_task(task)

            # Notify task handler that processing is done
            result_json = os.path.join(
                tempfile.gettempdir(),
                f"{task.sid}_{task.fileinfo.sha256}_result.json")
            error_json = os.path.join(
                tempfile.gettempdir(),
                f"{task.sid}_{task.fileinfo.sha256}_error.json")
            if os.path.exists(result_json):
                msg = f"{json.dumps([result_json, SUCCESS])}\n"
            elif os.path.exists(error_json):
                msg = f"{json.dumps([error_json, ERROR])}\n"
            else:
                msg = f"{json.dumps([None, ERROR])}\n"

            self.done_fifo.write(msg)
            self.done_fifo.flush()
Ejemplo n.º 7
0
    def try_run(self):
        self.status = STATUSES.INITIALIZING

        # Try to load service class
        try:
            service_class = load_module_by_path(SERVICE_PATH)
        except Exception:
            self.log.error("Could not find service in path.")
            raise

        # Load on-disk manifest for bootstrap/registration
        service_manifest = self._load_manifest()

        # Register the service
        registration = self.tasking_client.register_service(service_manifest)

        # Are we just registering?
        if not registration['keep_alive'] or REGISTER_ONLY:
            self.status = STATUSES.STOPPING
            self.stop()
            return

        # Instantiate the service based of the registration results
        self.service_config = registration.get('service_config', {})
        self.service = service_class(config=self.service_config.get('config'))
        self.service_name = self.service_config['name']
        self.service_tool_version = self.service.get_tool_version()
        self.metric_factory = MetricsFactory('service',
                                             Metrics,
                                             name=self.service_name,
                                             export_zero=False,
                                             redis=self.redis)
        file_required = self.service_config.get('file_required', True)

        # Start the service
        self.service.start_service()

        while self.running:
            # Cleanup the working directory
            self._cleanup_working_directory()

            # Get a task
            self.status = STATUSES.WAITING_FOR_TASK
            task, _ = self.tasking_client.get_task(
                self.client_id, self.service_name,
                self.service_config['version'], self.service_tool_version,
                self.metric_factory)

            if not task:
                continue

            # Load Task
            try:
                # Inspect task to ensure submission parameters are given, add defaults where necessary
                params = {
                    x['name']:
                    task['service_config'].get(x['name'], x['default'])
                    for x in service_manifest.get('submission_params', [])
                }
                task['service_config'] = params
                service_task = ServiceTask(task)
                self.log.info(f"[{service_task.sid}] New task received")
            except ValueError as e:
                self.log.error(f"Invalid task received: {str(e)}")
                continue

            # Download file if needed
            if file_required:
                self.status = STATUSES.DOWNLOADING_FILE
                file_path = os.path.join(self.tasking_dir,
                                         service_task.fileinfo.sha256)
                received_file_sha256 = None
                self.log.info(
                    f"[{service_task.sid}] Downloading file: {service_task.fileinfo.sha256}"
                )
                try:
                    self.filestore.download(service_task.fileinfo.sha256,
                                            file_path)
                    received_file_sha256 = get_sha256_for_file(file_path)
                except FileStoreException:
                    self.status = STATUSES.FILE_NOT_FOUND
                    self.log.error(
                        f"[{service_task.sid}] Requested file not found in the system: {service_task.fileinfo.sha256}"
                    )

                # If the file retrieved is different from what we requested, report the error
                if received_file_sha256 and received_file_sha256 != service_task.fileinfo.sha256:
                    self.status = STATUSES.ERROR_FOUND
                    self.log.error(
                        f"[{service_task.sid}] Downloaded ({received_file_sha256}) doesn't match "
                        f"requested ({service_task.fileinfo.sha256})")

            # Process if we're not already in error
            if self.status not in [
                    STATUSES.ERROR_FOUND, STATUSES.FILE_NOT_FOUND
            ]:
                self.status = STATUSES.PROCESSING
                self.service.handle_task(service_task)

                # Check for the response from the service
                result_json = os.path.join(
                    self.tasking_dir,
                    f"{service_task.sid}_{service_task.fileinfo.sha256}_result.json"
                )
                error_json = os.path.join(
                    self.tasking_dir,
                    f"{service_task.sid}_{service_task.fileinfo.sha256}_error.json"
                )
                if os.path.exists(result_json):
                    self.status = STATUSES.RESULT_FOUND
                elif os.path.exists(error_json):
                    self.status = STATUSES.ERROR_FOUND
                else:
                    self.status = STATUSES.ERROR_FOUND
                    error_json = None

            # Handle the service response
            if self.status == STATUSES.RESULT_FOUND:
                self.log.info(
                    f"[{service_task.sid}] Task successfully completed")
                try:
                    self._handle_task_result(result_json, service_task)
                except RuntimeError as re:
                    if is_recoverable_runtime_error(re):
                        self.log.info(
                            f"[{service_task.sid}] Service trying to use a threadpool during shutdown, "
                            "sending recoverable error.")
                        self._handle_task_error(service_task)
                    else:
                        raise
            elif self.status == STATUSES.ERROR_FOUND:
                self.log.info(
                    f"[{service_task.sid}] Task completed with errors")
                self._handle_task_error(service_task,
                                        error_json_path=error_json)
            elif self.status == STATUSES.FILE_NOT_FOUND:
                self.log.info(
                    f"[{service_task.sid}] Task completed with errors due to missing file from filestore"
                )
                self._handle_task_error(service_task,
                                        status="FAIL_NONRECOVERABLE",
                                        error_type="EXCEPTION")