Example #1
0
def load_journal(name, delete_queue):
    working_dir = config.core.expiry.journal.directory
    expiry_ttl = config.core.expiry.journal.ttl * 24 * 60 * 60
    log.debug("Expiry will load journal in %s for %s bucket." %
              (working_dir, name))
    while True:
        try:
            for listed_file in os.listdir(working_dir):
                journal_file = os.path.join(working_dir, listed_file)
                if os.path.isfile(journal_file):
                    if journal_file.endswith(name):
                        cur_time = now()
                        day = "%sT00:00:00Z" % listed_file.split(".")[0]
                        file_time = iso_to_epoch(day)
                        if file_time + expiry_ttl <= cur_time:
                            with open(journal_file) as to_delete_journal:
                                count = 0
                                for line in to_delete_journal:
                                    if count % 1000 == 0:
                                        while delete_queue.length(
                                        ) > MAX_QUEUE_LENGTH:
                                            time.sleep(SLEEP_TIME)

                                    line = line.strip()
                                    if line:
                                        delete_queue.push(line)

                                    count += 1

                            os.unlink(journal_file)
        except OSError:
            pass

        time.sleep(SLEEP_TIME)
    def handle_retries(self):
        tasks = []
        while self.sleep(0 if tasks else 3):
            cpu_mark = time.process_time()
            time_mark = time.time()

            # Start of ingest message
            if self.apm_client:
                self.apm_client.begin_transaction('ingest_retries')

            tasks = self.retry_queue.dequeue_range(upper_limit=isotime.now(),
                                                   num=100)

            for task in tasks:
                self.ingest_queue.push(task)

            # End of ingest message (success)
            if self.apm_client:
                elasticapm.label(retries=len(tasks))
                self.apm_client.end_transaction('ingest_retries', 'success')

            self.counter.increment_execution_time(
                'cpu_seconds',
                time.process_time() - cpu_mark)
            self.counter.increment_execution_time('busy_seconds',
                                                  time.time() - time_mark)
Example #3
0
    def submit(self, task: IngestTask):
        self.submit_client.submit(
            submission_obj=task.submission,
            completed_queue=_completeq_name,
        )

        self.timeout_queue.push(int(now(_max_time)), task.scan_key)
        self.log.info(
            f"[{task.ingest_id} :: {task.sha256}] Submitted to dispatcher for analysis"
        )
Example #4
0
def process_alerts():
    global running  # pylint: disable=W0603

    consecutive_errors = 0

    end_t = now(interval)
    while running:
        if now() > end_t:
            logger.info("Finished interval (%ds). Restarting...", interval)
            running = False
            break

        event = queue.select(alertq, commandq, timeout=1)
        if not event:
            continue

        q_name = event[0]
        message = event[1]
        if q_name == alertq_name:
            counts.increment('alert.received')
            try:
                create_alert(counts, datastore, logger, message)
                consecutive_errors = 0
            except Exception as ex:  # pylint: disable=W0703
                consecutive_errors += 1
                retries = message['retries'] = message.get('retries', 0) + 1
                if retries > max_retries:
                    logger.exception('Max retries exceeded for: %s',
                                     str(message))
                else:
                    alertq.push(message)
                    if 'Submission not finalized' not in str(ex):
                        logger.exception('Unhandled exception processing: %s',
                                         str(message))

                for x in exit_msgs:
                    if x in str(ex):
                        consecutive_errors = max_consecutive_errors + 1
                        break

            if consecutive_errors > max_consecutive_errors:
                break
Example #5
0
def retry(raw, scan_key, sha256, ex):  # df node def
    current_time = now()

    notice = Notice(raw)
    retries = notice.get('retries', 0) + 1

    if retries > max_retries:
        trace = ''
        if ex and type(ex) != FileStoreException:
            trace = ': ' + get_stacktrace_info(ex)
        logger.error('Max retries exceeded for %s%s', sha256, trace)
        dupq.delete(dup_prefix + scan_key)
    elif expired(current_time - seconds(notice.get('ts', current_time)), 0):
        logger.info('No point retrying expired submission for %s', sha256)
        dupq.delete(dup_prefix + scan_key)  # df pull delete
    else:
        logger.info('Requeuing %s (%s)', sha256, ex or 'unknown')
        notice.set('retries', retries)
        notice.set('retry_at', now(retry_delay))

        retryq.push(notice.raw)  # df push push
Example #6
0
    def _get_version_map(self):
        self.engine_map = {}
        engine_list = []
        newest_dat = 0
        oldest_dat = now()

        url = self.cfg.get('BASE_URL') + "stat/engines"
        try:
            r = self.session.get(url=url, timeout=self.timeout)
        except requests.exceptions.Timeout:
            raise Exception("Metadefender service timeout.")

        engines = r.json()

        for engine in engines:
            if self.cfg.get("MD_VERSION") == 4:
                name = self._format_engine_name(engine["eng_name"])
                version = engine['eng_ver']
                def_time = engine['def_time']
                etype = engine['engine_type']
            elif self.cfg.get("MD_VERSION") == 3:
                name = self._format_engine_name(engine["eng_name"]).replace(
                    "scanengine", "")
                version = engine['eng_ver']
                def_time = engine['def_time'].replace(" AM", "").replace(
                    " PM", "").replace("/", "-").replace(" ", "T")
                def_time = def_time[6:10] + "-" + def_time[:5] + def_time[
                    10:] + "Z"
                etype = engine['eng_type']
            else:
                raise Exception("Unknown metadefender version")

            # Compute newest DAT
            dat_epoch = iso_to_epoch(def_time)
            if dat_epoch > newest_dat:
                newest_dat = dat_epoch

            if dat_epoch < oldest_dat and dat_epoch != 0 and etype in [
                    "av", "Bundled engine"
            ]:
                oldest_dat = dat_epoch

            self.engine_map[name] = {
                'version': version,
                'def_time': iso_to_local(def_time)[:19]
            }
            engine_list.append(name)
            engine_list.append(version)
            engine_list.append(def_time)

        self.newest_dat = epoch_to_local(newest_dat)[:19]
        self.oldest_dat = epoch_to_local(oldest_dat)[:19]
        self.dat_hash = hashlib.md5("".join(engine_list)).hexdigest()
Example #7
0
def create_fake_result(svc_id, classification, srl, hours_to_live):
    start = now()
    # Generate a random configuration key
    length = int(random.random() * 32) + 32
    conf_bytes = "".join(
        [chr(int(random.random() * 256)) for _ in xrange(length)])
    conf_key = hashlib.md5(conf_bytes).hexdigest()[:7]

    # Update result object with random values
    res_obj = copy.deepcopy(RESULT_TEMPLATE)
    res_obj['__expiry_ts__'] = now_as_iso(hours_to_live * 60 * 60)
    res_obj['created'] = now_as_iso()
    res_obj['response']['service_name'] %= svc_id
    res_obj['classification'] = res_obj['result'][
        'classification'] = classification
    res_obj['srl'] = srl

    # Create result sections
    for _ in xrange(int(random.random() * 4) + 1):
        section = copy.deepcopy(RESULT_SECTION_TEMPLATE)
        section['classification'] = classification
        section['body'] = generate_random_words(
            int(random.random() * 1024) + 32)
        section['title_text'] = generate_random_words(
            int(random.random() * 14) + 2)
        res_obj['result']['sections'].append(section)

    # Create tags
    for _ in xrange(int(random.random() * 29) + 1):
        tag = copy.deepcopy(RESULT_TAG_TEMPLATE)
        tag['classification'] = classification
        tag['type'] = generate_random_words(1).upper()
        tag['value'] = generate_random_words(int(random.random() * 7) + 1)
        res_obj['result']['tags'].append(tag)

    # Update milestones
    res_obj['response']['milestones']['service_started'] = start
    res_obj['response']['milestones']['service_completed'] = now()
    return res_obj, conf_key
    def handle_timeouts(self):
        timeouts = []
        while self.sleep(0 if timeouts else 3):
            cpu_mark = time.process_time()
            time_mark = time.time()

            # Start of ingest message
            if self.apm_client:
                self.apm_client.begin_transaction('ingest_timeouts')

            timeouts = self.timeout_queue.dequeue_range(
                upper_limit=isotime.now(), num=100)

            for scan_key in timeouts:
                # noinspection PyBroadException
                try:
                    actual_timeout = False

                    # Remove the entry from the hash of submissions in progress.
                    entry = self.scanning.pop(scan_key)
                    if entry:
                        actual_timeout = True
                        self.log.error("Submission timed out for %s: %s",
                                       scan_key, str(entry))

                    dup = self.duplicate_queue.pop(_dup_prefix + scan_key,
                                                   blocking=False)
                    if dup:
                        actual_timeout = True

                    while dup:
                        self.log.error("Submission timed out for %s: %s",
                                       scan_key, str(dup))
                        dup = self.duplicate_queue.pop(_dup_prefix + scan_key,
                                                       blocking=False)

                    if actual_timeout:
                        self.counter.increment('timed_out')
                except Exception:
                    self.log.exception("Problem timing out %s:", scan_key)

            # End of ingest message (success)
            if self.apm_client:
                elasticapm.label(timeouts=len(timeouts))
                self.apm_client.end_transaction('ingest_timeouts', 'success')

            self.counter.increment_execution_time(
                'cpu_seconds',
                time.process_time() - cpu_mark)
            self.counter.increment_execution_time('busy_seconds',
                                                  time.time() - time_mark)
Example #9
0
def submit(client, notice):
    priority = notice.get('priority')
    sha256 = notice.get('sha256')

    hdr = notice.parse(description=': '.join((default_prefix, sha256 or '')),
                       **defaults)

    user = hdr.pop('submitter')
    hdr.pop('priority', None)

    path = notice.get('filename', None) or sha256
    client.submit(sha256, path, priority, user, **hdr)
    with timeouts_lock:
        timeouts.append(Timeout(now(max_time), notice.get('scan_key')))
    def get_logged_in_user(self):
        auto_auth_uname = self.auto_auth_check()
        if auto_auth_uname is not None:
            return auto_auth_uname

        session_id = flsk_session.get("session_id", None)

        if not session_id:
            current_app.logger.debug('session_id cookie not found')
            abort(401)

        session = KV_SESSION.get(session_id)

        if not session:
            current_app.logger.debug(
                f'[{session_id}] session_id not found in redis')
            abort(401)
        else:
            cur_time = now()
            if session.get('expire_at', 0) < cur_time:
                KV_SESSION.pop(session_id)
                current_app.logger.debug(
                    f'[{session_id}] session has expired '
                    f'{session.get("expire_at", 0)} < {cur_time}')
                abort(401)
            else:
                session['expire_at'] = cur_time + session.get('duration', 3600)

        if config.ui.validate_session_ip and \
                request.headers.get("X-Forwarded-For", request.remote_addr) != session.get('ip', None):
            current_app.logger.debug(
                f'[{session_id}] X-Forwarded-For does not match session IP '
                f'{request.headers.get("X-Forwarded-For", None)} != {session.get("ip", None)}'
            )
            abort(401)

        if config.ui.validate_session_useragent and \
                request.headers.get("User-Agent", None) != session.get('user_agent', None):
            current_app.logger.debug(
                f'[{session_id}] User-Agent does not match session user_agent '
                f'{request.headers.get("User-Agent", None)} != {session.get("user_agent", None)}'
            )
            abort(401)

        KV_SESSION.set(session_id, session)

        self.extra_session_checks(session)

        return session.get("username", None)
Example #11
0
    def retry(self, task, scan_key, ex):
        current_time = now()

        retries = task.retries + 1

        if retries > _max_retries:
            trace = ''
            if ex:
                trace = ': ' + get_stacktrace_info(ex)
            self.log.error(
                f'[{task.ingest_id} :: {task.sha256}] Max retries exceeded {trace}'
            )
            self.duplicate_queue.delete(_dup_prefix + scan_key)
        elif self.expired(current_time - task.ingest_time.timestamp(), 0):
            self.log.info(
                f'[{task.ingest_id} :: {task.sha256}] No point retrying expired submission'
            )
            self.duplicate_queue.delete(_dup_prefix + scan_key)
        else:
            self.log.info(
                f'[{task.ingest_id} :: {task.sha256}] Requeuing ({ex or "unknown"})'
            )
            task.retries = retries
            self.retry_queue.push(int(now(_retry_delay)), task.json())
Example #12
0
def process_retries():  # df node def
    while running:
        raw = retryq.pop(timeout=1)  # df pull pop
        if not raw:
            continue

        retry_at = raw['retry_at']
        delay = retry_at - now()

        if delay >= 0.125:
            retryq.unpop(raw)
            time.sleep(min(delay, 1))
            continue

        ingestq.push(raw)  # df push push
    def check(
        self,
        task: IngestTask,
        count_miss=True
    ) -> Tuple[Optional[str], Optional[str], Optional[float], str]:
        key = self.stamp_filescore_key(task)

        with self.cache_lock:
            result = self.cache.get(key, None)

        if result:
            self.counter.increment('cache_hit_local')
            self.log.info(
                f'[{task.ingest_id} :: {task.sha256}] Local cache hit')
        else:
            result = self.datastore.filescore.get_if_exists(key)
            if result:
                self.counter.increment('cache_hit')
                self.log.info(
                    f'[{task.ingest_id} :: {task.sha256}] Remote cache hit')
            else:
                if count_miss:
                    self.counter.increment('cache_miss')
                return None, None, None, key

            with self.cache_lock:
                self.cache[key] = result

        current_time = now()
        age = current_time - result.time
        errors = result.errors

        if self.expired(age, errors):
            self.log.info(
                f"[{task.ingest_id} :: {task.sha256}] Cache hit dropped, cache has expired"
            )
            self.counter.increment('cache_expired')
            self.cache.pop(key, None)
            self.datastore.filescore.delete(key)
            return None, None, None, key
        elif self.stale(age, errors):
            self.log.info(
                f"[{task.ingest_id} :: {task.sha256}] Cache hit dropped, cache is stale"
            )
            self.counter.increment('cache_stale')
            return None, None, result.score, key

        return result.psid, result.sid, result.score, key
Example #14
0
def send_heartbeat():
    t = now()

    up_hours = (t - start_time) / (60.0 * 60.0)

    queues = {}
    drop_p = {}

    for level in ('low', 'medium', 'critical', 'high'):
        queues[level] = uniqueq.count(*priority_range[level])
        threshold = sample_threshold[level]
        drop_p[level] = 1 - max(0, drop_chance(queues[level], threshold))

    heartbeat = {
        'hostinfo': hostinfo,
        'inflight': scanning.length(),
        'ingest': ingestq.length(),
        'ingesting': drop_p,
        'queues': queues,
        'shard': shard,
        'up_hours': up_hours,
        'waiting': submissionq.length(),
        'ingest.bytes_completed': 0,
        'ingest.bytes_ingested': 0,
        'ingest.duplicates': 0,
        'ingest.files_completed': 0,
        'ingest.skipped': 0,
        'ingest.submissions_completed': 0,
        'ingest.submissions_ingested': 0,
        'ingest.timed_out': 0,
        'ingest.whitelisted': 0,
    }

    # Send ingester stats.
    exported = ingester_counts.export()

    # Add ingester stats to our heartbeat.
    heartbeat.update(exported)

    # Send our heartbeat.
    raw = message.Message(to="*",
                          sender='middleman',
                          mtype=message.MT_INGESTHEARTBEAT,
                          body=heartbeat).as_dict()
    statusq.publish(raw)

    # Send whitelister stats.
    whitelister_counts.export()
    def process_retries(self) -> int:
        # Start of ingest message
        if self.apm_client:
            self.apm_client.begin_transaction('ingest_msg')

        tasks = self.ingester.retry_queue.dequeue_range(
            upper_limit=isotime.now(), num=10)

        for task in tasks:
            self.ingester.ingest_queue.push(task)

        # End of ingest message (success)
        if self.apm_client:
            elasticapm.tag(retries=len(tasks))
            self.apm_client.end_transaction('ingest_retries', 'success')

        return len(tasks)
    def rescan(self, submission: Submission, results: Dict[str, Result],
               file_infos: Dict[str, FileInfo], file_tree, errors: List[str],
               rescan_services: List[str]):
        """
        Rescan a submission started on another system.
        """
        # Reset submission processing data
        submission['times'].pop('completed')
        submission['state'] = 'submitted'

        # Set the list of service to rescan
        submission['params']['services']['rescan'] = rescan_services

        # Create the submission object
        submission_obj = Submission(submission)

        if len(submission_obj.files) == 0:
            raise SubmissionException("No files found to submit.")

        for f in submission_obj.files:
            if not self.datastore.file.exists(f.sha256):
                raise SubmissionException(
                    f"File {f.sha256} does not exist, cannot continue submission."
                )

        # Set the new expiry
        if submission_obj.params.ttl:
            submission_obj.expiry_ts = epoch_to_iso(now() +
                                                    submission_obj.params.ttl *
                                                    24 * 60 * 60)

        # Clearing runtime_excluded on initial submit or resubmit
        submission_obj.params.services.runtime_excluded = []

        # Save the submission
        self.datastore.submission.save(submission_obj.sid, submission_obj)

        # Dispatch the submission
        self.log.debug("Submission complete. Dispatching: %s",
                       submission_obj.sid)
        self.dispatcher.dispatch_bundle(submission_obj, results, file_infos,
                                        file_tree, errors)

        return submission
Example #17
0
def process_timeouts():  # df node def
    global timeouts  # pylint:disable=W0603

    with timeouts_lock:
        current_time = now()
        index = 0

        for t in timeouts:
            if t.time >= current_time:
                break

            index += 1

            try:
                timed_out(t.scan_key)  # df push calls
            except:  # pylint: disable=W0702
                logger.exception("Problem timing out %s:", t.scan_key)

        timeouts = timeouts[index:]
    def process_timeouts(self):
        # Start of ingest message
        if self.apm_client:
            self.apm_client.begin_transaction('ingest_msg')

        ingester = self.ingester
        timeouts = ingester.timeout_queue.dequeue_range(
            upper_limit=isotime.now(), num=10)

        for scan_key in timeouts:
            try:
                actual_timeout = False

                # Remove the entry from the hash of submissions in progress.
                entry = ingester.scanning.pop(scan_key)
                if entry:
                    actual_timeout = True
                    self.log.error("Submission timed out for %s: %s", scan_key,
                                   str(entry))

                dup = ingester.duplicate_queue.pop(_dup_prefix + scan_key,
                                                   blocking=False)
                if dup:
                    actual_timeout = True

                while dup:
                    self.log.error("Submission timed out for %s: %s", scan_key,
                                   str(dup))
                    dup = ingester.duplicate_queue.pop(_dup_prefix + scan_key,
                                                       blocking=False)

                if actual_timeout:
                    ingester.counter.increment('ingest_timeout')
            except Exception:
                self.log.exception("Problem timing out %s:", scan_key)

        # End of ingest message (success)
        if self.apm_client:
            elasticapm.tag(timeouts=len(timeouts))
            self.apm_client.end_transaction('ingest_timeouts', 'success')

        return len(timeouts)
Example #19
0
def check(datastore, notice):
    key = stamp_filescore_key(notice)

    with cache_lock:
        result = cache.get(key, None)

    counter_name = 'ingest.cache_hit_local'
    if result:
        logger.info('Local cache hit')
    else:
        counter_name = 'ingest.cache_hit'

        result = datastore.get_filescore(key)
        if result:
            logger.info('Remote cache hit')
        else:
            ingester_counts.increment('ingest.cache_miss')
            return None, False, None, key

        add(key, result.get('psid', None), result['sid'], result['score'],
            result.get('errors', 0), result['time'])

    current_time = now()
    delta = current_time - result.get('time', current_time)
    errors = result.get('errors', 0)

    if expired(delta, errors):
        ingester_counts.increment('ingest.cache_expired')
        with cache_lock:
            cache.pop(key, None)
            datastore.delete_filescore(key)
        return None, False, None, key
    elif stale(delta, errors):
        ingester_counts.increment('ingest.cache_stale')
        return None, False, result['score'], key

    ingester_counts.increment(counter_name)

    return result.get('psid', None), result['sid'], result['score'], key
    def _get_version_map(self, node: str) -> None:
        """
        Get the versions of all engines running on a given node
        :param node: The IP of the MetaDefender node
        :return: None
        """
        newest_dat = 0
        oldest_dat = now()
        engine_list = []
        active_engines = 0
        failed_states = ["removed", "temporary failed", "permanently failed"]
        url = urljoin(node, 'stat/engines')

        try:
            self.log.debug(f"_get_version_map: GET {url}")
            r = self.session.get(url=url, timeout=self.timeout)
            engines = r.json()

            for engine in engines:
                if engine['active'] and engine["state"] not in failed_states:
                    active_engines += 1

                if self.config.get("md_version") == 4:
                    name = self._format_engine_name(engine["eng_name"])
                    version = engine['eng_ver']
                    def_time = engine['def_time']
                    etype = engine['engine_type']
                elif self.config.get("md_version") == 3:
                    name = self._format_engine_name(
                        engine["eng_name"]).replace("scanengine", "")
                    version = engine['eng_ver']
                    def_time = engine['def_time'].replace(" AM", "").replace(
                        " PM", "").replace("/", "-").replace(" ", "T")
                    def_time = def_time[6:10] + "-" + def_time[:5] + def_time[
                        10:] + "Z"
                    etype = engine['eng_type']
                else:
                    raise Exception("Unknown version of MetaDefender")

                # Compute newest DAT
                dat_epoch = iso_to_epoch(def_time)
                if dat_epoch > newest_dat:
                    newest_dat = dat_epoch

                if dat_epoch < oldest_dat and dat_epoch != 0 and etype in [
                        "av", "Bundled engine"
                ]:
                    oldest_dat = dat_epoch

                self.nodes[node]['engine_map'][name] = {
                    'version': version,
                    'def_time': iso_to_local(def_time)[:19]
                }
                engine_list.append(name)
                engine_list.append(version)
                engine_list.append(def_time)

            self.nodes[node]['engine_count'] = active_engines
            self.nodes[node]['newest_dat'] = epoch_to_local(newest_dat)[:19]
            self.nodes[node]['oldest_dat'] = epoch_to_local(oldest_dat)[:19]
            self.nodes[node]['engine_list'] = "".join(engine_list)
        except exceptions.Timeout:
            raise Exception(
                f"Node ({node}) timed out after {self.timeout}s while trying to get engine version map"
            )
        except ConnectionError:
            raise Exception(
                f"Unable to connect to node ({node}) while trying to get engine version map"
            )
Example #21
0
def login(**_):
    """
    Login the user onto the system
    
    Variables:
    None
    
    Arguments: 
    None
    
    Data Block:
    {
     "user": <UID>,
     "password": <ENCRYPTED_PASSWORD>,
     "otp": <OTP_TOKEN>,
     "apikey": <ENCRYPTED_APIKEY>,
     "u2f_response": <RESPONSE_TO_CHALLENGE_FROM_U2F_TOKEN>
    }

    Result example:
    {
     "username": <Logged in user>, # Username for the logged in user
     "privileges": ["R", "W"],     # Different privileges that the user will get for this session
     "session_duration": 60        # Time after which this session becomes invalid
                                   #   Note: The timer reset after each call
    }
    """
    data = request.json
    if not data:
        data = request.values

    user = data.get('user', None)
    password = data.get('password', None)
    apikey = data.get('apikey', None)
    u2f_response = data.get('u2f_response', None)

    if config.auth.get('encrypted_login', True):
        private_key = load_async_key(STORAGE.get_blob('id_rsa'), use_pkcs=True)

        if password and private_key:
            password = private_key.decrypt(base64.b64decode(password), "ERROR")

        if apikey and private_key:
            apikey = private_key.decrypt(base64.b64decode(apikey), "ERROR")

    try:
        otp = int(data.get('otp', 0) or 0)
    except Exception:
        raise AuthenticationException('Invalid OTP token')

    if request.environ.get("HTTP_X_REMOTE_CERT_VERIFIED", "FAILURE") == "SUCCESS":
        dn = request.environ.get("HTTP_X_REMOTE_DN")
    else:
        dn = False

    if (user and password) or dn or (user and apikey):
        auth = {
            'username': user,
            'password': password,
            'otp': otp,
            'u2f_response': u2f_response,
            'dn': dn,
            'apikey': apikey
        }

        try:
            logged_in_uname, priv = default_authenticator(auth, request, flsk_session, STORAGE)
            session_duration = config.ui.get('session_duration', 3600)
            cur_time = now()
            xsrf_token = generate_random_secret()
            current_session = {
                'duration': session_duration,
                'ip': request.headers.get("X-Forward-For", request.remote_addr),
                'privileges': priv,
                'time': int(cur_time) - (int(cur_time) % session_duration),
                'user_agent': request.headers.get("User-Agent", "Unknown user agent"),
                'username': logged_in_uname,
                'xsrf_token': xsrf_token
            }
            session_id = hashlib.sha512(str(current_session)).hexdigest()
            current_session['expire_at'] = cur_time + session_duration
            flsk_session['session_id'] = session_id
            KV_SESSION.add(session_id, current_session)
            return make_api_response({
                "username": logged_in_uname,
                "privileges": priv,
                "session_duration": config.ui.get('session_duration', 3600)
            }, cookies={'XSRF-TOKEN': xsrf_token})
        except AuthenticationException as wpe:
            return make_api_response("", wpe.message, 401)

    return make_api_response("", "Not enough information to proceed with authentication", 401)
Example #22
0
    def completed(self, sub):
        """Invoked when notified that a submission has completed."""
        # There is only one file in the submissions we have made
        sha256 = sub.files[0].sha256
        scan_key = sub.params.create_filescore_key(sha256)
        raw = self.scanning.pop(scan_key)

        psid = sub.params.psid
        score = sub.max_score
        sid = sub.sid

        if not raw:
            # Some other worker has already popped the scanning queue?
            self.log.warning(
                f"[{sub.metadata.get('ingest_id', 'unknown')} :: {sha256}] "
                f"Submission completed twice")
            return scan_key

        task = IngestTask(raw)
        task.submission.sid = sid

        errors = sub.error_count
        file_count = sub.file_count
        self.counter.increment('submissions_completed')
        self.counter.increment('files_completed', increment_by=file_count)
        self.counter.increment('bytes_completed', increment_by=task.file_size)

        with self.cache_lock:
            fs = self.cache[scan_key] = FileScore({
                'expiry_ts':
                now(self.config.core.ingester.cache_dtl * 24 * 60 * 60),
                'errors':
                errors,
                'psid':
                psid,
                'score':
                score,
                'sid':
                sid,
                'time':
                now(),
            })
            self.datastore.filescore.save(scan_key, fs)

        self.finalize(psid, sid, score, task)

        def exhaust() -> Iterable[IngestTask]:
            while True:
                res = self.duplicate_queue.pop(_dup_prefix + scan_key,
                                               blocking=False)
                if res is None:
                    break
                res = IngestTask(res)
                res.submission.sid = sid
                yield res

        # You may be tempted to remove the assignment to dups and use the
        # value directly in the for loop below. That would be a mistake.
        # The function finalize may push on the duplicate queue which we
        # are pulling off and so condensing those two lines creates a
        # potential infinite loop.
        dups = [dup for dup in exhaust()]
        for dup in dups:
            self.finalize(psid, sid, score, dup)

        return scan_key
Example #23
0
def login(**_):
    """
    Login the user onto the system
    
    Variables:
    None
    
    Arguments: 
    None
    
    Data Block:
    {
     "user": <UID>,
     "password": <ENCRYPTED_PASSWORD>,
     "otp": <OTP_TOKEN>,
     "apikey": <ENCRYPTED_APIKEY>,
     "webauthn_auth_resp": <RESPONSE_TO_CHALLENGE_FROM_WEBAUTHN>
    }

    Result example:
    {
     "username": <Logged in user>, # Username for the logged in user
     "privileges": ["R", "W"],     # Different privileges that the user will get for this session
     "session_duration": 60        # Time after which this session becomes invalid
                                   #   Note: The timer reset after each call
    }
    """
    data = request.json
    if not data:
        data = request.values

    user = data.get('user', None)
    password = data.get('password', None)
    apikey = data.get('apikey', None)
    webauthn_auth_resp = data.get('webauthn_auth_resp', None)
    oauth_provider = data.get('oauth_provider', None)
    oauth_token = data.get('oauth_token', None)

    if config.auth.oauth.enabled and oauth_provider:
        oauth = current_app.extensions.get('authlib.integrations.flask_client')
        provider = oauth.create_client(oauth_provider)

        if provider:
            redirect_uri = f'https://{request.host}/login.html?provider={oauth_provider}'
            return provider.authorize_redirect(redirect_uri=redirect_uri)

    try:
        otp = int(data.get('otp', 0) or 0)
    except Exception:
        raise AuthenticationException('Invalid OTP token')

    if (user and password) or (user and apikey) or (user and oauth_token):
        auth = {
            'username': user,
            'password': password,
            'otp': otp,
            'webauthn_auth_resp': webauthn_auth_resp,
            'apikey': apikey,
            'oauth_token': oauth_token
        }

        logged_in_uname = None
        ip = request.headers.get("X-Forwarded-For", request.remote_addr)
        try:
            logged_in_uname, priv = default_authenticator(
                auth, request, flsk_session, STORAGE)
            session_duration = config.ui.session_duration
            cur_time = now()
            xsrf_token = generate_random_secret()
            current_session = {
                'duration': session_duration,
                'ip': ip,
                'privileges': priv,
                'time': int(cur_time) - (int(cur_time) % session_duration),
                'user_agent': request.headers.get("User-Agent", None),
                'username': logged_in_uname,
                'xsrf_token': xsrf_token
            }
            session_id = hashlib.sha512(
                str(current_session).encode("UTF-8")).hexdigest()
            current_session['expire_at'] = cur_time + session_duration
            flsk_session['session_id'] = session_id
            KV_SESSION.add(session_id, current_session)
            return make_api_response(
                {
                    "username": logged_in_uname,
                    "privileges": priv,
                    "session_duration": session_duration
                },
                cookies={'XSRF-TOKEN': xsrf_token})
        except AuthenticationException as wpe:
            uname = auth.get('username', '(None)')
            LOGGER.warning(
                f"Authentication failure. (U:{uname} - IP:{ip}) [{wpe}]")
            return make_api_response("", err=str(wpe), status_code=401)
        finally:
            if logged_in_uname:
                LOGGER.info(
                    f"Login successful. (U:{logged_in_uname} - IP:{ip})")

    return make_api_response(
        "", "Not enough information to proceed with authentication", 401)
Example #24
0
    def ingest(self, task: IngestTask):
        self.log.info(
            f"[{task.ingest_id} :: {task.sha256}] Task received for processing"
        )
        # Load a snapshot of ingest parameters as of right now.
        max_file_size = self.config.submission.max_file_size
        param = task.params

        self.counter.increment('bytes_ingested', increment_by=task.file_size)
        self.counter.increment('submissions_ingested')

        if any(len(file.sha256) != 64 for file in task.submission.files):
            self.log.error(
                f"[{task.ingest_id} :: {task.sha256}] Invalid sha256, skipped")
            self.send_notification(task,
                                   failure="Invalid sha256",
                                   logfunc=self.log.warning)
            return

        # Clean up metadata strings, since we may delete some, iterate on a copy of the keys
        for key in list(task.submission.metadata.keys()):
            value = task.submission.metadata[key]
            meta_size = len(value)
            if meta_size > self.config.submission.max_metadata_length:
                self.log.info(
                    f'[{task.ingest_id} :: {task.sha256}] '
                    f'Removing {key} from metadata because value is too big')
                task.submission.metadata.pop(key)

        if task.file_size > max_file_size and not task.params.ignore_size and not task.params.never_drop:
            task.failure = f"File too large ({task.file_size} > {max_file_size})"
            self._notify_drop(task)
            self.counter.increment('skipped')
            self.log.error(
                f"[{task.ingest_id} :: {task.sha256}] {task.failure}")
            return

        # Set the groups from the user, if they aren't already set
        if not task.params.groups:
            task.params.groups = self.get_groups_from_user(
                task.params.submitter)

        # Check if this file is already being processed
        pprevious, previous, score = None, False, None
        if not param.ignore_cache:
            pprevious, previous, score, _ = self.check(task)

        # Assign priority.
        low_priority = self.is_low_priority(task)

        priority = param.priority
        if priority < 0:
            priority = self.priority_value['medium']

            if score is not None:
                priority = self.priority_value['low']
                for level, threshold in self.threshold_value.items():
                    if score >= threshold:
                        priority = self.priority_value[level]
                        break
            elif low_priority:
                priority = self.priority_value['low']

        # Reduce the priority by an order of magnitude for very old files.
        current_time = now()
        if priority and self.expired(
                current_time - task.submission.time.timestamp(), 0):
            priority = (priority / 10) or 1

        param.priority = priority

        # Do this after priority has been assigned.
        # (So we don't end up dropping the resubmission).
        if previous:
            self.counter.increment('duplicates')
            self.finalize(pprevious, previous, score, task)
            return

        if self.drop(task):
            self.log.info(f"[{task.ingest_id} :: {task.sha256}] Dropped")
            return

        if self.is_whitelisted(task):
            self.log.info(f"[{task.ingest_id} :: {task.sha256}] Whitelisted")
            return

        self.unique_queue.push(priority, task.as_primitives())
Example #25
0
        def base(*args, **kwargs):
            # Login
            session_id = flsk_session.get("session_id", None)

            if not session_id:
                abort(401)

            session = KV_SESSION.get(session_id)

            if not session:
                abort(401)
            else:
                session = json.loads(session)
                cur_time = now()
                if session.get('expire_at', 0) < cur_time:
                    KV_SESSION.pop(session_id)
                    abort(401)
                else:
                    session['expire_at'] = cur_time + session.get(
                        'duration', 3600)

            if request.headers.get("X-Forward-For", None) != session.get('ip', None) or \
                    request.headers.get("User-Agent", None) != session.get('user_agent', None):
                abort(401)

            KV_SESSION.set(session_id, session)

            logged_in_uname = session.get("username", None)

            if not set(self.required_priv).intersection(
                    set(session.get("privileges", []))):
                raise AccessDeniedException(
                    "The method you've used to login does not give you access to this API."
                )

            if "E" in session.get("privileges", []) and self.check_xsrf_token and \
                    session.get('xsrf_token', "") != request.environ.get('HTTP_X_XSRF_TOKEN', ""):
                raise AccessDeniedException("Invalid XSRF token.")

            # Impersonation
            requestor = request.environ.get("HTTP_X_PROXIEDENTITIESCHAIN",
                                            None)
            temp_user = login(logged_in_uname)

            # Terms of Service
            if not request.path == "/api/v3/user/tos/%s/" % logged_in_uname:
                if not temp_user.get(
                        'agrees_with_tos', False) and config.ui.get(
                            "tos", None) is not None:
                    raise AccessDeniedException(
                        "Agree to Terms of Service before you can make any API calls."
                    )

            if requestor:
                user = None
                if ("C=" in requestor or "c=" in requestor) and dn_parser:
                    requestor_chain = [
                        dn_parser(x.replace("<", "").replace(">", ""))
                        for x in requestor.split("><")
                    ]
                    requestor_chain.reverse()
                else:
                    requestor_chain = [requestor]

                impersonator = temp_user
                merged_classification = impersonator['classification']
                for as_uname in requestor_chain:
                    user = login(as_uname)
                    if not user:
                        raise AccessDeniedException(
                            "One of the entity in the proxied "
                            "chain does not exist in our system.")
                    user[
                        'classification'] = CLASSIFICATION.intersect_user_classification(
                            user['classification'], merged_classification)
                    merged_classification = user['classification']
                    add_access_control(user)

                if user:
                    logged_in_uname = "%s(on behalf of %s)" % (
                        impersonator['uname'], user['uname'])
                else:
                    raise AccessDeniedException(
                        "Invalid proxied entities chain received.")
            else:
                impersonator = {}
                user = temp_user
            if self.require_admin and not user['is_admin']:
                raise AccessDeniedException(
                    "API %s requires ADMIN privileges" % request.path)

            #############################################
            # Special username api query validation
            #
            #    If an API call requests a username, the username as to match
            #    the logged in user or the user has to be ADMIN
            #
            #    API that needs this special validation need to make sure their
            #    variable name for the username is as an optional parameter
            #    inside 'username_key'. Default: 'username'
            if self.username_key in kwargs:
                if kwargs[self.username_key] != user['uname'] \
                        and not kwargs[self.username_key] == "__global__" \
                        and not kwargs[self.username_key] == "__workflow__" \
                        and not kwargs[self.username_key].lower() == "__current__" \
                        and not user['is_admin']:
                    return make_api_response(
                        {}, "Your username does not match requested username",
                        403)

            if self.audit:
                # noinspection PyBroadException
                try:
                    json_blob = request.json
                    if not isinstance(json_blob, dict):
                        json_blob = {}
                except Exception:
                    json_blob = {}

                params_list = list(args) + \
                    ["%s=%s" % (k, v) for k, v in kwargs.iteritems() if k in AUDIT_KW_TARGET] + \
                    ["%s=%s" % (k, v) for k, v in request.args.iteritems() if k in AUDIT_KW_TARGET] + \
                    ["%s=%s" % (k, v) for k, v in json_blob.iteritems() if k in AUDIT_KW_TARGET]

                if len(params_list) != 0:
                    AUDIT_LOG.info("%s [%s] :: %s(%s)" %
                                   (logged_in_uname, user['classification'],
                                    func.func_name, ", ".join(params_list)))

            # Save user credential in user kwarg for future reference
            kwargs['user'] = user

            # Check current user quota
            quota_user = impersonator.get('uname', None) or user['uname']
            quota_id = "%s [%s] => %s" % (quota_user, str(
                uuid.uuid4()), request.path)
            count = int(RATE_LIMITER.inc(quota_user, track_id=quota_id))
            RATE_LIMITER.inc("__global__", track_id=quota_id)

            flsk_session['quota_user'] = quota_user
            flsk_session['quota_id'] = quota_id
            flsk_session['quota_set'] = True

            quota = user.get('api_quota', 10)
            if count > quota:
                if config.ui.enforce_quota:
                    LOGGER.info(
                        "User %s was prevented from using the api due to exceeded quota. [%s/%s]"
                        % (quota_user, count, quota))
                    raise QuotaExceededException(
                        "You've exceeded your maximum quota of %s " % quota)
                else:
                    LOGGER.info("Quota exceeded for user %s. [%s/%s]" %
                                (quota_user, count, quota))
            else:
                if DEBUG:
                    LOGGER.info(
                        "%s's quota is under or equal its limit. [%s/%s]" %
                        (quota_user, count, quota))

            return func(*args, **kwargs)
Example #26
0
is_low_priority = forge.get_is_low_priority()
max_priority = config.submissions.max.priority
max_retries = 10
max_time = 2 * 24 * 60 * 60  # Wait 2 days for responses.
max_waiting = int(config.core.dispatcher.max.inflight) / (2 * shards)
min_priority = 1
priority_value = constants.PRIORITIES
retry_delay = 180
retryq = queue.NamedQueue('m-retry-' + shard, **persistent)  # df line queue
running = True
sampling = False
selected_initial = [
    'Antivirus', 'Extraction', 'Filtering', 'Networking', 'Static Analysis'
]
stale_after_seconds = config.core.middleman.stale_after
start_time = now()
submissionq = queue.NamedQueue('m-submission-' + shard,
                               **persistent)  # df line queue
timeouts = []
timeouts_lock = RLock()
whitelist = forge.get_whitelist()
whitelisted = {}
whitelisted_lock = RLock()

dropper_threads = 1
try:
    dropper_threads = int(config.core.middleman.dropper_threads)
except AttributeError:
    logger.warning("No dropper_threads setting. Defaulting to %d.",
                   dropper_threads)
Example #27
0
def ingest(datastore, user_groups, raw):  # df node def
    notice = Notice(raw)

    ignore_size = notice.get('ignore_size', False)
    never_drop = notice.get('never_drop', False)
    sha256 = notice.get('sha256')
    size = notice.get('size', 0)

    # Make sure we have a submitter ...
    user = notice.get('submitter', None)
    if user is None:
        user = config.submissions.user
        notice.set('submitter', user)

    # ... and groups.
    groups = notice.get('groups', None)
    if groups is None:
        groups = user_groups.get(user, None)
        if groups is None:
            ruser = datastore.get_user(user)
            if not ruser:
                return
            groups = ruser.get('groups', [])
            user_groups[user] = groups
        notice.set('groups', groups)

    selected = notice.get('selected', None)
    if not selected:
        selected = selected_initial
        notice.set('selected', selected)
        notice.set('resubmit_to', ['Dynamic Analysis'])

    resubmit_to = notice.get('resubmit_to', None)
    if resubmit_to is None:
        notice.set('resubmit_to', [])

    ingester_counts.increment('ingest.bytes_ingested', int(size))
    ingester_counts.increment('ingest.submissions_ingested')

    if not sha256:
        send_notification(notice,
                          failure="Invalid sha256",
                          logfunc=logger.warning)
        return

    c12n = notice.get('classification', '')
    if not Classification.is_valid(c12n):
        send_notification(notice,
                          failure="Invalid classification %s" % c12n,
                          logfunc=logger.warning)
        return

    metadata = notice.get('metadata', {})
    if isinstance(metadata, dict):
        to_delete = []
        for k, v in metadata.iteritems():
            size = sys.getsizeof(v, -1)
            if isinstance(v, basestring):
                size = len(v)
            if size > config.core.middleman.max_value_size:
                to_delete.append(k)
            elif size < 0:
                to_delete.append(k)
        if to_delete:
            logger.info('Removing %s from %s', to_delete, notice.raw)
            for k in to_delete:
                metadata.pop(k, None)

    if size > config.submissions.max.size and not ignore_size and not never_drop:
        notice.set(
            'failure',
            "File too large (%d > %d)" % (size, config.submissions.max.size))
        dropq.push(notice.raw)  # df push push
        ingester_counts.increment('ingest.skipped')
        return

    pprevious, previous, score = None, False, None
    if not notice.get('ignore_cache', False):
        pprevious, previous, score, _ = check(datastore, notice)

    # Assign priority.
    low_priority = is_low_priority(notice)

    priority = notice.get('priority')
    if priority is None:
        priority = priority_value['medium']

        if score is not None:
            priority = priority_value['low']
            for level in ('critical', 'high'):
                if score >= threshold_value[level]:
                    priority = priority_value[level]
                    break
        elif low_priority:
            priority = priority_value['low']

    # Reduce the priority by an order of magnitude for very old files.
    current_time = now()
    if priority and \
            expired(current_time - seconds(notice.get('ts', current_time)), 0):
        priority = (priority / 10) or 1

    notice.set('priority', priority)

    # Do this after priority has been assigned.
    # (So we don't end up dropping the resubmission).
    if previous:
        ingester_counts.increment('ingest.duplicates')
        finalize(pprevious, previous, score, notice)  # df push calls
        return

    if drop(notice):  # df push calls
        return

    if is_whitelisted(notice):  # df push calls
        return

    uniqueq.push(priority, notice.raw)  # df push push
Example #28
0
def init():
    datastore = forge.get_datastore()
    datastore.commit_index('submission')

    sids = [
        x['submission.sid'] for x in datastore.stream_search(
            'submission',
            'state:submitted AND times.submitted:[NOW-1DAY TO *] '
            'AND submission.metadata.type:* '
            'AND NOT submission.description:Resubmit*')
    ]

    submissions = {}
    submitted = {}
    for submission in datastore.get_submissions(sids):
        task = Task(submission)

        if not task.original_selected or not task.root_sha256 or not task.scan_key:
            continue

        if forge.determine_ingest_queue(task.root_sha256) != ingestq_name:
            continue

        scan_key = task.scan_key
        submissions[task.sid] = submission
        submitted[scan_key] = task.sid

    # Outstanding is the set of things Riak believes are being scanned.
    outstanding = set(submitted.keys())

    # Keys is the set of things middleman believes are being scanned.
    keys = set(scanning.keys())

    # Inflight is the set of submissions middleman and Riak agree are inflight.
    inflight = outstanding.intersection(keys)

    # Missing is the set of submissions middleman thinks are in flight but
    # according to Riak are not incomplete.
    missing = keys.difference(inflight)

    # Process the set of submissions Riak believes are incomplete but
    # middleman doesn't know about.
    for scan_key in outstanding.difference(inflight):
        sid = submitted.get(scan_key, None)

        if not sid:
            logger.info("Init: No sid found for incomplete")
            continue

        if not task.original_selected or not task.root_sha256 or not task.scan_key:
            logger.info("Init: Not root_sha256 or original_selected")
            continue

        submission = submissions[sid]

        task = Task(submission)

        if not task.metadata:
            logger.info("Init: Incomplete submission is not one of ours: %s",
                        sid)

        stype = None
        try:
            stype = task.metadata.get('type', None)
        except:  # pylint: disable=W0702
            logger.exception(
                "Init: Incomplete submission has malformed metadata: %s", sid)

        if not stype:
            logger.info("Init: Incomplete submission missing type: %s", sid)

        raw = {
            'metadata': task.metadata,
            'overrides': get_submission_overrides(task, overrides),
            'sha256': task.root_sha256,
            'type': stype,
        }
        raw['overrides']['selected'] = task.original_selected

        reinsert(datastore, " (incomplete)", Notice(raw), logger)

    r = redis.StrictRedis(persistent['host'], persistent['port'],
                          persistent['db'])

    # Duplicates is the set of sha256s where a duplicate queue exists.
    duplicates = [
        x.replace(dup_prefix, '', 1) for x in r.keys(dup_prefix + '*')
    ]

    # Process the set of duplicates where no scanning or riak entry exists.
    for scan_key in set(duplicates).difference(outstanding.union(keys)):
        raw = dupq.pop(dup_prefix + scan_key, blocking=False)
        if not raw:
            logger.warning("Init: Couldn't pop off dup queue (%s)", scan_key)
            dupq.delete(dup_prefix + scan_key)
            continue

        reinsert(datastore, " (missed duplicate)", Notice(raw), logger)

    while True:
        res = completeq.pop(blocking=False)
        if not res:
            break

        scan_key = completed(Task(res))
        try:
            missing.remove(scan_key)
        except:  # pylint: disable=W0702
            pass

    # Process the set of submissions middleman thinks are in flight but
    # according to Riak are not incomplete.
    for scan_key in missing:
        raw = scanning.pop(scan_key)
        if raw:
            reinsert(datastore, '', Notice(raw), logger, retry_all=False)

    # Set up time outs for all inflight submissions.
    expiry_time = now(max_time)
    for scan_key in inflight:
        # No need to lock. We're the only thing running at this point.
        timeouts.append(Timeout(scan_key, expiry_time))

    signal.signal(signal.SIGINT, interrupt)
    signal.signal(signal.SIGTERM, interrupt)

    datastore.close()
def test_isotime_epoch():
    epoch_date = now(200)

    assert epoch_date == local_to_epoch(epoch_to_local(epoch_date))
    assert epoch_date == iso_to_epoch(epoch_to_iso(epoch_date))
    assert isinstance(epoch_date, float)
Example #30
0
def completed(task):  # df node def
    sha256 = task.root_sha256

    psid = task.psid
    score = task.score
    sid = task.sid

    scan_key = task.scan_key

    with ScanLock(scan_key):
        # Remove the entry from the hash of submissions in progress.
        raw = scanning.pop(scan_key)  # df pull pop
        if not raw:
            logger.warning("Untracked submission (score=%d) for: %s %s",
                           int(score), sha256, str(task.metadata))

            # Not a result we care about. We are notified for every
            # submission that completes. Some submissions will not be ours.
            if task.metadata:
                stype = None
                try:
                    stype = task.metadata.get('type', None)
                except:  # pylint: disable=W0702
                    logger.exception("Malformed metadata: %s:", sid)

                if not stype:
                    return scan_key

                if (task.description or '').startswith(default_prefix):
                    raw = {
                        'metadata': task.metadata,
                        'overrides': get_submission_overrides(task, overrides),
                        'sha256': sha256,
                        'type': stype,
                    }

                    finalize(psid, sid, score, Notice(raw))
            return scan_key

        errors = task.raw.get('error_count', 0)
        file_count = task.raw.get('file_count', 0)
        ingester_counts.increment('ingest.submissions_completed')
        ingester_counts.increment('ingest.files_completed', file_count)
        ingester_counts.increment('ingest.bytes_completed', int(task.size
                                                                or 0))

        notice = Notice(raw)

        with cache_lock:
            _add(scan_key, psid, sid, score, errors, now())

        finalize(psid, sid, score, notice)  # df push calls

        def exhaust():
            while True:
                res = dupq.pop(  # df pull pop
                    dup_prefix + scan_key, blocking=False)
                if res is None:
                    break
                yield res

        # You may be tempted to remove the assignment to dups and use the
        # value directly in the for loop below. That would be a mistake.
        # The function finalize may push on the duplicate queue which we
        # are pulling off and so condensing those two lines creates a
        # potential infinite loop.
        dups = [dup for dup in exhaust()]
        for dup in dups:
            finalize(psid, sid, score, Notice(dup))

    return scan_key