def retry(raw, scan_key, sha256, ex): # df node def current_time = now() notice = Notice(raw) retries = notice.get('retries', 0) + 1 if retries > max_retries: trace = '' if ex and type(ex) != FileStoreException: trace = ': ' + get_stacktrace_info(ex) logger.error('Max retries exceeded for %s%s', sha256, trace) dupq.delete(dup_prefix + scan_key) elif expired(current_time - seconds(notice.get('ts', current_time)), 0): logger.info('No point retrying expired submission for %s', sha256) dupq.delete(dup_prefix + scan_key) # df pull delete else: logger.info('Requeuing %s (%s)', sha256, ex or 'unknown') notice.set('retries', retries) notice.set('retry_at', now(retry_delay)) retryq.push(notice.raw) # df push push
def dropper(): # df node def datastore = forge.get_datastore() while running: raw = dropq.pop(timeout=1) # df pull pop if not raw: continue notice = Notice(raw) send_notification(notice) c12n = notice.get('classification', config.core.middleman.classification) expiry = now_as_iso(86400) sha256 = notice.get('sha256') datastore.save_or_freshen_file(sha256, {'sha256': sha256}, expiry, c12n) datastore.close()
def ingest(datastore, user_groups, raw): # df node def notice = Notice(raw) ignore_size = notice.get('ignore_size', False) never_drop = notice.get('never_drop', False) sha256 = notice.get('sha256') size = notice.get('size', 0) # Make sure we have a submitter ... user = notice.get('submitter', None) if user is None: user = config.submissions.user notice.set('submitter', user) # ... and groups. groups = notice.get('groups', None) if groups is None: groups = user_groups.get(user, None) if groups is None: ruser = datastore.get_user(user) if not ruser: return groups = ruser.get('groups', []) user_groups[user] = groups notice.set('groups', groups) selected = notice.get('selected', None) if not selected: selected = selected_initial notice.set('selected', selected) notice.set('resubmit_to', ['Dynamic Analysis']) resubmit_to = notice.get('resubmit_to', None) if resubmit_to is None: notice.set('resubmit_to', []) ingester_counts.increment('ingest.bytes_ingested', int(size)) ingester_counts.increment('ingest.submissions_ingested') if not sha256: send_notification(notice, failure="Invalid sha256", logfunc=logger.warning) return c12n = notice.get('classification', '') if not Classification.is_valid(c12n): send_notification(notice, failure="Invalid classification %s" % c12n, logfunc=logger.warning) return metadata = notice.get('metadata', {}) if isinstance(metadata, dict): to_delete = [] for k, v in metadata.iteritems(): size = sys.getsizeof(v, -1) if isinstance(v, basestring): size = len(v) if size > config.core.middleman.max_value_size: to_delete.append(k) elif size < 0: to_delete.append(k) if to_delete: logger.info('Removing %s from %s', to_delete, notice.raw) for k in to_delete: metadata.pop(k, None) if size > config.submissions.max.size and not ignore_size and not never_drop: notice.set( 'failure', "File too large (%d > %d)" % (size, config.submissions.max.size)) dropq.push(notice.raw) # df push push ingester_counts.increment('ingest.skipped') return pprevious, previous, score = None, False, None if not notice.get('ignore_cache', False): pprevious, previous, score, _ = check(datastore, notice) # Assign priority. low_priority = is_low_priority(notice) priority = notice.get('priority') if priority is None: priority = priority_value['medium'] if score is not None: priority = priority_value['low'] for level in ('critical', 'high'): if score >= threshold_value[level]: priority = priority_value[level] break elif low_priority: priority = priority_value['low'] # Reduce the priority by an order of magnitude for very old files. current_time = now() if priority and \ expired(current_time - seconds(notice.get('ts', current_time)), 0): priority = (priority / 10) or 1 notice.set('priority', priority) # Do this after priority has been assigned. # (So we don't end up dropping the resubmission). if previous: ingester_counts.increment('ingest.duplicates') finalize(pprevious, previous, score, notice) # df push calls return if drop(notice): # df push calls return if is_whitelisted(notice): # df push calls return uniqueq.push(priority, notice.raw) # df push push
def submitter(): # df node def client = forge.get_submission_service() datastore = forge.get_datastore() while running: try: raw = submissionq.pop(timeout=1) # df pull pop if not raw: continue # noinspection PyBroadException try: sha256 = raw['sha256'] except Exception: # pylint: disable=W0703 logger.exception("Malformed entry on submission queue:") continue if not sha256: logger.error("Malformed entry on submission queue: %s", raw) continue notice = Notice(raw) if drop(notice): # df push calls continue if is_whitelisted(notice): # df push calls continue pprevious, previous, score = None, False, None if not notice.get('ignore_cache', False): pprevious, previous, score, scan_key = check(datastore, notice) if previous: if not notice.get('resubmit_to', []) and not pprevious: logger.warning( "No psid for what looks like a resubmission of %s: %s", sha256, scan_key) finalize(pprevious, previous, score, notice) # df push calls continue with ScanLock(scan_key): if scanning.exists(scan_key): logger.debug('Duplicate %s', sha256) ingester_counts.increment('ingest.duplicates') dupq.push(dup_prefix + scan_key, notice.raw) # df push push continue scanning.add(scan_key, notice.raw) # df push add ex = return_exception(submit, client, notice) if not ex: continue ingester_counts.increment('ingest.error') should_retry = True tex = type(ex) if tex == FileStoreException: ex = tex("Problem with file: %s" % sha256) elif tex == CorruptedFileStoreException: logger.error( "Submission failed due to corrupted filestore: %s" % ex.message) should_retry = False else: trace = get_stacktrace_info(ex) logger.error("Submission failed: %s", trace) raw = scanning.pop(scan_key) if not raw: logger.error('No scanning entry for for %s', sha256) continue if not should_retry: continue retry(raw, scan_key, sha256, ex) if tex == riak.RiakError: raise ex # pylint: disable=E0702 except Exception: # pylint:disable=W0703 logger.exception("Unexpected error")