def run(self): db = get_db() update_timer = StopWatch() query = {} if self.fts_config.only_missing_indicies: query[FTS_KEY] = {"$eq": None} bulk_op = db.stix.initialize_unordered_bulk_op() update_count = 0 for doc in db.stix.find(query): fts_data = document_prose(doc) if FTS_KEY in doc and fts_data[FTS_KEY] == doc[FTS_KEY]: continue update_count += 1 bulk_op.find({'_id': doc['_id']}).update({'$set': fts_data}) if not update_count % STIXFts.PAGE_SIZE: bulk_op.execute() bulk_op = db.stix.initialize_unordered_bulk_op() if update_count % STIXFts.PAGE_SIZE: bulk_op.execute() log_activity( "system", 'FTS', 'INFO', "%s : Updated %d of %d objects in %dms" % ('Missing Insert' if self.fts_config.only_missing_indicies else 'Full Rebuild', update_count, db.stix.count(), update_timer.ms()))
def run(self): def build_activity_message(num_of_duplicates): if num_of_duplicates: return 'Deduped %d Observables ' % (num_of_duplicates) else: return "No Observable duplicates found" messages = [] last_run_at = self.config.task.last_run_at elapsed = StopWatch() try: try: rehash.rehash(last_run_at) except: pass # rehash failed, let's just continue with the hashes as is. original_to_duplicates = STIXDedup.find_duplicates( self.config.only_local_ns) for original, duplicates in original_to_duplicates.iteritems(): try: self.merge_object(original, duplicates) except Exception as e: log_error(e, 'adapters/dedup/dedup', 'Failed to merge %s' % original) messages.append(build_activity_message( len(original_to_duplicates))) messages.insert(0, 'Online Dedup in %ds: ' % int(elapsed.sec())) log_activity('system', 'DEDUP', 'INFO', "\n \t".join(messages)) except Exception as e: log_activity('system', 'DEDUP', 'ERROR', e.message)
def generate_error_message(username, message, e, elapsed): log_activity(username, 'INCIDENT INGEST', 'ERROR', message) log_error(e, 'adapters/incident/import', 'Import Failed') return JsonResponse({ 'duration': int(elapsed.ms()), 'messages': [message], 'state': 'error' }, status=500)
def ajax_create_incidents(request, username): is_valid = is_valid_request(request) if is_valid[0] is False: return JsonResponse({}, status=is_valid[1]) try: user = Repository_User.objects.get(username=username) except DoesNotExist: return JsonResponse({'messages': 'User does not exist'}, status=403) ip = None drafts = [] elapsed = StopWatch() try: raw_data = REGEX_LINE_DELIMETER.split(request.read()) reader = get_dict_reader(raw_data) data = [row for row in reader] drafts, drafts_validation = draft_wrapper(data) ip = DedupInboxProcessor(validate=False, user=user) upsert_drafts(ip, drafts, user) ip.run() duration = int(elapsed.ms()) remove_drafts(drafts), validate_csv_field_names(reader, ip), build_validation_message(ip, drafts_validation, drafts, data) log_activity(username, 'INCIDENT INGEST', 'INFO', build_activity_message( ip.saved_count, duration, ip.filter_messages, ip.validation_result)) return JsonResponse({ 'count': ip.saved_count, 'duration': duration, 'messages': ip.filter_messages, 'state': 'success', 'validation_result': ip.validation_result }, status=202) except (KeyError, ValueError, InboxError) as e: remove_drafts(drafts) count = ip.saved_count if isinstance(ip, DedupInboxProcessor) else 0 duration = int(elapsed.ms()) messages = [e.message] validation_result = ip.validation_result if isinstance(ip, DedupInboxProcessor) else {} log_activity(username, 'INCIDENT INGEST', 'WARN', build_activity_message( count, duration, messages, validation_result )) return JsonResponse({ 'count': count, 'duration': duration, 'messages': messages, 'state': 'invalid', 'validation_result': validation_result }, status=400) except Exception as e: if e.message == 'line contains NULL byte': return generate_error_message(username, 'Unable to parse file', e, elapsed) else: return generate_error_message(username, e.message, e, elapsed)
def _process_bulk_op(): for blo in dict_bls.keys(): c = db.stix_backlinks_mod.find_one({'_id': blo}) existing_bls = {} if c: existing_bls = c['value'] for new_bl in dict_bls[blo]: existing_bls[new_bl['id']] = new_bl['type'] try: db.stix_backlinks_mod.update({'_id': blo}, {'$set': {'value': existing_bls}}, upsert=True) except Exception as e: if blo not in previous_failures: log_activity("system", "Backlink", "ERROR", "processing %s length of backlinks: %d error %s)" % (blo, len(existing_bls), e.message)) previous_failures.add(blo)
def run(self): def build_activity_message(min_date, objects, compositions, time_ms): def summarise(into, summary_template, items): num_items = len(items) into.append(summary_template % num_items) namespace_filter_text = 'in %s namespace,' % LOCAL_ALIAS.upper() if not self.retention_config.only_local_ns: namespace_filter_text = 'not in %s namespace,' % LOCAL_ALIAS.upper( ) messages = [ 'Objects created before %s which are %s are candidates for deletion' % (min_date.strftime("%Y-%m-%d %H:%M:%S"), namespace_filter_text) ] summarise( messages, 'Found %d objects with insufficient back links or sightings', objects) summarise(messages, 'Found %d orphaned observable compositions', compositions) messages.append('In %dms' % time_ms) return "\n".join(messages) namespace_filter = LOCAL_NAMESPACE if not self.retention_config.only_local_ns: namespace_filter = {'$ne': LOCAL_NAMESPACE} timer = StopWatch() try: current_date = datetime.utcnow() STIXPurge.wait_for_background_jobs_completion(current_date) minimum_date = current_date - relativedelta( months=self.retention_config.max_age_in_months) # Get old items that don't have enough back links and sightings (excluding observable compositions): objects_to_delete = self.get_purge_candidates( minimum_date, namespace_filter) # Look for any observable compositions that were orphaned on the previous call to run: orphaned_observable_compositions_to_delete = STIXPurge._get_orphaned_external_observable_compositions( current_date) ids_to_delete = objects_to_delete + orphaned_observable_compositions_to_delete for page_index in range(0, len(ids_to_delete), self.PAGE_SIZE): try: chunk_ids = ids_to_delete[page_index:page_index + self.PAGE_SIZE] STIXPurge.remove(chunk_ids) except Exception as e: log_activity('system', 'AGEING', 'ERROR', e.message) except Exception as e: log_activity('system', 'AGEING', 'ERROR', e.message) else: log_activity( 'system', 'AGEING', 'INFO', build_activity_message( minimum_date, objects_to_delete, orphaned_observable_compositions_to_delete, timer.ms()))
def run(self): previous_failures = set() def _process_bulk_op(): for blo in dict_bls.keys(): c = db.stix_backlinks_mod.find_one({'_id': blo}) existing_bls = {} if c: existing_bls = c['value'] for new_bl in dict_bls[blo]: existing_bls[new_bl['id']] = new_bl['type'] try: db.stix_backlinks_mod.update({'_id': blo}, {'$set': {'value': existing_bls}}, upsert=True) except Exception as e: if blo not in previous_failures: log_activity("system", "Backlink", "ERROR", "processing %s length of backlinks: %d error %s)" % (blo, len(existing_bls), e.message)) previous_failures.add(blo) db = get_db() db.stix_backlinks_mod.update({"_id": "max_created_on"}, {'value': datetime.utcnow()}, True) db.stix_backlinks.update({"_id": "max_created_on"}, {'value': datetime.utcnow() + timedelta(days=5)}, True) # Make sure the bg process doesn't continue update_timer = StopWatch() dict_bls = {} for doc in db.stix.find({}): if 'data' in doc and 'edges' in doc['data']: for edge in doc['data']['edges'].keys(): dict_bls.setdefault(edge, []).append({"id": doc['_id'], "type": doc['type']}) if not (len(dict_bls) % STIXBacklinks.PAGE_SIZE): _process_bulk_op() dict_bls = {} if len(dict_bls): _process_bulk_op() for i in range(0, 5): # In case something adds to stix_backlinks between the drop and rename, try a few times db.stix_backlinks.drop() try: db.stix_backlinks_mod.rename("stix_backlinks") except OperationFailure: sleep(0.2) continue else: break log_activity("system", 'Backlink', 'INFO', "%s : Updated for %d objects in %ds" % ( 'Full Rebuild', db.stix.count(), update_timer.sec()) )
def process_stix(stream, user, extract_id, file_name): elapsed = StopWatch() def log_extract_activity_message(message): duration = int(elapsed.ms()) return "@ %dms : %s\n" % (duration, message) def process_draft_obs(): # draft_indicator['observables'] contains all obs for the ind. observable_ids just the inboxed # (i.e. not de-duped) # If it is not de-duped, dump the id as this confuses the builder; and gives us a quick way to differentiate. for obs in draft_indicator['observables']: if obs['id'] in observable_ids: # Is it a draft? del obs['id'] obs['title'] = obs['objectType'] + ":" + obs['title'] def remove_from_db(ids): PAGE_SIZE = 100 for page_index in range(0, len(ids), PAGE_SIZE): try: chunk_ids = ids[page_index:page_index + PAGE_SIZE] STIXPurge.remove(chunk_ids) except Exception: pass log_message = log_extract_activity_message("DedupInboxProcessor parse") extract_store.update(extract_id, "PROCESSING", "", []) try: ip = DedupInboxProcessor(validate=False, user=user, streams=[(stream, None)]) except (InboxError, EntitiesForbidden, XMLSyntaxError) as e: extract_store.update( extract_id, "FAILED", "Error parsing stix file: %s content from parser was %s" % (e.message, stream.buf), []) return log_message += log_extract_activity_message( "DedupInboxProcessor run & dedup") ip.run() indicators = [ inbox_item for _, inbox_item in ip.contents.iteritems() if inbox_item.api_object.ty == 'ind' ] if not len(indicators): extract_store.update( extract_id, "FAILED", "No indicators found when parsing file %s" % file_name, []) return indicator_ids = [ id_ for id_, inbox_item in ip.contents.iteritems() if inbox_item.api_object.ty == 'ind' ] observable_ids = { id_ for id_, inbox_item in ip.contents.iteritems() if inbox_item.api_object.ty == 'obs' } log_message += log_extract_activity_message( "Create drafts from inboxed objects") try: for indicator in indicators: draft_indicator = EdgeObject.load(indicator.id).to_draft() process_draft_obs() Draft.upsert('ind', draft_indicator, user) finally: # The observables were fully inboxed, but we want them only to exist as drafts, so remove from db log_message += log_extract_activity_message("Delete inboxed objects") remove_from_db(indicator_ids + list(observable_ids)) extract_store.update(extract_id, "COMPLETE", "Found %d indicators" % (len(indicator_ids)), indicator_ids) log_message += log_extract_activity_message("Redirect user to visualiser") log_activity(user.username, 'EXTRACT', 'INFO', log_message)
def ajax_import(request, username): if not request.method == 'POST': return JsonResponse({}, status=405) if not request.META.get('HTTP_ACCEPT') == 'application/json': return JsonResponse({}, status=406) if not request.META.get('CONTENT_TYPE') in {'application/xml', 'text/xml'}: return JsonResponse({}, status=415) try: request.user = Repository_User.objects.get(username=username) except DoesNotExist: return JsonResponse({}, status=403) elapsed = StopWatch() ip = None try: ip = DedupInboxProcessor(user=request.user, streams=[(request, None)]) ip.run() duration = int(elapsed.ms()) if len(ip.filter_messages) == 0 and ip.message: ip.filter_messages.append(ip.message) log_activity( username, 'DEDUP', 'INFO', build_activity_message(ip.saved_count, duration, ip.filter_messages, ip.validation_result)) return JsonResponse( { 'count': ip.saved_count, 'duration': duration, 'messages': ip.filter_messages, 'state': 'success', 'validation_result': ip.validation_result }, status=202) except (XMLSyntaxError, EntitiesForbidden, InboxError) as e: count = ip.saved_count if isinstance(ip, DedupInboxProcessor) else 0 duration = int(elapsed.ms()) messages = [e.message] validation_result = ip.validation_result if isinstance( ip, DedupInboxProcessor) else {} log_activity( username, 'DEDUP', 'WARN', build_activity_message(count, duration, messages, validation_result)) return JsonResponse( { 'count': count, 'duration': duration, 'messages': messages, 'state': 'invalid', 'validation_result': validation_result }, status=400) except Exception as e: log_activity(username, 'DEDUP', 'ERROR', e.message) log_error(e, 'adapters/dedup/import', 'Import failed') return JsonResponse( { 'duration': int(elapsed.ms()), 'messages': [e.message], 'state': 'error' }, status=500)