def generate_daterange(report): """ Creates a date_range timestamp with format YYYY-MM-DD-T-HH:MM:SS based on begin and end dates for easier parsing in Kibana. Move to utils to avoid duplication w/ elastic? """ metadata = report["report_metadata"] begin_date = human_timestamp_to_datetime(metadata["begin_date"]) end_date = human_timestamp_to_datetime(metadata["end_date"]) begin_date_human = begin_date.strftime("%Y-%m-%dT%H:%M:%S") end_date_human = end_date.strftime("%Y-%m-%dT%H:%M:%S") date_range = [begin_date_human, end_date_human] logger.debug("date_range is {}".format(date_range)) return date_range
def save_report_to_s3(self, report, report_type): report_date = human_timestamp_to_datetime( report["report_metadata"]["begin_date"]) report_id = report["report_metadata"]["report_id"] path_template = "{0}/{1}/year={2}/month={3:02d}/day={4:02d}/{5}.json" object_path = path_template.format(self.bucket_path, report_type, report_date.year, report_date.month, report_date.day, report_id) logger.debug("Saving {0} report to s3://{1}/{2}".format( report_type, self.bucket_name, object_path)) object_metadata = { k: v for k, v in report["report_metadata"].items() if k in self.metadata_keys } self.bucket.put_object(Body=json.dumps(report), Key=object_path, Metadata=object_metadata)
def save_forensic_report_to_elasticsearch(forensic_report, index_suffix=None, monthly_indexes=False, number_of_shards=1, number_of_replicas=1): """ Saves a parsed DMARC forensic report to ElasticSearch Args: forensic_report (OrderedDict): A parsed forensic report index_suffix (str): The suffix of the name of the index to save to monthly_indexes (bool): Use monthly indexes instead of daily indexes number_of_shards (int): The number of shards to use in the index number_of_replicas (int): The number of replicas to use in the index Raises: AlreadySaved """ logger.debug("Saving forensic report to Elasticsearch") forensic_report = forensic_report.copy() sample_date = None if forensic_report["parsed_sample"]["date"] is not None: sample_date = forensic_report["parsed_sample"]["date"] sample_date = human_timestamp_to_datetime(sample_date) original_headers = forensic_report["parsed_sample"]["headers"] headers = OrderedDict() for original_header in original_headers: headers[original_header.lower()] = original_headers[original_header] arrival_date_human = forensic_report["arrival_date_utc"] arrival_date = human_timestamp_to_datetime(arrival_date_human) search = Search(index="dmarc_forensic*") arrival_query = {"match": {"arrival_date": arrival_date}} q = Q(arrival_query) from_ = None to_ = None subject = None if "from" in headers: from_ = headers["from"] from_query = {"match": {"sample.headers.from": from_}} q = q & Q(from_query) if "to" in headers: to_ = headers["to"] to_query = {"match": {"sample.headers.to": to_}} q = q & Q(to_query) if "subject" in headers: subject = headers["subject"] subject_query = {"match": {"sample.headers.subject": subject}} q = q & Q(subject_query) search.query = q existing = search.execute() if len(existing) > 0: raise AlreadySaved("A forensic sample to {0} from {1} " "with a subject of {2} and arrival date of {3} " "already exists in " "Elasticsearch".format(to_, from_, subject, arrival_date_human)) parsed_sample = forensic_report["parsed_sample"] sample = _ForensicSampleDoc( raw=forensic_report["sample"], headers=headers, headers_only=forensic_report["sample_headers_only"], date=sample_date, subject=forensic_report["parsed_sample"]["subject"], filename_safe_subject=parsed_sample["filename_safe_subject"], body=forensic_report["parsed_sample"]["body"]) for address in forensic_report["parsed_sample"]["to"]: sample.add_to(display_name=address["display_name"], address=address["address"]) for address in forensic_report["parsed_sample"]["reply_to"]: sample.add_reply_to(display_name=address["display_name"], address=address["address"]) for address in forensic_report["parsed_sample"]["cc"]: sample.add_cc(display_name=address["display_name"], address=address["address"]) for address in forensic_report["parsed_sample"]["bcc"]: sample.add_bcc(display_name=address["display_name"], address=address["address"]) for attachment in forensic_report["parsed_sample"]["attachments"]: sample.add_attachment(filename=attachment["filename"], content_type=attachment["mail_content_type"], sha256=attachment["sha256"]) try: forensic_doc = _ForensicReportDoc( feedback_type=forensic_report["feedback_type"], user_agent=forensic_report["user_agent"], version=forensic_report["version"], original_mail_from=forensic_report["original_mail_from"], arrival_date=arrival_date, domain=forensic_report["reported_domain"], original_envelope_id=forensic_report["original_envelope_id"], authentication_results=forensic_report["authentication_results"], delivery_results=forensic_report["delivery_result"], source_ip_address=forensic_report["source"]["ip_address"], source_country=forensic_report["source"]["country"], source_reverse_dns=forensic_report["source"]["reverse_dns"], source_base_domain=forensic_report["source"]["base_domain"], authentication_mechanisms=forensic_report[ "authentication_mechanisms"], auth_failure=forensic_report["auth_failure"], dkim_domain=forensic_report["dkim_domain"], original_rcpt_to=forensic_report["original_rcpt_to"], sample=sample) index = "dmarc_forensic" if index_suffix: index = "{0}_{1}".format(index, index_suffix) if monthly_indexes: index_date = arrival_date.strftime("%Y-%m") else: index_date = arrival_date.strftime("%Y-%m-%d") index = "{0}-{1}".format(index, index_date) index_settings = dict(number_of_shards=number_of_shards, number_of_replicas=number_of_replicas) create_indexes([index], index_settings) forensic_doc.meta.index = index try: forensic_doc.save() except Exception as e: raise ElasticsearchError("Elasticsearch error: {0}".format( e.__str__())) except KeyError as e: raise InvalidForensicReport( "Forensic report missing required field: {0}".format(e.__str__()))
def save_aggregate_report_to_elasticsearch(aggregate_report, index_suffix=None, monthly_indexes=False, number_of_shards=1, number_of_replicas=1): """ Saves a parsed DMARC aggregate report to ElasticSearch Args: aggregate_report (OrderedDict): A parsed forensic report index_suffix (str): The suffix of the name of the index to save to monthly_indexes (bool): Use monthly indexes instead of daily indexes number_of_shards (int): The number of shards to use in the index number_of_replicas (int): The number of replicas to use in the index Raises: AlreadySaved """ logger.debug("Saving aggregate report to Elasticsearch") aggregate_report = aggregate_report.copy() metadata = aggregate_report["report_metadata"] org_name = metadata["org_name"] report_id = metadata["report_id"] domain = aggregate_report["policy_published"]["domain"] begin_date = human_timestamp_to_datetime(metadata["begin_date"]) end_date = human_timestamp_to_datetime(metadata["end_date"]) begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%S") end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%S") if monthly_indexes: index_date = begin_date.strftime("%Y-%m") else: index_date = begin_date.strftime("%Y-%m-%d") aggregate_report["begin_date"] = begin_date aggregate_report["end_date"] = end_date date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]] org_name_query = Q(dict(match=dict(org_name=org_name))) report_id_query = Q(dict(match=dict(report_id=report_id))) domain_query = Q(dict(match={"published_policy.domain": domain})) begin_date_query = Q(dict(match=dict(date_range=begin_date))) end_date_query = Q(dict(match=dict(date_range=end_date))) search = Search(index="dmarc_aggregate*") query = org_name_query & report_id_query & domain_query query = query & begin_date_query & end_date_query search.query = query existing = search.execute() if len(existing) > 0: raise AlreadySaved("An aggregate report ID {0} from {1} about {2} " "with a date range of {3} UTC to {4} UTC already " "exists in " "Elasticsearch".format(report_id, org_name, domain, begin_date_human, end_date_human)) published_policy = _PublishedPolicy( domain=aggregate_report["policy_published"]["domain"], adkim=aggregate_report["policy_published"]["adkim"], aspf=aggregate_report["policy_published"]["aspf"], p=aggregate_report["policy_published"]["p"], sp=aggregate_report["policy_published"]["sp"], pct=aggregate_report["policy_published"]["pct"], fo=aggregate_report["policy_published"]["fo"]) for record in aggregate_report["records"]: agg_doc = _AggregateReportDoc( xml_schema=aggregate_report["xml_schema"], org_name=metadata["org_name"], org_email=metadata["org_email"], org_extra_contact_info=metadata["org_extra_contact_info"], report_id=metadata["report_id"], date_range=date_range, errors=metadata["errors"], published_policy=published_policy, source_ip_address=record["source"]["ip_address"], source_country=record["source"]["country"], source_reverse_dns=record["source"]["reverse_dns"], source_base_domain=record["source"]["base_domain"], message_count=record["count"], disposition=record["policy_evaluated"]["disposition"], dkim_aligned=record["policy_evaluated"]["dkim"] is not None and record["policy_evaluated"]["dkim"].lower() == "pass", spf_aligned=record["policy_evaluated"]["spf"] is not None and record["policy_evaluated"]["spf"].lower() == "pass", header_from=record["identifiers"]["header_from"], envelope_from=record["identifiers"]["envelope_from"], envelope_to=record["identifiers"]["envelope_to"]) for override in record["policy_evaluated"]["policy_override_reasons"]: agg_doc.add_policy_override(type_=override["type"], comment=override["comment"]) for dkim_result in record["auth_results"]["dkim"]: agg_doc.add_dkim_result(domain=dkim_result["domain"], selector=dkim_result["selector"], result=dkim_result["result"]) for spf_result in record["auth_results"]["spf"]: agg_doc.add_spf_result(domain=spf_result["domain"], scope=spf_result["scope"], result=spf_result["result"]) index = "dmarc_aggregate" if index_suffix: index = "{0}_{1}".format(index, index_suffix) index = "{0}-{1}".format(index, index_date) index_settings = dict(number_of_shards=number_of_shards, number_of_replicas=number_of_replicas) create_indexes([index], index_settings) agg_doc.meta.index = index try: agg_doc.save() except Exception as e: raise ElasticsearchError("Elasticsearch error: {0}".format( e.__str__()))
def save_forensic_report_to_elasticsearch(forensic_report, index="dmarc_forensic"): """ Saves a parsed DMARC forensic report to ElasticSearch Args: forensic_report (OrderedDict): A parsed forensic report index (str): The name of the index to save to Raises: AlreadySaved """ logger.debug("Saving forensic report to Elasticsearch") forensic_report = forensic_report.copy() sample_date = None if forensic_report["parsed_sample"]["date"] is not None: sample_date = forensic_report["parsed_sample"]["date"] sample_date = human_timestamp_to_datetime(sample_date) original_headers = forensic_report["parsed_sample"]["headers"] headers = OrderedDict() for original_header in original_headers: headers[original_header.lower()] = original_headers[original_header] arrival_date_human = forensic_report["arrival_date_utc"] arrival_date = human_timestamp_to_datetime(arrival_date_human) search = Index(index).search() arrival_query = {"match": {"arrival_date": arrival_date}} q = Q(arrival_query) from_ = None to_ = None subject = None if "from" in headers: from_ = headers["from"] from_query = {"match": {"sample.headers.from": from_}} q = q & Q(from_query) if "to" in headers: to_ = headers["to"] to_query = {"match": {"sample.headers.to": to_}} q = q & Q(to_query) if "subject" in headers: subject = headers["subject"] subject_query = {"match": {"sample.headers.subject": subject}} q = q & Q(subject_query) search.query = q existing = search.execute() if len(existing) > 0: raise AlreadySaved("A forensic sample to {0} from {1} " "with a subject of {2} and arrival date of {3} " "already exists in " "Elasticsearch".format(to_, from_, subject, arrival_date_human )) parsed_sample = forensic_report["parsed_sample"] sample = _ForensicSampleDoc( raw=forensic_report["sample"], headers=headers, headers_only=forensic_report["sample_headers_only"], date=sample_date, subject=forensic_report["parsed_sample"]["subject"], filename_safe_subject=parsed_sample["filename_safe_subject"], body=forensic_report["parsed_sample"]["body"] ) for address in forensic_report["parsed_sample"]["to"]: sample.add_to(display_name=address["display_name"], address=address["address"]) for address in forensic_report["parsed_sample"]["reply_to"]: sample.add_reply_to(display_name=address["display_name"], address=address["address"]) for address in forensic_report["parsed_sample"]["cc"]: sample.add_cc(display_name=address["display_name"], address=address["address"]) for address in forensic_report["parsed_sample"]["bcc"]: sample.add_bcc(display_name=address["display_name"], address=address["address"]) for attachment in forensic_report["parsed_sample"]["attachments"]: sample.add_attachment(filename=attachment["filename"], content_type=attachment["mail_content_type"]) forensic_doc = _ForensicReportDoc( feedback_type=forensic_report["feedback_type"], user_agent=forensic_report["user_agent"], version=forensic_report["version"], original_mail_from=forensic_report["original_mail_from"], arrival_date=arrival_date, domain=forensic_report["reported_domain"], original_envelope_id=forensic_report["original_envelope_id"], authentication_results=forensic_report["authentication_results"], delivery_results=forensic_report["delivery_result"], source_ip_address=forensic_report["source"]["ip_address"], source_country=forensic_report["source"]["country"], source_reverse_dns=forensic_report["source"]["reverse_dns"], source_base_domain=forensic_report["source"]["base_domain"], authentication_mechanisms=forensic_report["authentication_mechanisms"], auth_failure=forensic_report["auth_failure"], dkim_domain=forensic_report["dkim_domain"], original_rcpt_to=forensic_report["original_rcpt_to"], sample=sample ) forensic_doc.meta.index = index try: forensic_doc.save() except Exception as e: raise ElasticsearchError( "Elasticsearch error: {0}".format(e.__str__()))
def save_aggregate_report_to_elasticsearch(aggregate_report, index="dmarc_aggregate"): """ Saves a parsed DMARC aggregate report to ElasticSearch Args: aggregate_report (OrderedDict): A parsed forensic report index (str): The name of the index to save to Raises: AlreadySaved """ logger.debug("Saving aggregate report to Elasticsearch") aggregate_report = aggregate_report.copy() metadata = aggregate_report["report_metadata"] org_name = metadata["org_name"] report_id = metadata["report_id"] domain = aggregate_report["policy_published"]["domain"] begin_date = human_timestamp_to_datetime(metadata["begin_date"]) end_date = human_timestamp_to_datetime(metadata["end_date"]) begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%S") end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%S") aggregate_report["begin_date"] = begin_date aggregate_report["end_date"] = end_date date_range = (aggregate_report["begin_date"], aggregate_report["end_date"]) org_name_query = Q(dict(match=dict(org_name=org_name))) report_id_query = Q(dict(match=dict(report_id=report_id))) domain_query = Q(dict(match=dict(domain=domain))) begin_date_query = Q(dict(match=dict(date_range=begin_date))) end_date_query = Q(dict(match=dict(date_range=end_date))) search = Index(index).search() search.query = org_name_query & report_id_query & domain_query & \ begin_date_query & end_date_query existing = search.execute() if len(existing) > 0: raise AlreadySaved("An aggregate report ID {0} from {1} about {2} " "with a date range of {3} UTC to {4} UTC already " "exists in " "Elasticsearch".format(report_id, org_name, domain, begin_date_human, end_date_human)) published_policy = _PublishedPolicy( domain=aggregate_report["policy_published"]["domain"], adkim=aggregate_report["policy_published"]["adkim"], aspf=aggregate_report["policy_published"]["aspf"], p=aggregate_report["policy_published"]["p"], sp=aggregate_report["policy_published"]["sp"], pct=aggregate_report["policy_published"]["pct"], fo=aggregate_report["policy_published"]["fo"] ) for record in aggregate_report["records"]: agg_doc = _AggregateReportDoc( xml_schemea=aggregate_report["xml_schema"], org_name=metadata["org_name"], org_email=metadata["org_email"], org_extra_contact_info=metadata["org_extra_contact_info"], report_id=metadata["report_id"], date_range=date_range, errors=metadata["errors"], published_policy=published_policy, source_ip_address=record["source"]["ip_address"], source_country=record["source"]["country"], source_reverse_dns=record["source"]["reverse_dns"], source_base_domain=record["source"]["base_domain"], message_count=record["count"], disposition=record["policy_evaluated"]["disposition"], dkim_aligned=record["policy_evaluated"]["dkim"] == "pass", spf_aligned=record["policy_evaluated"]["spf"] == "pass", header_from=record["identifiers"]["header_from"], envelope_from=record["identifiers"]["envelope_from"], envelope_to=record["identifiers"]["envelope_to"] ) for override in record["policy_evaluated"]["policy_override_reasons"]: agg_doc.add_policy_override(type_=override["type"], comment=override["comment"]) for dkim_result in record["auth_results"]["dkim"]: agg_doc.add_dkim_result(domain=dkim_result["domain"], selector=dkim_result["selector"], result=dkim_result["result"]) for spf_result in record["auth_results"]["spf"]: agg_doc.add_spf_result(domain=spf_result["domain"], scope=spf_result["scope"], result=spf_result["result"]) agg_doc.meta.index = index try: agg_doc.save() except Exception as e: raise ElasticsearchError( "Elasticsearch error: {0}".format(e.__str__()))