Example #1
0
def fix_data_in_s3(fields, bucket, s3_client, crash_data):
    """Fix data in raw_crash file in S3."""
    crashid = crash_data["crashid"]

    path = "v2/raw_crash/%(entropy)s/%(date)s/%(crashid)s" % {
        "entropy": crashid[:3],
        "date": date_from_ooid(crashid).strftime("%Y%m%d"),
        "crashid": crashid,
    }
    resp = s3_client.get_object(Bucket=bucket, Key=path)
    raw_crash_as_string = resp["Body"].read()
    data = json.loads(raw_crash_as_string)
    should_save = False
    for field in fields:
        if field in data:
            del data[field]
            should_save = True

    if should_save:
        s3_client.upload_fileobj(
            Fileobj=io.BytesIO(dict_to_str(data).encode("utf-8")),
            Bucket=bucket,
            Key=path,
        )
        click.echo("# s3: fixed raw crash")
    else:
        click.echo("# s3: raw crash was fine")
Example #2
0
def check_elasticsearch(supersearch, crash_ids):
    """Checks Elasticsearch and returns list of missing crash ids.

    Crash ids should all be on the same day.

    """
    crash_ids = [crash_ids] if isinstance(crash_ids, str) else crash_ids
    crash_date = date_from_ooid(crash_ids[0])

    # The datestamp in the crashid doesn't match the processed date sometimes especially
    # when the crash came in at the end of the day.
    start_date = (crash_date - datetime.timedelta(days=5)).strftime("%Y-%m-%d")
    end_date = (crash_date + datetime.timedelta(days=5)).strftime("%Y-%m-%d")

    params = {
        "uuid": crash_ids,
        "date": [">=%s" % start_date, "<=%s" % end_date],
        "_columns": ["uuid"],
        "_facets": [],
        "_facets_size": 0,
    }
    search_results = supersearch.get(**params)

    crash_ids_in_es = [hit["uuid"] for hit in search_results["hits"]]
    return set(crash_ids) - set(crash_ids_in_es)
Example #3
0
 def _get_datestamp(self, crashid):
     """Retrieves datestamp from a crashid or raises an exception"""
     datestamp = date_from_ooid(crashid)
     if datestamp is None:
         # We should never hit this situation unless the crashid is not valid
         raise CrashidMissingDatestamp('%s is missing datestamp' % crashid)
     return datestamp
Example #4
0
 def _get_datestamp(self, crashid):
     """Retrieves datestamp from a crashid or raises an exception"""
     datestamp = date_from_ooid(crashid)
     if datestamp is None:
         # We should never hit this situation unless the crashid is not valid
         raise CrashidMissingDatestamp('%s is missing datestamp' % crashid)
     return datestamp
Example #5
0
    def action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        processor_notes = processor_meta.processor_notes

        processed_crash.submitted_timestamp = raw_crash.get(
            "submitted_timestamp", date_from_ooid(raw_crash.uuid))
        if isinstance(processed_crash.submitted_timestamp, str):
            processed_crash.submitted_timestamp = datetime_from_isodate_string(
                processed_crash.submitted_timestamp)
        processed_crash.date_processed = processed_crash.submitted_timestamp
        # defaultCrashTime: must have crashed before date processed
        submitted_timestamp_as_epoch = int(
            time.mktime(processed_crash.submitted_timestamp.timetuple()))
        try:
            # the old name for crash time
            timestampTime = int(
                raw_crash.get("timestamp", submitted_timestamp_as_epoch))
        except ValueError:
            timestampTime = 0
            processor_notes.append('non-integer value of "timestamp"')
        try:
            crash_time = int(
                self._get_truncate_or_warn(raw_crash, "CrashTime",
                                           processor_notes, timestampTime, 10))
        except ValueError:
            crash_time = 0
            processor_notes.append('non-integer value of "CrashTime" (%s)' %
                                   raw_crash.CrashTime)

        processed_crash.crash_time = crash_time
        if crash_time == submitted_timestamp_as_epoch:
            processor_notes.append("client_crash_date is unknown")
        # StartupTime: must have started up some time before crash
        try:
            startupTime = int(raw_crash.get("StartupTime", crash_time))
        except ValueError:
            startupTime = 0
            processor_notes.append('non-integer value of "StartupTime"')
        # InstallTime: must have installed some time before startup
        try:
            installTime = int(raw_crash.get("InstallTime", startupTime))
        except ValueError:
            installTime = 0
            processor_notes.append('non-integer value of "InstallTime"')
        processed_crash.client_crash_date = datetime.datetime.fromtimestamp(
            crash_time, UTC)
        processed_crash.install_age = crash_time - installTime
        processed_crash.uptime = max(0, crash_time - startupTime)
        try:
            last_crash = int(raw_crash.SecondsSinceLastCrash)
        except (KeyError, TypeError, ValueError):
            last_crash = None
            processor_notes.append(
                'non-integer value of "SecondsSinceLastCrash"')
        if last_crash and last_crash > MAXINT:
            last_crash = None
            processor_notes.append(
                '"SecondsSinceLastCrash" larger than MAXINT - set to NULL')
        processed_crash.last_crash = last_crash
Example #6
0
def get_datestamp(crashid):
    """Parses out datestamp from a crashid.

    :returns: datetime

    :raises CrashIDMissingDatestamp: if the crash id has no datestamp at the end

    """
    datestamp = date_from_ooid(crashid)
    if datestamp is None:
        # We should never hit this situation unless the crashid is not valid
        raise CrashIDMissingDatestamp("%s is missing datestamp" % crashid)
    return datestamp
Example #7
0
 def create_processed_crash_in_es(self, es_conn, crash_id):
     crash_date = date_from_ooid(crash_id)
     document = {
         "crash_id": crash_id,
         "raw_crash": {},
         "processed_crash": {
             "uuid": crash_id,
             "signature": "OOM | Small",
             "date_processed": crash_date,
         },
     }
     index_name = crash_date.strftime(es_conn.get_index_template())
     doctype = es_conn.get_doctype()
     with es_conn() as conn:
         conn.index(index=index_name, doc_type=doctype, body=document, id=crash_id)
     es_conn.refresh()
Example #8
0
def check_elasticsearch(supersearch, crash_ids):
    """Checks Elasticsearch and returns list of missing crash ids.

    Crash ids should all be on the same day.

    """
    crash_ids = [crash_ids] if isinstance(crash_ids, str) else crash_ids
    crash_date = date_from_ooid(crash_ids[0])
    start_date = crash_date.strftime("%Y-%m-%d")
    end_date = (crash_date + datetime.timedelta(days=1)).strftime("%Y-%m-%d")

    params = {
        "uuid": crash_ids,
        "date": [">=%s" % start_date, "<=%s" % end_date],
        "_results_number": len(crash_ids),
        "_columns": ["uuid"],
        "_facets": [],
        "_facets_size": 0,
    }
    search_results = supersearch.get(**params)

    crash_ids_in_es = [hit["uuid"] for hit in search_results["hits"]]
    return set(crash_ids) - set(crash_ids_in_es)
Example #9
0
 def _get_base(self, crash_id):
     date = date_from_ooid(crash_id)
     if not date:
         date = utc_now()
     date_formatted = "%4d%02d%02d" % (date.year, date.month, date.day)
     return [self.config.fs_root, date_formatted]
Example #10
0
def test_date_from_ooid():
    crash_id = "3efa014e-a9e9-405d-ae7e-9def54181210"
    assert ooid.date_from_ooid(crash_id) == datetime.datetime(2018, 12, 10, tzinfo=UTC)

    crash_id = "3efa014e-a9e9-405d-ae7e-9def54ffffff"
    assert ooid.date_from_ooid(crash_id) is None
Example #11
0
def test_date_from_ooid():
    crash_id = '3efa014e-a9e9-405d-ae7e-9def54181210'
    assert ooid.date_from_ooid(crash_id) == datetime.datetime(2018, 12, 10, tzinfo=UTC)

    crash_id = '3efa014e-a9e9-405d-ae7e-9def54ffffff'
    assert ooid.date_from_ooid(crash_id) is None
Example #12
0
 def _get_base(self, crash_id):
     date = date_from_ooid(crash_id)
     if not date:
         date = utc_now()
     date_formatted = "%4d%02d%02d" % (date.year, date.month, date.day)
     return [self.config.fs_root, date_formatted]
    def action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        processor_notes = processor_meta.processor_notes

        processed_crash.submitted_timestamp = raw_crash.get(
            'submitted_timestamp',
            date_from_ooid(raw_crash.uuid)
        )
        if isinstance(processed_crash.submitted_timestamp, str):
            processed_crash.submitted_timestamp = datetime_from_isodate_string(
                processed_crash.submitted_timestamp
            )
        processed_crash.date_processed = processed_crash.submitted_timestamp
        # defaultCrashTime: must have crashed before date processed
        submitted_timestamp_as_epoch = int(
            time.mktime(processed_crash.submitted_timestamp.timetuple())
        )
        try:
            timestampTime = int(
                raw_crash.get('timestamp', submitted_timestamp_as_epoch)
            )  # the old name for crash time
        except ValueError:
            timestampTime = 0
            processor_notes.append('non-integer value of "timestamp"')
        try:
            crash_time = int(
                self._get_truncate_or_warn(
                    raw_crash,
                    'CrashTime',
                    processor_notes,
                    timestampTime,
                    10
                )
            )
        except ValueError:
            crash_time = 0
            processor_notes.append(
                'non-integer value of "CrashTime" (%s)' % raw_crash.CrashTime
            )

        processed_crash.crash_time = crash_time
        if crash_time == submitted_timestamp_as_epoch:
            processor_notes.append("client_crash_date is unknown")
        # StartupTime: must have started up some time before crash
        try:
            startupTime = int(raw_crash.get('StartupTime', crash_time))
        except ValueError:
            startupTime = 0
            processor_notes.append('non-integer value of "StartupTime"')
        # InstallTime: must have installed some time before startup
        try:
            installTime = int(raw_crash.get('InstallTime', startupTime))
        except ValueError:
            installTime = 0
            processor_notes.append('non-integer value of "InstallTime"')
        processed_crash.client_crash_date = datetime.datetime.fromtimestamp(
            crash_time,
            UTC
        )
        processed_crash.install_age = crash_time - installTime
        processed_crash.uptime = max(0, crash_time - startupTime)
        try:
            last_crash = int(raw_crash.SecondsSinceLastCrash)
        except (KeyError, TypeError, ValueError):
            last_crash = None
            processor_notes.append(
                'non-integer value of "SecondsSinceLastCrash"'
            )
        if last_crash and last_crash > MAXINT:
            last_crash = None
            processor_notes.append(
                '"SecondsSinceLastCrash" larger than MAXINT - set to NULL'
            )
        processed_crash.last_crash = last_crash