Esempio n. 1
0
    def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        crash_id = raw_crash.uuid
        old_processed_crash = self.crashstore.get_unredacted_processed(crash_id)

        for key, value in old_processed_crash.iteritems():
            if 'date_processed' in key:
                processed_crash[key] = date_to_string(
                    string_to_datetime(value) - self.config.time_delta
                )
                print processed_crash.uuid, value, processed_crash[key]
            else:
                if key != 'uptime' and key != 'crash_time' and (
                   'time' in key or "date" in key or 'Date' in key
                ):
                    value = date_to_string(string_to_datetime(value))
                processed_crash[key] = value
        processor_meta.processor_notes.append(
            'DateProcessedTimeMachine has pushed date_processed into the past'
            ' by "%s" (D HH:MM:SS)' %  to_str(self.config.time_delta)
        )
        processor_meta.processor_notes.append(
            'Original processor_notes: %s'
            % old_processed_crash['processor_notes']
        )
        return True
Esempio n. 2
0
def test_string_datetime_with_timezone():
    date = "2001-11-30T12:34:56Z"
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001, 11, 30, 12, 34, 56, tzinfo=UTC)
    assert res.strftime('%H') == '12'
    # because it's a timezone aware datetime
    assert res.tzname() == 'UTC'
    assert res.strftime('%Z') == 'UTC'
    assert res.strftime('%z') == '+0000'

    # plus 3 hours east of Zulu means minus 3 hours on UTC
    date = "2001-11-30T12:10:56+03:00"
    res = datetimeutil.string_to_datetime(date)
    expected = datetime.datetime(2001, 11, 30, 12 - 3, 10, 56, tzinfo=UTC)
    assert res == expected

    # similar example
    date = "2001-11-30T12:10:56-01:30"
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001, 11, 30, 12 + 1, 10 + 30, 56, tzinfo=UTC)

    # YY-mm-dd+HH:ii:ss.S date
    date = "2001-11-30 12:34:56.123456Z"
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001, 11, 30, 12, 34, 56, 123456, tzinfo=UTC)
Esempio n. 3
0
def check_type(param, datatype):
    """
    Make sure that param is of type datatype and return it.

    If param is None, return it.
    If param is an instance of datatype, return it.
    If param is not an instance of datatype and is not None, cast it as
    datatype and return it.
    """
    if param is None:
        return param

    if datatype == "str" and not isinstance(param, basestring):
        try:
            param = str(param)
        except ValueError:
            param = str()

    elif datatype == "int" and not isinstance(param, int):
        try:
            param = int(param)
        except ValueError:
            param = int()

    elif datatype == "bool" and not isinstance(param, bool):
        param = str(param).lower() in ("true", "t", "1", "y", "yes")

    elif datatype == "datetime" and not isinstance(param, datetime):
        try:
            param = dtutil.string_to_datetime(param)
        except ValueError:
            param = None

    elif datatype == "date" and not isinstance(param, date):
        try:
            param = dtutil.string_to_datetime(param).date()
        except ValueError:
            param = None

    elif datatype == "timedelta" and not isinstance(param, timedelta):
        try:
            param = dtutil.strHoursToTimeDelta(param)
        except ValueError:
            param = None

    elif datatype == "json" and isinstance(param, basestring):
        try:
            param = json.loads(param)
        except ValueError:
            param = None

    elif datatype == "float" and not isinstance(param, float):
        try:
            param = float(param)
        except ValueError:
            param = float()

    return param
Esempio n. 4
0
def check_type(param, datatype):
    """
    Make sure that param is of type datatype and return it.

    If param is None, return it.
    If param is an instance of datatype, return it.
    If param is not an instance of datatype and is not None, cast it as
    datatype and return it.
    """
    if param is None:
        return param

    if datatype == "str" and not isinstance(param, basestring):
        try:
            param = str(param)
        except ValueError:
            param = str()

    elif datatype == "int" and not isinstance(param, int):
        try:
            param = int(param)
        except ValueError:
            param = int()

    elif datatype == "bool" and not isinstance(param, bool):
        param = str(param).lower() in ("true", "t", "1", "y", "yes")

    elif datatype == "datetime" and not isinstance(param, datetime):
        try:
            param = dtutil.string_to_datetime(param)
        except ValueError:
            param = None

    elif datatype == "date" and not isinstance(param, date):
        try:
            param = dtutil.string_to_datetime(param).date()
        except ValueError:
            param = None

    elif datatype == "timedelta" and not isinstance(param, timedelta):
        try:
            param = dtutil.strHoursToTimeDelta(param)
        except ValueError:
            param = None

    elif datatype == "json" and isinstance(param, basestring):
        try:
            param = json.loads(param)
        except ValueError:
            param = None

    elif datatype == "float" and not isinstance(param, float):
        try:
            param = float(param)
        except ValueError:
            param = float()

    return param
Esempio n. 5
0
    def query(self, from_date, to_date, json_query):
        """
        Send a query directly to ElasticSearch and return the result.
        """
        # Default dates
        now = dtutil.utc_now().date()
        lastweek = now - datetime.timedelta(7)

        from_date = dtutil.string_to_datetime(from_date) or lastweek
        to_date = dtutil.string_to_datetime(to_date) or now
        daterange = self.generate_list_of_indexes(from_date, to_date)

        # -
        # This code is here to avoid failing queries caused by missing
        # indexes. It should not happen on prod, but doing this makes
        # sure users will never see a 500 Error because of this eventuality.
        # -

        # Iterate until we can return an actual result and not an error
        can_return = False
        while not can_return:
            if not daterange:
                # This is probably wrong and should be raising an error instead
                http_response = "{}"
                break

            uri = "/%s/_search" % ",".join(daterange)

            with self.http:
                http_response = self.http.post(uri, json_query)

            # If there has been an error,
            # then we get a dict instead of some json.
            if isinstance(http_response, dict):
                data = http_response["error"]["data"]

                # If an index is missing,
                # try to remove it from the list of indexes and retry.
                if (http_response["error"]["code"] == 404
                        and data.find("IndexMissingException") >= 0):
                    index = data[data.find("[[") + 2:data.find("]")]
                    daterange.remove(index)
                else:
                    error = 'Unexpected error from elasticsearch: %s'
                    raise UnexpectedElasticsearchError(error % data)
            else:
                can_return = True

        return (http_response, "text/json")
Esempio n. 6
0
    def query(self, from_date, to_date, json_query):
        """
        Send a query directly to ElasticSearch and return the result.
        """
        # Default dates
        now = dtutil.utc_now().date()
        lastweek = now - datetime.timedelta(7)

        from_date = dtutil.string_to_datetime(from_date) or lastweek
        to_date = dtutil.string_to_datetime(to_date) or now
        daterange = self.generate_list_of_indexes(from_date, to_date)

        # -
        # This code is here to avoid failing queries caused by missing
        # indexes. It should not happen on prod, but doing this makes
        # sure users will never see a 500 Error because of this eventuality.
        # -

        # Iterate until we can return an actual result and not an error
        can_return = False
        while not can_return:
            if not daterange:
                # This is probably wrong and should be raising an error instead
                http_response = "{}"
                break

            uri = "/%s/_search" % ",".join(daterange)

            with self.http:
                http_response = self.http.post(uri, json_query)

            # If there has been an error,
            # then we get a dict instead of some json.
            if isinstance(http_response, dict):
                data = http_response["error"]["data"]

                # If an index is missing,
                # try to remove it from the list of indexes and retry.
                if (http_response["error"]["code"] == 404 and
                    data.find("IndexMissingException") >= 0):
                    index = data[data.find("[[") + 2:data.find("]")]
                    daterange.remove(index)
                else:
                    error = 'Unexpected error from elasticsearch: %s'
                    raise UnexpectedElasticsearchError(error % data)
            else:
                can_return = True

        return (http_response, "text/json")
Esempio n. 7
0
def convert_to_type(value, data_type):
    if data_type == "str" and not isinstance(value, str):
        value = str(value)
    # yes, 'enum' is being converted to a string
    elif data_type == "enum" and not isinstance(value, str):
        value = str(value)
    elif data_type == "int" and not isinstance(value, int):
        value = int(value)
    elif data_type == "bool" and not isinstance(value, bool):
        value = str(value).lower() in ("true", "t", "1", "y", "yes")
    elif data_type == "datetime" and not isinstance(value, datetime.datetime):
        value = datetimeutil.string_to_datetime(value)
    elif data_type == "date" and not isinstance(value, datetime.date):
        value = datetimeutil.string_to_datetime(value).date()
    return value
Esempio n. 8
0
def convert_to_type(value, data_type):
    if data_type == 'str' and not isinstance(value, basestring):
        value = str(value)
    # yes, 'enum' is being converted to a string
    elif data_type == 'enum' and not isinstance(value, basestring):
        value = str(value)
    elif data_type == 'int' and not isinstance(value, int):
        value = int(value)
    elif data_type == 'bool' and not isinstance(value, bool):
        value = str(value).lower() in ('true', 't', '1', 'y', 'yes')
    elif data_type == 'datetime' and not isinstance(value, datetime.datetime):
        value = datetimeutil.string_to_datetime(value)
    elif data_type == 'date' and not isinstance(value, datetime.date):
        value = datetimeutil.string_to_datetime(value).date()
    return value
Esempio n. 9
0
def convert_to_type(value, data_type):
    if data_type == 'str' and not isinstance(value, basestring):
        value = str(value)
    # yes, 'enum' is being converted to a string
    elif data_type == 'enum' and not isinstance(value, basestring):
        value = str(value)
    elif data_type == 'int' and not isinstance(value, int):
        value = int(value)
    elif data_type == 'bool' and not isinstance(value, bool):
        value = str(value).lower() in ('true', 't', '1', 'y', 'yes')
    elif data_type == 'datetime' and not isinstance(value, datetime.datetime):
        value = datetimeutil.string_to_datetime(value)
    elif data_type == 'date' and not isinstance(value, datetime.date):
        value = datetimeutil.string_to_datetime(value).date()
    return value
Esempio n. 10
0
    def update_crashstats_signature(self, signature, report_date, report_build):
        with transaction_context(self.database) as connection:
            # Pull the data from the db. If it's there, then do an update. If it's
            # not there, then do an insert.
            try:
                sql = """
                SELECT signature, first_build, first_date
                FROM crashstats_signature
                WHERE signature=%s
                """
                sig = single_row_sql(connection, sql, (signature,))

                sql = """
                UPDATE crashstats_signature
                SET first_build=%s, first_date=%s
                WHERE signature=%s
                """
                params = (
                    min(sig[1], int(report_build)),
                    min(sig[2], string_to_datetime(report_date)),
                    sig[0]
                )

            except SQLDidNotReturnSingleRow:
                sql = """
                INSERT INTO crashstats_signature (signature, first_build, first_date)
                VALUES (%s, %s, %s)
                """
                params = (signature, report_build, report_date)

            execute_no_results(connection, sql, params)
Esempio n. 11
0
def check_type(param, datatype):
    """
    Make sure that param is of type datatype and return it.

    If param is None, return it.
    If param is an instance of datatype, return it.
    If param is not an instance of datatype and is not None, cast it as
    datatype and return it.
    """
    if param is None:
        return param

    if datatype == "str" and not isinstance(param, basestring):
        try:
            param = str(param)
        except ValueError:
            param = str()

    elif datatype == "int" and not isinstance(param, int):
        try:
            param = int(param)
        except ValueError:
            param = int()

    elif datatype == "datetime" and not isinstance(param, datetime):
        try:
            param = dtutil.string_to_datetime(param)
        except ValueError:
            param = None

    return param
    def main(self):
        storage = self.config.elasticsearch_storage_class(self.config)

        crash_file = open(self.config.processed_crash_file)
        processed_crash = json.load(crash_file)

        crash_file = open(self.config.raw_crash_file)
        raw_crash = json.load(crash_file)

        crash_date = string_to_datetime(processed_crash['date_processed'])
        es_index = storage.get_index_for_crash(crash_date)
        es_doctype = self.config.elasticsearch_doctype
        crash_id = processed_crash['uuid']

        storage.save_raw_and_processed(
            raw_crash,
            None,
            processed_crash,
            crash_id
        )

        try:
            # Verify the crash has been inserted
            crash = storage.es.get(
                es_index,
                es_doctype,
                crash_id
            )
            assert crash['exists']

        finally:
            # Clean up created index.
            storage.es.delete_index(es_index)
Esempio n. 13
0
    def test_get_index_for_crash_dynamic_name(self):
        """Test a dynamic (date-based) index name.
        """

        # The crashstorage class looks for '%' in the index name; if that
        # symbol is present, it will attempt to generate a new date-based
        # index name. Since the test base config doesn't use this pattern,
        # we need to specify it now.
        modified_config = self.get_tuned_config(
            ESCrashStorage,
            {'resource.elasticsearch.elasticsearch_index':
                'socorro_integration_test_reports%Y%m%d'}
        )
        es_storage = ESCrashStorage(config=modified_config)

        # The date is used to generate the name of the index; it must be a
        # datetime object.
        date = string_to_datetime(
            a_processed_crash['client_crash_date']
        )
        index = es_storage.get_index_for_crash(date)

        # The base index name is obtained from the test base class and the
        # date is appended to it according to pattern specified above.
        ok_(type(index) is str)
        eq_(index, 'socorro_integration_test_reports20120408')
Esempio n. 14
0
    def update_crashstats_signature(self, signature, report_date,
                                    report_build):
        with transaction_context(self.database) as connection:
            # Pull the data from the db. If it's there, then do an update. If it's
            # not there, then do an insert.
            try:
                sql = """
                SELECT signature, first_build, first_date
                FROM crashstats_signature
                WHERE signature=%s
                """
                sig = single_row_sql(connection, sql, (signature, ))

                sql = """
                UPDATE crashstats_signature
                SET first_build=%s, first_date=%s
                WHERE signature=%s
                """
                params = (min(sig[1], int(report_build)),
                          min(sig[2], string_to_datetime(report_date)), sig[0])

            except SQLDidNotReturnSingleRow:
                sql = """
                INSERT INTO crashstats_signature (signature, first_build, first_date)
                VALUES (%s, %s, %s)
                """
                params = (signature, report_build, report_date)

            execute_no_results(connection, sql, params)
Esempio n. 15
0
    def test_get_index_for_crash_dynamic_name(self):
        """Test a dynamic (date-based) index name """

        # The crashstorage class looks for '%' in the index name; if that
        # symbol is present, it will attempt to generate a new date-based
        # index name. Since the test base config doesn't use this pattern,
        # we need to specify it now.
        modified_config = self.get_tuned_config(
            ESCrashStorage,
            {
                "resource.elasticsearch.elasticsearch_index":
                "socorro_integration_test_reports%Y%m%d"
            },
        )
        es_storage = ESCrashStorage(config=modified_config)

        # The date is used to generate the name of the index; it must be a
        # datetime object.
        date = string_to_datetime(a_processed_crash["client_crash_date"])
        index = es_storage.get_index_for_crash(date)

        # The base index name is obtained from the test base class and the
        # date is appended to it according to pattern specified above.
        assert type(index) is str
        assert index == "socorro_integration_test_reports20120408"
Esempio n. 16
0
def test_string_to_datetime():
    """
    Test datetimeutil.string_to_datetime()
    """
    # Empty date
    date = ""
    try:
        res = datetimeutil.string_to_datetime(date)
        raise AssertionError("expect this to raise ValueError")
    except ValueError:
        pass

    # already a date
    date = datetime.datetime.utcnow()
    res = datetimeutil.string_to_datetime(date)

    eq_(res, date.replace(tzinfo=UTC))
    eq_(res.strftime('%Z'), 'UTC')
    eq_(res.strftime('%z'), '+0000')

    # YY-mm-dd date
    date = "2001-11-03"
    res = datetimeutil.string_to_datetime(date)
    eq_(res, datetime.datetime(2001, 11, 3, tzinfo=UTC))
    eq_(res.strftime('%Z'), 'UTC')  # timezone aware

    # and naughty YY-m-d date
    date = "2001-1-3"
    res = datetimeutil.string_to_datetime(date)
    eq_(res, datetime.datetime(2001, 1, 3, tzinfo=UTC))
    eq_(res.strftime('%Z'), 'UTC')  # timezone aware

    # Commented out because I don't thing `YY-mm-dd+HH:ii:ss` is a
    # valid date format.
    ## YY-mm-dd+HH:ii:ss date
    #date = "2001-11-30+12:34:56"
    #try:
    #    res = datetimeutil.string_to_datetime(date)
    #except ValueError:
    #    res = None
    #expected = datetime(2001, 11, 30, 12, 34, 56)
    #assert res == expected, "Date is %s, %s expected." % (date, expected)

    # YY-mm-dd HH:ii:ss.S date
    date = "2001-11-30 12:34:56.123456"
    res = datetimeutil.string_to_datetime(date)
    eq_(res, datetime.datetime(2001, 11, 30, 12, 34, 56, 123456, tzinfo=UTC))

    # Separated date
    date = ["2001-11-30", "12:34:56"]
    res = datetimeutil.string_to_datetime(date)
    eq_(res, datetime.datetime(2001, 11, 30, 12, 34, 56, tzinfo=UTC))

    # Invalid date
    date = "2001-11-32"
    try:
        res = datetimeutil.string_to_datetime(date)
        raise AssertionError("should have raise a ValueError")
    except ValueError:
        pass
Esempio n. 17
0
    def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock):
        """Test an index attempt that fails because of a bogus number field.

        Expected behavior is to remove that field and retry indexing.

        """
        es_storage = ESCrashStorage(config=self.config)

        crash_id = a_processed_crash['uuid']
        raw_crash = {}
        processed_crash = {
            'date_processed': '2012-04-08 10:56:41.558922',
            'bogus-field': 1234567890,
            'foo': 'bar',
        }

        def mock_index(*args, **kwargs):
            if 'bogus-field' in kwargs['body']['processed_crash']:
                raise elasticsearch.exceptions.TransportError(
                    400,
                    'RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:'
                    '9300]][indices:data/write/index]]; nested: '
                    'MapperParsingException[failed to parse '
                    '[processed_crash.bogus-field]]; nested: '
                    'NumberFormatException[For input string: '
                    '"18446744073709480735"]; '
                )

            return True

        es_class_mock().index.side_effect = mock_index

        # Submit a crash and ensure that it succeeds.
        es_storage.save_raw_and_processed(
            raw_crash=deepcopy(raw_crash),
            dumps=None,
            processed_crash=deepcopy(processed_crash),
            crash_id=crash_id
        )

        expected_doc = {
            'crash_id': crash_id,
            'removed_fields': 'processed_crash.bogus-field',
            'processed_crash': {
                'date_processed': string_to_datetime(
                    '2012-04-08 10:56:41.558922'
                ),
                'foo': 'bar',
            },
            'raw_crash': {},
        }
        es_class_mock().index.assert_called_with(
            index=self.config.elasticsearch.elasticsearch_index,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
            body=expected_doc,
            id=crash_id
        )
Esempio n. 18
0
    def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock):
        """Test an index attempt that fails because of a bogus number field.

        Expected behavior is to remove that field and retry indexing.

        """
        es_storage = ESCrashStorage(config=self.config)

        crash_id = SAMPLE_PROCESSED_CRASH["uuid"]
        raw_crash = {}
        processed_crash = {
            "date_processed": date_to_string(utc_now()),
            # NOTE(willkg): This needs to be a key that's in super_search_fields, but is
            # rejected by our mock_index call--this is wildly contrived.
            "version": 1234567890,
            "uuid": crash_id,
        }

        def mock_index(*args, **kwargs):
            if "version" in kwargs["body"]["processed_crash"]:
                raise elasticsearch.exceptions.TransportError(
                    400,
                    ("RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:"
                     "9300]][indices:data/write/index]]; nested: "
                     "MapperParsingException[failed to parse "
                     "[processed_crash.version]]; nested: "
                     "NumberFormatException[For input string: "
                     '"18446744073709480735"]; '),
                )

            return True

        es_class_mock().index.side_effect = mock_index

        # Submit a crash and ensure that it succeeds.
        es_storage.save_processed_crash(
            raw_crash=deepcopy(raw_crash),
            processed_crash=deepcopy(processed_crash),
        )

        expected_doc = {
            "crash_id": crash_id,
            "removed_fields": "processed_crash.version",
            "processed_crash": {
                "date_processed":
                string_to_datetime(processed_crash["date_processed"]),
                "uuid":
                crash_id,
            },
            "raw_crash": {},
        }
        es_class_mock().index.assert_called_with(
            index=self.es_context.get_index_for_date(utc_now()),
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
            body=expected_doc,
            id=crash_id,
        )
Esempio n. 19
0
    def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock):
        """Test an index attempt that fails because of a bogus number field.

        Expected behavior is to remove that field and retry indexing.

        """
        es_storage = ESCrashStorage(config=self.config)

        crash_id = a_processed_crash['uuid']
        raw_crash = {}
        processed_crash = {
            'date_processed': '2012-04-08 10:56:41.558922',
            'bogus-field': 1234567890,
            'foo': 'bar',
        }

        def mock_index(*args, **kwargs):
            if 'bogus-field' in kwargs['body']['processed_crash']:
                raise elasticsearch.exceptions.TransportError(
                    400,
                    'RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:'
                    '9300]][indices:data/write/index]]; nested: '
                    'MapperParsingException[failed to parse '
                    '[processed_crash.bogus-field]]; nested: '
                    'NumberFormatException[For input string: '
                    '"18446744073709480735"]; '
                )

            return True

        es_class_mock().index.side_effect = mock_index

        # Submit a crash and ensure that it succeeds.
        es_storage.save_raw_and_processed(
            raw_crash=deepcopy(raw_crash),
            dumps=None,
            processed_crash=deepcopy(processed_crash),
            crash_id=crash_id
        )

        expected_doc = {
            'crash_id': crash_id,
            'removed_fields': 'processed_crash.bogus-field',
            'processed_crash': {
                'date_processed': string_to_datetime(
                    '2012-04-08 10:56:41.558922'
                ),
                'foo': 'bar',
            },
            'raw_crash': {},
        }
        es_class_mock().index.assert_called_with(
            index=self.config.elasticsearch.elasticsearch_index,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
            body=expected_doc,
            id=crash_id
        )
Esempio n. 20
0
    def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock):
        """Test an index attempt that fails because of a bogus number field.

        Expected behavior is to remove that field and retry indexing.

        """
        es_storage = ESCrashStorage(config=self.config)

        crash_id = a_processed_crash["uuid"]
        raw_crash = {}
        processed_crash = {
            "date_processed": "2012-04-08 10:56:41.558922",
            "bogus-field": 1234567890,
            "foo": "bar",
        }

        def mock_index(*args, **kwargs):
            if "bogus-field" in kwargs["body"]["processed_crash"]:
                raise elasticsearch.exceptions.TransportError(
                    400,
                    ("RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:"
                     "9300]][indices:data/write/index]]; nested: "
                     "MapperParsingException[failed to parse "
                     "[processed_crash.bogus-field]]; nested: "
                     "NumberFormatException[For input string: "
                     '"18446744073709480735"]; '),
                )

            return True

        es_class_mock().index.side_effect = mock_index

        # Submit a crash and ensure that it succeeds.
        es_storage.save_raw_and_processed(
            raw_crash=deepcopy(raw_crash),
            dumps=None,
            processed_crash=deepcopy(processed_crash),
            crash_id=crash_id,
        )

        expected_doc = {
            "crash_id": crash_id,
            "removed_fields": "processed_crash.bogus-field",
            "processed_crash": {
                "date_processed":
                string_to_datetime("2012-04-08 10:56:41.558922"),
                "foo": "bar",
            },
            "raw_crash": {},
        }
        es_class_mock().index.assert_called_with(
            index=self.config.elasticsearch.elasticsearch_index,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
            body=expected_doc,
            id=crash_id,
        )
Esempio n. 21
0
 def clean(self, value):
     if any(itertools.imap(value.startswith, ('>=', '<='))):
         op = value[:2]
         value = value[2:]
     elif any(itertools.imap(value.startswith, ('=', '>', '<'))):
         op = value[:1]
         value = value[1:]
     else:
         op = '='
     return (op, string_to_datetime(value).date())
Esempio n. 22
0
    def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock):
        """Test an index attempt that fails because of a bogus number field.
        Expected behavior is to remove that field and retry indexing.
        """
        # ESCrashStorage uses the "limited backoff" transaction executor.
        # In real life this will retry operational exceptions over time, but
        # in unit tests, we just want it to hurry up and fail.
        backoff_config = self.config
        backoff_config['backoff_delays'] = [0, 0, 0]
        backoff_config['wait_log_interval'] = 0

        es_storage = ESCrashStorage(config=self.config)

        crash_id = a_processed_crash['uuid']
        raw_crash = {}
        processed_crash = {
            'date_processed': '2012-04-08 10:56:41.558922',
            'bogus-field': 1234567890,
            'foo': 'bar',
        }

        def mock_index(*args, **kwargs):
            if 'bogus-field' in kwargs['body']['processed_crash']:
                raise elasticsearch.exceptions.TransportError(
                    400,
                    'RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:'
                    '9300]][indices:data/write/index]]; nested: '
                    'MapperParsingException[failed to parse '
                    '[processed_crash.bogus-field]]; nested: '
                    'NumberFormatException[For input string: '
                    '"18446744073709480735"]; ')

            return True

        es_class_mock().index.side_effect = mock_index

        # Submit a crash and ensure that it succeeds.
        es_storage.save_raw_and_processed(raw_crash, None, processed_crash,
                                          crash_id)

        expected_doc = {
            'crash_id': crash_id,
            'removed_fields': 'processed_crash.bogus-field',
            'processed_crash': {
                'date_processed':
                string_to_datetime('2012-04-08 10:56:41.558922'),
                'foo': 'bar',
            },
            'raw_crash': {},
        }
        es_class_mock().index.assert_called_with(
            index=self.config.elasticsearch.elasticsearch_index,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
            body=expected_doc,
            id=crash_id)
Esempio n. 23
0
    def test_cron_job(self, exacttarget_mock):
        config_manager = self._setup_config_manager()
        et_mock = exacttarget_mock.return_value

        # Make get_subscriber raise an exception
        list_service = et_mock.list.return_value = mock.Mock()
        list_service.get_subscriber = mock.Mock(
            side_effect=exacttarget.NewsletterException()
        )

        with config_manager.context() as config:
            tab = crontabber.CronTabber(config)
            tab.run_all()

            information = self._load_structure()
            assert information['automatic-emails']
            assert not information['automatic-emails']['last_error']
            assert information['automatic-emails']['last_success']
            self.assertEqual(et_mock.trigger_send.call_count, 4)

            last_email = u'z\[email protected]'

            # Verify the last call to trigger_send
            fields = {
                'EMAIL_ADDRESS_': last_email,
                'EMAIL_FORMAT_': 'H',
                'TOKEN': last_email
            }

            et_mock.trigger_send.assert_called_with('socorro_dev_test', fields)

            # Verify that user's data was updated
            conf = config.crontabber['class-AutomaticEmailsCronApp']
            es = SuperS().es(
                urls=conf.elasticsearch.elasticsearch_urls,
                timeout=conf.elasticsearch.elasticsearch_timeout,
            )
            search = es.indexes(conf.elasticsearch.elasticsearch_emails_index)
            search = search.doctypes('emails')
            es.get_es().refresh()

            emails_list = (
                '*****@*****.**',
                '"Quidam" <*****@*****.**>',
                '*****@*****.**'
            )
            search = search.filter(_id__in=emails_list)
            res = search.values_list('last_sending')
            self.assertEqual(len(res), 3)
            now = utc_now()
            for row in res:
                date = string_to_datetime(row[0])
                self.assertEqual(date.year, now.year)
                self.assertEqual(date.month, now.month)
                self.assertEqual(date.day, now.day)
Esempio n. 24
0
    def test_cron_job(self, exacttarget_mock):
        config_manager = self._setup_config_manager()
        et_mock = exacttarget_mock.return_value

        # Make get_subscriber raise an exception
        list_service = et_mock.list.return_value = mock.Mock()
        list_service.get_subscriber = mock.Mock(
            side_effect=exacttarget.NewsletterException()
        )

        with config_manager.context() as config:
            tab = crontabber.CronTabber(config)
            tab.run_all()

            information = self._load_structure()
            assert information['automatic-emails']
            assert not information['automatic-emails']['last_error']
            assert information['automatic-emails']['last_success']
            self.assertEqual(et_mock.trigger_send.call_count, 4)

            last_email = u'z\[email protected]'

            # Verify the last call to trigger_send
            fields = {
                'EMAIL_ADDRESS_': last_email,
                'EMAIL_FORMAT_': 'H',
                'TOKEN': last_email
            }

            et_mock.trigger_send.assert_called_with('socorro_dev_test', fields)

            # Verify that user's data was updated
            conf = config.crontabber['class-AutomaticEmailsCronApp']
            es = SuperS().es(
                urls=conf.elasticsearch.elasticsearch_urls,
                timeout=conf.elasticsearch.elasticsearch_timeout,
            )
            search = es.indexes(conf.elasticsearch.elasticsearch_emails_index)
            search = search.doctypes('emails')
            es.get_es().refresh()

            emails_list = (
                '*****@*****.**',
                '"Quidam" <*****@*****.**>',
                '*****@*****.**'
            )
            search = search.filter(_id__in=emails_list)
            res = search.values_list('last_sending')
            self.assertEqual(len(res), 3)
            now = utc_now()
            for row in res:
                date = string_to_datetime(row[0])
                self.assertEqual(date.year, now.year)
                self.assertEqual(date.month, now.month)
                self.assertEqual(date.day, now.day)
Esempio n. 25
0
def test_string_datetime_with_timezone():
    date = "2001-11-30T12:34:56Z"
    res = datetimeutil.string_to_datetime(date)
    eq_(res, datetime.datetime(2001, 11, 30, 12, 34, 56, tzinfo=UTC))
    eq_(res.strftime('%H'), '12')
    # because it's a timezone aware datetime
    ok_(res.tzname())
    eq_(res.strftime('%Z'), 'UTC')
    eq_(res.strftime('%z'), '+0000')

    # plus 3 hours east of Zulu means minus 3 hours on UTC
    date = "2001-11-30T12:10:56+03:00"
    res = datetimeutil.string_to_datetime(date)
    expected = datetime.datetime(2001, 11, 30, 12 - 3, 10, 56, tzinfo=UTC)
    eq_(res, expected)

    # similar example
    date = "2001-11-30T12:10:56-01:30"
    res = datetimeutil.string_to_datetime(date)
    eq_(res, datetime.datetime(2001, 11, 30, 12 + 1, 10 + 30, 56, tzinfo=UTC))

    # YY-mm-dd+HH:ii:ss.S date
    date = "2001-11-30 12:34:56.123456Z"
    res = datetimeutil.string_to_datetime(date)
    eq_(res, datetime.datetime(2001, 11, 30, 12, 34, 56, 123456, tzinfo=UTC))

    docstring = """
        * 2012-01-10T12:13:14
        * 2012-01-10T12:13:14.98765
        * 2012-01-10T12:13:14.98765+03:00
        * 2012-01-10T12:13:14.98765Z
        * 2012-01-10 12:13:14
        * 2012-01-10 12:13:14.98765
        * 2012-01-10 12:13:14.98765+03:00
        * 2012-01-10 12:13:14.98765Z
    """.strip().splitlines()
    examples = [x.replace('*', '').strip() for x in docstring]
    for example in examples:
        res = datetimeutil.string_to_datetime(example)
        ok_(res.tzinfo)
        ok_(isinstance(res, datetime.datetime))
Esempio n. 26
0
    def get_index_for_crash(self, processed_crash):
        """return the submission URL for a crash, based on the submission URL
        in config and the date of the crash"""
        index = self.config.elasticsearch_index
        crash_date = datetimeutil.string_to_datetime(processed_crash["date_processed"])

        if not index:
            return None
        elif "%" in index:
            index = crash_date.strftime(index)

        return index
Esempio n. 27
0
 def update_crashstats_signature(self, signature, report_date, report_build):
     report_build = int(report_build)
     report_date = string_to_datetime(report_date)
     try:
         sig = Signature.objects.get(signature=signature)
         sig.first_build = min(report_build, sig.first_build)
         sig.first_date = min(report_date, sig.first_date)
     except Signature.DoesNotExist:
         sig = Signature.objects.create(
             signature=signature, first_build=report_build, first_date=report_date
         )
     sig.save()
Esempio n. 28
0
def test_string_to_datetime():
    """
    Test datetimeutil.string_to_datetime()
    """
    # Empty date
    date = ""
    with pytest.raises(ValueError):
        res = datetimeutil.string_to_datetime(date)

    # already a date
    date = datetime.datetime.utcnow()
    res = datetimeutil.string_to_datetime(date)

    assert res == date.replace(tzinfo=UTC)
    assert res.strftime('%Z') == 'UTC'
    assert res.strftime('%z') == '+0000'

    # YY-mm-dd date
    date = "2001-11-03"
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001, 11, 3, tzinfo=UTC)
    assert res.strftime('%Z') == 'UTC'  # timezone aware

    # and naughty YY-m-d date
    date = "2001-1-3"
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001, 1, 3, tzinfo=UTC)
    assert res.strftime('%Z') == 'UTC'  # timezone aware

    # YY-mm-dd HH:ii:ss.S date
    date = "2001-11-30 12:34:56.123456"
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001,
                                    11,
                                    30,
                                    12,
                                    34,
                                    56,
                                    123456,
                                    tzinfo=UTC)

    # Separated date
    date = ["2001-11-30", "12:34:56"]
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001, 11, 30, 12, 34, 56, tzinfo=UTC)

    # Invalid date
    date = "2001-11-32"
    with pytest.raises(ValueError):
        datetimeutil.string_to_datetime(date)
    def format_dates_in_crash(self, processed_crash):
        # HBase returns dates in a format that elasticsearch does not
        # understand. To keep our elasticsearch mapping simple, we
        # transform all dates to a recognized format.
        for attr in processed_crash:
            try:
                processed_crash[attr] = datetimeutil.date_to_string(
                    datetimeutil.string_to_datetime(processed_crash[attr]))
            except (ValueError, TypeError, ISO8601Error):
                # the attribute is not a date
                pass

        return processed_crash
Esempio n. 30
0
  def post(self, *args):
    " Webpy method receives inputs from uri "
    errors = []
    email_form = self.email_form()
    if email_form.validates():
      product    = email_form['product'].value
      versions   = tuple([x.strip() for x in email_form['versions'].value.split(',')])
      signature  = email_form['signature'].value
      subject    = email_form['subject'].value
      body       = email_form['body'].value
      start_date = string_to_datetime(email_form['start_date'].value)
      end_date   = string_to_datetime(email_form['end_date'].value)
      author     = email_form['author'].value

      logger.info("%s is creating an email campaign for %s %s crashes in [%s] Between %s and %s" %(author, product, versions, signature, start_date, end_date))

      connection = self.database.connection()
      try:
        cursor = connection.cursor()
        campaign_id, full_email_rows = self.create_email_campaign(cursor, product, versions, signature, subject, body, start_date, end_date, author)
        logger.info('full_email_rows: %s' % full_email_rows)
        email_addresses = [row['email'] for row in full_email_rows]
        logger.info('email_addresses: %s' % email_addresses)
        email_contact_ids = self.save_campaign_contacts(cursor, campaign_id, email_addresses)
        logger.info('email_contact_ids: %s' % email_contact_ids)

        connection.commit()

        return {'campaign_id': campaign_id}
      finally:
        connection.close()
    else:
      web.badrequest()
      for field in ['product', 'versions', 'signature', 'subject', 'body', 'start_date', 'end_date', 'author']:
        if email_form[field].note:
          # Example "product: Required"
          errors.append("%s: %s" % (field, email_form[field].note))
        logger.info("Bad Request. %s" % ', '.join(errors))
        return {'message': ', '.join(errors)}
Esempio n. 31
0
    def get_index_for_crash(self, processed_crash):
        """return the submission URL for a crash, based on the submission URL
        in config and the date of the crash"""
        index = self.config.elasticsearch_index
        crash_date = datetimeutil.string_to_datetime(
            processed_crash['date_processed'])

        if not index:
            return None
        elif '%' in index:
            index = crash_date.strftime(index)

        return index
Esempio n. 32
0
  def post(self, *args):
    " Webpy method receives inputs from uri "
    errors = []
    email_form = self.email_form()
    if email_form.validates():
      product    = email_form['product'].value
      versions   = tuple([x.strip() for x in email_form['versions'].value.split(',')])
      signature  = email_form['signature'].value
      subject    = email_form['subject'].value
      body       = email_form['body'].value
      start_date = string_to_datetime(email_form['start_date'].value)
      end_date   = string_to_datetime(email_form['end_date'].value)
      author     = email_form['author'].value

      logger.info("%s is creating an email campaign for %s %s crashes in [%s] Between %s and %s" %(author, product, versions, signature, start_date, end_date))

      connection = self.database.connection()
      try:
        cursor = connection.cursor()
        campaign_id, full_email_rows = self.create_email_campaign(cursor, product, versions, signature, subject, body, start_date, end_date, author)
        logger.info('full_email_rows: %s' % full_email_rows)
        email_addresses = [row['email'] for row in full_email_rows]
        logger.info('email_addresses: %s' % email_addresses)
        email_contact_ids = self.save_campaign_contacts(cursor, campaign_id, email_addresses)
        logger.info('email_contact_ids: %s' % email_contact_ids)

        connection.commit()

        return {'campaign_id': campaign_id}
      finally:
        connection.close()
    else:
      web.badrequest()
      for field in ['product', 'versions', 'signature', 'subject', 'body', 'start_date', 'end_date', 'author']:
        if email_form[field].note:
          # Example "product: Required"
          errors.append("%s: %s" % (field, email_form[field].note))
        logger.info("Bad Request. %s" % ', '.join(errors))
        return {'message': ', '.join(errors)}
Esempio n. 33
0
    def save_processed_crash(self, raw_crash, processed_crash):
        """Save processed crash report to Elasticsearch"""
        index_name = self.es_context.get_index_for_date(
            string_to_datetime(processed_crash["date_processed"]))
        es_doctype = self.config.elasticsearch.elasticsearch_doctype
        crash_id = processed_crash["uuid"]

        supersearch_fields_keys = self.get_keys_for_fields()
        try:
            mapping_keys = self.get_keys_for_mapping(index_name, es_doctype)
        except NotFoundError:
            mapping_keys = None
        all_valid_keys = supersearch_fields_keys
        if mapping_keys:
            # If there are mapping_keys, then the index exists already and we
            # should make sure we're not indexing anything that's not in that
            # mapping
            all_valid_keys = all_valid_keys & mapping_keys

        # Copy the crash structures so we can mutate them later and remove everything
        # that's not a valid key for the index
        raw_crash = {
            key: value
            for key, value in copy.deepcopy(raw_crash).items()
            if "raw_crash.%s" % key in all_valid_keys
        }
        processed_crash = {
            key: value
            for key, value in copy.deepcopy(processed_crash).items()
            if "processed_crash.%s" % key in all_valid_keys
        }

        # Clean up and redact raw and processed crash data
        self.prepare_processed_crash(raw_crash, processed_crash)

        # Capture crash data size metrics--do this only after we've cleaned up
        # the crash data
        self.capture_crash_metrics(raw_crash, processed_crash)

        crash_document = {
            "crash_id": processed_crash["uuid"],
            "processed_crash": processed_crash,
            "raw_crash": raw_crash,
        }

        self._submit_crash_to_elasticsearch(
            crash_id=crash_id,
            es_doctype=es_doctype,
            index_name=index_name,
            crash_document=crash_document,
        )
Esempio n. 34
0
def test_string_datetime_with_timezone():
    date = "2001-11-30T12:34:56Z"
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001, 11, 30, 12, 34, 56, tzinfo=UTC)
    assert res.strftime('%H') == '12'
    # because it's a timezone aware datetime
    assert res.tzname() == 'UTC'
    assert res.strftime('%Z') == 'UTC'
    assert res.strftime('%z') == '+0000'

    # plus 3 hours east of Zulu means minus 3 hours on UTC
    date = "2001-11-30T12:10:56+03:00"
    res = datetimeutil.string_to_datetime(date)
    expected = datetime.datetime(2001, 11, 30, 12 - 3, 10, 56, tzinfo=UTC)
    assert res == expected

    # similar example
    date = "2001-11-30T12:10:56-01:30"
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001,
                                    11,
                                    30,
                                    12 + 1,
                                    10 + 30,
                                    56,
                                    tzinfo=UTC)

    # YY-mm-dd+HH:ii:ss.S date
    date = "2001-11-30 12:34:56.123456Z"
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001,
                                    11,
                                    30,
                                    12,
                                    34,
                                    56,
                                    123456,
                                    tzinfo=UTC)
    def format_dates_in_crash(self, processed_crash):
        # HBase returns dates in a format that elasticsearch does not
        # understand. To keep our elasticsearch mapping simple, we
        # transform all dates to a recognized format.
        for attr in processed_crash:
            try:
                processed_crash[attr] = datetimeutil.date_to_string(
                    datetimeutil.string_to_datetime(
                        processed_crash[attr]
                    )
                )
            except (ValueError, TypeError, ISO8601Error):
                # the attribute is not a date
                pass

        return processed_crash
Esempio n. 36
0
    def _submit_crash_to_elasticsearch(self, crash_id, crash_document):
        """submit a crash report to elasticsearch.

        Generate the index name from the date of the crash report, verify that
        index already exists, and if it doesn't create it and set its mapping.
        Lastly index the crash report.
        """
        if not self.config.elasticsearch_urls:
            return

        crash_date = datetimeutil.string_to_datetime(
            crash_document['processed_crash']['date_processed']
        )
        es_index = self.get_index_for_crash(crash_date)
        es_doctype = self.config.elasticsearch_doctype

        try:
            # We first need to ensure that the index already exists in ES.
            # If it doesn't, we create it and put its mapping.
            if es_index not in self.indices_cache:
                self.create_socorro_index(es_index)

                # Cache the list of existing indices to avoid HTTP requests
                self.indices_cache.add(es_index)

            self.es.index(
                es_index,
                es_doctype,
                crash_document,
                id=crash_id,
                replication='async'
            )
        except pyelasticsearch.exceptions.ConnectionError:
            self.logger.critical('%s may not have been submitted to '
                                 'elasticsearch due to a connection error',
                                 crash_id)
            raise
        except pyelasticsearch.exceptions.Timeout:
            self.logger.critical('%s may not have been submitted to '
                                 'elasticsearch due to a timeout',
                                 crash_id)
            raise
        except pyelasticsearch.exceptions.ElasticHttpError, e:
            self.logger.critical(u'%s may not have been submitted to '
                                 'elasticsearch due to the following: %s',
                                 crash_id, e)
            raise
Esempio n. 37
0
 def reconstitute_datetimes(processed_crash):
     datetime_fields = [
         'submitted_timestamp',
         'date_processed',
         'client_crash_date',
         'started_datetime',
         'startedDateTime',
         'completed_datetime',
         'completeddatetime',
     ]
     for a_key in datetime_fields:
         try:
             processed_crash[a_key] = string_to_datetime(
                 processed_crash[a_key])
         except KeyError:
             # not there? we don't care
             pass
Esempio n. 38
0
    def reconstitute_datetimes(processed_crash):
        # FIXME(willkg): These should be specified in super_search_fields.py
        # and not hard-coded
        datetime_fields = [
            'submitted_timestamp',
            'date_processed',
            'client_crash_date',
            'started_datetime',
            'startedDateTime',
            'completed_datetime',
            'completeddatetime',
        ]
        for a_key in datetime_fields:
            if a_key not in processed_crash:
                continue

            processed_crash[a_key] = string_to_datetime(processed_crash[a_key])
Esempio n. 39
0
    def reconstitute_datetimes(processed_crash):
        # FIXME(willkg): These should be specified in super_search_fields.py
        # and not hard-coded
        datetime_fields = [
            'submitted_timestamp',
            'date_processed',
            'client_crash_date',
            'started_datetime',
            'startedDateTime',
            'completed_datetime',
            'completeddatetime',
        ]
        for a_key in datetime_fields:
            if a_key not in processed_crash:
                continue

            processed_crash[a_key] = string_to_datetime(processed_crash[a_key])
Esempio n. 40
0
def test_string_to_datetime():
    """
    Test datetimeutil.string_to_datetime()
    """
    # Empty date
    date = ""
    with pytest.raises(ValueError):
        res = datetimeutil.string_to_datetime(date)

    # already a date
    date = datetime.datetime.utcnow()
    res = datetimeutil.string_to_datetime(date)

    assert res == date.replace(tzinfo=UTC)
    assert res.strftime('%Z') == 'UTC'
    assert res.strftime('%z') == '+0000'

    # YY-mm-dd date
    date = "2001-11-03"
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001, 11, 3, tzinfo=UTC)
    assert res.strftime('%Z') == 'UTC'  # timezone aware

    # and naughty YY-m-d date
    date = "2001-1-3"
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001, 1, 3, tzinfo=UTC)
    assert res.strftime('%Z') == 'UTC'  # timezone aware

    # YY-mm-dd HH:ii:ss.S date
    date = "2001-11-30 12:34:56.123456"
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001, 11, 30, 12, 34, 56, 123456, tzinfo=UTC)

    # Separated date
    date = ["2001-11-30", "12:34:56"]
    res = datetimeutil.string_to_datetime(date)
    assert res == datetime.datetime(2001, 11, 30, 12, 34, 56, tzinfo=UTC)

    # Invalid date
    date = "2001-11-32"
    with pytest.raises(ValueError):
        datetimeutil.string_to_datetime(date)
Esempio n. 41
0
    def _submit_crash_to_elasticsearch(self, crash_id, crash_document):
        """submit a crash report to elasticsearch.

        Generate the index name from the date of the crash report, verify that
        index already exists, and if it doesn't create it and set its mapping.
        Lastly index the crash report.
        """
        if not self.config.elasticsearch_urls:
            return

        crash_date = datetimeutil.string_to_datetime(
            crash_document['processed_crash']['date_processed'])
        es_index = self.get_index_for_crash(crash_date)
        es_doctype = self.config.elasticsearch_doctype

        try:
            # We first need to ensure that the index already exists in ES.
            # If it doesn't, we create it and put its mapping.
            if es_index not in self.indices_cache:
                self.create_socorro_index(es_index)

                # Cache the list of existing indices to avoid HTTP requests
                self.indices_cache.add(es_index)

            self.es.index(
                es_index,
                es_doctype,
                crash_document,
                id=crash_id,
            )
        except pyelasticsearch.exceptions.ConnectionError:
            self.logger.critical(
                '%s may not have been submitted to '
                'elasticsearch due to a connection error', crash_id)
            raise
        except pyelasticsearch.exceptions.Timeout:
            self.logger.critical(
                '%s may not have been submitted to '
                'elasticsearch due to a timeout', crash_id)
            raise
        except pyelasticsearch.exceptions.ElasticHttpError, e:
            self.logger.critical(
                u'%s may not have been submitted to '
                'elasticsearch due to the following: %s', crash_id, e)
            raise
Esempio n. 42
0
 def reconstitute_datetimes(processed_crash):
     datetime_fields = [
         'submitted_timestamp',
         'date_processed',
         'client_crash_date',
         'started_datetime',
         'startedDateTime',
         'completed_datetime',
         'completeddatetime',
     ]
     for a_key in datetime_fields:
         try:
             processed_crash[a_key] = string_to_datetime(
                 processed_crash[a_key]
             )
         except KeyError:
             # not there? we don't care
             pass
Esempio n. 43
0
def testSendAllEmails():
    context = getDummyContext()

    testContacts = ["*****@*****.**", "*****@*****.**"]
    crash_date = string_to_datetime("2011-09-01")
    contacts = [(0, testContacts[0], "abc", "ooid1", crash_date), (0, testContacts[1], "abc", "ooid2", crash_date)]
    subject = "email subject"
    body = "email body"

    dummySmtp = expect.DummyObjectWithExpectations()
    # no variables
    noVarBody = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keQ==\n'
    dummySmtp.expect("sendmail", (context.fromEmailAddress, [testContacts[0]], noVarBody % testContacts[0]), {}, None)
    dummySmtp.expect("sendmail", (context.fromEmailAddress, [testContacts[1]], noVarBody % testContacts[1]), {}, None)

    sender = es.EmailSender(context)
    contacted_emails = sender.send_all_emails(contacts, subject, body, dummySmtp)
    assert contacted_emails == {0: "sent"}

    # FIXME
    #  # unsubscribe variable
    #  unsubVarBody1 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSBodHRwOi8vZXhhbXBsZS5jb20vdW5zdWJzY3JpYmUvYWJj\n'
    #  unsubVarBody2 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSBodHRwOi8vZXhhbXBsZS5jb20vdW5zdWJzY3JpYmUvZGVm\n'
    #  dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[0]], unsubVarBody1 % testContacts[0]), {}, None)
    #  dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[1]], unsubVarBody2 % testContacts[1]), {}, None)
    #
    #  body = 'email body *|UNSUBSCRIBE_URL|*'
    #  contacted_emails = sender.send_all_emails(contacts, subject, body, dummySmtp)
    #  print contacted_emails
    # assert contacted_emails == [testContacts[0], testContacts[1]]

    # email_address variable
    emailVarBody1 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSAxQGV4YW1wbGUuY29t\n'
    emailVarBody2 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSAyQGV4YW1wbGUuY29t\n'
    dummySmtp.expect(
        "sendmail", (context.fromEmailAddress, [testContacts[0]], emailVarBody1 % testContacts[0]), {}, None
    )
    dummySmtp.expect(
        "sendmail", (context.fromEmailAddress, [testContacts[1]], emailVarBody2 % testContacts[1]), {}, None
    )

    body = "email body *|EMAIL_ADDRESS|*"
    contacted_emails = sender.send_all_emails(contacts, subject, body, dummySmtp)
    assert contacted_emails == {0: "sent"}
Esempio n. 44
0
    def main(self):
        storage_config = self.get_config_context()
        storage = self.config.elasticsearch_storage_class(storage_config)

        # Create the supersearch fields.
        storage.es.bulk_index(
            index=storage_config.elasticsearch_default_index,
            doc_type='supersearch_fields',
            docs=SUPERSEARCH_FIELDS.values(),
            id_field='name',
            refresh=True,
        )

        crash_file = open(self.config.processed_crash_file)
        processed_crash = json.load(crash_file)

        crash_file = open(self.config.raw_crash_file)
        raw_crash = json.load(crash_file)

        crash_date = string_to_datetime(processed_crash['date_processed'])
        es_index = storage.get_index_for_crash(crash_date)
        es_doctype = storage_config.elasticsearch_doctype
        crash_id = processed_crash['uuid']

        storage.save_raw_and_processed(
            raw_crash,
            None,
            processed_crash,
            crash_id
        )

        try:
            # Verify the crash has been inserted
            crash = storage.es.get(
                es_index,
                es_doctype,
                crash_id
            )
            assert crash['exists']

        finally:
            # Clean up created index.
            storage.es.delete_index(es_index)
            storage.es.delete_index(storage_config.elasticsearch_default_index)
Esempio n. 45
0
def testSendAllEmails():
  context = getDummyContext()

  testContacts = ['*****@*****.**', '*****@*****.**']
  crash_date = string_to_datetime('2011-09-01')
  contacts = [
    (0, testContacts[0], 'abc', 'ooid1', crash_date),
    (0, testContacts[1], 'abc', 'ooid2', crash_date)
  ]
  subject = 'email subject'
  body = 'email body'

  dummySmtp = expect.DummyObjectWithExpectations()
  # no variables
  noVarBody = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keQ==\n'
  dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[0]], noVarBody % testContacts[0]), {}, None)
  dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[1]], noVarBody % testContacts[1]), {}, None)

  sender = es.EmailSender(context)
  contacted_emails = sender.send_all_emails(contacts, subject, body, dummySmtp)
  assert contacted_emails == {0: 'sent'}

# FIXME
#  # unsubscribe variable
#  unsubVarBody1 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSBodHRwOi8vZXhhbXBsZS5jb20vdW5zdWJzY3JpYmUvYWJj\n'
#  unsubVarBody2 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSBodHRwOi8vZXhhbXBsZS5jb20vdW5zdWJzY3JpYmUvZGVm\n'
#  dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[0]], unsubVarBody1 % testContacts[0]), {}, None)
#  dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[1]], unsubVarBody2 % testContacts[1]), {}, None)
#
#  body = 'email body *|UNSUBSCRIBE_URL|*'
#  contacted_emails = sender.send_all_emails(contacts, subject, body, dummySmtp)
#  print contacted_emails
  #assert contacted_emails == [testContacts[0], testContacts[1]]

  # email_address variable
  emailVarBody1 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSAxQGV4YW1wbGUuY29t\n'
  emailVarBody2 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSAyQGV4YW1wbGUuY29t\n'
  dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[0]], emailVarBody1 % testContacts[0]), {}, None)
  dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[1]], emailVarBody2 % testContacts[1]), {}, None)

  body = 'email body *|EMAIL_ADDRESS|*'
  contacted_emails = sender.send_all_emails(contacts, subject, body, dummySmtp)
  assert contacted_emails == {0: 'sent'}
    def main(self):
        es_storage = self.config.elasticsearch_storage_class(self.config)
        hb_storage = self.config.hbase_storage_class(self.config)
        hb_client = HBaseConnectionForCrashReports(
            self.config.hbase_host,
            self.config.hbase_port,
            self.config.hbase_timeout,
        )

        current_date = self.config.end_date
        date = current_date.strftime('%y%m%d')

        one_day = datetime.timedelta(days=1)
        for i in range(self.config.duration):
            day = current_date.strftime('%y%m%d')
            self.config.logger.info('backfilling crashes for %s', day)

            reports = hb_client.get_list_of_processed_json_for_date(
                day,
                number_of_retries=5
            )

            for report in reports:
                processed_crash = json.loads(report)

                # HBase returns dates in a format that elasticsearch does not
                # understand. To keep our elasticsearch mapping simple, we
                # transform all dates to a recognized format.
                for attr in processed_crash:
                    try:
                        processed_crash[attr] = datetimeutil.date_to_string(
                            datetimeutil.string_to_datetime(
                                processed_crash[attr]
                            )
                        )
                    except (ValueError, TypeError, ISO8601Error):
                        # the attribute is not a date
                        pass
                # print processed_crash['uuid']
                es_storage.save_processed(processed_crash)
            current_date -= one_day

        return 0
Esempio n. 47
0
    def _submit_crash_to_elasticsearch(self, connection, crash_document):
        """Submit a crash report to elasticsearch.
        """

        # Massage the crash such that the date_processed field is formatted
        # in the fashion of our established mapping.
        # First create a datetime object from the string in the crash report.
        crash_date = datetimeutil.string_to_datetime(
            crash_document['processed_crash']['date_processed']
        )
        # Then convert it back to a string with the expected formatting.
        crash_date_with_t = datetimeutil.date_to_string(crash_date)
        # Finally, re-insert that string back into the report for indexing.
        crash_document['processed_crash']['date_processed'] = crash_date_with_t

        # Obtain the index name.
        es_index = self.get_index_for_crash(crash_date)
        es_doctype = self.config.elasticsearch.elasticsearch_doctype
        crash_id = crash_document['crash_id']

        # Attempt to create the index; it's OK if it already exists.
        if es_index not in self.indices_cache:
            index_creator = IndexCreator(config=self.config)
            index_creator.create_socorro_index(es_index)

        # Submit the crash for indexing.
        try:
            connection.index(
                index=es_index,
                doc_type=es_doctype,
                body=crash_document,
                id=crash_id
            )

        except elasticsearch.exceptions.ElasticsearchException as e:
            self.config.logger.critical(
                'Submission to Elasticsearch failed for %s (%s)',
                crash_id,
                e,
                exc_info=True
            )
            raise
Esempio n. 48
0
def reconstitute_datetimes(processed_crash):
    """Convert string values to datetimes for specified fields

    This operates in-place.

    """
    # FIXME(willkg): These should be specified in super_search_fields.py
    # and not hard-coded
    datetime_fields = [
        "submitted_timestamp",
        "date_processed",
        "client_crash_date",
        "started_datetime",
        "startedDateTime",
        "completed_datetime",
        "completeddatetime",
    ]
    for a_key in datetime_fields:
        if a_key not in processed_crash:
            continue

        processed_crash[a_key] = string_to_datetime(processed_crash[a_key])
    def main(self):
        storage = self.config.elasticsearch_storage_class(self.config)

        crash_file = open(self.config.processed_crash_file)
        processed_crash = json.load(crash_file)

        crash_file = open(self.config.raw_crash_file)
        raw_crash = json.load(crash_file)

        crash_date = string_to_datetime(processed_crash["date_processed"])
        es_index = storage.get_index_for_crash(crash_date)
        es_doctype = self.config.elasticsearch_doctype
        crash_id = processed_crash["uuid"]

        storage.save_raw_and_processed(raw_crash, None, processed_crash, crash_id)

        # Verify the crash has been inserted
        es = pyelasticsearch.ElasticSearch(self.config.elasticsearch_urls)

        crash = es.get(es_index, es_doctype, crash_id)
        assert crash["exists"]

        print "Success - %s/%s/%s" % (es_index, es_doctype, crash_id)
Esempio n. 50
0
    def test_email_cannot_be_sent_twice(self, exacttarget_mock):
        config_manager = self._setup_config_manager(
            restrict_products=['NightlyTrain']
        )
        et_mock = exacttarget_mock.return_value

        # Prepare failures
        _failures = []
        _email_sent = []

        class SomeRandomError(Exception):
            pass

        def trigger_send(template, fields):
            email = fields['EMAIL_ADDRESS_']
            if email == '*****@*****.**' and email not in _failures:
                _failures.append(email)
                raise SomeRandomError('This is an error. ')
            else:
                _email_sent.append(email)

        et_mock.trigger_send = trigger_send

        with config_manager.context() as config:
            tab = crontabber.CronTabber(config)
            tab.run_all()

            information = self._load_structure()
            assert information['automatic-emails']
            assert information['automatic-emails']['last_error']
            self.assertEqual(
                information['automatic-emails']['last_error']['type'],
                str(SomeRandomError)
            )

            # Verify that user's data was updated, but not all of it
            self.assertEqual(_email_sent, ['*****@*****.**', '*****@*****.**'])
            emails_list = (
                '*****@*****.**',
                '*****@*****.**',
                '*****@*****.**',
                '*****@*****.**',
                '*****@*****.**'
            )

            conf = config.crontabber['class-AutomaticEmailsCronApp']
            es = SuperS().es(
                urls=conf.elasticsearch.elasticsearch_urls,
                timeout=conf.elasticsearch.elasticsearch_timeout,
            )
            search = es.indexes(
                conf.elasticsearch.elasticsearch_emails_index
            )
            search = search.doctypes('emails')
            es.get_es().refresh()

            search = search.filter(_id__in=emails_list)
            res = search.execute()
            self.assertEqual(res.count, 2)

            now = utc_now()
            for row in res.results:
                assert row['_id'] in ('*****@*****.**', '*****@*****.**')
                date = string_to_datetime(row['_source']['last_sending'])
                self.assertEqual(date.year, now.year)
                self.assertEqual(date.month, now.month)
                self.assertEqual(date.day, now.day)

            # Run crontabber again and verify that all users are updated,
            # and emails are not sent twice
            state = tab.job_database['automatic-emails']
            self._wind_clock(state, hours=1)
            tab.job_database['automatic-emails'] = state

            tab.run_all()

            information = self._load_structure()
            assert information['automatic-emails']
            assert not information['automatic-emails']['last_error']
            assert information['automatic-emails']['last_success']

            # Verify that users were not sent an email twice
            self.assertEqual(_email_sent, [
                '*****@*****.**',
                '*****@*****.**',
                '*****@*****.**',
                '*****@*****.**',
                '*****@*****.**'
            ])
Esempio n. 51
0
def test_string_datetime_with_timezone_variations(ts, timezone):
    res = datetimeutil.string_to_datetime(ts)
    # NOTE(willkg): isodate.tzinfo.FixedOffset doesn't define __eq__, so we compare the
    # reprs of them. :(
    assert repr(res.tzinfo) == repr(timezone)
    assert isinstance(res, datetime.datetime)
Esempio n. 52
0
    def query(self, from_date, to_date, json_query):
        """
        Send a query directly to ElasticSearch and return the result.
        """
        # Default dates
        now = dtutil.utc_now().date()
        lastweek = now - timedelta(7)

        from_date = dtutil.string_to_datetime(from_date) or lastweek
        to_date = dtutil.string_to_datetime(to_date) or now

        # Create the indexes to use for querying.
        daterange = []
        delta_day = to_date - from_date
        for delta in range(0, delta_day.days + 1):
            day = from_date + timedelta(delta)
            index = "socorro_%s" % day.strftime("%y%m%d")
            # Cache protection for limitating the number of HTTP calls
            if index not in self.cache or not self.cache[index]:
                daterange.append(index)

        can_return = False

        # -
        # This code is here to avoid failing queries caused by missing
        # indexes. It should not happen on prod, but doing this makes
        # sure users will never see a 500 Error because of this eventuality.
        # -

        # Iterate until we can return an actual result and not an error
        while not can_return:
            if not daterange:
                http_response = "{}"
                break

            datestring = ",".join(daterange)
            uri = "/%s/_search" % datestring

            with self.http:
                http_response = self.http.post(uri, json_query)

            # If there has been an error,
            # then we get a dict instead of some json.
            if isinstance(http_response, dict):
                data = http_response["error"]["data"]

                # If an index is missing,
                # try to remove it from the list of indexes and retry.
                if (http_response["error"]["code"] == 404 and
                    data.find("IndexMissingException") >= 0):
                    index = data[data.find("[[") + 2:data.find("]")]

                    # Cache protection for limitating the number of HTTP calls
                    self.cache[index] = True

                    try:
                        daterange.remove(index)
                    except Exception:
                        raise
            else:
                can_return = True

        return (http_response, "text/json")
Esempio n. 53
0
        'json_dump': 'stackwalker output',
    },
    'upload_file_minidump_flash2': {
        'things': 'untouched',
        'json_dump': 'stackwalker output',
    },
    'upload_file_minidump_browser': {
        'things': 'untouched',
        'json_dump': 'stackwalker output',
    },
}

a_processed_crash_with_no_stackwalker = deepcopy(a_processed_crash)

a_processed_crash_with_no_stackwalker['date_processed'] = \
    string_to_datetime('2012-04-08 10:56:41.558922')
a_processed_crash_with_no_stackwalker['client_crash_date'] =  \
    string_to_datetime('2012-04-08 10:52:42.0')
a_processed_crash_with_no_stackwalker['completeddatetime'] =  \
    string_to_datetime('2012-04-08 10:56:50.902884')
a_processed_crash_with_no_stackwalker['started_datetime'] =  \
    string_to_datetime('2012-04-08 10:56:50.440752')
a_processed_crash_with_no_stackwalker['startedDateTime'] =  \
    string_to_datetime('2012-04-08 10:56:50.440752')

del a_processed_crash_with_no_stackwalker['json_dump']
del a_processed_crash_with_no_stackwalker['upload_file_minidump_flash1'][
    'json_dump'
]
del a_processed_crash_with_no_stackwalker['upload_file_minidump_flash2'][
    'json_dump'