def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta): crash_id = raw_crash.uuid old_processed_crash = self.crashstore.get_unredacted_processed(crash_id) for key, value in old_processed_crash.iteritems(): if 'date_processed' in key: processed_crash[key] = date_to_string( string_to_datetime(value) - self.config.time_delta ) print processed_crash.uuid, value, processed_crash[key] else: if key != 'uptime' and key != 'crash_time' and ( 'time' in key or "date" in key or 'Date' in key ): value = date_to_string(string_to_datetime(value)) processed_crash[key] = value processor_meta.processor_notes.append( 'DateProcessedTimeMachine has pushed date_processed into the past' ' by "%s" (D HH:MM:SS)' % to_str(self.config.time_delta) ) processor_meta.processor_notes.append( 'Original processor_notes: %s' % old_processed_crash['processor_notes'] ) return True
def test_string_datetime_with_timezone(): date = "2001-11-30T12:34:56Z" res = datetimeutil.string_to_datetime(date) assert res == datetime.datetime(2001, 11, 30, 12, 34, 56, tzinfo=UTC) assert res.strftime('%H') == '12' # because it's a timezone aware datetime assert res.tzname() == 'UTC' assert res.strftime('%Z') == 'UTC' assert res.strftime('%z') == '+0000' # plus 3 hours east of Zulu means minus 3 hours on UTC date = "2001-11-30T12:10:56+03:00" res = datetimeutil.string_to_datetime(date) expected = datetime.datetime(2001, 11, 30, 12 - 3, 10, 56, tzinfo=UTC) assert res == expected # similar example date = "2001-11-30T12:10:56-01:30" res = datetimeutil.string_to_datetime(date) assert res == datetime.datetime(2001, 11, 30, 12 + 1, 10 + 30, 56, tzinfo=UTC) # YY-mm-dd+HH:ii:ss.S date date = "2001-11-30 12:34:56.123456Z" res = datetimeutil.string_to_datetime(date) assert res == datetime.datetime(2001, 11, 30, 12, 34, 56, 123456, tzinfo=UTC)
def check_type(param, datatype): """ Make sure that param is of type datatype and return it. If param is None, return it. If param is an instance of datatype, return it. If param is not an instance of datatype and is not None, cast it as datatype and return it. """ if param is None: return param if datatype == "str" and not isinstance(param, basestring): try: param = str(param) except ValueError: param = str() elif datatype == "int" and not isinstance(param, int): try: param = int(param) except ValueError: param = int() elif datatype == "bool" and not isinstance(param, bool): param = str(param).lower() in ("true", "t", "1", "y", "yes") elif datatype == "datetime" and not isinstance(param, datetime): try: param = dtutil.string_to_datetime(param) except ValueError: param = None elif datatype == "date" and not isinstance(param, date): try: param = dtutil.string_to_datetime(param).date() except ValueError: param = None elif datatype == "timedelta" and not isinstance(param, timedelta): try: param = dtutil.strHoursToTimeDelta(param) except ValueError: param = None elif datatype == "json" and isinstance(param, basestring): try: param = json.loads(param) except ValueError: param = None elif datatype == "float" and not isinstance(param, float): try: param = float(param) except ValueError: param = float() return param
def query(self, from_date, to_date, json_query): """ Send a query directly to ElasticSearch and return the result. """ # Default dates now = dtutil.utc_now().date() lastweek = now - datetime.timedelta(7) from_date = dtutil.string_to_datetime(from_date) or lastweek to_date = dtutil.string_to_datetime(to_date) or now daterange = self.generate_list_of_indexes(from_date, to_date) # - # This code is here to avoid failing queries caused by missing # indexes. It should not happen on prod, but doing this makes # sure users will never see a 500 Error because of this eventuality. # - # Iterate until we can return an actual result and not an error can_return = False while not can_return: if not daterange: # This is probably wrong and should be raising an error instead http_response = "{}" break uri = "/%s/_search" % ",".join(daterange) with self.http: http_response = self.http.post(uri, json_query) # If there has been an error, # then we get a dict instead of some json. if isinstance(http_response, dict): data = http_response["error"]["data"] # If an index is missing, # try to remove it from the list of indexes and retry. if (http_response["error"]["code"] == 404 and data.find("IndexMissingException") >= 0): index = data[data.find("[[") + 2:data.find("]")] daterange.remove(index) else: error = 'Unexpected error from elasticsearch: %s' raise UnexpectedElasticsearchError(error % data) else: can_return = True return (http_response, "text/json")
def convert_to_type(value, data_type): if data_type == "str" and not isinstance(value, str): value = str(value) # yes, 'enum' is being converted to a string elif data_type == "enum" and not isinstance(value, str): value = str(value) elif data_type == "int" and not isinstance(value, int): value = int(value) elif data_type == "bool" and not isinstance(value, bool): value = str(value).lower() in ("true", "t", "1", "y", "yes") elif data_type == "datetime" and not isinstance(value, datetime.datetime): value = datetimeutil.string_to_datetime(value) elif data_type == "date" and not isinstance(value, datetime.date): value = datetimeutil.string_to_datetime(value).date() return value
def convert_to_type(value, data_type): if data_type == 'str' and not isinstance(value, basestring): value = str(value) # yes, 'enum' is being converted to a string elif data_type == 'enum' and not isinstance(value, basestring): value = str(value) elif data_type == 'int' and not isinstance(value, int): value = int(value) elif data_type == 'bool' and not isinstance(value, bool): value = str(value).lower() in ('true', 't', '1', 'y', 'yes') elif data_type == 'datetime' and not isinstance(value, datetime.datetime): value = datetimeutil.string_to_datetime(value) elif data_type == 'date' and not isinstance(value, datetime.date): value = datetimeutil.string_to_datetime(value).date() return value
def update_crashstats_signature(self, signature, report_date, report_build): with transaction_context(self.database) as connection: # Pull the data from the db. If it's there, then do an update. If it's # not there, then do an insert. try: sql = """ SELECT signature, first_build, first_date FROM crashstats_signature WHERE signature=%s """ sig = single_row_sql(connection, sql, (signature,)) sql = """ UPDATE crashstats_signature SET first_build=%s, first_date=%s WHERE signature=%s """ params = ( min(sig[1], int(report_build)), min(sig[2], string_to_datetime(report_date)), sig[0] ) except SQLDidNotReturnSingleRow: sql = """ INSERT INTO crashstats_signature (signature, first_build, first_date) VALUES (%s, %s, %s) """ params = (signature, report_build, report_date) execute_no_results(connection, sql, params)
def check_type(param, datatype): """ Make sure that param is of type datatype and return it. If param is None, return it. If param is an instance of datatype, return it. If param is not an instance of datatype and is not None, cast it as datatype and return it. """ if param is None: return param if datatype == "str" and not isinstance(param, basestring): try: param = str(param) except ValueError: param = str() elif datatype == "int" and not isinstance(param, int): try: param = int(param) except ValueError: param = int() elif datatype == "datetime" and not isinstance(param, datetime): try: param = dtutil.string_to_datetime(param) except ValueError: param = None return param
def main(self): storage = self.config.elasticsearch_storage_class(self.config) crash_file = open(self.config.processed_crash_file) processed_crash = json.load(crash_file) crash_file = open(self.config.raw_crash_file) raw_crash = json.load(crash_file) crash_date = string_to_datetime(processed_crash['date_processed']) es_index = storage.get_index_for_crash(crash_date) es_doctype = self.config.elasticsearch_doctype crash_id = processed_crash['uuid'] storage.save_raw_and_processed( raw_crash, None, processed_crash, crash_id ) try: # Verify the crash has been inserted crash = storage.es.get( es_index, es_doctype, crash_id ) assert crash['exists'] finally: # Clean up created index. storage.es.delete_index(es_index)
def test_get_index_for_crash_dynamic_name(self): """Test a dynamic (date-based) index name. """ # The crashstorage class looks for '%' in the index name; if that # symbol is present, it will attempt to generate a new date-based # index name. Since the test base config doesn't use this pattern, # we need to specify it now. modified_config = self.get_tuned_config( ESCrashStorage, {'resource.elasticsearch.elasticsearch_index': 'socorro_integration_test_reports%Y%m%d'} ) es_storage = ESCrashStorage(config=modified_config) # The date is used to generate the name of the index; it must be a # datetime object. date = string_to_datetime( a_processed_crash['client_crash_date'] ) index = es_storage.get_index_for_crash(date) # The base index name is obtained from the test base class and the # date is appended to it according to pattern specified above. ok_(type(index) is str) eq_(index, 'socorro_integration_test_reports20120408')
def update_crashstats_signature(self, signature, report_date, report_build): with transaction_context(self.database) as connection: # Pull the data from the db. If it's there, then do an update. If it's # not there, then do an insert. try: sql = """ SELECT signature, first_build, first_date FROM crashstats_signature WHERE signature=%s """ sig = single_row_sql(connection, sql, (signature, )) sql = """ UPDATE crashstats_signature SET first_build=%s, first_date=%s WHERE signature=%s """ params = (min(sig[1], int(report_build)), min(sig[2], string_to_datetime(report_date)), sig[0]) except SQLDidNotReturnSingleRow: sql = """ INSERT INTO crashstats_signature (signature, first_build, first_date) VALUES (%s, %s, %s) """ params = (signature, report_build, report_date) execute_no_results(connection, sql, params)
def test_get_index_for_crash_dynamic_name(self): """Test a dynamic (date-based) index name """ # The crashstorage class looks for '%' in the index name; if that # symbol is present, it will attempt to generate a new date-based # index name. Since the test base config doesn't use this pattern, # we need to specify it now. modified_config = self.get_tuned_config( ESCrashStorage, { "resource.elasticsearch.elasticsearch_index": "socorro_integration_test_reports%Y%m%d" }, ) es_storage = ESCrashStorage(config=modified_config) # The date is used to generate the name of the index; it must be a # datetime object. date = string_to_datetime(a_processed_crash["client_crash_date"]) index = es_storage.get_index_for_crash(date) # The base index name is obtained from the test base class and the # date is appended to it according to pattern specified above. assert type(index) is str assert index == "socorro_integration_test_reports20120408"
def test_string_to_datetime(): """ Test datetimeutil.string_to_datetime() """ # Empty date date = "" try: res = datetimeutil.string_to_datetime(date) raise AssertionError("expect this to raise ValueError") except ValueError: pass # already a date date = datetime.datetime.utcnow() res = datetimeutil.string_to_datetime(date) eq_(res, date.replace(tzinfo=UTC)) eq_(res.strftime('%Z'), 'UTC') eq_(res.strftime('%z'), '+0000') # YY-mm-dd date date = "2001-11-03" res = datetimeutil.string_to_datetime(date) eq_(res, datetime.datetime(2001, 11, 3, tzinfo=UTC)) eq_(res.strftime('%Z'), 'UTC') # timezone aware # and naughty YY-m-d date date = "2001-1-3" res = datetimeutil.string_to_datetime(date) eq_(res, datetime.datetime(2001, 1, 3, tzinfo=UTC)) eq_(res.strftime('%Z'), 'UTC') # timezone aware # Commented out because I don't thing `YY-mm-dd+HH:ii:ss` is a # valid date format. ## YY-mm-dd+HH:ii:ss date #date = "2001-11-30+12:34:56" #try: # res = datetimeutil.string_to_datetime(date) #except ValueError: # res = None #expected = datetime(2001, 11, 30, 12, 34, 56) #assert res == expected, "Date is %s, %s expected." % (date, expected) # YY-mm-dd HH:ii:ss.S date date = "2001-11-30 12:34:56.123456" res = datetimeutil.string_to_datetime(date) eq_(res, datetime.datetime(2001, 11, 30, 12, 34, 56, 123456, tzinfo=UTC)) # Separated date date = ["2001-11-30", "12:34:56"] res = datetimeutil.string_to_datetime(date) eq_(res, datetime.datetime(2001, 11, 30, 12, 34, 56, tzinfo=UTC)) # Invalid date date = "2001-11-32" try: res = datetimeutil.string_to_datetime(date) raise AssertionError("should have raise a ValueError") except ValueError: pass
def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of a bogus number field. Expected behavior is to remove that field and retry indexing. """ es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] raw_crash = {} processed_crash = { 'date_processed': '2012-04-08 10:56:41.558922', 'bogus-field': 1234567890, 'foo': 'bar', } def mock_index(*args, **kwargs): if 'bogus-field' in kwargs['body']['processed_crash']: raise elasticsearch.exceptions.TransportError( 400, 'RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:' '9300]][indices:data/write/index]]; nested: ' 'MapperParsingException[failed to parse ' '[processed_crash.bogus-field]]; nested: ' 'NumberFormatException[For input string: ' '"18446744073709480735"]; ' ) return True es_class_mock().index.side_effect = mock_index # Submit a crash and ensure that it succeeds. es_storage.save_raw_and_processed( raw_crash=deepcopy(raw_crash), dumps=None, processed_crash=deepcopy(processed_crash), crash_id=crash_id ) expected_doc = { 'crash_id': crash_id, 'removed_fields': 'processed_crash.bogus-field', 'processed_crash': { 'date_processed': string_to_datetime( '2012-04-08 10:56:41.558922' ), 'foo': 'bar', }, 'raw_crash': {}, } es_class_mock().index.assert_called_with( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id )
def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of a bogus number field. Expected behavior is to remove that field and retry indexing. """ es_storage = ESCrashStorage(config=self.config) crash_id = SAMPLE_PROCESSED_CRASH["uuid"] raw_crash = {} processed_crash = { "date_processed": date_to_string(utc_now()), # NOTE(willkg): This needs to be a key that's in super_search_fields, but is # rejected by our mock_index call--this is wildly contrived. "version": 1234567890, "uuid": crash_id, } def mock_index(*args, **kwargs): if "version" in kwargs["body"]["processed_crash"]: raise elasticsearch.exceptions.TransportError( 400, ("RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:" "9300]][indices:data/write/index]]; nested: " "MapperParsingException[failed to parse " "[processed_crash.version]]; nested: " "NumberFormatException[For input string: " '"18446744073709480735"]; '), ) return True es_class_mock().index.side_effect = mock_index # Submit a crash and ensure that it succeeds. es_storage.save_processed_crash( raw_crash=deepcopy(raw_crash), processed_crash=deepcopy(processed_crash), ) expected_doc = { "crash_id": crash_id, "removed_fields": "processed_crash.version", "processed_crash": { "date_processed": string_to_datetime(processed_crash["date_processed"]), "uuid": crash_id, }, "raw_crash": {}, } es_class_mock().index.assert_called_with( index=self.es_context.get_index_for_date(utc_now()), doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id, )
def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of a bogus number field. Expected behavior is to remove that field and retry indexing. """ es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash["uuid"] raw_crash = {} processed_crash = { "date_processed": "2012-04-08 10:56:41.558922", "bogus-field": 1234567890, "foo": "bar", } def mock_index(*args, **kwargs): if "bogus-field" in kwargs["body"]["processed_crash"]: raise elasticsearch.exceptions.TransportError( 400, ("RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:" "9300]][indices:data/write/index]]; nested: " "MapperParsingException[failed to parse " "[processed_crash.bogus-field]]; nested: " "NumberFormatException[For input string: " '"18446744073709480735"]; '), ) return True es_class_mock().index.side_effect = mock_index # Submit a crash and ensure that it succeeds. es_storage.save_raw_and_processed( raw_crash=deepcopy(raw_crash), dumps=None, processed_crash=deepcopy(processed_crash), crash_id=crash_id, ) expected_doc = { "crash_id": crash_id, "removed_fields": "processed_crash.bogus-field", "processed_crash": { "date_processed": string_to_datetime("2012-04-08 10:56:41.558922"), "foo": "bar", }, "raw_crash": {}, } es_class_mock().index.assert_called_with( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id, )
def clean(self, value): if any(itertools.imap(value.startswith, ('>=', '<='))): op = value[:2] value = value[2:] elif any(itertools.imap(value.startswith, ('=', '>', '<'))): op = value[:1] value = value[1:] else: op = '=' return (op, string_to_datetime(value).date())
def test_indexing_bogus_number_field(self, es_class_mock, es_client_mock): """Test an index attempt that fails because of a bogus number field. Expected behavior is to remove that field and retry indexing. """ # ESCrashStorage uses the "limited backoff" transaction executor. # In real life this will retry operational exceptions over time, but # in unit tests, we just want it to hurry up and fail. backoff_config = self.config backoff_config['backoff_delays'] = [0, 0, 0] backoff_config['wait_log_interval'] = 0 es_storage = ESCrashStorage(config=self.config) crash_id = a_processed_crash['uuid'] raw_crash = {} processed_crash = { 'date_processed': '2012-04-08 10:56:41.558922', 'bogus-field': 1234567890, 'foo': 'bar', } def mock_index(*args, **kwargs): if 'bogus-field' in kwargs['body']['processed_crash']: raise elasticsearch.exceptions.TransportError( 400, 'RemoteTransportException[[i-f94dae31][inet[/172.31.1.54:' '9300]][indices:data/write/index]]; nested: ' 'MapperParsingException[failed to parse ' '[processed_crash.bogus-field]]; nested: ' 'NumberFormatException[For input string: ' '"18446744073709480735"]; ') return True es_class_mock().index.side_effect = mock_index # Submit a crash and ensure that it succeeds. es_storage.save_raw_and_processed(raw_crash, None, processed_crash, crash_id) expected_doc = { 'crash_id': crash_id, 'removed_fields': 'processed_crash.bogus-field', 'processed_crash': { 'date_processed': string_to_datetime('2012-04-08 10:56:41.558922'), 'foo': 'bar', }, 'raw_crash': {}, } es_class_mock().index.assert_called_with( index=self.config.elasticsearch.elasticsearch_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, body=expected_doc, id=crash_id)
def test_cron_job(self, exacttarget_mock): config_manager = self._setup_config_manager() et_mock = exacttarget_mock.return_value # Make get_subscriber raise an exception list_service = et_mock.list.return_value = mock.Mock() list_service.get_subscriber = mock.Mock( side_effect=exacttarget.NewsletterException() ) with config_manager.context() as config: tab = crontabber.CronTabber(config) tab.run_all() information = self._load_structure() assert information['automatic-emails'] assert not information['automatic-emails']['last_error'] assert information['automatic-emails']['last_success'] self.assertEqual(et_mock.trigger_send.call_count, 4) last_email = u'z\[email protected]' # Verify the last call to trigger_send fields = { 'EMAIL_ADDRESS_': last_email, 'EMAIL_FORMAT_': 'H', 'TOKEN': last_email } et_mock.trigger_send.assert_called_with('socorro_dev_test', fields) # Verify that user's data was updated conf = config.crontabber['class-AutomaticEmailsCronApp'] es = SuperS().es( urls=conf.elasticsearch.elasticsearch_urls, timeout=conf.elasticsearch.elasticsearch_timeout, ) search = es.indexes(conf.elasticsearch.elasticsearch_emails_index) search = search.doctypes('emails') es.get_es().refresh() emails_list = ( '*****@*****.**', '"Quidam" <*****@*****.**>', '*****@*****.**' ) search = search.filter(_id__in=emails_list) res = search.values_list('last_sending') self.assertEqual(len(res), 3) now = utc_now() for row in res: date = string_to_datetime(row[0]) self.assertEqual(date.year, now.year) self.assertEqual(date.month, now.month) self.assertEqual(date.day, now.day)
def test_string_datetime_with_timezone(): date = "2001-11-30T12:34:56Z" res = datetimeutil.string_to_datetime(date) eq_(res, datetime.datetime(2001, 11, 30, 12, 34, 56, tzinfo=UTC)) eq_(res.strftime('%H'), '12') # because it's a timezone aware datetime ok_(res.tzname()) eq_(res.strftime('%Z'), 'UTC') eq_(res.strftime('%z'), '+0000') # plus 3 hours east of Zulu means minus 3 hours on UTC date = "2001-11-30T12:10:56+03:00" res = datetimeutil.string_to_datetime(date) expected = datetime.datetime(2001, 11, 30, 12 - 3, 10, 56, tzinfo=UTC) eq_(res, expected) # similar example date = "2001-11-30T12:10:56-01:30" res = datetimeutil.string_to_datetime(date) eq_(res, datetime.datetime(2001, 11, 30, 12 + 1, 10 + 30, 56, tzinfo=UTC)) # YY-mm-dd+HH:ii:ss.S date date = "2001-11-30 12:34:56.123456Z" res = datetimeutil.string_to_datetime(date) eq_(res, datetime.datetime(2001, 11, 30, 12, 34, 56, 123456, tzinfo=UTC)) docstring = """ * 2012-01-10T12:13:14 * 2012-01-10T12:13:14.98765 * 2012-01-10T12:13:14.98765+03:00 * 2012-01-10T12:13:14.98765Z * 2012-01-10 12:13:14 * 2012-01-10 12:13:14.98765 * 2012-01-10 12:13:14.98765+03:00 * 2012-01-10 12:13:14.98765Z """.strip().splitlines() examples = [x.replace('*', '').strip() for x in docstring] for example in examples: res = datetimeutil.string_to_datetime(example) ok_(res.tzinfo) ok_(isinstance(res, datetime.datetime))
def get_index_for_crash(self, processed_crash): """return the submission URL for a crash, based on the submission URL in config and the date of the crash""" index = self.config.elasticsearch_index crash_date = datetimeutil.string_to_datetime(processed_crash["date_processed"]) if not index: return None elif "%" in index: index = crash_date.strftime(index) return index
def update_crashstats_signature(self, signature, report_date, report_build): report_build = int(report_build) report_date = string_to_datetime(report_date) try: sig = Signature.objects.get(signature=signature) sig.first_build = min(report_build, sig.first_build) sig.first_date = min(report_date, sig.first_date) except Signature.DoesNotExist: sig = Signature.objects.create( signature=signature, first_build=report_build, first_date=report_date ) sig.save()
def test_string_to_datetime(): """ Test datetimeutil.string_to_datetime() """ # Empty date date = "" with pytest.raises(ValueError): res = datetimeutil.string_to_datetime(date) # already a date date = datetime.datetime.utcnow() res = datetimeutil.string_to_datetime(date) assert res == date.replace(tzinfo=UTC) assert res.strftime('%Z') == 'UTC' assert res.strftime('%z') == '+0000' # YY-mm-dd date date = "2001-11-03" res = datetimeutil.string_to_datetime(date) assert res == datetime.datetime(2001, 11, 3, tzinfo=UTC) assert res.strftime('%Z') == 'UTC' # timezone aware # and naughty YY-m-d date date = "2001-1-3" res = datetimeutil.string_to_datetime(date) assert res == datetime.datetime(2001, 1, 3, tzinfo=UTC) assert res.strftime('%Z') == 'UTC' # timezone aware # YY-mm-dd HH:ii:ss.S date date = "2001-11-30 12:34:56.123456" res = datetimeutil.string_to_datetime(date) assert res == datetime.datetime(2001, 11, 30, 12, 34, 56, 123456, tzinfo=UTC) # Separated date date = ["2001-11-30", "12:34:56"] res = datetimeutil.string_to_datetime(date) assert res == datetime.datetime(2001, 11, 30, 12, 34, 56, tzinfo=UTC) # Invalid date date = "2001-11-32" with pytest.raises(ValueError): datetimeutil.string_to_datetime(date)
def format_dates_in_crash(self, processed_crash): # HBase returns dates in a format that elasticsearch does not # understand. To keep our elasticsearch mapping simple, we # transform all dates to a recognized format. for attr in processed_crash: try: processed_crash[attr] = datetimeutil.date_to_string( datetimeutil.string_to_datetime(processed_crash[attr])) except (ValueError, TypeError, ISO8601Error): # the attribute is not a date pass return processed_crash
def post(self, *args): " Webpy method receives inputs from uri " errors = [] email_form = self.email_form() if email_form.validates(): product = email_form['product'].value versions = tuple([x.strip() for x in email_form['versions'].value.split(',')]) signature = email_form['signature'].value subject = email_form['subject'].value body = email_form['body'].value start_date = string_to_datetime(email_form['start_date'].value) end_date = string_to_datetime(email_form['end_date'].value) author = email_form['author'].value logger.info("%s is creating an email campaign for %s %s crashes in [%s] Between %s and %s" %(author, product, versions, signature, start_date, end_date)) connection = self.database.connection() try: cursor = connection.cursor() campaign_id, full_email_rows = self.create_email_campaign(cursor, product, versions, signature, subject, body, start_date, end_date, author) logger.info('full_email_rows: %s' % full_email_rows) email_addresses = [row['email'] for row in full_email_rows] logger.info('email_addresses: %s' % email_addresses) email_contact_ids = self.save_campaign_contacts(cursor, campaign_id, email_addresses) logger.info('email_contact_ids: %s' % email_contact_ids) connection.commit() return {'campaign_id': campaign_id} finally: connection.close() else: web.badrequest() for field in ['product', 'versions', 'signature', 'subject', 'body', 'start_date', 'end_date', 'author']: if email_form[field].note: # Example "product: Required" errors.append("%s: %s" % (field, email_form[field].note)) logger.info("Bad Request. %s" % ', '.join(errors)) return {'message': ', '.join(errors)}
def get_index_for_crash(self, processed_crash): """return the submission URL for a crash, based on the submission URL in config and the date of the crash""" index = self.config.elasticsearch_index crash_date = datetimeutil.string_to_datetime( processed_crash['date_processed']) if not index: return None elif '%' in index: index = crash_date.strftime(index) return index
def save_processed_crash(self, raw_crash, processed_crash): """Save processed crash report to Elasticsearch""" index_name = self.es_context.get_index_for_date( string_to_datetime(processed_crash["date_processed"])) es_doctype = self.config.elasticsearch.elasticsearch_doctype crash_id = processed_crash["uuid"] supersearch_fields_keys = self.get_keys_for_fields() try: mapping_keys = self.get_keys_for_mapping(index_name, es_doctype) except NotFoundError: mapping_keys = None all_valid_keys = supersearch_fields_keys if mapping_keys: # If there are mapping_keys, then the index exists already and we # should make sure we're not indexing anything that's not in that # mapping all_valid_keys = all_valid_keys & mapping_keys # Copy the crash structures so we can mutate them later and remove everything # that's not a valid key for the index raw_crash = { key: value for key, value in copy.deepcopy(raw_crash).items() if "raw_crash.%s" % key in all_valid_keys } processed_crash = { key: value for key, value in copy.deepcopy(processed_crash).items() if "processed_crash.%s" % key in all_valid_keys } # Clean up and redact raw and processed crash data self.prepare_processed_crash(raw_crash, processed_crash) # Capture crash data size metrics--do this only after we've cleaned up # the crash data self.capture_crash_metrics(raw_crash, processed_crash) crash_document = { "crash_id": processed_crash["uuid"], "processed_crash": processed_crash, "raw_crash": raw_crash, } self._submit_crash_to_elasticsearch( crash_id=crash_id, es_doctype=es_doctype, index_name=index_name, crash_document=crash_document, )
def format_dates_in_crash(self, processed_crash): # HBase returns dates in a format that elasticsearch does not # understand. To keep our elasticsearch mapping simple, we # transform all dates to a recognized format. for attr in processed_crash: try: processed_crash[attr] = datetimeutil.date_to_string( datetimeutil.string_to_datetime( processed_crash[attr] ) ) except (ValueError, TypeError, ISO8601Error): # the attribute is not a date pass return processed_crash
def _submit_crash_to_elasticsearch(self, crash_id, crash_document): """submit a crash report to elasticsearch. Generate the index name from the date of the crash report, verify that index already exists, and if it doesn't create it and set its mapping. Lastly index the crash report. """ if not self.config.elasticsearch_urls: return crash_date = datetimeutil.string_to_datetime( crash_document['processed_crash']['date_processed'] ) es_index = self.get_index_for_crash(crash_date) es_doctype = self.config.elasticsearch_doctype try: # We first need to ensure that the index already exists in ES. # If it doesn't, we create it and put its mapping. if es_index not in self.indices_cache: self.create_socorro_index(es_index) # Cache the list of existing indices to avoid HTTP requests self.indices_cache.add(es_index) self.es.index( es_index, es_doctype, crash_document, id=crash_id, replication='async' ) except pyelasticsearch.exceptions.ConnectionError: self.logger.critical('%s may not have been submitted to ' 'elasticsearch due to a connection error', crash_id) raise except pyelasticsearch.exceptions.Timeout: self.logger.critical('%s may not have been submitted to ' 'elasticsearch due to a timeout', crash_id) raise except pyelasticsearch.exceptions.ElasticHttpError, e: self.logger.critical(u'%s may not have been submitted to ' 'elasticsearch due to the following: %s', crash_id, e) raise
def reconstitute_datetimes(processed_crash): datetime_fields = [ 'submitted_timestamp', 'date_processed', 'client_crash_date', 'started_datetime', 'startedDateTime', 'completed_datetime', 'completeddatetime', ] for a_key in datetime_fields: try: processed_crash[a_key] = string_to_datetime( processed_crash[a_key]) except KeyError: # not there? we don't care pass
def reconstitute_datetimes(processed_crash): # FIXME(willkg): These should be specified in super_search_fields.py # and not hard-coded datetime_fields = [ 'submitted_timestamp', 'date_processed', 'client_crash_date', 'started_datetime', 'startedDateTime', 'completed_datetime', 'completeddatetime', ] for a_key in datetime_fields: if a_key not in processed_crash: continue processed_crash[a_key] = string_to_datetime(processed_crash[a_key])
def _submit_crash_to_elasticsearch(self, crash_id, crash_document): """submit a crash report to elasticsearch. Generate the index name from the date of the crash report, verify that index already exists, and if it doesn't create it and set its mapping. Lastly index the crash report. """ if not self.config.elasticsearch_urls: return crash_date = datetimeutil.string_to_datetime( crash_document['processed_crash']['date_processed']) es_index = self.get_index_for_crash(crash_date) es_doctype = self.config.elasticsearch_doctype try: # We first need to ensure that the index already exists in ES. # If it doesn't, we create it and put its mapping. if es_index not in self.indices_cache: self.create_socorro_index(es_index) # Cache the list of existing indices to avoid HTTP requests self.indices_cache.add(es_index) self.es.index( es_index, es_doctype, crash_document, id=crash_id, ) except pyelasticsearch.exceptions.ConnectionError: self.logger.critical( '%s may not have been submitted to ' 'elasticsearch due to a connection error', crash_id) raise except pyelasticsearch.exceptions.Timeout: self.logger.critical( '%s may not have been submitted to ' 'elasticsearch due to a timeout', crash_id) raise except pyelasticsearch.exceptions.ElasticHttpError, e: self.logger.critical( u'%s may not have been submitted to ' 'elasticsearch due to the following: %s', crash_id, e) raise
def reconstitute_datetimes(processed_crash): datetime_fields = [ 'submitted_timestamp', 'date_processed', 'client_crash_date', 'started_datetime', 'startedDateTime', 'completed_datetime', 'completeddatetime', ] for a_key in datetime_fields: try: processed_crash[a_key] = string_to_datetime( processed_crash[a_key] ) except KeyError: # not there? we don't care pass
def testSendAllEmails(): context = getDummyContext() testContacts = ["*****@*****.**", "*****@*****.**"] crash_date = string_to_datetime("2011-09-01") contacts = [(0, testContacts[0], "abc", "ooid1", crash_date), (0, testContacts[1], "abc", "ooid2", crash_date)] subject = "email subject" body = "email body" dummySmtp = expect.DummyObjectWithExpectations() # no variables noVarBody = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keQ==\n' dummySmtp.expect("sendmail", (context.fromEmailAddress, [testContacts[0]], noVarBody % testContacts[0]), {}, None) dummySmtp.expect("sendmail", (context.fromEmailAddress, [testContacts[1]], noVarBody % testContacts[1]), {}, None) sender = es.EmailSender(context) contacted_emails = sender.send_all_emails(contacts, subject, body, dummySmtp) assert contacted_emails == {0: "sent"} # FIXME # # unsubscribe variable # unsubVarBody1 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSBodHRwOi8vZXhhbXBsZS5jb20vdW5zdWJzY3JpYmUvYWJj\n' # unsubVarBody2 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSBodHRwOi8vZXhhbXBsZS5jb20vdW5zdWJzY3JpYmUvZGVm\n' # dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[0]], unsubVarBody1 % testContacts[0]), {}, None) # dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[1]], unsubVarBody2 % testContacts[1]), {}, None) # # body = 'email body *|UNSUBSCRIBE_URL|*' # contacted_emails = sender.send_all_emails(contacts, subject, body, dummySmtp) # print contacted_emails # assert contacted_emails == [testContacts[0], testContacts[1]] # email_address variable emailVarBody1 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSAxQGV4YW1wbGUuY29t\n' emailVarBody2 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSAyQGV4YW1wbGUuY29t\n' dummySmtp.expect( "sendmail", (context.fromEmailAddress, [testContacts[0]], emailVarBody1 % testContacts[0]), {}, None ) dummySmtp.expect( "sendmail", (context.fromEmailAddress, [testContacts[1]], emailVarBody2 % testContacts[1]), {}, None ) body = "email body *|EMAIL_ADDRESS|*" contacted_emails = sender.send_all_emails(contacts, subject, body, dummySmtp) assert contacted_emails == {0: "sent"}
def main(self): storage_config = self.get_config_context() storage = self.config.elasticsearch_storage_class(storage_config) # Create the supersearch fields. storage.es.bulk_index( index=storage_config.elasticsearch_default_index, doc_type='supersearch_fields', docs=SUPERSEARCH_FIELDS.values(), id_field='name', refresh=True, ) crash_file = open(self.config.processed_crash_file) processed_crash = json.load(crash_file) crash_file = open(self.config.raw_crash_file) raw_crash = json.load(crash_file) crash_date = string_to_datetime(processed_crash['date_processed']) es_index = storage.get_index_for_crash(crash_date) es_doctype = storage_config.elasticsearch_doctype crash_id = processed_crash['uuid'] storage.save_raw_and_processed( raw_crash, None, processed_crash, crash_id ) try: # Verify the crash has been inserted crash = storage.es.get( es_index, es_doctype, crash_id ) assert crash['exists'] finally: # Clean up created index. storage.es.delete_index(es_index) storage.es.delete_index(storage_config.elasticsearch_default_index)
def testSendAllEmails(): context = getDummyContext() testContacts = ['*****@*****.**', '*****@*****.**'] crash_date = string_to_datetime('2011-09-01') contacts = [ (0, testContacts[0], 'abc', 'ooid1', crash_date), (0, testContacts[1], 'abc', 'ooid2', crash_date) ] subject = 'email subject' body = 'email body' dummySmtp = expect.DummyObjectWithExpectations() # no variables noVarBody = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keQ==\n' dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[0]], noVarBody % testContacts[0]), {}, None) dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[1]], noVarBody % testContacts[1]), {}, None) sender = es.EmailSender(context) contacted_emails = sender.send_all_emails(contacts, subject, body, dummySmtp) assert contacted_emails == {0: 'sent'} # FIXME # # unsubscribe variable # unsubVarBody1 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSBodHRwOi8vZXhhbXBsZS5jb20vdW5zdWJzY3JpYmUvYWJj\n' # unsubVarBody2 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSBodHRwOi8vZXhhbXBsZS5jb20vdW5zdWJzY3JpYmUvZGVm\n' # dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[0]], unsubVarBody1 % testContacts[0]), {}, None) # dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[1]], unsubVarBody2 % testContacts[1]), {}, None) # # body = 'email body *|UNSUBSCRIBE_URL|*' # contacted_emails = sender.send_all_emails(contacts, subject, body, dummySmtp) # print contacted_emails #assert contacted_emails == [testContacts[0], testContacts[1]] # email_address variable emailVarBody1 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSAxQGV4YW1wbGUuY29t\n' emailVarBody2 = 'Content-Type: text/plain; charset="utf-8"\nMIME-Version: 1.0\nContent-Transfer-Encoding: base64\nFrom: [email protected]\nSubject: email subject\nTo: %s\n\nZW1haWwgYm9keSAyQGV4YW1wbGUuY29t\n' dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[0]], emailVarBody1 % testContacts[0]), {}, None) dummySmtp.expect('sendmail', (context.fromEmailAddress, [testContacts[1]], emailVarBody2 % testContacts[1]), {}, None) body = 'email body *|EMAIL_ADDRESS|*' contacted_emails = sender.send_all_emails(contacts, subject, body, dummySmtp) assert contacted_emails == {0: 'sent'}
def main(self): es_storage = self.config.elasticsearch_storage_class(self.config) hb_storage = self.config.hbase_storage_class(self.config) hb_client = HBaseConnectionForCrashReports( self.config.hbase_host, self.config.hbase_port, self.config.hbase_timeout, ) current_date = self.config.end_date date = current_date.strftime('%y%m%d') one_day = datetime.timedelta(days=1) for i in range(self.config.duration): day = current_date.strftime('%y%m%d') self.config.logger.info('backfilling crashes for %s', day) reports = hb_client.get_list_of_processed_json_for_date( day, number_of_retries=5 ) for report in reports: processed_crash = json.loads(report) # HBase returns dates in a format that elasticsearch does not # understand. To keep our elasticsearch mapping simple, we # transform all dates to a recognized format. for attr in processed_crash: try: processed_crash[attr] = datetimeutil.date_to_string( datetimeutil.string_to_datetime( processed_crash[attr] ) ) except (ValueError, TypeError, ISO8601Error): # the attribute is not a date pass # print processed_crash['uuid'] es_storage.save_processed(processed_crash) current_date -= one_day return 0
def _submit_crash_to_elasticsearch(self, connection, crash_document): """Submit a crash report to elasticsearch. """ # Massage the crash such that the date_processed field is formatted # in the fashion of our established mapping. # First create a datetime object from the string in the crash report. crash_date = datetimeutil.string_to_datetime( crash_document['processed_crash']['date_processed'] ) # Then convert it back to a string with the expected formatting. crash_date_with_t = datetimeutil.date_to_string(crash_date) # Finally, re-insert that string back into the report for indexing. crash_document['processed_crash']['date_processed'] = crash_date_with_t # Obtain the index name. es_index = self.get_index_for_crash(crash_date) es_doctype = self.config.elasticsearch.elasticsearch_doctype crash_id = crash_document['crash_id'] # Attempt to create the index; it's OK if it already exists. if es_index not in self.indices_cache: index_creator = IndexCreator(config=self.config) index_creator.create_socorro_index(es_index) # Submit the crash for indexing. try: connection.index( index=es_index, doc_type=es_doctype, body=crash_document, id=crash_id ) except elasticsearch.exceptions.ElasticsearchException as e: self.config.logger.critical( 'Submission to Elasticsearch failed for %s (%s)', crash_id, e, exc_info=True ) raise
def reconstitute_datetimes(processed_crash): """Convert string values to datetimes for specified fields This operates in-place. """ # FIXME(willkg): These should be specified in super_search_fields.py # and not hard-coded datetime_fields = [ "submitted_timestamp", "date_processed", "client_crash_date", "started_datetime", "startedDateTime", "completed_datetime", "completeddatetime", ] for a_key in datetime_fields: if a_key not in processed_crash: continue processed_crash[a_key] = string_to_datetime(processed_crash[a_key])
def main(self): storage = self.config.elasticsearch_storage_class(self.config) crash_file = open(self.config.processed_crash_file) processed_crash = json.load(crash_file) crash_file = open(self.config.raw_crash_file) raw_crash = json.load(crash_file) crash_date = string_to_datetime(processed_crash["date_processed"]) es_index = storage.get_index_for_crash(crash_date) es_doctype = self.config.elasticsearch_doctype crash_id = processed_crash["uuid"] storage.save_raw_and_processed(raw_crash, None, processed_crash, crash_id) # Verify the crash has been inserted es = pyelasticsearch.ElasticSearch(self.config.elasticsearch_urls) crash = es.get(es_index, es_doctype, crash_id) assert crash["exists"] print "Success - %s/%s/%s" % (es_index, es_doctype, crash_id)
def test_email_cannot_be_sent_twice(self, exacttarget_mock): config_manager = self._setup_config_manager( restrict_products=['NightlyTrain'] ) et_mock = exacttarget_mock.return_value # Prepare failures _failures = [] _email_sent = [] class SomeRandomError(Exception): pass def trigger_send(template, fields): email = fields['EMAIL_ADDRESS_'] if email == '*****@*****.**' and email not in _failures: _failures.append(email) raise SomeRandomError('This is an error. ') else: _email_sent.append(email) et_mock.trigger_send = trigger_send with config_manager.context() as config: tab = crontabber.CronTabber(config) tab.run_all() information = self._load_structure() assert information['automatic-emails'] assert information['automatic-emails']['last_error'] self.assertEqual( information['automatic-emails']['last_error']['type'], str(SomeRandomError) ) # Verify that user's data was updated, but not all of it self.assertEqual(_email_sent, ['*****@*****.**', '*****@*****.**']) emails_list = ( '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**' ) conf = config.crontabber['class-AutomaticEmailsCronApp'] es = SuperS().es( urls=conf.elasticsearch.elasticsearch_urls, timeout=conf.elasticsearch.elasticsearch_timeout, ) search = es.indexes( conf.elasticsearch.elasticsearch_emails_index ) search = search.doctypes('emails') es.get_es().refresh() search = search.filter(_id__in=emails_list) res = search.execute() self.assertEqual(res.count, 2) now = utc_now() for row in res.results: assert row['_id'] in ('*****@*****.**', '*****@*****.**') date = string_to_datetime(row['_source']['last_sending']) self.assertEqual(date.year, now.year) self.assertEqual(date.month, now.month) self.assertEqual(date.day, now.day) # Run crontabber again and verify that all users are updated, # and emails are not sent twice state = tab.job_database['automatic-emails'] self._wind_clock(state, hours=1) tab.job_database['automatic-emails'] = state tab.run_all() information = self._load_structure() assert information['automatic-emails'] assert not information['automatic-emails']['last_error'] assert information['automatic-emails']['last_success'] # Verify that users were not sent an email twice self.assertEqual(_email_sent, [ '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**' ])
def test_string_datetime_with_timezone_variations(ts, timezone): res = datetimeutil.string_to_datetime(ts) # NOTE(willkg): isodate.tzinfo.FixedOffset doesn't define __eq__, so we compare the # reprs of them. :( assert repr(res.tzinfo) == repr(timezone) assert isinstance(res, datetime.datetime)
def query(self, from_date, to_date, json_query): """ Send a query directly to ElasticSearch and return the result. """ # Default dates now = dtutil.utc_now().date() lastweek = now - timedelta(7) from_date = dtutil.string_to_datetime(from_date) or lastweek to_date = dtutil.string_to_datetime(to_date) or now # Create the indexes to use for querying. daterange = [] delta_day = to_date - from_date for delta in range(0, delta_day.days + 1): day = from_date + timedelta(delta) index = "socorro_%s" % day.strftime("%y%m%d") # Cache protection for limitating the number of HTTP calls if index not in self.cache or not self.cache[index]: daterange.append(index) can_return = False # - # This code is here to avoid failing queries caused by missing # indexes. It should not happen on prod, but doing this makes # sure users will never see a 500 Error because of this eventuality. # - # Iterate until we can return an actual result and not an error while not can_return: if not daterange: http_response = "{}" break datestring = ",".join(daterange) uri = "/%s/_search" % datestring with self.http: http_response = self.http.post(uri, json_query) # If there has been an error, # then we get a dict instead of some json. if isinstance(http_response, dict): data = http_response["error"]["data"] # If an index is missing, # try to remove it from the list of indexes and retry. if (http_response["error"]["code"] == 404 and data.find("IndexMissingException") >= 0): index = data[data.find("[[") + 2:data.find("]")] # Cache protection for limitating the number of HTTP calls self.cache[index] = True try: daterange.remove(index) except Exception: raise else: can_return = True return (http_response, "text/json")
'json_dump': 'stackwalker output', }, 'upload_file_minidump_flash2': { 'things': 'untouched', 'json_dump': 'stackwalker output', }, 'upload_file_minidump_browser': { 'things': 'untouched', 'json_dump': 'stackwalker output', }, } a_processed_crash_with_no_stackwalker = deepcopy(a_processed_crash) a_processed_crash_with_no_stackwalker['date_processed'] = \ string_to_datetime('2012-04-08 10:56:41.558922') a_processed_crash_with_no_stackwalker['client_crash_date'] = \ string_to_datetime('2012-04-08 10:52:42.0') a_processed_crash_with_no_stackwalker['completeddatetime'] = \ string_to_datetime('2012-04-08 10:56:50.902884') a_processed_crash_with_no_stackwalker['started_datetime'] = \ string_to_datetime('2012-04-08 10:56:50.440752') a_processed_crash_with_no_stackwalker['startedDateTime'] = \ string_to_datetime('2012-04-08 10:56:50.440752') del a_processed_crash_with_no_stackwalker['json_dump'] del a_processed_crash_with_no_stackwalker['upload_file_minidump_flash1'][ 'json_dump' ] del a_processed_crash_with_no_stackwalker['upload_file_minidump_flash2'][ 'json_dump'