def run(self): # if this is non-zero, we use it. if self.config.days_into_past: last_run = utc_now() - datetime.timedelta(days=self.config.days_into_past) else: try: # KeyError if it's never run successfully # TypeError if self.job_information is None last_run = self.job_information["last_success"] except (KeyError, TypeError): # basically, the "virgin run" of this job last_run = utc_now() # bugzilla runs on PST, so we need to communicate in its time zone PST = tz.gettz("PST8PDT") last_run_formatted = last_run.astimezone(PST).strftime("%Y-%m-%d") query = self.config.query % last_run_formatted for (bug_id, status, resolution, short_desc, signature_set) in self._iterator(query): try: # each run of this loop is a transaction self.database_transaction_executor( self.inner_transaction, bug_id, status, resolution, short_desc, signature_set ) except NothingUsefulHappened: pass
def _normal_jobs_iter(self): """ Yields a list of job tuples pulled from the 'jobs' table for which the owner is this process and the started datetime is null. This iterator is perpetual - it never raises the StopIteration exception """ get_normal_job_sql = ( "select" " j.id," " j.uuid," " priority " "from" " jobs j " "where" " j.owner = %d" " and j.starteddatetime is null " "order by queueddatetime" " limit %d" % (self.processor_id, self.config.batchJobLimit)) normal_jobs_list = [] last_query_timestamp = utc_now() while True: polling_threshold = utc_now() - self.config.pollingInterval if not normal_jobs_list and \ last_query_timestamp < polling_threshold: # get more normal_jobs_list = self.transaction( execute_query_fetchall, get_normal_job_sql ) last_query_timestamp = utc_now() if normal_jobs_list: while normal_jobs_list: yield normal_jobs_list.pop(0) else: yield None
def run(self): # if this is non-zero, we use it. if self.config.days_into_past: last_run = (utc_now() - datetime.timedelta(days=self.config.days_into_past)) else: try: # KeyError if it's never run successfully # TypeError if self.job_information is None last_run = self.job_information['last_success'] except (KeyError, TypeError): # basically, the "virgin run" of this job last_run = utc_now() # bugzilla runs on PST, so we need to communicate in its time zone PST = tz.gettz('PST8PDT') last_run_formatted = last_run.astimezone(PST).strftime('%Y-%m-%d') query = self.config.query % last_run_formatted for (bug_id, status, resolution, short_desc, signature_set) in self._iterator(query): try: # each run of this loop is a transaction self.database_transaction_executor(self.inner_transaction, bug_id, status, resolution, short_desc, signature_set) except NothingUsefulHappened: pass
def test_new_crashes(self): new_crash_source = ESNewCrashSource(self.config) self.index_crash(a_processed_crash, raw_crash=a_raw_crash, crash_id=a_processed_crash['uuid']) self.index_crash(a_firefox_processed_crash, raw_crash=a_raw_crash, crash_id=a_firefox_processed_crash['uuid']) other_firefox_processed_crash = deepcopy(a_firefox_processed_crash) other_firefox_processed_crash['uuid'] = ( other_firefox_processed_crash['uuid'].replace('a', 'e')) other_firefox_processed_crash['date_processed'] = ( utc_now() - datetime.timedelta(days=1)) self.index_crash(other_firefox_processed_crash, raw_crash=a_raw_crash, crash_id=other_firefox_processed_crash['uuid']) self.refresh_index() assert self.connection.get( index=self.config.elasticsearch.elasticsearch_index, id=a_processed_crash['uuid']) assert self.connection.get( index=self.config.elasticsearch.elasticsearch_index, id=a_firefox_processed_crash['uuid']) # same test now that there is a processed crash in there generator = new_crash_source.new_crashes( utc_now() - datetime.timedelta(days=1), 'Firefox', ['43.0.1']) eq_(list(generator), [a_firefox_processed_crash['uuid']])
def test_no_new_crashes(self): new_crash_source = ESNewCrashSource(self.config) self.health_check() generator = new_crash_source.new_crashes( utc_now() - datetime.timedelta(days=1), 'Firefox', ['43.0.1'] ) eq_(list(generator), []) self.index_crash( a_processed_crash, raw_crash=a_raw_crash, crash_id=a_processed_crash['uuid'] ) self.refresh_index() # Same test now that there is a processed crash in there # but notably under a different name and version. generator = new_crash_source.new_crashes( utc_now() - datetime.timedelta(days=1), 'Firefox', ['43.0.1'] ) eq_(list(generator), [])
def test_delete_old_indices(self): # Create old indices to be deleted. self.index_client.create('socorro200142', {}) self.indices.append('socorro200142') self.index_client.create('socorro200000', {}) self.indices.append('socorro200000') # Create an old aliased index. self.index_client.create('socorro200201_20030101', {}) self.indices.append('socorro200201_20030101') self.index_client.put_alias( index='socorro200201_20030101', name='socorro200201', ) # Create a recent aliased index. last_week_index = self.get_index_for_date( utc_now() - datetime.timedelta(weeks=1) ) self.index_client.create('socorro_some_aliased_index', {}) self.indices.append('socorro_some_aliased_index') self.index_client.put_alias( index='socorro_some_aliased_index', name=last_week_index, ) # Create a recent index that should not be deleted. now_index = self.get_index_for_date(utc_now()) self.index_client.create(now_index, {}) self.indices.append(now_index) # These will raise an error if an index was not correctly created. assert self.index_client.exists('socorro200142') assert self.index_client.exists('socorro200000') assert self.index_client.exists('socorro200201') assert self.index_client.exists(now_index) assert self.index_client.exists(last_week_index) api = IndexCleaner(self.config) api.delete_old_indices() # Verify the recent index is still there. ok_(self.index_client.exists(now_index)) ok_(self.index_client.exists(last_week_index)) # Verify the old indices are gone. ok_(not self.index_client.exists('socorro200142')) ok_(not self.index_client.exists('socorro200000')) ok_(not self.index_client.exists('socorro200201'))
def test_delete_old_indices(self): # Create old indices to be deleted. self.index_client.create('socorro200142', {}) self.indices.append('socorro200142') self.index_client.create('socorro200000', {}) self.indices.append('socorro200000') # Create an old aliased index. self.index_client.create('socorro200201_20030101', {}) self.indices.append('socorro200201_20030101') self.index_client.put_alias( index='socorro200201_20030101', name='socorro200201', ) # Create a recent aliased index. last_week_index = self.get_index_for_date(utc_now() - datetime.timedelta(weeks=1)) self.index_client.create('socorro_some_aliased_index', {}) self.indices.append('socorro_some_aliased_index') self.index_client.put_alias( index='socorro_some_aliased_index', name=last_week_index, ) # Create a recent index that should not be deleted. now_index = self.get_index_for_date(utc_now()) self.index_client.create(now_index, {}) self.indices.append(now_index) # These will raise an error if an index was not correctly created. assert self.index_client.exists('socorro200142') assert self.index_client.exists('socorro200000') assert self.index_client.exists('socorro200201') assert self.index_client.exists(now_index) assert self.index_client.exists(last_week_index) api = IndexCleaner(self.config) api.delete_old_indices() # Verify the recent index is still there. ok_(self.index_client.exists(now_index)) ok_(self.index_client.exists(last_week_index)) # Verify the old indices are gone. ok_(not self.index_client.exists('socorro200142')) ok_(not self.index_client.exists('socorro200000')) ok_(not self.index_client.exists('socorro200201'))
def test_mapping(self, mapping): """Verify that a mapping is correct. This function does so by first creating a new, temporary index in elasticsearch using the mapping. It then takes some recent crash reports that are in elasticsearch and tries to insert them in the temporary index. Any failure in any of those steps will raise an exception. If any is raised, that means the mapping is incorrect in some way (either it doesn't validate against elasticsearch's rules, or is not compatible with the data we currently store). If no exception is raised, the mapping is likely correct. This function is to be used in any place that can change the `storage_mapping` field in any Super Search Field. Methods `create_field` and `update_field` use it, see above. """ temp_index = 'socorro_mapping_test' es_connection = self.get_connection() # Import at runtime to avoid dependency circle. from socorro.external.es.index_creator import IndexCreator index_creator = IndexCreator(self.config) try: index_creator.create_index( temp_index, mapping, ) now = datetimeutil.utc_now() last_week = now - datetime.timedelta(days=7) current_indices = self.generate_list_of_indexes(last_week, now) crashes_sample = es_connection.search( index=current_indices, doc_type=self.config.elasticsearch.elasticsearch_doctype, size=self.config.elasticsearch.mapping_test_crash_number, ) crashes = [x['_source'] for x in crashes_sample['hits']['hits']] for crash in crashes: es_connection.index( index=temp_index, doc_type=self.config.elasticsearch.elasticsearch_doctype, body=crash, ) except elasticsearch.exceptions.ElasticsearchException as e: raise BadArgumentError( 'storage_mapping', msg='Indexing existing data in Elasticsearch failed with the ' 'new mapping. Error is: %s' % str(e), ) finally: try: index_creator.get_index_client().delete(temp_index) except elasticsearch.exceptions.NotFoundError: # If the index does not exist (if the index creation failed # for example), we don't need to do anything. pass
def test_basic_run(self): cur = self.conn.cursor() # Ensure test table is present. statement = """ INSERT INTO raw_adi_logs (report_date, product_name, count) VALUES (%(first)s, 'WinterFox', 11), (%(second)s, 'WinterFox', 23) """ second = utc_now().date() first = second - datetime.timedelta(days=1) cur.execute(statement, {'first': first, 'second': second}) self.conn.commit() # Run the crontabber job to remove the test table. config_manager = self._setup_config_manager(days_to_keep=1) with config_manager.context() as config: tab = CronTabber(config) tab.run_all() # Basic assertion test of stored procedure. information = self._load_structure() assert information['clean-raw-adi-logs'] assert not information['clean-raw-adi-logs']['last_error'] assert information['clean-raw-adi-logs']['last_success'] # Ensure test row was removed cur.execute(""" SELECT report_date FROM raw_adi_logs """) result, = cur.fetchall() report_date = result[0] eq_(report_date, second)
def get_signatures(self, **kwargs): """Return top crashers by signatures. See http://socorro.readthedocs.org/en/latest/middleware.html#tcbs """ filters = [ ("product", None, "str"), ("version", None, "str"), ("crash_type", "all", "str"), ("to_date", datetimeutil.utc_now(), "datetime"), ("duration", datetime.timedelta(7), "timedelta"), ("os", None, "str"), ("limit", 100, "int"), ("date_range_type", None, "str") ] params = external_common.parse_arguments(filters, kwargs) params.logger = logger # what the twoPeriodTopCrasherComparison() function does is that it # makes a start date from taking the to_date - duration if params.duration > datetime.timedelta(30): raise BadArgumentError('Duration too long. Max 30 days.') with self.get_connection() as connection: return tcbs.twoPeriodTopCrasherComparison(connection, params)
def test_create_release_with_beta_number_null(self): self._insert_release_channels() service = Releases(config=self.config) now = datetimeutil.utc_now() build_id = now.strftime('%Y%m%d%H%M') params = dict( product='Firefox', version='1.0', update_channel='beta', build_id=build_id, platform='Windows', beta_number=None, release_channel='Beta', throttle=1 ) res = service.create_release(**params) ok_(res) # but... params['beta_number'] = 0 assert_raises( MissingArgumentError, service.create_release, **params )
def test_basic_run(self): cur = self.conn.cursor() # Ensure test table is present. statement = """ INSERT INTO raw_adi (date, product_name, adi_count) VALUES (%(first)s, 'WinterFox', 11), (%(second)s, 'WinterFox', 23) """ second = utc_now().date() first = second - datetime.timedelta(days=1) cur.execute(statement, {'first': first, 'second': second}) self.conn.commit() # Run the crontabber job to remove the test table. config_manager = self._setup_config_manager(days_to_keep=1) with config_manager.context() as config: tab = CronTabber(config) tab.run_all() # Basic assertion test of stored procedure. information = self._load_structure() assert information['clean-raw-adi'] assert not information['clean-raw-adi']['last_error'] assert information['clean-raw-adi']['last_success'] # Ensure test row was removed cur.execute(""" SELECT date FROM raw_adi """) result, = cur.fetchall() report_date = result[0] eq_(report_date, second)
def test_create_release(self): self._insert_release_channels() self._insert_products() config_manager = self._setup_config_manager() with config_manager.context() as config: app = middleware_app.MiddlewareApp(config) app.main() server = middleware_app.application now = datetimeutil.utc_now() response = self.post( server, '/releases/release/', { 'product': 'Firefox', 'version': '1.0', 'update_channel': 'beta', 'build_id': now.strftime('%Y%m%d%H%M'), 'platform': 'Windows', 'beta_number': '1', 'release_channel': 'Beta', 'throttle': '1' } ) eq_(response.data, True)
def POST(self, *args): raw_crash, dumps = self._get_raw_crash_from_form() current_timestamp = utc_now() raw_crash.submitted_timestamp = current_timestamp.isoformat() # legacy - ought to be removed someday raw_crash.timestamp = time.time() if (not self.accept_submitted_crash_id or 'uuid' not in raw_crash): crash_id = createNewOoid(current_timestamp) raw_crash.uuid = crash_id self.logger.info('%s received', crash_id) else: crash_id = raw_crash.uuid self.logger.info('%s received with existing crash_id:', crash_id) if ('legacy_processing' not in raw_crash or not self.accept_submitted_legacy_processing): raw_crash.legacy_processing, raw_crash.throttle_rate = ( self.throttler.throttle(raw_crash)) else: raw_crash.legacy_processing = int(raw_crash.legacy_processing) if raw_crash.legacy_processing == DISCARD: self.logger.info('%s discarded', crash_id) return "Discarded=1\n" if raw_crash.legacy_processing == IGNORE: self.logger.info('%s ignored', crash_id) return "Unsupported=1\n" raw_crash.type_tag = self.dump_id_prefix.strip('-') self.crash_storage.save_raw_crash(raw_crash, dumps, crash_id) self.logger.info('%s accepted', crash_id) return "CrashID=%s%s\n" % (self.dump_id_prefix, crash_id)
def _current_slot(self): now = utc_now() return [ "%02d" % now.hour, "%02d_%02d" % (now.minute, now.second // self.config.minute_slice_interval) ]
def POST(self, *args): raw_crash, dumps = self._get_raw_crash_from_form() current_timestamp = utc_now() raw_crash.submitted_timestamp = current_timestamp.isoformat() # legacy - ought to be removed someday raw_crash.timestamp = time.time() if (not self.config.accept_submitted_crash_id or 'crash_id' not in raw_crash ): crash_id = createNewOoid(current_timestamp) raw_crash.crash_id = crash_id self.logger.info('%s received', crash_id) else: crash_id = raw_crash.crash_id self.logger.info('%s received with existing crash_id:', crash_id) raw_crash.type_tag = self.type_tag self.crash_storage.save_raw_crash( raw_crash, dumps, crash_id ) self.logger.info('%s accepted', crash_id) return "CrashID=%s%s\n" % (self.type_tag, crash_id)
def test_basic_run(self): cur = self.conn.cursor() # Ensure test table is present. statement = """ INSERT INTO missing_symbols (date_processed, debug_file, debug_id, code_file, code_id) VALUES (%(first)s, 'foo.pdb', '0420', 'foo.py', '123'), (%(second)s, 'bar.pdb', '65EA9', 'bar.py', null) """ second = utc_now().date() first = second - datetime.timedelta(days=1) cur.execute(statement, {'first': first, 'second': second}) self.conn.commit() # Run the crontabber job to remove the test table. config_manager = self._setup_config_manager(days_to_keep=1) with config_manager.context() as config: tab = CronTabber(config) tab.run_all() # Basic assertion test of stored procedure. information = self._load_structure() assert information['clean-missing-symbols'] assert not information['clean-missing-symbols']['last_error'] assert information['clean-missing-symbols']['last_success'] # Ensure expected test row was removed cur.execute(""" SELECT date_processed FROM missing_symbols """) first, = cur.fetchall() date_processed = first[0] eq_(date_processed, second)
def POST(self, *args): raw_crash, dumps = self._get_raw_crash_from_form() current_timestamp = utc_now() raw_crash.submitted_timestamp = current_timestamp.isoformat() # legacy - ought to be removed someday raw_crash.timestamp = time.time() if (not self.accept_submitted_crash_id or 'uuid' not in raw_crash): crash_id = createNewOoid(current_timestamp) raw_crash.uuid = crash_id self.logger.info('%s received', crash_id) else: crash_id = raw_crash.uuid self.logger.info('%s received with existing crash_id:', crash_id) if ('legacy_processing' not in raw_crash or not self.accept_submitted_legacy_processing): raw_crash.legacy_processing, raw_crash.throttle_rate = ( self.throttler.throttle(raw_crash)) else: raw_crash.legacy_processing = int(raw_crash.legacy_processing) try: # We want to capture the crash report size, but need to # differentiate between compressed vs. uncompressed data as well as # accepted vs. rejected data. crash_report_size = self._get_content_length() is_compressed = self._is_content_gzipped() is_accepted = (raw_crash.legacy_processing in (ACCEPT, DEFER)) metrics_data = {} size_key = '_'.join([ 'crash_report_size', 'accepted' if is_accepted else 'rejected', 'compressed' if is_compressed else 'uncompressed', ]) metrics_data = {size_key: crash_report_size} self.metrics.capture_stats(metrics_data) except Exception: # We *never* want metrics reporting to prevent saving a crash, so # we catch everything and log an error. self.logger.error('metrics kicked up exception', exc_info=True) if raw_crash.legacy_processing == DISCARD: self.logger.info('%s discarded', crash_id) return "Discarded=1\n" if raw_crash.legacy_processing == IGNORE: self.logger.info('%s ignored', crash_id) return "Unsupported=1\n" raw_crash.type_tag = self.dump_id_prefix.strip('-') # Save crash to storage. self.crash_storage.save_raw_crash(raw_crash, dumps, crash_id) # Return crash id to http client. self.logger.info('%s accepted', crash_id) return "CrashID=%s%s\n" % (self.dump_id_prefix, crash_id)
def test_utc_now(): """ Test datetimeutil.utc_now() """ res = datetimeutil.utc_now() eq_(res.strftime('%Z'), 'UTC') eq_(res.strftime('%z'), '+0000') ok_(res.tzinfo)
def test_no_new_crashes(self): new_crash_source = ESNewCrashSource(self.config) self.health_check() generator = new_crash_source.new_crashes( utc_now() - datetime.timedelta(days=1), 'Firefox', ['43.0.1']) eq_(list(generator), []) self.index_crash(a_processed_crash, raw_crash=a_raw_crash, crash_id=a_processed_crash['uuid']) self.refresh_index() # Same test now that there is a processed crash in there # but notably under a different name and version. generator = new_crash_source.new_crashes( utc_now() - datetime.timedelta(days=1), 'Firefox', ['43.0.1']) eq_(list(generator), [])
def test_get_parameters_date_defaults(self): with _get_config_manager().context() as config: search = SearchBaseWithFields( config=config, ) now = datetimeutil.utc_now() # Test default values when nothing is passed params = search.get_parameters() ok_('date' in params) eq_(len(params['date']), 2) # Pass only the high value args = { 'date': '<%s' % datetimeutil.date_to_string(now) } params = search.get_parameters(**args) ok_('date' in params) eq_(len(params['date']), 2) eq_(params['date'][0].operator, '<') eq_(params['date'][1].operator, '>=') eq_(params['date'][0].value.date(), now.date()) eq_( params['date'][1].value.date(), now.date() - datetime.timedelta(days=7) ) # Pass only the low value pasttime = now - datetime.timedelta(days=10) args = { 'date': '>=%s' % datetimeutil.date_to_string(pasttime) } params = search.get_parameters(**args) ok_('date' in params) eq_(len(params['date']), 2) eq_(params['date'][0].operator, '<=') eq_(params['date'][1].operator, '>=') eq_(params['date'][0].value.date(), now.date()) eq_(params['date'][1].value.date(), pasttime.date()) # Pass the two values pasttime = now - datetime.timedelta(days=10) args = { 'date': [ '<%s' % datetimeutil.date_to_string(now), '>%s' % datetimeutil.date_to_string(pasttime), ] } params = search.get_parameters(**args) ok_('date' in params) eq_(len(params['date']), 2) eq_(params['date'][0].operator, '<') eq_(params['date'][1].operator, '>') eq_(params['date'][0].value.date(), now.date()) eq_(params['date'][1].value.date(), pasttime.date())
def test_new_crashes(self): new_crash_source = ESNewCrashSource(self.config) self.index_crash( a_processed_crash, raw_crash=a_raw_crash, crash_id=a_processed_crash['uuid'] ) self.index_crash( a_firefox_processed_crash, raw_crash=a_raw_crash, crash_id=a_firefox_processed_crash['uuid'] ) other_firefox_processed_crash = deepcopy(a_firefox_processed_crash) other_firefox_processed_crash['uuid'] = ( other_firefox_processed_crash['uuid'].replace('a', 'e') ) other_firefox_processed_crash['date_processed'] = ( utc_now() - datetime.timedelta(days=1) ) self.index_crash( other_firefox_processed_crash, raw_crash=a_raw_crash, crash_id=other_firefox_processed_crash['uuid'] ) self.refresh_index() assert self.connection.get( index=self.config.elasticsearch.elasticsearch_index, id=a_processed_crash['uuid'] ) assert self.connection.get( index=self.config.elasticsearch.elasticsearch_index, id=a_firefox_processed_crash['uuid'] ) # same test now that there is a processed crash in there generator = new_crash_source.new_crashes( utc_now() - datetime.timedelta(days=1), 'Firefox', ['43.0.1'] ) eq_(list(generator), [a_firefox_processed_crash['uuid']])
def _get_base(self, crash_id): """this method overrides the base method to define the daily file system root directory name. While the default class is to use a YYYYMMDD form, this class substitutes a simple DD form. This is the mechanism of directory recycling as at the first day of a new month we return to the same directiory structures that were created on the first day of the previous month""" date = dateFromOoid(crash_id) if not date: date = utc_now() date_formatted = "%02d" % (date.day,) return [self.config.fs_root, date_formatted]
def get_missing_fields(self): """Return a list of all missing fields in our database. Take the list of all fields that were indexed in the last 3 weeks and do a diff with the list of known fields. """ now = datetimeutil.utc_now() two_weeks_ago = now - datetime.timedelta(weeks=2) indices = self.generate_list_of_indexes(two_weeks_ago, now) es_connection = self.get_connection() index_client = elasticsearch.client.IndicesClient(es_connection) doctype = self.config.elasticsearch.elasticsearch_doctype def parse_mapping(mapping, namespace): """Return a set of all fields in a mapping. Parse the mapping recursively. """ fields = set() for key in mapping: field = mapping[key] if namespace: field_full_name = '.'.join((namespace, key)) else: field_full_name = key if 'properties' in field: fields.update( parse_mapping( field['properties'], field_full_name ) ) else: fields.add(field_full_name) return fields all_existing_fields = set() for index in indices: try: mapping = index_client.get_mapping( index=index, ) properties = mapping[index]['mappings'][doctype]['properties'] all_existing_fields.update(parse_mapping(properties, None)) except elasticsearch.exceptions.NotFoundError, e: # If an index does not exist, this should not fail self.config.logger.warning( 'Missing index in elasticsearch while running ' 'SuperSearchFields.get_missing_fields, error is: %s', str(e) )
def get_adu_by_signature(self, **kwargs): """Return a list of ADUs and crash counts by signature and ADU date """ now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("start_date", lastweek, "date"), ("end_date", now, "date"), ("signature", None, "str"), ("channel", None, "str"), ("product_name", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) for param in ("start_date", "end_date", "signature", "channel"): if not params[param]: raise MissingArgumentError(param) if (params.end_date - params.start_date) > datetime.timedelta(days=365): raise BadArgumentError('Duration too long. Max 365 days.') sql_query = """ SELECT product_name, signature, adu_date::TEXT, build_date::TEXT, buildid::TEXT, crash_count, adu_count, os_name, channel FROM crash_adu_by_build_signature WHERE adu_date BETWEEN %(start_date)s AND %(end_date)s AND product_name = %(product_name)s AND channel = %(channel)s AND signature = %(signature)s ORDER BY buildid """ error_message = ( "Failed to retrieve crash ADU by build signature from PostgreSQL" ) results = self.query(sql_query, params, error_message=error_message) crashes = results.zipped() return { "hits": crashes, "total": len(crashes) }
def _get_base(self, crash_id): """this method overrides the base method to define the daily file system root directory name. While the default class is to use a YYYYMMDD form, this class substitutes a simple DD form. This is the mechanism of directory recycling as at the first day of a new month we return to the same directiory structures that were created on the first day of the previous month""" date = dateFromOoid(crash_id) if not date: date = utc_now() date_formatted = "%02d" % (date.day, ) return [self.config.fs_root, date_formatted]
def createNewOoid(timestamp=None, depth=None): """Create a new Ooid for a given time, to be stored at a given depth timestamp: the year-month-day is encoded in the ooid. If none, use current day depth: the expected storage depth is encoded in the ooid. If non, use the defaultDepth returns a new opaque id string holding 24 random hex digits and encoded date and depth info """ if not timestamp: timestamp = utc_now().date() if not depth: depth = defaultDepth assert depth <= 4 and depth >=1 uuid = str(uu.uuid4()) return "%s%d%02d%02d%02d" %(uuid[:-7],depth,timestamp.year%100,timestamp.month,timestamp.day)
def uuidToOoid(uuid,timestamp=None, depth= None): """ Create an ooid from a 32-hex-digit string in regular uuid format. uuid: must be uuid in expected format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxx7777777 timestamp: the year-month-day is encoded in the ooid. If none, use current day depth: the expected storage depth is encoded in the ooid. If non, use the defaultDepth returns a new opaque id string holding the first 24 digits of the provided uuid and encoded date and depth info """ if not timestamp: timestamp = utc_now().date() if not depth: depth = defaultDepth assert depth <= 4 and depth >=1 return "%s%d%02d%02d%02d" %(uuid[:-7],depth,timestamp.year%100,timestamp.month,timestamp.day)
def test_get_with_indices(self, mocked_es): mocked_connection = mock.Mock() mocked_es.Elasticsearch.return_value = mocked_connection # Test default indices. self.api.post( query='{}' ) mocked_connection.search.assert_called_with( body={}, index=[self.api.config.elasticsearch.elasticsearch_index], doc_type=self.api.config.elasticsearch.elasticsearch_doctype ) # Test all indices. self.api.post( query='{}', indices=['ALL'] ) mocked_connection.search.assert_called_with( body={} ) # Test forcing indices. self.api.post( query='{}', indices=['socorro_201801', 'socorro_200047', 'not_an_index'] ) mocked_connection.search.assert_called_with( body={}, index=['socorro_201801', 'socorro_200047', 'not_an_index'], doc_type=self.api.config.elasticsearch.elasticsearch_doctype ) # Test default indices with an index schema based on dates. index_schema = 'socorro_%Y%W' config = self.get_base_config(es_index=index_schema) api = Query(config=config) now = datetimeutil.utc_now() last_week = now - datetime.timedelta(days=7) indices = api.generate_list_of_indexes(last_week, now) api.post( query='{}' ) mocked_connection.search.assert_called_with( body={}, index=indices, doc_type=api.config.elasticsearch.elasticsearch_doctype )
def get_adu_by_signature(self, **kwargs): """Return a list of ADUs and crash counts by signature and ADU date """ now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("start_date", lastweek, "date"), ("end_date", now, "date"), ("signature", None, "str"), ("channel", None, "str"), ("product_name", None, "str"), ] params = external_common.parse_arguments(filters, kwargs) for param in ("start_date", "end_date", "signature", "channel"): if not params[param]: raise MissingArgumentError(param) if (params.end_date - params.start_date) > datetime.timedelta(days=365): raise BadArgumentError('Duration too long. Max 365 days.') sql_query = """ SELECT product_name, signature, adu_date::TEXT, build_date::TEXT, buildid::TEXT, crash_count, adu_count, os_name, channel FROM crash_adu_by_build_signature WHERE adu_date BETWEEN %(start_date)s AND %(end_date)s AND product_name = %(product_name)s AND channel = %(channel)s AND signature = %(signature)s ORDER BY buildid """ error_message = ( "Failed to retrieve crash ADU by build signature from PostgreSQL") results = self.query(sql_query, params, error_message=error_message) crashes = results.zipped() return {"hits": crashes, "total": len(crashes)}
def test_update_release_missingargumenterror(self): self._insert_release_channels() service = Releases(config=self.config) now = datetimeutil.utc_now() build_id = now.strftime('%Y%m%d%H%M') params = dict(product='', version='1.0', update_channel='beta', build_id=build_id, platform='Windows', beta_number=1, release_channel='Beta', throttle=1) assert_raises(MissingArgumentError, service.create_release, **params)
def post(self, **kwargs): '''Return the result of a custom query. ''' params = external_common.parse_arguments(self.filters, kwargs) if not params.query: raise MissingArgumentError('query') try: query = json.loads(params.query) except ValueError: raise BadArgumentError( 'query', msg="Invalid JSON value for parameter 'query'" ) # Set indices. indices = [] if not params.indices: # By default, use the last two indices. today = datetimeutil.utc_now() last_week = today - datetime.timedelta(days=7) indices = self.generate_list_of_indexes(last_week, today) elif len(params.indices) == 1 and params.indices[0] == 'ALL': # If we want all indices, just do nothing. pass else: indices = params.indices search_args = {} if indices: search_args['index'] = indices search_args['doc_type'] = ( self.config.elasticsearch.elasticsearch_doctype ) connection = self.get_connection() try: results = connection.search( body=query, **search_args ) except elasticsearch.exceptions.NotFoundError, e: missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0] raise ResourceNotFound( "elasticsearch index '%s' does not exist" % missing_index )
def test_get_parameters_date_defaults(self): with _get_config_manager().context() as config: search = SearchBaseWithFields(config=config, ) now = datetimeutil.utc_now() # Test default values when nothing is passed params = search.get_parameters() ok_('date' in params) eq_(len(params['date']), 2) # Pass only the high value args = {'date': '<%s' % datetimeutil.date_to_string(now)} params = search.get_parameters(**args) ok_('date' in params) eq_(len(params['date']), 2) eq_(params['date'][0].operator, '<') eq_(params['date'][1].operator, '>=') eq_(params['date'][0].value.date(), now.date()) eq_(params['date'][1].value.date(), now.date() - datetime.timedelta(days=7)) # Pass only the low value pasttime = now - datetime.timedelta(days=10) args = {'date': '>=%s' % datetimeutil.date_to_string(pasttime)} params = search.get_parameters(**args) ok_('date' in params) eq_(len(params['date']), 2) eq_(params['date'][0].operator, '<=') eq_(params['date'][1].operator, '>=') eq_(params['date'][0].value.date(), now.date()) eq_(params['date'][1].value.date(), pasttime.date()) # Pass the two values pasttime = now - datetime.timedelta(days=10) args = { 'date': [ '<%s' % datetimeutil.date_to_string(now), '>%s' % datetimeutil.date_to_string(pasttime), ] } params = search.get_parameters(**args) ok_('date' in params) eq_(len(params['date']), 2) eq_(params['date'][0].operator, '<') eq_(params['date'][1].operator, '>') eq_(params['date'][0].value.date(), now.date()) eq_(params['date'][1].value.date(), pasttime.date())
def test_create_release(self): self._insert_release_channels() service = Releases(config=self.config) now = datetimeutil.utc_now() build_id = now.strftime('%Y%m%d%H%M') params = dict(product='Firefox', version='1.0', update_channel='beta', build_id=build_id, platform='Windows', beta_number=1, release_channel='Beta', throttle=1) res = service.create_release(**params) ok_(res)
def test_basic_run_no_errors(self): # a mutable where commands sent are stored commands_sent = [] self.Popen.side_effect = functools.partial( mocked_Popen, _commands_sent=commands_sent, _exit_code=0, _stdout='Bla bla', _stderr='', ) config_manager = self._setup_config_manager() with config_manager.context() as config: tab = CronTabber(config) tab.run_all() information = self._load_structure() assert information['modulelist'] #print information['modulelist']['last_error'] #print information['modulelist']['last_error']['traceback'] if information['modulelist']['last_error']: raise AssertionError(information['modulelist']['last_error']) assert len(commands_sent) == 3 first = commands_sent[0] second = commands_sent[1] third = commands_sent[2] yesterday = utc_now() yesterday -= datetime.timedelta(days=1) yesterday_fmt = yesterday.strftime('%Y%m%d') ok_('PIG_CLASSPATH=/some/place pig' in first) ok_('-param start_date=%s' % yesterday_fmt in first) ok_('-param end_date=%s' % yesterday_fmt in first) ok_('/some/place/modulelist.pig' in first) ok_('PIG_CLASSPATH=/some/place hadoop fs -getmerge' in second) ok_('modulelist-%s-%s' % (yesterday_fmt, yesterday_fmt) in second) ok_('/some/other/place/%s-modulelist.txt' % (yesterday_fmt, ) in second) ok_('PIG_CLASSPATH=/some/place hadoop fs ' in third) ok_('modulelist-%s-%s' % (yesterday_fmt, yesterday_fmt) in second) # note that all jobs spew out 'Bla bla' on stdout config.logger.info.assert_called_with('Bla bla')
def test_create_release(self): self._insert_release_channels() service = Releases(config=self.config) now = datetimeutil.utc_now() build_id = now.strftime('%Y%m%d%H%M') params = dict( product='Firefox', version='1.0', update_channel='beta', build_id=build_id, platform='Windows', beta_number=1, release_channel='Beta', throttle=1 ) res = service.create_release(**params) ok_(res)
def delete_old_indices(self): now = utc_now() policy_delay = datetime.timedelta(weeks=self.config.retention_policy) time_limit = (now - policy_delay).replace(tzinfo=None) es_class = self.config.elasticsearch.elasticsearch_class( self.config.elasticsearch ) index_client = es_class.indices_client() status = index_client.status() indices = status['indices'].keys() aliases = index_client.get_aliases() for index in indices: # Some indices look like 'socorro%Y%W_%Y%M%d', but they are # aliased to the expected format of 'socorro%Y%W'. In such cases, # replace the index with the alias. if index in aliases and 'aliases' in aliases[index]: index_aliases = aliases[index]['aliases'].keys() if index_aliases: index = index_aliases[0] if not re.match( self.config.elasticsearch.elasticsearch_index_regex, index ): # This index doesn't look like a crash index, let's skip it. continue # This won't take the week part of our indices into account... index_date = datetime.datetime.strptime( index, self.config.elasticsearch.elasticsearch_index ) # So we need to get that differently, and then add it to the date. index_date += datetime.timedelta(weeks=int(index[-2:])) if index_date < time_limit: index_client.delete(index) # Bad index! Go away!
def post(self, **kwargs): '''Return the result of a custom query. ''' params = external_common.parse_arguments(self.filters, kwargs) if not params.query: raise MissingArgumentError('query') try: query = json.loads(params.query) except ValueError: raise BadArgumentError( 'query', msg="Invalid JSON value for parameter 'query'") # Set indices. indices = [] if not params.indices: # By default, use the last two indices. today = datetimeutil.utc_now() last_week = today - datetime.timedelta(days=7) indices = self.generate_list_of_indexes(last_week, today) elif len(params.indices) == 1 and params.indices[0] == 'ALL': # If we want all indices, just do nothing. pass else: indices = params.indices search_args = {} if indices: search_args['index'] = indices search_args['doc_type'] = ( self.config.elasticsearch.elasticsearch_doctype) connection = self.get_connection() try: results = connection.search(body=query, **search_args) except elasticsearch.exceptions.NotFoundError, e: missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0] raise ResourceNotFound("elasticsearch index '%s' does not exist" % missing_index)
def POST(self, *args): raw_crash, dumps = self._get_raw_crash_from_form() current_timestamp = utc_now() raw_crash.submitted_timestamp = current_timestamp.isoformat() # legacy - ought to be removed someday raw_crash.timestamp = time.time() if (not self.accept_submitted_crash_id or 'uuid' not in raw_crash): crash_id = createNewOoid(current_timestamp) raw_crash.uuid = crash_id self.logger.info('%s received', crash_id) else: crash_id = raw_crash.uuid self.logger.info('%s received with existing crash_id:', crash_id) if ('legacy_processing' not in raw_crash or not self.accept_submitted_legacy_processing ): raw_crash.legacy_processing, raw_crash.throttle_rate = ( self.throttler.throttle(raw_crash) ) else: raw_crash.legacy_processing = int(raw_crash.legacy_processing) if raw_crash.legacy_processing == DISCARD: self.logger.info('%s discarded', crash_id) return "Discarded=1\n" if raw_crash.legacy_processing == IGNORE: self.logger.info('%s ignored', crash_id) return "Unsupported=1\n" raw_crash.type_tag = self.dump_id_prefix.strip('-') self.crash_storage.save_raw_crash( raw_crash, dumps, crash_id ) self.logger.info('%s accepted', crash_id) return "CrashID=%s%s\n" % (self.dump_id_prefix, crash_id)
def test_update_release_missingargumenterror(self): self._insert_release_channels() service = Releases(config=self.config) now = datetimeutil.utc_now() build_id = now.strftime('%Y%m%d%H%M') params = dict( product='', version='1.0', update_channel='beta', build_id=build_id, platform='Windows', beta_number=1, release_channel='Beta', throttle=1 ) assert_raises( MissingArgumentError, service.create_release, **params )
def test_create_release_with_beta_number_null(self): self._insert_release_channels() service = Releases(config=self.config) now = datetimeutil.utc_now() build_id = now.strftime('%Y%m%d%H%M') params = dict(product='Firefox', version='1.0', update_channel='beta', build_id=build_id, platform='Windows', beta_number=None, release_channel='Beta', throttle=1) res = service.create_release(**params) ok_(res) # but... params['beta_number'] = 0 assert_raises(MissingArgumentError, service.create_release, **params)
def get(self, **kwargs): """ return GC crashes per build ID """ for arg in ['product', 'version']: if not kwargs.get(arg): raise MissingArgumentError(arg) now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("product", None, "str"), ("version", None, "str"), ("from_date", lastweek, "date"), ("to_date", now, "date"), ] params = external_common.parse_arguments(filters, kwargs) result = self.query( """ /* socorro.external.postgresql.gccrashes.GCCrashes.get */ SELECT build::text, sum(gc_count_madu) FROM gccrashes JOIN product_versions USING (product_version_id) WHERE product_name = %(product)s AND version_string = %(version)s AND report_date BETWEEN %(from_date)s AND %(to_date)s AND build IS NOT NULL GROUP BY build ORDER BY build """, params) # Because we don't return a list of dicts, we turn it into a # pure list first so it becomes a list of tuples. rows = list(result) return {'hits': rows, 'total': len(rows)}
def get_signatures(self, **kwargs): """Return top crashers by signatures. See https://socorro.readthedocs.io/en/latest/middleware.html#tcbs """ filters = [("product", None, "str"), ("version", None, "str"), ("crash_type", "all", "str"), ("to_date", datetimeutil.utc_now(), "datetime"), ("duration", datetime.timedelta(7), "timedelta"), ("os", None, "str"), ("limit", 100, "int"), ("date_range_type", None, "str")] params = external_common.parse_arguments(filters, kwargs) params.logger = logger # what the twoPeriodTopCrasherComparison() function does is that it # makes a start date from taking the to_date - duration if params.duration > datetime.timedelta(30): raise BadArgumentError('Duration too long. Max 30 days.') with self.get_connection() as connection: return tcbs.twoPeriodTopCrasherComparison(connection, params)
def test_crashes_comments_with_data(self): config_manager = self._setup_config_manager() now = datetimeutil.utc_now() uuid = "%%s-%s" % now.strftime("%y%m%d") cursor = self.conn.cursor() cursor.execute(""" INSERT INTO reports (id, date_processed, uuid, signature, user_comments) VALUES ( 1, %s, %s, 'sig1', 'crap' ), ( 2, %s, %s, 'sig2', 'great' ); """, (now, uuid % "a1", now, uuid % "a2")) self.conn.commit() with config_manager.context() as config: app = middleware_app.MiddlewareApp(config) app.main() server = middleware_app.application response = self.get( server, '/crashes/comments/', {'signature': 'sig1', 'from': now, 'to': now} ) eq_(response.data['total'], 1) eq_(response.data['hits'][0]['user_comments'], 'crap')
def get(self, **kwargs): """ return GC crashes per build ID """ for arg in ['product', 'version']: if not kwargs.get(arg): raise MissingArgumentError(arg) now = datetimeutil.utc_now().date() lastweek = now - datetime.timedelta(weeks=1) filters = [ ("product", None, "str"), ("version", None, "str"), ("from_date", lastweek, "date"), ("to_date", now, "date"), ] params = external_common.parse_arguments(filters, kwargs) result = self.query(""" /* socorro.external.postgresql.gccrashes.GCCrashes.get */ SELECT build::text, sum(gc_count_madu) FROM gccrashes JOIN product_versions USING (product_version_id) WHERE product_name = %(product)s AND version_string = %(version)s AND report_date BETWEEN %(from_date)s AND %(to_date)s AND build IS NOT NULL GROUP BY build ORDER BY build """, params) # Because we don't return a list of dicts, we turn it into a # pure list first so it becomes a list of tuples. rows = list(result) return {'hits': rows, 'total': len(rows)}
class PGPVNewCrashSource(PGQueryNewCrashSource): required_config = Namespace() required_config.crash_id_query = change_default( PGQueryNewCrashSource, 'crash_id_query', "select uuid " "from reports_clean rc join product_versions pv " " on rc.product_version_id = pv.product_version_id " "where " "%s <= date_processed and date_processed < %s " "and %s between pv.build_date and pv.sunset_date") required_config.add_option('date', doc="a date in the form YYYY-MM-DD", default=(utc_now() - timedelta(1)).date(), from_string_converter=string_to_datetime) #-------------------------------------------------------------------------- def __init__(self, config, name, quit_check_callback=None): super(PGPVNewCrashSource, self).__init__(config, name, quit_check_callback) self.data = ( config.date, config.date + timedelta(1), # add a day config.date)
def setUp(self): self.baseDate = dt.datetime(2008,12,25, tzinfo=UTC) self.rawuuids = [] self.yyyyoids = [] self.dyyoids = [] self.depths = [4,4,3,3,3,2,2,2,1,1] self.badooid0 = "%s%s" %(str(uu.uuid4())[:-8],'ffeea1b2') self.badooid1 = "%s%s" %(str(uu.uuid4())[:-8],'f3eea1b2') for i in range(10): self.rawuuids.append(str(uu.uuid4())) assert len(self.depths) == len(self.rawuuids) for i in self.rawuuids: self.yyyyoids.append("%s%4d%02d%02d" % (i[:-8],self.baseDate.year,self.baseDate.month,self.baseDate.day)) for i in range(len(self.rawuuids)): self.dyyoids.append("%s%d%02d%02d%02d" %(self.rawuuids[i][:-7],self.depths[i],self.baseDate.year%100,self.baseDate.month,self.baseDate.day)) today = utc_now() self.nowstamp = dt.datetime(today.year,today.month,today.day,tzinfo=UTC) self.xmas05 = dt.datetime(2005,12,25,tzinfo=UTC)
def delete_old_indices(self): now = utc_now() policy_delay = datetime.timedelta(weeks=self.config.retention_policy) time_limit = (now - policy_delay).replace(tzinfo=None) es_class = self.config.elasticsearch.elasticsearch_class( self.config.elasticsearch) index_client = es_class.indices_client() status = index_client.status() indices = status['indices'].keys() aliases = index_client.get_aliases() for index in indices: # Some indices look like 'socorro%Y%W_%Y%M%d', but they are # aliased to the expected format of 'socorro%Y%W'. In such cases, # replace the index with the alias. if index in aliases and 'aliases' in aliases[index]: index_aliases = aliases[index]['aliases'].keys() if index_aliases: index = index_aliases[0] if not re.match( self.config.elasticsearch.elasticsearch_index_regex, index): # This index doesn't look like a crash index, let's skip it. continue # This won't take the week part of our indices into account... index_date = datetime.datetime.strptime( index, self.config.elasticsearch.elasticsearch_index) # So we need to get that differently, and then add it to the date. index_date += datetime.timedelta(weeks=int(index[-2:])) if index_date < time_limit: index_client.delete(index) # Bad index! Go away!
def test_test_mapping(self): """Much test. So meta. Wow test_test_. """ # First test a valid mapping. mapping = self.api.get_mapping() ok_(self.api.test_mapping(mapping) is None) # Insert an invalid storage mapping. mapping = self.api.get_mapping({ 'name': 'fake_field', 'storage_mapping': { 'type': 'unkwown' } }) assert_raises( BadArgumentError, self.api.test_mapping, mapping, ) # Test with a correct mapping but with data that cannot be indexed. self.index_crash({ 'date_processed': datetimeutil.utc_now(), 'product': 'WaterWolf', }) self.refresh_index() mapping = self.api.get_mapping({ 'name': 'product', 'storage_mapping': { 'type': 'long' } }) assert_raises( BadArgumentError, self.api.test_mapping, mapping, )
def setUp(self): """Set up this test class by populating the reports table with fake data. """ super(IntegrationTestReleases, self).setUp() cursor = self.connection.cursor() # Insert data now = datetimeutil.utc_now() build_date = now - datetime.timedelta(days=30) sunset_date = now + datetime.timedelta(days=30) cursor.execute(""" INSERT INTO products (product_name, sort, release_name) VALUES ( 'Firefox', 1, 'firefox' ), ( 'FennecAndroid', 2, 'fennecandroid' ), ( 'Thunderbird', 3, 'thunderbird' ); """) cursor.execute(""" INSERT INTO product_versions (product_version_id, product_name, major_version, release_version, version_string, version_sort, build_date, sunset_date, featured_version, build_type) VALUES ( 1, 'Firefox', '15.0', '15.0', '15.0a1', '000000150a1', '%(build_date)s', '%(sunset_date)s', 't', 'Nightly' ), ( 2, 'Firefox', '14.0', '14.0', '14.0a2', '000000140a2', '%(build_date)s', '%(sunset_date)s', 'f', 'Aurora' ), ( 3, 'Firefox', '13.0', '13.0', '13.0b1', '000000130b1', '%(build_date)s', '%(sunset_date)s', 't', 'Beta' ), ( 4, 'FennecAndroid', '15.0', '15.0', '15.0a1', '000000150a1', '%(build_date)s', '%(sunset_date)s', 't', 'Nightly' ), ( 5, 'FennecAndroid', '14.0', '14.0', '14.0a1', '000000140a1', '%(build_date)s', '%(sunset_date)s', 'f', 'Aurora' ), ( 6, 'Thunderbird', '15.0', '15.0', '15.0a1', '000000150a1', '%(build_date)s', '%(sunset_date)s', 't', 'Nightly' ), ( 7, 'Firefox', '24.5', '24.5.0esr', '24.5.0esr', '024005000x000', '%(build_date)s', '%(sunset_date)s', 'f', 'ESR' ) ; """ % {"build_date": build_date, "sunset_date": sunset_date}) self.connection.commit()