def __init__(self, search, crash, project=None, dry_run=True, explain=False, saved=False, logdf=False, ): self.came_from = search context = search.context self.context = context if isinstance(crash, string_types): self.crash = ESCrash(self.context.index, crash) self.saved = True elif isinstance(crash, dict): self.crash = Crash(crash) self.saved = saved elif isinstance(crash, ESCrash): self.crash = crash self.saved = True elif isinstance(crash, Crash): self.crash = crash self.saved = saved self.strategy = context.strategy self.dry_run = dry_run self.ran = False self.validate() self.project = project self.fix_project() self.thresholds = context.thresholds self.index = context.index self.explain = explain self.fix_crash() self.logdf = logdf
def get_reports_by_bucket(response, threshold): """ Returns a dictionary mapping bucket_id => reports, from the ElasticSearch response. """ buckets = defaultdict(list) raw_hits = response['hits']['hits'] for hit in raw_hits: report = hit['_source'] crash = Crash(report) bucket_id = crash.get_bucket_id(threshold) buckets[bucket_id].append(crash) return dict(buckets)
def de_elastify(d): """ Take a dict and de_elastifies it, turning it into a Crash (but not an ESCrash: to do that call ESCrash()) """ if 'buckets' in d: d['buckets'] = ESBuckets(d['buckets']) if 'date' in d: d['date'] = parse_es_date(d['date']) return Crash(d)
def get_a_bunch_of_crashes(self, date_range_start, limit): bunch = [] step = 100 for from_ in range(0, limit, step): query = { 'from': from_, 'since': date_range_start, 'size': step, } response = requests.get(self.path_to('*', 'search'), params=query) response.raise_for_status() for crash in response.json(): crash = Crash(crash) bunch.append(crash) return bunch
def test_es_add(self): import gc es = ESCrash.es es.indices.delete(index='crashes', ignore=[400, 404]) mycrash = ESCrash(self.exampleCrash1) mycrash_dupe = ESCrash(self.exampleCrash1) assert mycrash is mycrash_dupe mycrash_another = ESCrash('exampleCrash1') assert mycrash is mycrash_another del mycrash del mycrash_another del mycrash_dupe gc.collect() es.indices.flush(index='crashes') time.sleep(1) fetched_from_es = ESCrash('exampleCrash1') fetched_from_es_undone = Crash(fetched_from_es) assert fetched_from_es_undone == self.exampleCrash1 fetched_from_es['cpu'] = 'amd64'
def get_crash(self, database_id, project): self._connect_to_elasticsearch() crash = None try: crash = ESCrash(database_id, index=self.es_index) except NotFoundError as e: raise KeyError(database_id) response = self.es.termvectors(index=self.es_index, doc_type='crash', id=database_id, fields='stacktrace.function.whole', term_statistics=True, offsets=False, positions=False) #with open('termvectors', 'wb') as termvectorsfile: #print(json.dumps(response, indent=2), file=termvectorsfile) if 'stacktrace.function.whole' in response['term_vectors']: vectors = response['term_vectors']['stacktrace.function.whole'] all_doc_count = float(vectors['field_statistics']['doc_count']) crash = Crash(crash) # Sometimes there's extra functions on top of the stack for # logging/cleanup/handling/rethrowing/whatever that get called # after the fault but before the trace is generated, and are # present for multiple crash locations. So except on the # full detail page, we don't want to display them. # This is for that. for frame in crash['stacktrace']: if 'function' in frame and frame['function']: function = frame['function'] term = vectors['terms'][function] relativedf = float(term['doc_freq']) / all_doc_count logdf = -1.0 * math.log(relativedf, 2) #print(logdf, file=sys.stderr) frame['logdf'] = logdf return crash
def ingest(self, crash, dryrun=False): """ Ingest a crash; the Crash may be a simple dictionary, or a pre-existing Crash instance. :return: the saved crash :rtype Crash: :raises IdenticalReportError: """ true_crash = Crash(crash) if 'stacktrace' in true_crash: assert isinstance(true_crash['stacktrace'], Stacktrace) assert isinstance(true_crash['stacktrace'][0], Stackframe) if 'address' in true_crash['stacktrace'][0]: assert isinstance(true_crash['stacktrace'][0]['address'], basestring) if dryrun: true_crash['buckets'] = self.bucketer.assign_buckets(true_crash) return true_crash else: return self.bucketer.assign_save_buckets(true_crash)
assert os.path.isdir(bucketdir) buglist = os.listdir(bucketdir) #if len(buglist) < 2: #continue buckets.append(bucket) INFO(bucket) for bugdir in buglist: bugdir = os.path.join(bucketdir, bugdir) INFO(bugdir) assert os.path.isdir(bugdir) #print repr(os.listdir(bugdir)) if len(os.listdir(bugdir)) >= 1: database_id = 'launchpad:' + os.path.basename(bugdir) try: INFO("Disk: " + database_id) crashdata = Crash.load_from_file(bugdir) except IOError as e: if "No stacktrace" in str(e): no_stacktrace += 1 continue else: raise crashes[database_id] = crashdata oracledata = Crash({ 'database_id': database_id, 'bucket': bucket, }) oracle[database_id] = oracledata bugs_total += 1 match = re.match(r'[^:]+:(\d+)$', database_id) sql_id = match.group(1)
class Report(object): """Object representing the API functionality for an individual crash.""" def __init__( self, search, crash, project=None, dry_run=True, explain=False, saved=False, logdf=False, ): self.came_from = search context = search.context self.context = context if isinstance(crash, string_types): self.crash = ESCrash(self.context.index, crash) self.saved = True elif isinstance(crash, dict): self.crash = Crash(crash) self.saved = saved elif isinstance(crash, ESCrash): self.crash = crash self.saved = True elif isinstance(crash, Crash): self.crash = crash self.saved = saved self.strategy = context.strategy self.dry_run = dry_run self.ran = False self.validate() self.project = project self.fix_project() self.thresholds = context.thresholds self.index = context.index self.explain = explain self.fix_crash() self.logdf = logdf def fix_crash(self): if isinstance(self.crash, ESCrash): self.crash = self.crash.as_crash() from partycrasher.api.report_bucket import ReportBucket from partycrasher.api.report_project import ReportProject from partycrasher.api.report_type import ReportType from partycrasher.api.search import Search assert isinstance(self.context, Context), context.__class__.__name__ self.crash['project'] = ReportProject( search=Search(context=self.context), project=self.crash['project']) self.crash['type'] = ReportType(search=Search(context=self.context), report_type=self.crash['type']) if 'buckets' in self.crash: for k, v in list(self.crash['buckets'].items()): if isinstance(v, Bucket): self.crash['buckets'][k] = ReportBucket( search=Search(context=self.context), id=v['id'], threshold=v['threshold']) def fix_project(self): crash_project = None if 'project' in self.crash: if isinstance(self.crash['project'], Project): crash_project = self.crash['project'].name else: crash_project = self.crash['project'] if crash_project is None: if self.project is None: raise NoProjectSpecifiedError(self.project, self.crash) else: self.crash['project'] = self.project return self.project else: if self.project is None: self.project = crash_project return crash_project else: # both not none if crash_project != self.project: raise ProjectMismatchError(self.project, self.crash) else: return self.project def validate(self): """Do some extra runtime checking that should be unnecessary if the Crash class is operating correctly.""" true_crash = self.crash if 'stacktrace' in true_crash: assert isinstance(true_crash['stacktrace'], Stacktrace) assert isinstance(true_crash['stacktrace'][0], Stackframe) if 'address' in true_crash['stacktrace'][0]: assert isinstance( true_crash['stacktrace'][0]['address'], string_types), ( "address must be a string instead of %s" % (true_crash['stacktrace'][0]['address'].__class__)) def search(self, explain=None): """Run the search.""" #error("Searching with explain=" + str(explain)) if explain is not None: self.explain = explain del explain if not self.ran: if (not self.explain) and self.saved: raise RuntimeError( "Requested search but there was no reason to search") self.es_result = self.strategy.query(self.crash, self.explain) self.ran = True return self.es_result def save(self): """Save the crash with assigned buckets to ES.""" assert not self.dry_run assert not self.saved if 'buckets' not in self.crash: self.assign_buckets() self.crash['buckets'].create() saved_crash = ESCrash(crash=self.crash, index=self.index) assert saved_crash is not None self.crash = saved_crash self.saved = True return saved_crash def assign_buckets(self): """Assigns buckets to this crash and returns the assigned buckets.""" assert 'buckets' not in self.crash self.search() buckets = self.strategy.matching_buckets(self.thresholds, self.es_result) if 'force_bucket' in self.crash: warn("Warning: overriding buckets to %s with force_bucket!" % (self.crash['force_bucket'])) for key in buckets: if key != 'top_match': buckets[key] = self.crash['force_bucket'] assert isinstance(buckets, Buckets) assert 'top_match' in buckets self.crash["buckets"] = buckets return buckets @property def assigned_buckets(self): """Returns the buckets assigned to this crash.""" buckets = None if 'buckets' not in self.crash: return self.assign_buckets() else: return self.crash['buckets'] @property def explanation(self): """ Returns the explanation of why it would be bucketed now the way it would. This is not necessarily the original bucketing. """ if self.explain: self.search() return self.es_result.explanation else: return None @property def auto_summary(self): """ Returns the summary of theexplanation of why it would be bucketed now the way it would. This is not necessarily the original bucketing. """ if self.explain: self.search() return self.es_result.explanation_summary else: return None @property def compare(self, other_report): """ Returns an explanation summary comparing two reports. """ raise NotImplementedError( "Report comparisons not currently implemented.") if self.explain: self.search() return self.es_result.explanation_summary() else: return None def crash_with_termvectors(self): """Returns the crash with logdf information included.""" assert self.saved database_id = self.crash['database_id'] response = self.context.index.termvectors( doc_type='crash', id=database_id, fields='stacktrace.function.whole', term_statistics=True, offsets=False, positions=False) #with open('termvectors', 'wb') as termvectorsfile: #print(json.dumps(response, indent=2), file=termvectorsfile) if isinstance(self.crash, ESCrash): crash = self.crash.as_crash() else: crash = self.crash if 'stacktrace.function.whole' in response['term_vectors']: vectors = response['term_vectors']['stacktrace.function.whole'] all_doc_count = float(vectors['field_statistics']['doc_count']) # Sometimes there's extra functions on top of the stack for # logging/cleanup/handling/rethrowing/whatever that get called # after the fault but before the trace is generated, and are # present for multiple crash locations. So except on the # full detail page, we don't want to display them. # This is for that. for frame in crash['stacktrace']: if 'function' in frame and frame['function']: function = frame['function'] term = vectors['terms'][function] relativedf = float(term['doc_freq']) / all_doc_count logdf = -1.0 * math.log(relativedf, 2) #print(logdf, file=sys.stderr) frame['logdf'] = logdf return crash def restify_(self): assert self.project is not None if self.logdf: crash = self.crash_with_termvectors() else: crash = self.crash d = { 'report': crash, 'saved': self.saved, } if self.explain: d['explanation'] = self.explanation d['auto_summary'] = self.auto_summary return d @property def database_id(self): return self.crash['database_id']
def search(self, query_string, since=None, until=None, project=None, from_=None, size=None, sort=None, order=None): es_query = { "query": { "bool": { "must": [ { "query_string": { "query": query_string, # This is necessary due to how we tokenize things # which is not on whitespace I.E. if the user # searched for CamelCaseThing it will be interpreted # as a search for Camel AND Case AND Thing rather # than Camel OR Case OR Thing "default_operator": "AND", } }, ] } }, } if sort is not None: if order is None: order = "desc" es_query["sort"] = [{sort: {"order": order}}] if project is not None: es_query['query']['bool']['must'].append( {"term": { "project": project }}) if (since is not None) or (until is not None): date_bounds = {} if since is not None: date_bounds['gt'] = since.isoformat() if until is not None: date_bounds['lt'] = until.isoformat() es_query['query']['bool']['must'].append( {"range": { "date": date_bounds }}) if from_ is not None: es_query["from"] = from_ if size is not None: es_query["size"] = size try: r = self.es.search(index=self.es_index, body=es_query) except RequestError as e: # TODO: use logger print(e.info, file=sys.stderr) raise except TransportError as e: # TODO: use logger print(e.info, file=sys.stderr) raise raw_hits = r['hits']['hits'] #print(json.dumps(raw_hits, indent=2), file=sys.stderr) results = [] for hit in raw_hits: report = hit['_source'] crash = Crash(report) results.append(crash) return results
class TestCrash(unittest.TestCase): exampleCrash1 = Crash({ 'database_id': 'exampleCrash1', 'project': 'Ubuntu', 'CrashCounter': '1', 'ExecutablePath': '/bin/nbd-server', 'NonfreeKernelModules': 'fglrx', 'Package': 'nbd-server 1:2.9.3-3ubuntu1', 'PackageArchitecture': 'i386', 'ProcCmdline': '/bin/nbd-server', 'ProcCwd': '/', 'ProcEnviron': 'PATH=/sbin:/bin:/usr/sbin:/usr/bin', 'Signal': '11', 'SourcePackage': 'nbd', 'StacktraceTop': '\xa0?? ()', 'Title': 'nbd-server crashed with SIGSEGV', 'Uname': 'Linux mlcochff 2.6.22-7-generic #1 SMP Mon Jun 25 17:33:14 GMT 2007 i686 GNU/Linux', 'cpu': 'i386', 'date': datetime.datetime(2007, 6, 27, 12, 4, 43), 'os': 'Ubuntu 7.10', 'stacktrace': Stacktrace([ Stackframe({ 'address': u'0x0804cbd3', 'args': u'argc=', 'depth': 0, 'extra': [ u'\tserve = (SERVER *) 0x0', u'\tservers = (GArray *) 0x8051418', u'\terr = (GError *) 0x0' ], 'file': u'nbd-server.c:1546', 'function': u'main' }), Stackframe({ 'address': u'0xb7cfcebc', 'args': u'', 'depth': 1, 'function': u'??' }), Stackframe({ 'address': u'0x00000001', 'args': u'', 'depth': 2, 'function': u'??' }), Stackframe({ 'address': u'0xbfeff544', 'args': u'', 'depth': 3, 'function': u'??' }), Stackframe({ 'address': u'0xbfeff54c', 'args': u'', 'depth': 4, 'function': u'??' }), Stackframe({ 'address': u'0xb7f1b898', 'args': u'', 'depth': 5, 'function': u'??' }), Stackframe({ 'address': u'0x00000000', 'args': u'', 'depth': 6, 'function': u'??' }) ]), 'type': 'Crash' }) def test_es_reachable_working(self): es = Elasticsearch(hosts=['localhost']) es.indices.create(index='test-index', ignore=400) es.indices.delete(index='test-index', ignore=[400, 404]) def test_es_add(self): import gc es = ESCrash.es es.indices.delete(index='crashes', ignore=[400, 404]) mycrash = ESCrash(self.exampleCrash1) mycrash_dupe = ESCrash(self.exampleCrash1) assert mycrash is mycrash_dupe mycrash_another = ESCrash('exampleCrash1') assert mycrash is mycrash_another del mycrash del mycrash_another del mycrash_dupe gc.collect() es.indices.flush(index='crashes') time.sleep(1) fetched_from_es = ESCrash('exampleCrash1') fetched_from_es_undone = Crash(fetched_from_es) assert fetched_from_es_undone == self.exampleCrash1 fetched_from_es['cpu'] = 'amd64'
def as_crash(self): """Return a modifyable copy that won't save updates to ES.""" c = Crash(deepcopy(self._d)) return c