コード例 #1
0
ファイル: report.py プロジェクト: pombredanne/partycrasher
 def __init__(self,
              search,
              crash,
              project=None,
              dry_run=True,
              explain=False,
              saved=False,
              logdf=False,
              ):
     self.came_from = search
     context = search.context
     self.context = context
     if isinstance(crash, string_types):
         self.crash = ESCrash(self.context.index, crash)
         self.saved = True
     elif isinstance(crash, dict):
         self.crash = Crash(crash)
         self.saved = saved
     elif isinstance(crash, ESCrash):
         self.crash = crash
         self.saved = True
     elif isinstance(crash, Crash):
         self.crash = crash
         self.saved = saved
     self.strategy = context.strategy
     self.dry_run = dry_run
     self.ran = False
     self.validate()
     self.project = project
     self.fix_project()
     self.thresholds = context.thresholds
     self.index = context.index
     self.explain = explain
     self.fix_crash()
     self.logdf = logdf
コード例 #2
0
ファイル: __init__.py プロジェクト: abramhindle/partycrasher
def get_reports_by_bucket(response, threshold):
    """
    Returns a dictionary mapping bucket_id => reports, from the ElasticSearch response.
    """
    buckets = defaultdict(list)

    raw_hits = response['hits']['hits']

    for hit in raw_hits:
        report = hit['_source']
        crash = Crash(report)
        bucket_id = crash.get_bucket_id(threshold)
        buckets[bucket_id].append(crash)

    return dict(buckets)
コード例 #3
0
def get_reports_by_bucket(response, threshold):
    """
    Returns a dictionary mapping bucket_id => reports, from the ElasticSearch response.
    """
    buckets = defaultdict(list)

    raw_hits = response['hits']['hits']

    for hit in raw_hits:
        report = hit['_source']
        crash = Crash(report)
        bucket_id = crash.get_bucket_id(threshold)
        buckets[bucket_id].append(crash)

    return dict(buckets)
コード例 #4
0
 def de_elastify(d):
     """ Take a dict and de_elastifies it, turning it into a Crash
         (but not an ESCrash: to do that call ESCrash())
     """
     if 'buckets' in d:
         d['buckets'] = ESBuckets(d['buckets'])
     if 'date' in d:
         d['date'] = parse_es_date(d['date'])
     return Crash(d)
コード例 #5
0
ファイル: client.py プロジェクト: pombredanne/partycrasher
 def get_a_bunch_of_crashes(self, date_range_start, limit):
   bunch = []
   step = 100
   for from_ in range(0, limit, step):
       query = {
         'from': from_,
         'since': date_range_start,
         'size': step,
       }
       response = requests.get(self.path_to('*', 'search'), params=query)
       response.raise_for_status()
       for crash in response.json():
           crash = Crash(crash)
           bunch.append(crash)
   return bunch
コード例 #6
0
 def test_es_add(self):
     import gc
     es = ESCrash.es
     es.indices.delete(index='crashes', ignore=[400, 404])
     mycrash = ESCrash(self.exampleCrash1)
     mycrash_dupe = ESCrash(self.exampleCrash1)
     assert mycrash is mycrash_dupe
     mycrash_another = ESCrash('exampleCrash1')
     assert mycrash is mycrash_another
     del mycrash
     del mycrash_another
     del mycrash_dupe
     gc.collect()
     es.indices.flush(index='crashes')
     time.sleep(1)
     fetched_from_es = ESCrash('exampleCrash1')
     fetched_from_es_undone = Crash(fetched_from_es)
     assert fetched_from_es_undone == self.exampleCrash1
     fetched_from_es['cpu'] = 'amd64'
コード例 #7
0
    def get_crash(self, database_id, project):
        self._connect_to_elasticsearch()
        crash = None
        try:
            crash = ESCrash(database_id, index=self.es_index)
        except NotFoundError as e:
            raise KeyError(database_id)

        response = self.es.termvectors(index=self.es_index,
                                       doc_type='crash',
                                       id=database_id,
                                       fields='stacktrace.function.whole',
                                       term_statistics=True,
                                       offsets=False,
                                       positions=False)

        #with open('termvectors', 'wb') as termvectorsfile:
        #print(json.dumps(response, indent=2), file=termvectorsfile)

        if 'stacktrace.function.whole' in response['term_vectors']:
            vectors = response['term_vectors']['stacktrace.function.whole']

            all_doc_count = float(vectors['field_statistics']['doc_count'])

            crash = Crash(crash)

            # Sometimes there's extra functions on top of the stack for
            # logging/cleanup/handling/rethrowing/whatever that get called
            # after the fault but before the trace is generated, and are
            # present for multiple crash locations. So except on the
            # full detail page, we don't want to display them.
            # This is for that.
            for frame in crash['stacktrace']:
                if 'function' in frame and frame['function']:
                    function = frame['function']
                    term = vectors['terms'][function]
                    relativedf = float(term['doc_freq']) / all_doc_count
                    logdf = -1.0 * math.log(relativedf, 2)
                    #print(logdf, file=sys.stderr)
                    frame['logdf'] = logdf

        return crash
コード例 #8
0
    def ingest(self, crash, dryrun=False):
        """
        Ingest a crash; the Crash may be a simple dictionary, or a
        pre-existing Crash instance.

        :return: the saved crash
        :rtype Crash:
        :raises IdenticalReportError:
        """
        true_crash = Crash(crash)
        if 'stacktrace' in true_crash:
            assert isinstance(true_crash['stacktrace'], Stacktrace)
            assert isinstance(true_crash['stacktrace'][0], Stackframe)
            if 'address' in true_crash['stacktrace'][0]:
                assert isinstance(true_crash['stacktrace'][0]['address'],
                                  basestring)

        if dryrun:
            true_crash['buckets'] = self.bucketer.assign_buckets(true_crash)
            return true_crash
        else:
            return self.bucketer.assign_save_buckets(true_crash)
コード例 #9
0
 assert os.path.isdir(bucketdir)
 buglist = os.listdir(bucketdir)
 #if len(buglist) < 2:
 #continue
 buckets.append(bucket)
 INFO(bucket)
 for bugdir in buglist:
     bugdir = os.path.join(bucketdir, bugdir)
     INFO(bugdir)
     assert os.path.isdir(bugdir)
     #print repr(os.listdir(bugdir))
     if len(os.listdir(bugdir)) >= 1:
         database_id = 'launchpad:' + os.path.basename(bugdir)
         try:
             INFO("Disk: " + database_id)
             crashdata = Crash.load_from_file(bugdir)
         except IOError as e:
             if "No stacktrace" in str(e):
                 no_stacktrace += 1
                 continue
             else:
                 raise
         crashes[database_id] = crashdata
         oracledata = Crash({
             'database_id': database_id,
             'bucket': bucket,
         })
         oracle[database_id] = oracledata
         bugs_total += 1
         match = re.match(r'[^:]+:(\d+)$', database_id)
         sql_id = match.group(1)
コード例 #10
0
ファイル: report.py プロジェクト: pombredanne/partycrasher
class Report(object):
    """Object representing the API functionality for an individual crash."""
    def __init__(
        self,
        search,
        crash,
        project=None,
        dry_run=True,
        explain=False,
        saved=False,
        logdf=False,
    ):
        self.came_from = search
        context = search.context
        self.context = context
        if isinstance(crash, string_types):
            self.crash = ESCrash(self.context.index, crash)
            self.saved = True
        elif isinstance(crash, dict):
            self.crash = Crash(crash)
            self.saved = saved
        elif isinstance(crash, ESCrash):
            self.crash = crash
            self.saved = True
        elif isinstance(crash, Crash):
            self.crash = crash
            self.saved = saved
        self.strategy = context.strategy
        self.dry_run = dry_run
        self.ran = False
        self.validate()
        self.project = project
        self.fix_project()
        self.thresholds = context.thresholds
        self.index = context.index
        self.explain = explain
        self.fix_crash()
        self.logdf = logdf

    def fix_crash(self):
        if isinstance(self.crash, ESCrash):
            self.crash = self.crash.as_crash()
        from partycrasher.api.report_bucket import ReportBucket
        from partycrasher.api.report_project import ReportProject
        from partycrasher.api.report_type import ReportType
        from partycrasher.api.search import Search
        assert isinstance(self.context, Context), context.__class__.__name__

        self.crash['project'] = ReportProject(
            search=Search(context=self.context), project=self.crash['project'])
        self.crash['type'] = ReportType(search=Search(context=self.context),
                                        report_type=self.crash['type'])
        if 'buckets' in self.crash:
            for k, v in list(self.crash['buckets'].items()):
                if isinstance(v, Bucket):
                    self.crash['buckets'][k] = ReportBucket(
                        search=Search(context=self.context),
                        id=v['id'],
                        threshold=v['threshold'])

    def fix_project(self):
        crash_project = None
        if 'project' in self.crash:
            if isinstance(self.crash['project'], Project):
                crash_project = self.crash['project'].name
            else:
                crash_project = self.crash['project']
        if crash_project is None:
            if self.project is None:
                raise NoProjectSpecifiedError(self.project, self.crash)
            else:
                self.crash['project'] = self.project
                return self.project
        else:
            if self.project is None:
                self.project = crash_project
                return crash_project
            else:  # both not none
                if crash_project != self.project:
                    raise ProjectMismatchError(self.project, self.crash)
                else:
                    return self.project

    def validate(self):
        """Do some extra runtime checking that should be unnecessary if the Crash class is operating correctly."""
        true_crash = self.crash
        if 'stacktrace' in true_crash:
            assert isinstance(true_crash['stacktrace'], Stacktrace)
            assert isinstance(true_crash['stacktrace'][0], Stackframe)
            if 'address' in true_crash['stacktrace'][0]:
                assert isinstance(
                    true_crash['stacktrace'][0]['address'], string_types), (
                        "address must be a string instead of %s" %
                        (true_crash['stacktrace'][0]['address'].__class__))

    def search(self, explain=None):
        """Run the search."""
        #error("Searching with explain=" + str(explain))
        if explain is not None:
            self.explain = explain
        del explain
        if not self.ran:
            if (not self.explain) and self.saved:
                raise RuntimeError(
                    "Requested search but there was no reason to search")
            self.es_result = self.strategy.query(self.crash, self.explain)
            self.ran = True
            return self.es_result

    def save(self):
        """Save the crash with assigned buckets to ES."""
        assert not self.dry_run
        assert not self.saved
        if 'buckets' not in self.crash:
            self.assign_buckets()
        self.crash['buckets'].create()
        saved_crash = ESCrash(crash=self.crash, index=self.index)
        assert saved_crash is not None
        self.crash = saved_crash
        self.saved = True
        return saved_crash

    def assign_buckets(self):
        """Assigns buckets to this crash and returns the assigned buckets."""
        assert 'buckets' not in self.crash
        self.search()
        buckets = self.strategy.matching_buckets(self.thresholds,
                                                 self.es_result)
        if 'force_bucket' in self.crash:
            warn("Warning: overriding buckets to %s with force_bucket!" %
                 (self.crash['force_bucket']))
            for key in buckets:
                if key != 'top_match':
                    buckets[key] = self.crash['force_bucket']
        assert isinstance(buckets, Buckets)
        assert 'top_match' in buckets
        self.crash["buckets"] = buckets
        return buckets

    @property
    def assigned_buckets(self):
        """Returns the buckets assigned to this crash."""
        buckets = None
        if 'buckets' not in self.crash:
            return self.assign_buckets()
        else:
            return self.crash['buckets']

    @property
    def explanation(self):
        """
        Returns the explanation of why it would be bucketed now the way it would.
        This is not necessarily the original bucketing.
        """
        if self.explain:
            self.search()
            return self.es_result.explanation
        else:
            return None

    @property
    def auto_summary(self):
        """
        Returns the summary of theexplanation of why it would be bucketed now the way it would.
        This is not necessarily the original bucketing.
        """
        if self.explain:
            self.search()
            return self.es_result.explanation_summary
        else:
            return None

    @property
    def compare(self, other_report):
        """
        Returns an explanation summary comparing two reports.
        """
        raise NotImplementedError(
            "Report comparisons not currently implemented.")
        if self.explain:
            self.search()
            return self.es_result.explanation_summary()
        else:
            return None

    def crash_with_termvectors(self):
        """Returns the crash with logdf information included."""
        assert self.saved
        database_id = self.crash['database_id']
        response = self.context.index.termvectors(
            doc_type='crash',
            id=database_id,
            fields='stacktrace.function.whole',
            term_statistics=True,
            offsets=False,
            positions=False)

        #with open('termvectors', 'wb') as termvectorsfile:
        #print(json.dumps(response, indent=2), file=termvectorsfile)

        if isinstance(self.crash, ESCrash):
            crash = self.crash.as_crash()
        else:
            crash = self.crash

        if 'stacktrace.function.whole' in response['term_vectors']:
            vectors = response['term_vectors']['stacktrace.function.whole']

            all_doc_count = float(vectors['field_statistics']['doc_count'])

            # Sometimes there's extra functions on top of the stack for
            # logging/cleanup/handling/rethrowing/whatever that get called
            # after the fault but before the trace is generated, and are
            # present for multiple crash locations. So except on the
            # full detail page, we don't want to display them.
            # This is for that.
            for frame in crash['stacktrace']:
                if 'function' in frame and frame['function']:
                    function = frame['function']
                    term = vectors['terms'][function]
                    relativedf = float(term['doc_freq']) / all_doc_count
                    logdf = -1.0 * math.log(relativedf, 2)
                    #print(logdf, file=sys.stderr)
                    frame['logdf'] = logdf

        return crash

    def restify_(self):
        assert self.project is not None
        if self.logdf:
            crash = self.crash_with_termvectors()
        else:
            crash = self.crash
        d = {
            'report': crash,
            'saved': self.saved,
        }
        if self.explain:
            d['explanation'] = self.explanation
            d['auto_summary'] = self.auto_summary
        return d

    @property
    def database_id(self):
        return self.crash['database_id']
コード例 #11
0
    def search(self,
               query_string,
               since=None,
               until=None,
               project=None,
               from_=None,
               size=None,
               sort=None,
               order=None):
        es_query = {
            "query": {
                "bool": {
                    "must": [
                        {
                            "query_string": {
                                "query": query_string,
                                # This is necessary due to how we tokenize things
                                # which is not on whitespace I.E. if the user
                                # searched for CamelCaseThing it will be interpreted
                                # as a search for Camel AND Case AND Thing rather
                                # than Camel OR Case OR Thing
                                "default_operator": "AND",
                            }
                        },
                    ]
                }
            },
        }
        if sort is not None:
            if order is None:
                order = "desc"
            es_query["sort"] = [{sort: {"order": order}}]
        if project is not None:
            es_query['query']['bool']['must'].append(
                {"term": {
                    "project": project
                }})
        if (since is not None) or (until is not None):
            date_bounds = {}
            if since is not None:
                date_bounds['gt'] = since.isoformat()
            if until is not None:
                date_bounds['lt'] = until.isoformat()
            es_query['query']['bool']['must'].append(
                {"range": {
                    "date": date_bounds
                }})
        if from_ is not None:
            es_query["from"] = from_
        if size is not None:
            es_query["size"] = size
        try:
            r = self.es.search(index=self.es_index, body=es_query)
        except RequestError as e:
            # TODO: use logger
            print(e.info, file=sys.stderr)
            raise
        except TransportError as e:
            # TODO: use logger
            print(e.info, file=sys.stderr)
            raise

        raw_hits = r['hits']['hits']
        #print(json.dumps(raw_hits, indent=2), file=sys.stderr)

        results = []

        for hit in raw_hits:
            report = hit['_source']
            crash = Crash(report)
            results.append(crash)

        return results
コード例 #12
0
class TestCrash(unittest.TestCase):

    exampleCrash1 = Crash({
        'database_id':
        'exampleCrash1',
        'project':
        'Ubuntu',
        'CrashCounter':
        '1',
        'ExecutablePath':
        '/bin/nbd-server',
        'NonfreeKernelModules':
        'fglrx',
        'Package':
        'nbd-server 1:2.9.3-3ubuntu1',
        'PackageArchitecture':
        'i386',
        'ProcCmdline':
        '/bin/nbd-server',
        'ProcCwd':
        '/',
        'ProcEnviron':
        'PATH=/sbin:/bin:/usr/sbin:/usr/bin',
        'Signal':
        '11',
        'SourcePackage':
        'nbd',
        'StacktraceTop':
        '\xa0?? ()',
        'Title':
        'nbd-server crashed with SIGSEGV',
        'Uname':
        'Linux mlcochff 2.6.22-7-generic #1 SMP Mon Jun 25 17:33:14 GMT 2007 i686 GNU/Linux',
        'cpu':
        'i386',
        'date':
        datetime.datetime(2007, 6, 27, 12, 4, 43),
        'os':
        'Ubuntu 7.10',
        'stacktrace':
        Stacktrace([
            Stackframe({
                'address':
                u'0x0804cbd3',
                'args':
                u'argc=',
                'depth':
                0,
                'extra': [
                    u'\tserve = (SERVER *) 0x0',
                    u'\tservers = (GArray *) 0x8051418',
                    u'\terr = (GError *) 0x0'
                ],
                'file':
                u'nbd-server.c:1546',
                'function':
                u'main'
            }),
            Stackframe({
                'address': u'0xb7cfcebc',
                'args': u'',
                'depth': 1,
                'function': u'??'
            }),
            Stackframe({
                'address': u'0x00000001',
                'args': u'',
                'depth': 2,
                'function': u'??'
            }),
            Stackframe({
                'address': u'0xbfeff544',
                'args': u'',
                'depth': 3,
                'function': u'??'
            }),
            Stackframe({
                'address': u'0xbfeff54c',
                'args': u'',
                'depth': 4,
                'function': u'??'
            }),
            Stackframe({
                'address': u'0xb7f1b898',
                'args': u'',
                'depth': 5,
                'function': u'??'
            }),
            Stackframe({
                'address': u'0x00000000',
                'args': u'',
                'depth': 6,
                'function': u'??'
            })
        ]),
        'type':
        'Crash'
    })

    def test_es_reachable_working(self):
        es = Elasticsearch(hosts=['localhost'])
        es.indices.create(index='test-index', ignore=400)
        es.indices.delete(index='test-index', ignore=[400, 404])

    def test_es_add(self):
        import gc
        es = ESCrash.es
        es.indices.delete(index='crashes', ignore=[400, 404])
        mycrash = ESCrash(self.exampleCrash1)
        mycrash_dupe = ESCrash(self.exampleCrash1)
        assert mycrash is mycrash_dupe
        mycrash_another = ESCrash('exampleCrash1')
        assert mycrash is mycrash_another
        del mycrash
        del mycrash_another
        del mycrash_dupe
        gc.collect()
        es.indices.flush(index='crashes')
        time.sleep(1)
        fetched_from_es = ESCrash('exampleCrash1')
        fetched_from_es_undone = Crash(fetched_from_es)
        assert fetched_from_es_undone == self.exampleCrash1
        fetched_from_es['cpu'] = 'amd64'
コード例 #13
0
 def as_crash(self):
     """Return a modifyable copy that won't save updates to ES."""
     c = Crash(deepcopy(self._d))
     return c