def test_get_db_from_config(self): db = get_db_from_config(dev=True) num = db.query(Update).count() assert num == 0, num
def main(releases=None): db = get_db_from_config() stats = {} # {release: {'stat': ...}} feedback = 0 # total number of updates that received feedback karma = defaultdict(int) # {username: # of karma submissions} num_updates = db.query(Update).count() proventesters = set() for release in db.query(Release).all(): if releases and release.name not in releases: continue updates = db.query(Update).filter_by(release=release) critpath_pkgs = get_critpath_pkgs(release.name.lower()) total = updates.count() if not total: continue print header(release.long_name) stats[release.name] = { 'num_updates': total, 'num_tested': 0, 'num_tested_without_karma': 0, 'num_feedback': 0, 'num_anon_feedback': 0, 'critpath_pkgs': defaultdict(int), 'num_critpath': 0, 'num_critpath_approved': 0, 'num_critpath_unapproved': 0, 'num_stablekarma': 0, 'num_testingtime': 0, 'critpath_without_karma': set(), 'conflicted_proventesters': [], 'critpath_positive_karma_including_proventesters': [], 'critpath_positive_karma_negative_proventesters': [], 'stable_with_negative_karma': updates.filter( and_(Update.status == UpdateStatus.stable, Update.karma < 0)).count(), 'bugs': set(), 'karma': defaultdict(int), 'deltas': [], 'occurrences': {}, 'accumulative': timedelta(), 'packages': defaultdict(int), 'proventesters': set(), 'proventesters_1': 0, 'proventesters_0': 0, 'proventesters_-1': 0, 'submitters': defaultdict(int), # for tracking number of types of karma '1': 0, '0': 0, '-1': 0, } data = stats[release.name] for status in statuses: data['num_%s' % status] = updates.filter( and_(Update.status == UpdateStatus.from_string(status))).count() for type in types: data['num_%s' % type] = updates.filter( Update.type == UpdateType.from_string(type)).count() for update in updates.all(): assert update.user, update.title data['submitters'][update.user.name] += 1 for build in update.builds: data['packages'][build.package.name] += 1 if build.package.name in critpath_pkgs: data['critpath_pkgs'][build.package.name] += 1 for bug in update.bugs: data['bugs'].add(bug.bug_id) feedback_done = False testingtime_done = False stablekarma_done = False for comment in update.comments: if not comment.user: print('Error: None comment for %s' % update.title) if comment.user.name == 'autoqa': continue # Track the # of +1's, -1's, and +0's. if comment.user.name != 'bodhi': data[str(comment.karma)] += 1 if 'proventesters' in [g.name for g in comment.user.groups]: data['proventesters'].add(comment.user.name) data['proventesters_%d' % comment.karma] += 1 if update.status == UpdateStatus.stable: if not stablekarma_done: if comment.text == 'This update has reached the stable karma threshold and will be pushed to the stable updates repository': data['num_stablekarma'] += 1 stablekarma_done = True elif comment.text and comment.text.endswith('days in testing and can be pushed to stable now if the maintainer wishes'): data['num_testingtime'] += 1 stablekarma_done = True # For figuring out if an update has received feedback or not if not feedback_done: if (not comment.user.name == 'bodhi' and comment.karma != 0 and not comment.anonymous): data['num_feedback'] += 1 # per-release tracking of feedback feedback += 1 # total number of updates that have received feedback feedback_done = True # so we don't run this for each comment # Tracking per-author karma & anonymous feedback if not comment.user.name == 'bodhi': if comment.anonymous: # @@: should we track anon +0 comments as "feedback"? if comment.karma != 0: data['num_anon_feedback'] += 1 else: author = comment.user.name data['karma'][author] += 1 karma[author] += 1 if (not testingtime_done and comment.text == 'This update has been pushed to testing'): for othercomment in update.comments: if othercomment.text == 'This update has been pushed to stable': delta = othercomment.timestamp - comment.timestamp data['deltas'].append(delta) data['occurrences'][delta.days] = \ data['occurrences'].setdefault( delta.days, 0) + 1 data['accumulative'] += delta testingtime_done = True break if update.critpath: if update.critpath_approved or update.status == UpdateStatus.stable: data['num_critpath_approved'] += 1 else: if update.status in (UpdateStatus.testing, UpdateStatus.pending): data['num_critpath_unapproved'] += 1 data['num_critpath'] += 1 #if not feedback_done: if update.status == UpdateStatus.stable and update.karma == 0: data['critpath_without_karma'].add(update) # Proventester metrics proventester_karma = defaultdict(int) # {username: karma} positive_proventesters = 0 negative_proventesters = 0 for comment in update.comments: if 'proventesters' in [g.name for g in comment.user.groups]: proventester_karma[comment.user.name] += comment.karma for _karma in proventester_karma.values(): if _karma > 0: positive_proventesters += 1 elif _karma < 0: negative_proventesters += 1 # Conflicting proventesters if positive_proventesters and negative_proventesters: data['conflicted_proventesters'] += [short_url(update)] # Track updates with overall positive karma, including positive # karma from a proventester if update.karma > 0 and positive_proventesters: data['critpath_positive_karma_including_proventesters'] += [short_url(update)] # Track updates with overall positive karma, including negative # karma from a proventester if update.karma > 0 and negative_proventesters: data['critpath_positive_karma_negative_proventesters'] += [short_url(update)] if testingtime_done: data['num_tested'] += 1 if not feedback_done: data['num_tested_without_karma'] += 1 data['deltas'].sort() print " * %d updates" % data['num_updates'] print " * %d packages updated" % (len(data['packages'])) for status in statuses: print " * %d %s updates" % (data['num_%s' % status], status) for type in types: print " * %d %s updates (%0.2f%%)" % (data['num_%s' % type], type, float(data['num_%s' % type]) / data['num_updates'] * 100) print " * %d bugs resolved" % len(data['bugs']) print " * %d critical path updates (%0.2f%%)" % (data['num_critpath'], float(data['num_critpath']) / data['num_updates'] * 100) print " * %d approved critical path updates" % ( data['num_critpath_approved']) print " * %d unapproved critical path updates" % ( data['num_critpath_unapproved']) print " * %d updates received feedback (%0.2f%%)" % ( data['num_feedback'], (float(data['num_feedback']) / data['num_updates'] * 100)) print " * %d +0 comments" % data['0'] print " * %d +1 comments" % data['1'] print " * %d -1 comments" % data['-1'] print " * %d unique authenticated karma submitters" % ( len(data['karma'])) print " * %d proventesters" % len(data['proventesters']) print " * %d +1's from proventesters" % data['proventesters_1'] print " * %d -1's from proventesters" % data['proventesters_-1'] if data['num_critpath']: print " * %d critpath updates with conflicting proventesters (%0.2f%% of critpath)" % (len(data['conflicted_proventesters']), float(len(data['conflicted_proventesters'])) / data['num_critpath'] * 100) for u in sorted(data['conflicted_proventesters']): print ' <li><a href="%s">%s</a></li>' % (u, u.split('/')[-1]) print " * %d critpath updates with positive karma and negative proventester feedback (%0.2f%% of critpath)" % (len(data['critpath_positive_karma_negative_proventesters']), float(len(data['critpath_positive_karma_negative_proventesters'])) / data['num_critpath'] * 100) for u in sorted(data['critpath_positive_karma_negative_proventesters']): print ' <li><a href="%s">%s</a></li>' % (u, u.split('/')[-1]) print " * %d critpath updates with positive karma and positive proventester feedback (%0.2f%% of critpath)" % (len(data['critpath_positive_karma_including_proventesters']), float(len(data['critpath_positive_karma_including_proventesters'])) / data['num_critpath'] * 100) print " * %d anonymous users gave feedback (%0.2f%%)" % ( data['num_anon_feedback'], float(data['num_anon_feedback']) / (data['num_anon_feedback'] + sum(data['karma'].values())) * 100) # This does not take into account updates that reach stablekarma before being pushed to testing! # print " * %d out of %d stable updates went through testing (%0.2f%%)" %( # data['num_tested'], data['num_stable'], # float(data['num_tested']) / data['num_stable'] * 100) print " * %d stable updates reached the stable karma threshold (%0.2f%%)" %( data['num_stablekarma'], float(data['num_stablekarma']) / data['num_stable'] * 100) print " * %d stable updates reached the minimum time in testing threshold (%0.2f%%)" % ( data['num_testingtime'], float(data['num_testingtime']) / data['num_stable'] * 100) print " * %d went from testing to stable *without* karma (%0.2f%%)" %( data['num_tested_without_karma'], float(data['num_tested_without_karma']) / data['num_tested'] * 100) print " * %d updates were pushed to stable with negative karma (%0.2f%%)" % ( data['stable_with_negative_karma'], float(data['stable_with_negative_karma']) / data['num_stable'] * 100) print " * %d critical path updates pushed to stable *without* karma" % ( len(data['critpath_without_karma'])) #for update in data['critpath_without_karma']: # print " * %s submitted by %s" % (update.title, update.submitter) print " * Time spent in testing:" print " * mean = %d days" % (data['accumulative'].days / len(data['deltas'])) print " * median = %d days" % ( data['deltas'][len(data['deltas']) / 2].days) print " * mode = %d days" % ( sorted(data['occurrences'].items(), key=itemgetter(1))[-1][0]) print "Out of %d packages updated, the top 50 were:" % ( len(data['packages'])) for package in sorted(data['packages'].iteritems(), key=itemgetter(1), reverse=True)[:50]: print " * %s (%d)" % (package[0], package[1]) print "Out of %d update submitters, the top 50 were:" % ( len(data['submitters'])) for submitter in sorted(data['submitters'].iteritems(), key=itemgetter(1), reverse=True)[:50]: print " * %s (%d)" % (submitter[0], submitter[1]) print "Out of %d critical path updates, the top 50 updated were:" % ( len(data['critpath_pkgs'])) for x in sorted(data['critpath_pkgs'].iteritems(), key=itemgetter(1), reverse=True)[:50]: print " * %s (%d)" % (x[0], x[1]) critpath_not_updated = set() for pkg in critpath_pkgs: if pkg not in data['critpath_pkgs']: critpath_not_updated.add(pkg) print "Out of %d critical path packages, %d were never updated:" % ( len(critpath_pkgs), len(critpath_not_updated)) for pkg in sorted(critpath_not_updated): print(' * %s' % pkg) print print print "Out of %d total updates, %d received feedback (%0.2f%%)" % ( num_updates, feedback, (float(feedback) / num_updates * 100)) print "Out of %d total unique commenters, the top 50 were:" % ( len(karma)) for submitter in sorted(karma.iteritems(), key=itemgetter(1), reverse=True)[:50]: print " * %s (%d)" % (submitter[0], submitter[1])
def load_sqlalchemy_db(): print "\nLoading pickled database %s" % sys.argv[2] db = file(sys.argv[2], 'r') data = pickle.load(db) import transaction from bodhi.models import DBSession, Base from bodhi.models import Release, Update, Build, Comment, User, Bug, CVE from bodhi.models import Package, Group from bodhi.models import UpdateType, UpdateStatus, UpdateRequest from sqlalchemy import create_engine from sqlalchemy.orm.exc import NoResultFound # Caches for quick lookup releases = {} packages = {} users = {} critpath = {} aliases = [] db = get_db_from_config() # Allow filtering of releases to load whitelist = [] if '--release' in sys.argv: for r in sys.argv[sys.argv.index('--release') + 1].split(','): whitelist.append(r) print('whitelist = %r' % whitelist) # Legacy format was just a list of update dictionaries # Now we'll pull things out into an organized dictionary: # {'updates': [], 'releases': []} if isinstance(data, dict): for release in data['releases']: try: db.query(Release).filter_by(name=release['name']).one() except NoResultFound: del(release['metrics']) del(release['locked']) r = Release(**release) r.stable_tag = "%s-updates" % r.dist_tag r.testing_tag = "%s-testing" % r.stable_tag r.candidate_tag = "%s-candidate" % r.stable_tag r.pending_testing_tag = "%s-pending" % r.testing_tag r.pending_stable_tag = "%s-pending" % r.stable_tag r.override_tag = "%s-override" % r.dist_tag db.add(r) data = data['updates'] progress = ProgressBar(widgets=[SimpleProgress(), Percentage(), Bar()]) for u in progress(data): try: release = releases[u['release'][0]] except KeyError: try: release = db.query(Release).filter_by(name=u['release'][0]).one() except NoResultFound: release = Release(name=u['release'][0], long_name=u['release'][1], id_prefix=u['release'][2], dist_tag=u['release'][3]) db.add(release) releases[u['release'][0]] = release if whitelist: if release.name in whitelist: critpath[release.name] = get_critpath_pkgs(release.name.lower()) print('%s critpath packages for %s' % (len(critpath[release.name]), release.name)) else: critpath[release.name] = get_critpath_pkgs(release.name.lower()) print('%s critpath packages for %s' % (len(critpath[release.name]), release.name)) if whitelist and release.name not in whitelist: continue ## Backwards compatbility request = u['request'] if u['request'] == 'move': u['request'] = 'stable' elif u['request'] == 'push': u['request'] = 'testing' elif u['request'] == 'unpush': u['request'] = 'obsolete' if u['approved'] not in (True, False): u['approved'] = None if u.has_key('update_id'): u['updateid'] = u['update_id'] u['alias'] = u['update_id'] if u['alias']: split = u['alias'].split('-') year, id = split[-2:] aliases.append((int(year), int(id))) if not u.has_key('date_modified'): u['date_modified'] = None # Port to new enum types if u['request']: if u['request'] == 'stable': u['request'] = UpdateRequest.stable elif u['request'] == 'testing': u['request'] = UpdateRequest.testing else: raise Exception("Unknown request: %s" % u['request']) if u['type'] == 'bugfix': u['type'] = UpdateType.bugfix elif u['type'] == 'newpackage': u['type'] = UpdateType.newpackage elif u['type'] == 'enhancement': u['type'] = UpdateType.enhancement elif u['type'] == 'security': u['type'] = UpdateType.security else: raise Exception("Unknown type: %r" % u['type']) if u['status'] == 'pending': u['status'] = UpdateStatus.pending elif u['status'] == 'testing': u['status'] = UpdateStatus.testing elif u['status'] == 'obsolete': u['status'] = UpdateStatus.obsolete elif u['status'] == 'stable': u['status'] = UpdateStatus.stable elif u['status'] == 'unpushed': u['status'] = UpdateStatus.unpushed else: raise Exception("Unknown status: %r" % u['status']) try: update = db.query(Update).filter_by(title=u['title']).one() continue except NoResultFound: update = Update(title=u['title'], date_submitted=u['date_submitted'], date_pushed=u['date_pushed'], date_modified=u['date_modified'], release=release, old_updateid=u['updateid'], alias=u['updateid'], pushed=u['pushed'], notes=u['notes'], karma=u['karma'], type=u['type'], status=u['status'], request=u['request'], ) #approved=u['approved']) db.add(update) db.flush() try: user = users[u['submitter']] except KeyError: try: user = db.query(User).filter_by(name=u['submitter']).one() except NoResultFound: user = User(name=u['submitter']) db.add(user) db.flush() users[u['submitter']] = user user.updates.append(update) ## Create Package and Build objects for pkg, nvr in u['builds']: try: package = packages[pkg] except KeyError: try: package = db.query(Package).filter_by(name=pkg).one() except NoResultFound: package = Package(name=pkg) db.add(package) packages[pkg] = package if package.name in critpath[update.release.name]: update.critpath = True try: build = db.query(Build).filter_by(nvr=nvr).one() except NoResultFound: build = Build(nvr=nvr, package=package) db.add(build) update.builds.append(build) ## Create all Bugzilla objects for this update for bug_num, bug_title, security, parent in u['bugs']: try: bug = db.query(Bug).filter_by(bug_id=bug_num).one() except NoResultFound: bug = Bug(bug_id=bug_num, security=security, parent=parent, title=bug_title) db.add(bug) update.bugs.append(bug) ## Create all CVE objects for this update for cve_id in u['cves']: try: cve = db.query(CVE).filter_by(cve_id=cve_id).one() except NoResultFound: cve = CVE(cve_id=cve_id) db.add(cve) update.cves.append(cve) ## Create all Comments for this update for c in u['comments']: try: timestamp, author, text, karma, anonymous = c except ValueError: timestamp, author, text, karma = c anonymous = '@' in author comment = Comment(timestamp=timestamp, text=text, karma=karma, anonymous=anonymous) db.add(comment) db.flush() update.comments.append(comment) if anonymous: name = u'anonymous' else: name = author group = None if not anonymous and ' (' in name: split = name.split(' (') name = split[0] group = split[1][:-1] assert group, name try: user = users[name] except KeyError: try: user = db.query(User).filter_by(name=name).one() except NoResultFound: user = User(name=name) db.add(user) db.flush() users[name] = user comment.user = user if group: try: group = db.query(Group).filter_by(name=group).one() except NoResultFound: group = Group(name=group) db.add(group) db.flush() user.groups.append(group) db.flush() # Hack to get the Bodhi2 alias generator working with bodhi1 data. # The new generator assumes that the alias is assigned at submission time, as opposed to push time. year, id = max(aliases) print('Highest alias = %r %r' % (year, id)) up = db.query(Update).filter_by(alias=u'FEDORA-%s-%s' % (year, id)).one() print(up.title) up.date_submitted = up.date_pushed db.flush() transaction.commit() print("\nDatabase migration complete!") print(" * %d updates" % db.query(Update).count()) print(" * %d builds" % db.query(Build).count()) print(" * %d comments" % db.query(Comment).count()) print(" * %d users" % db.query(User).count()) print(" * %d bugs" % db.query(Bug).count()) print(" * %d CVEs" % db.query(CVE).count())