def AllAffsCSV(file, hlist): if file is None: return writer = csv.writer(file, quoting=csv.QUOTE_NONNUMERIC) writer.writerow(['email', 'name', 'company', 'date_to', 'source']) emails = list(set(sum(map(lambda el: el.email, hlist), []))) emails.sort() for email in emails: if email == '*****@*****.**': continue email = RemapEmail(email) name = LookupEmail(email).name empls = MapToEmployer(email, 2) for date, empl, domain in empls: datestr = str(date) if date > yesterday: datestr = '' emplstr = empl.name.replace('"', '.').replace('\\', '.') source = 'config' if domain: source = 'domain' writer.writerow([ email_encode(email), email_encode(name), emplstr, datestr, source ]) for em in ReverseAlias(email): if em in emails: print 'This is bad, reverse email already in emails, check: `em`, `email`, `emails`' pdb.set_trace() writer.writerow( [email_encode(em), email_encode(name), emplstr, datestr])
def AllFilesCSV(file, hlist, FileFilter, InvertFilter): if file is None: return matches = {} processed = {} writer = csv.writer (file, quoting=csv.QUOTE_NONNUMERIC) writer.writerow (['email', 'name', 'date', 'affiliation', 'file', 'added', 'removed', 'changed']) for hacker in hlist: for patch in hacker.patches: if not patch.totaled or patch.commit in processed: continue empl = patch.author.emailemployer (patch.email, patch.date) email = patch.email aname = patch.author.name datestr = str(patch.date) emplstr = empl.name.replace ('"', '.').replace ('\\', '.') for (filename, filedata) in patch.files.iteritems(): if filedata[2] == 0: continue if FileFilter: if filename in matches: match = matches[filename] else: match = not not FileFilter.search(filename) matches[filename] = match if match == InvertFilter: continue writer.writerow ([email_encode(email), email_encode(aname), datestr, emplstr, filename, filedata[0], filedata[1], filedata[2]]) processed[patch.commit] = True
def ReportLine (text, count, pct): global HTMLclass if count == 0: return Outfile.write(email_encode('%-80s %4d (%.1f%%)\n' % (text, count, pct))) if HTMLfile: HTMLfile.write(email_encode(TRow % (HClasses[HTMLclass], text, count, pct))) HTMLclass ^= 1
def ReportLineStr (text, count, extra): global HTMLclass if count == 0: return Outfile.write(email_encode('%-80s %4d %s\n' % (text, count, extra))) if HTMLfile: HTMLfile.write(email_encode(TRowStr % (HClasses[HTMLclass], text, count, extra))) HTMLclass ^= 1
def ReportLine(text, count, pct): global HTMLclass if count == 0: return Outfile.write(email_encode('%-80s %4d (%.1f%%)\n' % (text, count, pct))) if HTMLfile: HTMLfile.write( email_encode(TRow % (HClasses[HTMLclass], text, count, pct))) HTMLclass ^= 1
def ReportLineStr(text, count, extra): global HTMLclass if count == 0: return Outfile.write(email_encode('%-80s %4d %s\n' % (text, count, extra))) if HTMLfile: HTMLfile.write( email_encode(TRowStr % (HClasses[HTMLclass], text, count, extra))) HTMLclass ^= 1
def store_patch(patch): if not patch.merge: employer = patch.author.emailemployer(patch.email, patch.date) employer = employer.name.replace('"', '.').replace ('\\', '.') author = patch.author.name.replace ('"', '.').replace ('\\', '.') author = email_encode(patch.author.name.replace ("'", '.')) try: domain = patch.email.split('@')[1] except: domain = patch.email ChangeSets.append([patch.commit, str(patch.date), email_encode(patch.email), domain, author, employer, patch.added, patch.removed, max(patch.added, patch.removed)]) for (filetype, (added, removed)) in patch.filetypes.iteritems(): FileTypes.append([patch.commit, filetype, added, removed])
def MapToEmployer (email, unknown = 0): # Somebody sometimes does s/@/ at /; let's fix it. email = email.strip().lower().replace(' at ', '@') try: return EmailToEmployer[email] except KeyError: pass namedom = email.split ('@') if len (namedom) < 2: print 'Oops...funky email %s' % email_encode(email) return [(nextyear, GetEmployer ('Funky'))] s = namedom[1].split ('.') for dots in range (len (s) - 2, -1, -1): addr = '.'.join (s[dots:]) try: return EmailToEmployer[addr] except KeyError: pass # # We don't know who they work for. # if unknown == 0: return [(nextyear, GetEmployer (email))] elif unknown == 1: return [(nextyear, GetEmployer (GetHackerDomain(addr, email)))] elif unknown == 2: return [(nextyear, GetEmployer ('(Unknown)'))] else: print "Unsupported unknown parameter handling value"
def MapToEmployer(email, unknown=0): # Somebody sometimes does s/@/ at /; let's fix it. email = email.strip().lower().replace(' at ', '@') try: return EmailToEmployer[email] except KeyError: pass namedom = email.split('@') if len(namedom) < 2: print 'Oops...funky email %s' % email_encode(email) return [(nextyear, GetEmployer('Funky'), False)] s = namedom[1].split('.') for dots in range(len(s) - 2, -1, -1): addr = '.'.join(s[dots:]) try: return EmailToEmployer[addr] except KeyError: pass # # We don't know who they work for. # if unknown == 0: return [(nextyear, GetEmployer(email), False)] elif unknown == 1: return [(nextyear, GetEmployer(GetHackerDomain(addr, email)), False)] elif unknown == 2: return [(nextyear, GetEmployer('(Unknown)'), False)] else: print "Unsupported unknown parameter handling value"
def store_patch(patch): if not patch.merge: employer = patch.author.emailemployer(patch.email, patch.date) employer = employer.name.replace('"', '.').replace('\\', '.') author = patch.author.name.replace('"', '.').replace('\\', '.') author = email_encode(patch.author.name.replace("'", '.')) try: domain = patch.email.split('@')[1] except: domain = patch.email ChangeSets.append([ patch.commit, str(patch.date), email_encode(patch.email), domain, author, employer, patch.added, patch.removed, max(patch.added, patch.removed) ]) for (filetype, (added, removed)) in patch.filetypes.iteritems(): FileTypes.append([patch.commit, filetype, added, removed])
def ReportByFileType(hacker_list): total = {} total_by_hacker = {} BeginReport('Developer contributions by type') for h in hacker_list: by_hacker = {} for patch in h.patches: # Get a summary by hacker for (filetype, (added, removed)) in patch.filetypes.iteritems(): if by_hacker.has_key(filetype): by_hacker[filetype][patch.ADDED] += added by_hacker[filetype][patch.REMOVED] += removed else: by_hacker[filetype] = [added, removed] # Update the totals if total.has_key(filetype): total[filetype][patch.ADDED] += added total[filetype][patch.REMOVED] += removed else: total[filetype] = [added, removed, []] # Print a summary by hacker print email_encode(h.full_name_with_aff()) for filetype, counters in by_hacker.iteritems(): print '\t', filetype, counters h_added = by_hacker[filetype][patch.ADDED] h_removed = by_hacker[filetype][patch.REMOVED] total[filetype][2].append( [h.full_name_with_aff(), h_added, h_removed]) # Print the global summary BeginReport('Contributions by type and developers') for filetype, (added, removed, hackers) in total.iteritems(): print filetype, added, removed for h, h_added, h_removed in hackers: print email_encode('\t%s: [%d, %d]' % (h, h_added, h_removed)) # Print the very global summary BeginReport('General contributions by type') for filetype, (added, removed, hackers) in total.iteritems(): print filetype, added, removed
def ReportByFileType (hacker_list): total = {} total_by_hacker = {} BeginReport ('Developer contributions by type') for h in hacker_list: by_hacker = {} for patch in h.patches: # Get a summary by hacker for (filetype, (added, removed)) in patch.filetypes.iteritems(): if by_hacker.has_key(filetype): by_hacker[filetype][patch.ADDED] += added by_hacker[filetype][patch.REMOVED] += removed else: by_hacker[filetype] = [added, removed] # Update the totals if total.has_key(filetype): total[filetype][patch.ADDED] += added total[filetype][patch.REMOVED] += removed else: total[filetype] = [added, removed, []] # Print a summary by hacker print email_encode(h.full_name_with_aff()) for filetype, counters in by_hacker.iteritems(): print '\t', filetype, counters h_added = by_hacker[filetype][patch.ADDED] h_removed = by_hacker[filetype][patch.REMOVED] total[filetype][2].append ([h.full_name_with_aff(), h_added, h_removed]) # Print the global summary BeginReport ('Contributions by type and developers') for filetype, (added, removed, hackers) in total.iteritems(): print filetype, added, removed for h, h_added, h_removed in hackers: print email_encode('\t%s: [%d, %d]' % (h, h_added, h_removed)) # Print the very global summary BeginReport ('General contributions by type') for filetype, (added, removed, hackers) in total.iteritems(): print filetype, added, removed
def OutputCSV (file): if file is None: return writer = csv.writer (file, quoting=csv.QUOTE_NONNUMERIC) writer.writerow (['Name', 'Email', 'Affliation', 'Date', 'Added', 'Removed', 'Changed', 'Changesets']) for date, stat in PeriodCommitHash.items(): # sanitise names " is common and \" sometimes too empl_name = stat.employer.name.replace ('"', '.').replace ('\\', '.') author_name = email_encode(stat.name.replace ('"', '.').replace ('\\', '.')) writer.writerow ([author_name, stat.email, empl_name, stat.date, stat.added, stat.removed, stat.changed, stat.changesets])
def AllAffsCSV(file, hlist): if file is None: return writer = csv.writer (file, quoting=csv.QUOTE_NONNUMERIC) writer.writerow (['email', 'name', 'company', 'date_to']) emails = list(set(sum(map(lambda el: el.email, hlist), []))) emails.sort() for email in emails: if email == '*****@*****.**': continue email = RemapEmail(email) name = LookupEmail(email).name empls = MapToEmployer(email, 2) for date, empl in empls: datestr = str(date) if date > yesterday: datestr = '' emplstr = empl.name.replace ('"', '.').replace ('\\', '.') writer.writerow ([email_encode(email), email_encode(name), emplstr, datestr]) for em in ReverseAlias(email): if em in emails: print 'This is bad, reverse email already in emails, check: `em`, `email`, `emails`' pdb.set_trace() writer.writerow ([email_encode(em), email_encode(name), emplstr, datestr])
def AllFilesCSV(file, hlist, FileFilter, InvertFilter): if file is None: return matches = {} processed = {} writer = csv.writer(file, quoting=csv.QUOTE_NONNUMERIC) writer.writerow([ 'email', 'name', 'date', 'affiliation', 'file', 'added', 'removed', 'changed' ]) for hacker in hlist: for patch in hacker.patches: if not patch.totaled or patch.commit in processed: continue empl = patch.author.emailemployer(patch.email, patch.date) email = patch.email aname = patch.author.name datestr = str(patch.date) emplstr = empl.name.replace('"', '.').replace('\\', '.') for (filename, filedata) in patch.files.iteritems(): if filedata[2] == 0: continue if FileFilter: if filename in matches: match = matches[filename] else: match = not not FileFilter.search(filename) matches[filename] = match if match == InvertFilter: continue writer.writerow([ email_encode(email), email_encode(aname), datestr, emplstr, filename, filedata[0], filedata[1], filedata[2] ]) processed[patch.commit] = True
def DumpDB(): out = open('database.dump', 'w') names = HackersByName.keys() names.sort() for name in names: h = HackersByName[name] out.write('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name, len( h.patches), h.added, h.removed, len(h.signoffs))) for i in range(0, len(h.email)): out.write('\t%s -> \n' % (email_encode(h.email[i]))) for date, empl, dom in h.employer[i]: out.write('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day, empl.name)) if h.versions: out.write('\tVersions: %s\n' % ','.join(h.versions))
def AddEmailEmployerMapping (email, employer, end = nextyear): if end is None: end = nextyear email = email.lower () empl = GetEmployer (employer) try: l = EmailToEmployer[email] for i in range (0, len(l)): date, xempl = l[i] if date == end: # probably both nextyear print 'WARNING: duplicate email/empl for %s' % (email_encode(email)) if date > end: l.insert (i, (end, empl)) return l.append ((end, empl)) except KeyError: EmailToEmployer[email] = [(end, empl)]
def DumpDB (): out = open ('database.dump', 'w') names = HackersByName.keys () names.sort () for name in names: h = HackersByName[name] out.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name, len (h.patches), h.added, h.removed, len (h.signoffs))) for i in range (0, len (h.email)): out.write ('\t%s -> \n' % (email_encode(h.email[i]))) for date, empl in h.employer[i]: out.write ('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day, empl.name)) if h.versions: out.write ('\tVersions: %s\n' % ','.join (h.versions))
def OutputCSV(file): if file is None: return writer = csv.writer(file, quoting=csv.QUOTE_NONNUMERIC) writer.writerow([ 'Name', 'Email', 'Affliation', 'Date', 'Added', 'Removed', 'Changed', 'Changesets' ]) for date, stat in PeriodCommitHash.items(): # sanitise names " is common and \" sometimes too empl_name = stat.employer.name.replace('"', '.').replace('\\', '.') author_name = email_encode( stat.name.replace('"', '.').replace('\\', '.')) writer.writerow([ author_name, stat.email, empl_name, stat.date, stat.added, stat.removed, stat.changed, stat.changesets ])
def AddEmailEmployerMapping(email, employer, end=nextyear, domain=False): if end is None: end = nextyear email = email.lower() empl = GetEmployer(employer) try: l = EmailToEmployer[email] for i in range(0, len(l)): date, xempl, dom = l[i] if date == end: # probably both nextyear print 'WARNING: duplicate email/empl for %s' % ( email_encode(email)) if date > end: l.insert(i, (end, empl, domain)) return l.append((end, empl, domain)) except KeyError: EmailToEmployer[email] = [(end, empl, domain)]
def ReportAll(hlist, cscount): ulist = hlist ulist.sort(ComparePCount) count = 0 BeginReport('All developers') alldevsFile = open('alldevs.txt', 'w') for h in ulist: pcount = len(h.patches) if pcount > 0: ReportLine(h.full_name_with_aff(), pcount, (pcount * 100.0) / cscount) alldevsFile.write( email_encode('%s\t%d\n' % (h.full_name_with_aff_tabs(), pcount))) count += 1 if count >= ListCount: break alldevsFile.close() EndReport()
def ReportUnknowns(hlist, cscount): # # Trim the list to just the unknowns; try to work properly whether # mapping to (Unknown) is happening or not. # ulist = [h for h in hlist if IsUnknown(h)] ulist.sort(ComparePCount) count = 0 BeginReport('Developers with unknown affiliation') unknownsFile = open('unknowns.txt', 'w') for h in ulist: pcount = len(h.patches) if pcount > 0: ReportLine(h.full_name_with_aff(), pcount, (pcount * 100.0) / cscount) unknownsFile.write( email_encode('%s\t%d\n' % (h.full_name_with_aff_tabs(), pcount))) count += 1 if count >= ListCount: break unknownsFile.close() EndReport()
def EndReport(text=None): if text: Outfile.write(email_encode('%s\n' % (text, ))) if HTMLfile: HTMLfile.write('</table>\n\n')
def GripeAboutAuthorName (name): if name in GripedAuthorNames: return GripedAuthorNames.append (name) print email_encode('%s is an author name, probably not what you want' % (name))
def AddEmailAlias (variant, canonical): if EmailAliases.has_key (variant): sys.stderr.write ('Duplicate email alias for %s\n' % (email_encode(variant))) EmailAliases[variant] = canonical
def Write(stuff): Outfile.write(email_encode(stuff))
def Write (stuff): Outfile.write (email_encode(stuff))
def __init__(self, name, email, employer, date): self.name = email_encode(name) self.email = email_encode(email) self.employer = employer self.added = self.removed = self.changed = self.changesets = 0 self.date = date
def __init__ (self, name, email, employer, date): self.name = email_encode(name) self.email = email_encode(email) self.employer = employer self.added = self.removed = self.changed = self.changesets = 0 self.date = date
def AddEmailAlias(variant, canonical): if EmailAliases.has_key(variant): sys.stderr.write('Duplicate email alias for %s\n' % (email_encode(variant))) EmailAliases[variant] = canonical
def EndReport (text=None): if text: Outfile.write(email_encode('%s\n' % (text, ))) if HTMLfile: HTMLfile.write('</table>\n\n')
def GripeAboutAuthorName(name): if name in GripedAuthorNames: return GripedAuthorNames.append(name) print email_encode('%s is an author name, probably not what you want' % (name))