def buildNetworks(project, startDate, stopDate, window, overlap, binary=False, removeNonAgents=False): """Iterate over each of the networks and build the data project - the masterproject to work on startDate - the first date to start stopDate - the last date to process window - the width of the sliding window, in weeks overlap - the overlap between windows, in weeks """ currentDate = startDate devMap = {} while currentDate < stopDate: nextDate = currentDate + timeutil.makeTimeDelta(weeks=window-overlap) finishDate = currentDate + timeutil.makeTimeDelta(weeks=window) buildEmailNetwork(project, currentDate, finishDate, binary=binary, removeNonAgents=removeNonAgents) devMap = buildSourceCodeNetwork(project,currentDate, finishDate, devMap, binary=binary, removeNonAgents=removeNonAgents) currentDate = nextDate
def dumpNetworks(project, startDate, stopDate, window, overlap): currentDate = startDate mp = MasterProject.select(MasterProject.q.name == project)[0] ctr = 0 while currentDate < stopDate: thisLinks = {} userFileLinks = {} nextDate = currentDate + timeutil.makeTimeDelta(weeks=window) for com in CVSCommit.select(AND(CVSCommit.q.startDate >= currentDate, CVSCommit.q.startDate <= nextDate, CVSCommit.q.projectID == Project.q.id, Project.q.masterProjectID == mp.id)): try: pid = com.user.persons[0].id except IndexError: log.warn("index error on user %d - ctr %d", com.userID, ctr) continue thisCommitFiles = [f.id for f in com.files] thisCommitFiles.sort() for f in thisCommitFiles: hkey = "p%d-f%d" % (pid, f) thisLinks[hkey] = thisLinks.get(hkey,0) + 1 userFileLinks[f] = userFileLinks.get(f,[]) + [pid] for i in xrange(0, len(thisCommitFiles)): for j in xrange(i, len(thisCommitFiles)): hkey="f%d-f%d" % (thisCommitFiles[i], thisCommitFiles[j]) thisLinks[hkey] = thisLinks.get(hkey,0) + 1 for ufl in userFileLinks.values(): uflNoDupe = list(Set(ufl)) for i in xrange(0, len(uflNoDupe)): for j in xrange(i, len(uflNoDupe)): hkey = "p%d-p%d" % (uflNoDupe[i], uflNoDupe[j]) thisLinks[hkey] = thisLinks.get(hkey,0) + 1 f = open("%s.d.%03d" % (mp.name.replace(os.sep,"_"), ctr), "w") f.write("# StartDate: %s\n" % (currentDate)) f.write("# StopDate: %s\n" % (stopDate)) for key,val in thisLinks.iteritems(): f.write("%s, %d\n" % (key, val)) f.close() ctr = ctr + 1 currentDate = currentDate + timeutil.makeTimeDelta(weeks=window-overlap)
def dumpMailList(mlname, firstDate=None, lastDate=None, delta=7): ml = MailList.select(MailList.q.name==mlname) if ml.count() < 1: raise KeyError("""Mailing List "%s" not found""" % (mlname)) if ml.count() > 1: raise KeyError("""Mailing List "%s" specifies multiple lists""" % (mlname)) ml = ml[0] # ignore all the messages we can't get a date for messages = MailMessage.select(AND(MailMessage.q.listID==ml.id, MailMessage.q.date!=None), orderBy=MailMessage.q.date) if not firstDate: firstDate = messages[0].date if not lastDate: lastDate = messages.reversed()[0].date print firstDate, lastDate firstDate = timeutil.makeDateTimeFromShortString("%04d%02d%02d" % (firstDate.year, firstDate.month, firstDate.day)) bins = [[],[],[],[]] while firstDate < lastDate: nextDate = firstDate + timeutil.makeTimeDelta(days=delta) messages = MailMessage.select(AND(MailMessage.q.listID==ml.id, MailMessage.q.date >= firstDate, MailMessage.q.date < nextDate)) numMessages = messages.count() newThreads = 0 oldThreads = 0 for msg in messages: if msg.replyTo == None: newThreads = newThreads + 1 else: oldThreads = oldThreads + 1 print "%04d-%02d-%02d, %d, %d, %d" % (firstDate.year, firstDate.month, firstDate.day, numMessages, newThreads, oldThreads) bins[0].append(abs(firstDate-MINDATE).days) bins[1].append(numMessages) bins[2].append(newThreads) bins[3].append(oldThreads) firstDate = nextDate return bins
def buildData(weeks, start, stop, overlap): """Build an agentxagent network in weeks intervals, also spit out some CSV files with statistics for each of the agents. @param weeks - the number of weeks to use for each interval @param start - the date to start @param stop - the date to stop @param overlap - number of weeks to overlap analysis """ agents = User.select() currentDate = start users = {} lagUsers1 = {} lagUsers2 = {} ctr = 0 while (currentDate < stop): nextDate = currentDate + timeutil.makeTimeDelta(weeks=weeks-overlap) stopDate = currentDate + timeutil.makeTimeDelta(weeks=weeks) log.info("Processing data from %s to %s", currentDate, nextDate) lagUsers2 = lagUsers1 lagUsers1 = users map(lambda x: users.__setitem__(x, {"name": x, "dev": 0, "projects": 0, "commits": 0, "devLag1": 0, "devLag2": 0, "files": 0, "totalCommits": 0, "totalProjects": 0, "totalFiles": 0, "commitTime": 0}), [y.name for y in agents]) map(lambda x: users[x[0]].__setitem__("id", x[1]), [[x.name, x.id] for x in agents]) log.info("Building global data on users") # fill in some of the stat information for all of the users for user in users.itervalues(): # get the total commits to this point: totalCommits = CVSCommit.select(AND(CVSCommit.q.startDate > start, CVSCommit.q.startDate <= stopDate, CVSCommit.q.userID == user["id"])).count() totalProjects = Project.select(AND(Project.q.id == CVSCommit.q.projectID, CVSCommit.q.startDate > start, CVSCommit.q.startDate <= stopDate, CVSCommit.q.userID == user["id"]), distinct=True).count() totalFiles = File.select(AND(CVSCommit.q.startDate > start, CVSCommit.q.startDate <= stopDate, CVSCommit.q.userID == user["id"], FileCommit.q.cvsCommitID == CVSCommit.q.id, FileCommit.q.fileID == File.q.id), distinct=True).count() firstCommit = CVSCommit.select(AND(CVSCommit.q.startDate > start, CVSCommit.q.userID == user["id"]), orderBy=CVSCommit.q.startDate, limit=1)[0] if firstCommit.startDate < stopDate: commitTime = (stopDate - firstCommit.startDate).days else: commitTime = 0 users[user["name"]]["totalCommits"] = totalCommits users[user["name"]]["totalProjects"] = totalProjects users[user["name"]]["totalFiles"] = totalFiles users[user["name"]]["commitTime"] = commitTime if lagUsers1.has_key(user["name"]): users[user["name"]]['devLag1'] = lagUsers1[user["name"]]["dev"] else: users[user["name"]]['devLag1'] = 0 if lagUsers2.has_key(user["name"]): users[user["name"]]['devLag2'] = lagUsers2[user["name"]]["dev"] else: users[user["name"]]['devLag2'] = 0 projects = {} # create the basic network network = DynamicNetwork() metaMatrix = MetaMatrix() network.addMetaMatrix(metaMatrix) devs = NodeSet(id="agent", type="agent") metaMatrix.addNodeSet(devs) graph = Graph(sourceType=devs, targetType=devs, directed=False) metaMatrix.addGraph(graph) activeUsers = User.select(AND(User.q.id == CVSCommit.q.userID, CVSCommit.q.startDate >= currentDate, CVSCommit.q.startDate <= stopDate), distinct=True) log.info("Building additional data on %d active users", activeUsers.count()) for user in activeUsers: users[user.name]["dev"] = 1 projs = Project.select(AND(CVSCommit.q.userID == user.id, CVSCommit.q.startDate >= currentDate, CVSCommit.q.startDate <= stopDate, CVSCommit.q.projectID == Project.q.id), distinct=True) log.debug("user: %s - active projects: %d", user.name, projs.count()) users[user.name]["projects"] = projs.count() commits = CVSCommit.select(AND(CVSCommit.q.userID == user.id, CVSCommit.q.startDate >= currentDate, CVSCommit.q.startDate <= stopDate)) users[user.name]["commits"] = commits.count() users[user.name]["file"] = File.select(AND(CVSCommit.q.startDate > currentDate, CVSCommit.q.startDate <= stopDate, CVSCommit.q.userID == user.id, FileCommit.q.cvsCommitID == CVSCommit.q.id, FileCommit.q.fileID == File.q.id), distinct=True).count() for proj in projs: if not projects.has_key(proj.name): projects[proj.name] = [] projects[proj.name].append(user.name) # create nodes for each of the agents userNodes = {} for u in users.iterkeys(): userNodes[u] = Node(id=u) devs.addNode(userNodes[u]) # link the nodes together in a clique for p in projects.itervalues(): if len(p) <= 1: continue for i in xrange(len(p)): for j in xrange(i+1,len(p)): e = graph.getEdge(userNodes[p[i]], userNodes[p[j]]) if e: e.value = e.value + 1 else: e = Edge(source=userNodes[p[i]], target=userNodes[p[j]], type="int", value=1) graph.addEdge(e) fn = "agentNetwork%02d.xml" % (ctr) s = network.toXml().serialize(format=1) f = open(fn, "w") f.write(s) f.close() # now create the GWT file from the network outputFile = "agentNetwork%02d%s.dl" % (ctr, graph.id) p = subprocess.Popen("/home/pwagstro/bin/dynetml_export -m dl -o agentNetwork%02d %s" % (ctr, fn), shell=True) sts = os.waitpid(p.pid, 0) os.rename(outputFile, "agentNetwork%02d.dl" % (ctr)) p = subprocess.Popen("/usr/bin/python2.4 dl2gwt.py agentNetwork%02d.dl" % (ctr), shell=True) sts = os.waitpid(p.pid, 0) # write the definition of the dbf file # dbfs hhave a limit of 11 characters for the title of each row dbfn=dbf_new() dbfn.add_field("id",'N',5) dbfn.add_field("name",'C',80) dbfn.add_field("dev",'N',2) dbfn.add_field("devLag1", 'N', 2) dbfn.add_field("devLag2", 'N', 2) dbfn.add_field("projects", 'N', 3) dbfn.add_field("commits", 'N', 5) dbfn.add_field("files", 'N', 5) dbfn.add_field("totalCommit", 'N', 5) dbfn.add_field("totalFiles", 'N', 5) dbfn.add_field("totalProj", 'N', 5) dbfn.add_field("commitTime", 'N', 5) dbfn.write("agentNetwork%02d.dbf" % (ctr)) # write the DBF file dbft = Dbf() dbft.openFile("agentNetwork%02d.dbf" % (ctr), readOnly=0) # dbft.reportOn() ctr2 = 1 for key,val in users.iteritems(): rec = DbfRecord(dbft) rec['id'] = ctr2 rec['name'] = key rec['dev'] = val['dev'] rec['devLag1'] = val['devLag1'] rec['devLag2'] = val['devLag2'] rec['projects'] = val['projects'] rec['commits'] = val['commits'] rec['files'] = val['files'] rec['totalCommit'] = val['totalCommits'] rec['totalFiles'] = val['totalFiles'] rec['totalProj'] = val['totalProjects'] rec['commitTime'] = val['commitTime'] rec.store() ctr2 = ctr2 + 1 dbft.close() # dump out the stats to a CSV file too fn = "agentNetwork%02d.csv" % (ctr) f = open(fn, "w") writer = csv.writer(f) ctr2 = 1 writer.writerow(["#ctr", "name", "dev", "projects", "commits", "files", "totalCommits", "totalFiles", "totalProjects", "commitTime"]) for item in [[key, val["dev"], val["projects"], val["commits"], val["files"], val["totalCommits"], val["totalFiles"], val["totalProjects"], val["commitTime"]] for key,val in users.iteritems()]: writer.writerow([ctr2] + item) ctr2 = ctr2 + 1 f.close() expire_all() currentDate = nextDate ctr = ctr + 1
def buildData(weeks, overlap, start, stop, project=None): if project: log.info("Only generating data from project %s", project) currentDate = start ctr = 0 devs = NodeSet(id="agent", type="agent") userNodes = {} # create all of the nodes for the users # do this only once # hack for getting only evolution users... if project != None: projectId = Project.select(Project.q.name == project)[0].id users = User.select(AND(User.q.id == CVSCommit.q.userID, CVSCommit.q.projectID == projectId), distinct=True) else: users = User.select() for user in users: userNodes[user.id] = Node(id=user.name) devs.addNode(userNodes[user.id]) ctr = 0 while (currentDate < stop): fileUsers = {} devs.clearEdges() nextDate = currentDate + timeutil.makeTimeDelta(weeks=weeks-overlap) stopDate = currentDate + timeutil.makeTimeDelta(weeks=weeks) log.info("Starting Date: %s - Ending Date: %s", currentDate, stopDate) # set up the meta matrix stuff network = DynamicNetwork() metaMatrix = MetaMatrix() network.addMetaMatrix(metaMatrix) files = NodeSet(id="resource", type="resource") metaMatrix.addNodeSet(devs) metaMatrix.addNodeSet(files) agentGraph = Graph(sourceType=devs, targetType=devs, directed=False) metaMatrix.addGraph(agentGraph) # get all of the CVS Commit information if project: commits = CVSCommit.select(AND(CVSCommit.q.startDate >= currentDate, CVSCommit.q.stopDate <= stopDate, CVSCommit.q.projectID == projectId), distinct=True) else: commits = CVSCommit.select(AND(CVSCommit.q.startDate >= currentDate, CVSCommit.q.stopDate <= stopDate, CVSCommit.q.projectID == 141), distinct=True) log.debug("Commits: %d", commits.count()) for com in commits: for fl in com.files: filename = com.project.name + "/" + fl.name base, ext = os.path.splitext(filename) if ext in BADEXTS: log.debug("Extension ignore file: %s", filename) continue path, fn = os.path.split(filename) if fn in BADFILES: log.debug("Name ignore file: %s", filename) continue if not fileUsers.has_key(filename): fileUsers[filename] = Set() fileUsers[filename].add(com.userID) for val in fileUsers.itervalues(): flList = list(val) for i in xrange(0,len(flList)): for j in xrange(i+1, len(flList)): e = agentGraph.getEdge(userNodes[flList[i]], userNodes[flList[j]]) if e: e.value = e.value + 1 else: e = Edge(source=userNodes[flList[i]], target=userNodes[flList[j]], type="int", value=1) agentGraph.addEdge(e) fn = "agentFileNetwork%02d.xml" % (ctr) log.info("Writing network to file %s - %d nodes, %d edges", fn, len(devs), len(agentGraph)) s = network.toXml().serialize(format=1) f = open(fn,"w") f.write(s) f.close() # now create the GWT file from the network outputFile = "agentFileNetwork%02d%s.dl" % (ctr, agentGraph.id) p = subprocess.Popen("/home/pwagstro/bin/dynetml_export -m dl -o agentFileNetwork%02d %s" % (ctr, fn), shell=True) sts = os.waitpid(p.pid, 0) os.rename(outputFile, "agentFileNetwork%02d.dl" % (ctr)) p = subprocess.Popen("/usr/bin/python2.4 dl2gwt.py agentFileNetwork%02d.dl" % (ctr), shell=True) sts = os.waitpid(p.pid, 0) fn = "agentFileNetwork%02d.dat" % (ctr) log.info("Writing network to raw file - %s", fn) f = open(fn,"w") agentGraph.dumpRaw(f) f.close() p = subprocess.Popen("unix2dos %s" % (fn), shell=True) sts = os.waitpid(p.pid, 0) ctr = ctr + 1 currentDate = nextDate expire_all()
def buildData(weeks, start, stop, overlap): currentDate = start ctr = 0 while (currentDate < stop): nextDate = currentDate + timeutil.makeTimeDelta(weeks=weeks-overlap) stopDate = currentDate + timeutil.makeTimeDelta(weeks=weeks) network = DynamicNetwork() metaMatrix = MetaMatrix() network.addMetaMatrix(metaMatrix) devs = NodeSet(id="agent", type="agent") projs = NodeSet(id="resource", type="resource") metaMatrix.addNodeSet(devs) metaMatrix.addNodeSet(projs) graph = Graph(sourceType=devs, targetType=projs, directed=False) metaMatrix.addGraph(graph) fnBase = "gnome%02d" % (ctr) fn = "%s.xml" % (fnBase) cvsFn = "%s.csv" % (fnBase) print "Date: %s - output: %s" % (currentDate, fn) commits = CVSCommit.select(AND(CVSCommit.q.startDate >= currentDate, CVSCommit.q.startDate <= stopDate)) # add in all the nodes into the network for p in Project.select(): projNode = Node(id=p.name) projs.addNode(projNode) for commit in commits: user = commit.user proj = commit.project userNode = devs[user.name] projNode = projs[proj.name] if not userNode: userNode = Node(id=user.name) devs.addNode(userNode) if not projNode: projNode = Node(id=proj.name) projs.addNode(projNode) e = graph.getEdge(userNode, projNode) if e: e.value = e.value + 1 else: e = Edge(source=userNode, target=projNode, type="int", value=1) graph.addEdge(e) log.info("writing CSV file to %s", cvsFn) f = open(cvsFn,"wb") writer = csv.writer(f) writer.writerow(["# name", "devs", "commits"]) for nd in projs.iternodes(): numAgents = len(nd.targetEdges) numCommits = sum([x.value for x in nd.targetEdges]) writer.writerow([nd.id, numAgents, numCommits]) f.close() log.info("removing isolates") projs.removeIsolates() devs.removeIsolates() log.info("serializing network to %s", fn) s = network.toXml().serialize(format=1) f = open(fn,"w") f.write(s) f.close() expire_all() ctr = ctr + 1 currentDate = nextDate
def loadFile(filename, maillist, fromHack=False, purge=False, purge_only=False): """Loads and archive of mailing list messages into the database. Right now this function does not handle running multiple times over the same mailing list. That's an outsanding bug. @param filename: - the filename to load @param mc: a dict to cache messages into @param maillist: a dbobjects.MailList object to set as the list object The in-reply to isn't specified anymore, instead, the following SQL command will hopefully load all of the data and set everything right. UPDATE mail_message set message_parent=a.mail_message_id FROM (SELECT a.mail_message_id FROM mail_message a where a.message_id = in_reply_to) AS a WHERE message_parent is null and in_reply_to is not null; """ nummsgs = 0 referencesRE = re.compile(r"(<[^>]+>)") log.info("processing file %s", filename) shortFN = os.path.split(filename)[1] archive = MailFileArchive.select(AND(MailFileArchive.q.filename==shortFN, MailFileArchive.q.listID==maillist.id)) # FIXME: this is an outstanding bug that needs to be addressed, basically # we can't double load a file, in the future we should check to see if the # entries have already been handled if archive.count() > 0: if not purge: log.error("Archive %s has already been loaded. For right now, we don't handle this, in the future, we will.", filename) return 0 else: log.warn("Archive %s has already been loaded, proceeding with purge", filename) query = """DELETE FROM mail_message_to WHERE mail_message_id IN (select mail_message_id from mail_message where mail_file_archive_id=%d)""" % (archive[0].id) log.debug("executing query: %s", query) MailMessage._connection.query(query) query = """DELETE FROM mail_message_reference WHERE mail_message_id IN (select mail_message_id from mail_message where mail_file_archive_id=%d)""" % (archive[0].id) log.debug("executing query: %s", query) MailMessage._connection.query(query) query = "DELETE FROM mail_message WHERE mail_file_archive_id=%d" % (archive[0].id) log.debug("executing query: %s", query) MailMessage._connection.query(query) archive = archive[0] else: archive = None if purge_only: log.info("purge only called, returning") return 0 # try to get the month from archive short = os.path.splitext(shortFN) if short[1] == '.gz': short = os.path.splitext(short[0]) month = short[0].split("-")[-1] year = short[0].split("-")[-2] # build the start and stop dates for the archive startDate=timeutil.makeDateTimeFromShortString("%04d%02d01" % (int(year), timeutil.getMonth(month))) stopDate=timeutil.addMonths(startDate,1) - timeutil.makeTimeDelta(seconds=1) if not archive: archive = MailFileArchive(filename=shortFN, list=maillist, startDate=startDate, stopDate=stopDate) mbox = mailutil.MailList(filename) msg = mbox.next() lastDate = None while msg != None: log.debug("processing message: %s", msg['Message-Id']) fromList = [x for x in rfc822.AddressList(msg['From']).addresslist] toList = [x[1].lower() for x in rfc822.AddressList(msg['To']).addresslist] toNames = [x[0].lower() for x in rfc822.AddressList(msg['To']).addresslist] ccList = [x[1].lower() for x in rfc822.AddressList(msg['cc']).addresslist] ccNames = [x[0].lower() for x in rfc822.AddressList(msg['cc']).addresslist] try: msgFrom = fromList[0][1].lower() except: log.warn("From not properly defined") msgFrom = "*****@*****.**" try: msgFromName = fromList[0][0].lower() except: log.warn("From name not properly defined") msgFromName = None if fromHack: msgFrom = msg['From'].replace(" at ","@").split()[0] try: timestamp = timeutil.makeDateTimeFromTuple(rfc822.parsedate(msg['date'])) except: log.warn("Error parsing date: %s - setting to None", msg['date']) timestamp = None try: messageId = msg['Message-Id'].split(";")[0] except: messageId = None if not messageId: messageId = "::CVSMINER::-"+random_string(length=64) # FIXME: messageID should be a little more robust in searching out # properly formatted messages pl = deList(msg.get_payload()) # pl = str(msg.get_payload()) if hasattr(pl,"append"): log.debug("is list") tmpPl = "" for payload in pl: tmpPl = tmpPl + payload.get_payload() pl = tmpPl if msg['In-Reply-To']: replyTo = msg['In-Reply-To'][:255].split(";")[0].strip() else: replyTo = None if msgFrom: msgFrom = msgFrom[:255] if msgFromName: msgFromName = msgFromName[:255] if msg['Subject']: subject = msg['Subject'][:255] else: subject = "::CVSMINER:: Subject Not Defined" if messageId: messageId = messageId[:255] try: m = create_mail_message(fromemail=msgFrom, fromname=msgFromName, subject=subject, body=pl, date=timestamp, messageid=messageId, maillist=maillist, archive=archive, replyto=replyTo) except UnicodeError: log.error("Unable to parse message no matter how hard I try...") msg = mbox.next() continue # map all of the references for the message if msg['References']: map(lambda x: create_mail_reference(message=m, reference=x), referencesRE.findall(msg['References'])) # seen is a dict that we use to track already captured email # addresses seen = {} for recip in zip(toList, toNames): if not seen.has_key(recip[0]): try: mr = create_mail_recipient(message=m, toemail=recip[0], toname=recip[1], isto=True) seen[recip[0]] = 1 except UnicodeDecodeError: pass for recip in zip(ccList,ccNames): if not seen.has_key(recip[0]): try: mr = create_mail_recipient(message=m, toemail=recip[0], toname=recip[1], isto=False) seen[recip[0]] = 1 except UnicodeDecodeError: pass msg = mbox.next() nummsgs = nummsgs + 1 return nummsgs
def buildData(weeks, start, stop, overlap): """Build an agentxagent network in weeks intervals, also spit out some CSV files with statistics for each of the agents. @param weeks - the number of weeks to use for each interval @param start - the date to start @param stop - the date to stop @param overlap - number of weeks to overlap analysis """ currentDate = start ctr = 0 lastActiveUsers = None allActiveUsers = Set() rows = [] while (currentDate < stop): thisRow = {} nextDate = currentDate + timeutil.makeTimeDelta(weeks=weeks-overlap) stopDate = currentDate + timeutil.makeTimeDelta(weeks=weeks) activeUsers = Set([x.name for x in User.select(AND(User.q.id == CVSCommit.q.userID, CVSCommit.q.startDate >= currentDate, CVSCommit.q.startDate <= stopDate), distinct=True)]) if lastActiveUsers: hamInactive = len(lastActiveUsers.difference(activeUsers)) hamActive = len(activeUsers.difference(lastActiveUsers)) newActives = len(activeUsers.difference(allActiveUsers)) try: percentDrop = float(hamInactive)/float(len(lastActiveUsers)) except: percentDrop = 0.0 else: hamInactive = None hamActive = None newActives = None percentDrop = None lastActiveUsers = activeUsers allActiveUsers = allActiveUsers.union(activeUsers) thisRow["hamInactive"] = hamInactive thisRow["hamActive"] = hamActive thisRow["newActives"] = newActives thisRow["percentDrop"] = percentDrop thisRow["allActives"] = len(allActiveUsers) rows.append(thisRow) currentDate = nextDate ctr = ctr + 1 g = graph.graphxy(width=8, key=graph.key.key(pos="mr", hinside=0), x=graph.axis.linear(min=0, max=len(rows), title="Time Period"), y=graph.axis.linear(min=0, max=rows[-1]["allActives"]+50, title="Developers")) dlist = graph.data.list(zip(range(len(rows)-1), [x["allActives"] for x in rows[1:-1]]), x=1, y=2, title="total developers") g.plot([dlist], [graph.style.line([color.rgb.red, style.linestyle.solid, style.linewidth.thick])]) g.writePDFfile("communityGrowth.pdf") g = graph.graphxy(width=8, key=graph.key.key(pos="mr", hinside=0), x=graph.axis.linear(min=0, max=len(rows), title="Time Period"), y=graph.axis.linear(min=0, max=1, title="Proportion of Developers")) dlist = graph.data.list(zip(range(len(rows)-1), [1-x["percentDrop"] for x in rows[1:-1]]), x=1, y=2, title="Retention Rate") g.plot([dlist], [graph.style.line([color.palette.Rainbow, style.linestyle.solid, style.linewidth.thick])]) g.writePDFfile("retentionRates.pdf")