Esempio n. 1
0
def buildNetworks(project, startDate, stopDate, window, overlap, binary=False, removeNonAgents=False):
    """Iterate over each of the networks and build the data

    project - the masterproject to work on
    startDate - the first date to start
    stopDate - the last date to process
    window - the width of the sliding window, in weeks
    overlap - the overlap between windows, in weeks
    """
    currentDate = startDate
    devMap = {}
    while currentDate < stopDate:
        nextDate = currentDate + timeutil.makeTimeDelta(weeks=window-overlap)
        finishDate = currentDate + timeutil.makeTimeDelta(weeks=window)
        buildEmailNetwork(project, currentDate, finishDate, binary=binary, removeNonAgents=removeNonAgents)
        devMap = buildSourceCodeNetwork(project,currentDate, finishDate, devMap, binary=binary, removeNonAgents=removeNonAgents)
        currentDate = nextDate
Esempio n. 2
0
def dumpNetworks(project, startDate, stopDate, window, overlap):
    currentDate = startDate
    mp = MasterProject.select(MasterProject.q.name == project)[0]
    ctr = 0
    while currentDate < stopDate:
        thisLinks = {}
        userFileLinks = {}
        nextDate = currentDate + timeutil.makeTimeDelta(weeks=window)
        for com in CVSCommit.select(AND(CVSCommit.q.startDate >= currentDate,
                                        CVSCommit.q.startDate <= nextDate,
                                        CVSCommit.q.projectID == Project.q.id,
                                        Project.q.masterProjectID == mp.id)):
            try:
                pid = com.user.persons[0].id
            except IndexError:
                log.warn("index error on user %d - ctr %d", com.userID, ctr)
                continue
            thisCommitFiles = [f.id for f in com.files]
            thisCommitFiles.sort()
            for f in thisCommitFiles:
                hkey = "p%d-f%d" % (pid, f)
                thisLinks[hkey] = thisLinks.get(hkey,0) + 1
                userFileLinks[f] = userFileLinks.get(f,[]) + [pid]
                
            for i in xrange(0, len(thisCommitFiles)):
                for j in xrange(i, len(thisCommitFiles)):
                    hkey="f%d-f%d" % (thisCommitFiles[i], thisCommitFiles[j])
                    thisLinks[hkey] = thisLinks.get(hkey,0) + 1


        for ufl in userFileLinks.values():
            uflNoDupe = list(Set(ufl))
            for i in xrange(0, len(uflNoDupe)):
                for j in xrange(i, len(uflNoDupe)):
                    hkey = "p%d-p%d" % (uflNoDupe[i], uflNoDupe[j])
                    thisLinks[hkey] = thisLinks.get(hkey,0) + 1
            
        f = open("%s.d.%03d" % (mp.name.replace(os.sep,"_"), ctr), "w")
        f.write("# StartDate: %s\n" % (currentDate))
        f.write("# StopDate: %s\n" % (stopDate))
        for key,val in thisLinks.iteritems():
            f.write("%s, %d\n" % (key, val))
        f.close()
        ctr = ctr + 1
        currentDate = currentDate + timeutil.makeTimeDelta(weeks=window-overlap)
Esempio n. 3
0
def dumpMailList(mlname, firstDate=None, lastDate=None, delta=7):
    ml = MailList.select(MailList.q.name==mlname)
    if ml.count() < 1:
        raise KeyError("""Mailing List "%s" not found""" % (mlname))
    if ml.count() > 1:
        raise KeyError("""Mailing List "%s" specifies multiple lists""" % (mlname))
    ml = ml[0]

    # ignore all the messages we can't get a date for
    messages = MailMessage.select(AND(MailMessage.q.listID==ml.id,
                                      MailMessage.q.date!=None),
                                  orderBy=MailMessage.q.date)
    if not firstDate:
        firstDate = messages[0].date
    if not lastDate:
        lastDate = messages.reversed()[0].date
    print firstDate, lastDate

    firstDate = timeutil.makeDateTimeFromShortString("%04d%02d%02d" %
                                                     (firstDate.year,
                                                      firstDate.month,
                                                      firstDate.day))

    bins = [[],[],[],[]]
    
    while firstDate < lastDate:
        nextDate = firstDate + timeutil.makeTimeDelta(days=delta)
        messages = MailMessage.select(AND(MailMessage.q.listID==ml.id,
                                          MailMessage.q.date >= firstDate,
                                          MailMessage.q.date < nextDate))
        numMessages = messages.count()
        newThreads = 0
        oldThreads = 0
        for msg in messages:
            if msg.replyTo == None:
                newThreads = newThreads + 1
            else:
                oldThreads = oldThreads + 1
        print "%04d-%02d-%02d, %d, %d, %d" % (firstDate.year,
                                              firstDate.month,
                                              firstDate.day,
                                              numMessages,
                                              newThreads,
                                              oldThreads)
        bins[0].append(abs(firstDate-MINDATE).days)
        bins[1].append(numMessages)
        bins[2].append(newThreads)
        bins[3].append(oldThreads)
        
        firstDate = nextDate

    return bins
Esempio n. 4
0
def buildData(weeks, start, stop, overlap):
    """Build an agentxagent network in weeks intervals, also spit
    out some CSV files with statistics for each of the agents.

    @param weeks - the number of weeks to use for each interval
    @param start - the date to start
    @param stop - the date to stop
    @param overlap - number of weeks to overlap analysis
    """
    agents = User.select()
    currentDate = start
    users = {}
    lagUsers1 = {}
    lagUsers2 = {}
    ctr = 0
    while (currentDate < stop):
        nextDate = currentDate + timeutil.makeTimeDelta(weeks=weeks-overlap)
        stopDate = currentDate + timeutil.makeTimeDelta(weeks=weeks)
        log.info("Processing data from %s to %s", currentDate, nextDate)
        lagUsers2 = lagUsers1
        lagUsers1 = users
        map(lambda x: users.__setitem__(x, {"name": x,
                                            "dev": 0,
                                            "projects": 0,
                                            "commits": 0,
                                            "devLag1": 0,
                                            "devLag2": 0,
                                            "files": 0,
                                            "totalCommits": 0,
                                            "totalProjects": 0,
                                            "totalFiles": 0,
                                            "commitTime": 0}), [y.name for y in agents])
        map(lambda x: users[x[0]].__setitem__("id", x[1]), [[x.name, x.id] for x in agents])

        log.info("Building global data on users")
        # fill in some of the stat information for all of the users
        for user in users.itervalues():
            # get the total commits to this point:
            totalCommits = CVSCommit.select(AND(CVSCommit.q.startDate > start,
                                                CVSCommit.q.startDate <= stopDate,
                                                CVSCommit.q.userID == user["id"])).count()
            totalProjects = Project.select(AND(Project.q.id == CVSCommit.q.projectID,
                                               CVSCommit.q.startDate > start,
                                               CVSCommit.q.startDate <= stopDate,
                                               CVSCommit.q.userID == user["id"]),
                                           distinct=True).count()
            totalFiles = File.select(AND(CVSCommit.q.startDate > start,
                                         CVSCommit.q.startDate <= stopDate,
                                         CVSCommit.q.userID == user["id"],
                                         FileCommit.q.cvsCommitID == CVSCommit.q.id,
                                         FileCommit.q.fileID == File.q.id),
                                     distinct=True).count()
            firstCommit = CVSCommit.select(AND(CVSCommit.q.startDate > start,
                                               CVSCommit.q.userID == user["id"]),
                                           orderBy=CVSCommit.q.startDate,
                                           limit=1)[0]
            if firstCommit.startDate < stopDate:
                commitTime = (stopDate - firstCommit.startDate).days
            else:
                commitTime = 0
            users[user["name"]]["totalCommits"] = totalCommits
            users[user["name"]]["totalProjects"] = totalProjects
            users[user["name"]]["totalFiles"] = totalFiles
            users[user["name"]]["commitTime"] = commitTime
            if lagUsers1.has_key(user["name"]):
                users[user["name"]]['devLag1'] = lagUsers1[user["name"]]["dev"]
            else:
                users[user["name"]]['devLag1'] = 0
            if lagUsers2.has_key(user["name"]):
                users[user["name"]]['devLag2'] = lagUsers2[user["name"]]["dev"]
            else:
                users[user["name"]]['devLag2'] = 0
            
        projects = {}
        # create the basic network
        network = DynamicNetwork()
        metaMatrix = MetaMatrix()
        network.addMetaMatrix(metaMatrix)
        devs = NodeSet(id="agent", type="agent")
        metaMatrix.addNodeSet(devs)
        graph = Graph(sourceType=devs, targetType=devs, directed=False)
        metaMatrix.addGraph(graph)

        activeUsers = User.select(AND(User.q.id == CVSCommit.q.userID,
                                       CVSCommit.q.startDate >= currentDate,
                                       CVSCommit.q.startDate <= stopDate), distinct=True)
        log.info("Building additional data on %d active users", activeUsers.count())
        for user in activeUsers:
            users[user.name]["dev"] = 1
            projs  = Project.select(AND(CVSCommit.q.userID == user.id,
                                        CVSCommit.q.startDate >= currentDate,
                                        CVSCommit.q.startDate <= stopDate,
                                        CVSCommit.q.projectID == Project.q.id),
                                    distinct=True)
            log.debug("user: %s - active projects: %d", user.name, projs.count())
            users[user.name]["projects"] = projs.count()
            commits = CVSCommit.select(AND(CVSCommit.q.userID == user.id,
                                           CVSCommit.q.startDate >= currentDate,
                                           CVSCommit.q.startDate <= stopDate))
            users[user.name]["commits"] = commits.count()
            users[user.name]["file"] = File.select(AND(CVSCommit.q.startDate > currentDate,
                                                       CVSCommit.q.startDate <= stopDate,
                                                       CVSCommit.q.userID == user.id,
                                                       FileCommit.q.cvsCommitID == CVSCommit.q.id,
                                                       FileCommit.q.fileID == File.q.id),
                                                   distinct=True).count()
            
            for proj in projs:
                if not projects.has_key(proj.name):
                    projects[proj.name] = []
                projects[proj.name].append(user.name)


        # create nodes for each of the agents
        userNodes = {}
        for u in users.iterkeys():
            userNodes[u] = Node(id=u)
            devs.addNode(userNodes[u])

        # link the nodes together in a clique
        for p in projects.itervalues():
            if len(p) <= 1:
                continue
            for i in xrange(len(p)):
                for j in xrange(i+1,len(p)):
                    e = graph.getEdge(userNodes[p[i]], userNodes[p[j]])
                    if e:
                        e.value = e.value + 1
                    else:
                        e = Edge(source=userNodes[p[i]], target=userNodes[p[j]],
                                 type="int", value=1)
                        graph.addEdge(e)
        fn = "agentNetwork%02d.xml" % (ctr)
        s = network.toXml().serialize(format=1)
        f = open(fn, "w")
        f.write(s)
        f.close()

        # now create the GWT file from the network
        outputFile = "agentNetwork%02d%s.dl" % (ctr, graph.id)
        p = subprocess.Popen("/home/pwagstro/bin/dynetml_export -m dl -o agentNetwork%02d %s" % (ctr, fn), shell=True)
        sts = os.waitpid(p.pid, 0)
        os.rename(outputFile, "agentNetwork%02d.dl" % (ctr))
        p = subprocess.Popen("/usr/bin/python2.4 dl2gwt.py agentNetwork%02d.dl" % (ctr), shell=True)
        sts = os.waitpid(p.pid, 0)
        
        # write the definition of the dbf file
        # dbfs hhave a limit of 11 characters for the title of each row
        dbfn=dbf_new()
        dbfn.add_field("id",'N',5)
        dbfn.add_field("name",'C',80)
        dbfn.add_field("dev",'N',2)
        dbfn.add_field("devLag1", 'N', 2)
        dbfn.add_field("devLag2", 'N', 2)
        dbfn.add_field("projects", 'N', 3)
        dbfn.add_field("commits", 'N', 5)
        dbfn.add_field("files", 'N', 5)
        dbfn.add_field("totalCommit", 'N', 5)
        dbfn.add_field("totalFiles", 'N', 5)
        dbfn.add_field("totalProj", 'N', 5)
        dbfn.add_field("commitTime", 'N', 5)
        dbfn.write("agentNetwork%02d.dbf" % (ctr))

        # write the DBF file
        dbft = Dbf()
        dbft.openFile("agentNetwork%02d.dbf" % (ctr), readOnly=0)
        # dbft.reportOn()
        ctr2 = 1
        for key,val in users.iteritems():
            rec = DbfRecord(dbft)
            rec['id'] = ctr2
            rec['name'] = key
            rec['dev'] = val['dev']
            rec['devLag1'] = val['devLag1']
            rec['devLag2'] = val['devLag2']
            rec['projects'] = val['projects']
            rec['commits'] = val['commits']
            rec['files'] = val['files']
            rec['totalCommit'] = val['totalCommits']
            rec['totalFiles'] = val['totalFiles']
            rec['totalProj'] = val['totalProjects']
            rec['commitTime'] = val['commitTime']
            rec.store()
            ctr2 = ctr2 + 1
        dbft.close()

        # dump out the stats to a CSV file too
        fn = "agentNetwork%02d.csv" % (ctr)
        f = open(fn, "w")
        writer = csv.writer(f)
        ctr2 = 1
        writer.writerow(["#ctr", "name", "dev", "projects", "commits", "files", "totalCommits", "totalFiles", "totalProjects", "commitTime"])
        for item in [[key, val["dev"], val["projects"], val["commits"], val["files"], val["totalCommits"],
                      val["totalFiles"], val["totalProjects"], val["commitTime"]] for key,val in users.iteritems()]:
            writer.writerow([ctr2] + item)
            ctr2 = ctr2 + 1
        f.close()

        expire_all()
        currentDate = nextDate
        ctr = ctr + 1
Esempio n. 5
0
def buildData(weeks, overlap, start, stop, project=None):
    if project:
        log.info("Only generating data from project %s", project)
    currentDate = start
    ctr = 0
    devs = NodeSet(id="agent", type="agent")
    userNodes = {}
    
    # create all of the nodes for the users
    # do this only once
    # hack for getting only evolution users...
    if project != None:
        projectId = Project.select(Project.q.name == project)[0].id
        users = User.select(AND(User.q.id == CVSCommit.q.userID,
                                CVSCommit.q.projectID == projectId), distinct=True)
    else:
        users = User.select()
    for user in users:
        userNodes[user.id] = Node(id=user.name)
        devs.addNode(userNodes[user.id])

    ctr = 0
    while (currentDate < stop):
        fileUsers = {}
        devs.clearEdges()
        nextDate = currentDate + timeutil.makeTimeDelta(weeks=weeks-overlap)
        stopDate = currentDate + timeutil.makeTimeDelta(weeks=weeks)
        log.info("Starting Date: %s - Ending Date: %s", currentDate, stopDate)

        # set up the meta matrix stuff
        network = DynamicNetwork()
        metaMatrix = MetaMatrix()
        network.addMetaMatrix(metaMatrix)
        files = NodeSet(id="resource", type="resource")
        metaMatrix.addNodeSet(devs)
        metaMatrix.addNodeSet(files)

        agentGraph = Graph(sourceType=devs, targetType=devs, directed=False)
        metaMatrix.addGraph(agentGraph)

        # get all of the CVS Commit information
        if project:
            commits = CVSCommit.select(AND(CVSCommit.q.startDate >= currentDate,
                                           CVSCommit.q.stopDate <= stopDate,
                                           CVSCommit.q.projectID == projectId), distinct=True)
        else:
            commits = CVSCommit.select(AND(CVSCommit.q.startDate >= currentDate,
                                           CVSCommit.q.stopDate <= stopDate,
                                           CVSCommit.q.projectID == 141), distinct=True)
        log.debug("Commits: %d", commits.count())
        for com in commits:
            for fl in com.files:
                filename = com.project.name + "/" + fl.name
                base, ext = os.path.splitext(filename)
                if ext in BADEXTS:
                    log.debug("Extension ignore file: %s", filename)
                    continue
                path, fn = os.path.split(filename)
                if fn in BADFILES:
                    log.debug("Name ignore file: %s", filename)
                    continue
                if not fileUsers.has_key(filename):
                    fileUsers[filename] = Set()                    
                fileUsers[filename].add(com.userID)

        for val in fileUsers.itervalues():
            flList = list(val)
            for i in xrange(0,len(flList)):
                for j in xrange(i+1, len(flList)):
                    e = agentGraph.getEdge(userNodes[flList[i]], userNodes[flList[j]])
                    if e:
                        e.value = e.value + 1
                    else:
                        e = Edge(source=userNodes[flList[i]],
                                 target=userNodes[flList[j]],
                                 type="int", value=1)
                        agentGraph.addEdge(e)

        fn = "agentFileNetwork%02d.xml" % (ctr)
        log.info("Writing network to file %s - %d nodes, %d edges", fn, len(devs), len(agentGraph))
        s = network.toXml().serialize(format=1)
        f = open(fn,"w")
        f.write(s)
        f.close()

        # now create the GWT file from the network
        outputFile = "agentFileNetwork%02d%s.dl" % (ctr, agentGraph.id)
        p = subprocess.Popen("/home/pwagstro/bin/dynetml_export -m dl -o agentFileNetwork%02d %s" % (ctr, fn), shell=True)
        sts = os.waitpid(p.pid, 0)
        os.rename(outputFile, "agentFileNetwork%02d.dl" % (ctr))
        p = subprocess.Popen("/usr/bin/python2.4 dl2gwt.py agentFileNetwork%02d.dl" % (ctr), shell=True)
        sts = os.waitpid(p.pid, 0)


        fn = "agentFileNetwork%02d.dat" % (ctr)
        log.info("Writing network to raw file - %s", fn)
        f = open(fn,"w")
        agentGraph.dumpRaw(f)
        f.close()
        
        p = subprocess.Popen("unix2dos %s" % (fn), shell=True)
        sts = os.waitpid(p.pid, 0)

        ctr = ctr + 1
        currentDate = nextDate
        expire_all()
Esempio n. 6
0
def buildData(weeks, start, stop, overlap):
    currentDate = start
    ctr = 0
    while (currentDate < stop):
        nextDate = currentDate + timeutil.makeTimeDelta(weeks=weeks-overlap)
        stopDate = currentDate + timeutil.makeTimeDelta(weeks=weeks)
        network = DynamicNetwork()
        metaMatrix = MetaMatrix()
        network.addMetaMatrix(metaMatrix)
        devs = NodeSet(id="agent", type="agent")
        projs = NodeSet(id="resource", type="resource")
        metaMatrix.addNodeSet(devs)
        metaMatrix.addNodeSet(projs)
        graph = Graph(sourceType=devs, targetType=projs, directed=False)
        metaMatrix.addGraph(graph)
        fnBase = "gnome%02d" % (ctr)
        fn = "%s.xml" % (fnBase)
        cvsFn = "%s.csv" % (fnBase)
        print "Date: %s - output: %s" % (currentDate, fn)
        commits = CVSCommit.select(AND(CVSCommit.q.startDate >= currentDate,
                                       CVSCommit.q.startDate <= stopDate))

        # add in all the nodes into the network
        for p in Project.select():
            projNode = Node(id=p.name)
            projs.addNode(projNode)

        for commit in commits:
            user = commit.user
            proj = commit.project
            
            userNode = devs[user.name]
            projNode = projs[proj.name]
            if not userNode:
                userNode = Node(id=user.name)
                devs.addNode(userNode)
            if not projNode:
                projNode = Node(id=proj.name)
                projs.addNode(projNode)
            e = graph.getEdge(userNode, projNode)
            if e:
                e.value = e.value + 1
            else:
                e = Edge(source=userNode, target=projNode, type="int", value=1)
                graph.addEdge(e)

        log.info("writing CSV file to %s", cvsFn)
        f = open(cvsFn,"wb")
        writer = csv.writer(f)
        writer.writerow(["# name", "devs", "commits"])
        for nd in projs.iternodes():
            numAgents = len(nd.targetEdges)
            numCommits = sum([x.value for x in nd.targetEdges])
            writer.writerow([nd.id, numAgents, numCommits])
        f.close()

        log.info("removing isolates")
        projs.removeIsolates()
        devs.removeIsolates()

        log.info("serializing network to %s", fn)
        s = network.toXml().serialize(format=1)
        f = open(fn,"w")
        f.write(s)
        f.close()
        
        expire_all()
        ctr = ctr + 1
        currentDate = nextDate
Esempio n. 7
0
def loadFile(filename, maillist, fromHack=False, purge=False, purge_only=False):
    """Loads and archive of mailing list messages into the database. Right
    now this function does not handle running multiple times over the
    same mailing list.  That's an outsanding bug.

    @param filename: - the filename to load
    @param mc: a dict to cache messages into
    @param maillist: a dbobjects.MailList object to set as the list object

    The in-reply to isn't specified anymore, instead, the following SQL
    command will hopefully load all of the data and set everything right.
    
    UPDATE mail_message set message_parent=a.mail_message_id
      FROM (SELECT a.mail_message_id FROM mail_message a where a.message_id = in_reply_to) AS a
     WHERE message_parent is null and in_reply_to is not null;
    """
    nummsgs = 0
    referencesRE = re.compile(r"(<[^>]+>)")

    log.info("processing file %s", filename)

    shortFN = os.path.split(filename)[1]
    archive = MailFileArchive.select(AND(MailFileArchive.q.filename==shortFN,
                                     MailFileArchive.q.listID==maillist.id))

    # FIXME: this is an outstanding bug that needs to be addressed, basically
    # we can't double load a file, in the future we should check to see if the
    # entries have already been handled
    if archive.count() > 0:
        if not purge:
            log.error("Archive %s has already been loaded.  For right now, we don't handle this, in the future, we will.", filename)
            return 0
        else:
            log.warn("Archive %s has already been loaded, proceeding with purge", filename)
            query = """DELETE FROM mail_message_to WHERE mail_message_id IN
                                   (select mail_message_id from mail_message where mail_file_archive_id=%d)""" % (archive[0].id)
            log.debug("executing query: %s", query)
            MailMessage._connection.query(query)
            query = """DELETE FROM mail_message_reference WHERE mail_message_id IN
                                   (select mail_message_id from mail_message where mail_file_archive_id=%d)""" % (archive[0].id)
            log.debug("executing query: %s", query)
            MailMessage._connection.query(query)
            query = "DELETE FROM mail_message WHERE mail_file_archive_id=%d" % (archive[0].id)
            log.debug("executing query: %s", query)
            MailMessage._connection.query(query)
            archive = archive[0]
    else:
        archive = None
    if purge_only:
        log.info("purge only called, returning")
        return 0
    # try to get the month from archive
    short = os.path.splitext(shortFN)
    if short[1] == '.gz':
        short = os.path.splitext(short[0])
    month = short[0].split("-")[-1]
    year = short[0].split("-")[-2]

    # build the start and stop dates for the archive
    startDate=timeutil.makeDateTimeFromShortString("%04d%02d01" % (int(year), timeutil.getMonth(month)))
    stopDate=timeutil.addMonths(startDate,1) - timeutil.makeTimeDelta(seconds=1)

    if not archive:
        archive = MailFileArchive(filename=shortFN, list=maillist,
                                  startDate=startDate, stopDate=stopDate)
    
    mbox = mailutil.MailList(filename)
    msg = mbox.next()
    lastDate = None
    while msg != None:
        log.debug("processing message: %s", msg['Message-Id'])
        fromList =  [x for x in rfc822.AddressList(msg['From']).addresslist]
        toList = [x[1].lower() for x in rfc822.AddressList(msg['To']).addresslist]
        toNames = [x[0].lower() for x in rfc822.AddressList(msg['To']).addresslist]
        ccList = [x[1].lower() for x in rfc822.AddressList(msg['cc']).addresslist]
        ccNames = [x[0].lower() for x in rfc822.AddressList(msg['cc']).addresslist]
        try:
            msgFrom = fromList[0][1].lower()
        except:
            log.warn("From not properly defined")
            msgFrom = "*****@*****.**"
        try:
            msgFromName = fromList[0][0].lower()
        except:
            log.warn("From name not properly defined")
            msgFromName = None

        if fromHack:
            msgFrom = msg['From'].replace(" at ","@").split()[0]

        try:
            timestamp = timeutil.makeDateTimeFromTuple(rfc822.parsedate(msg['date']))
        except:
            log.warn("Error parsing date: %s - setting to None", msg['date'])
            timestamp = None

        try:
            messageId = msg['Message-Id'].split(";")[0]
        except:
            messageId = None
        if not messageId:
            messageId = "::CVSMINER::-"+random_string(length=64)
        # FIXME: messageID should be a little more robust in searching out
        # properly formatted messages

        pl = deList(msg.get_payload())
        # pl = str(msg.get_payload())
        if hasattr(pl,"append"):
            log.debug("is list")
            tmpPl = ""
            for payload in pl:
                tmpPl = tmpPl + payload.get_payload()
            pl = tmpPl

        if msg['In-Reply-To']:
            replyTo = msg['In-Reply-To'][:255].split(";")[0].strip()
        else:
            replyTo = None
            
        if msgFrom: msgFrom = msgFrom[:255]
        if msgFromName: msgFromName = msgFromName[:255]
        if msg['Subject']:
            subject = msg['Subject'][:255]
        else:
            subject = "::CVSMINER:: Subject Not Defined"
        if messageId: messageId = messageId[:255]

        try:
            m = create_mail_message(fromemail=msgFrom, fromname=msgFromName, subject=subject, body=pl,
                                    date=timestamp, messageid=messageId, maillist=maillist,
                                    archive=archive, replyto=replyTo)
        except UnicodeError:
            log.error("Unable to parse message no matter how hard I try...")
            msg = mbox.next()
            continue
                

        # map all of the references for the message
        if msg['References']: map(lambda x: create_mail_reference(message=m, reference=x), referencesRE.findall(msg['References']))

        # seen is a dict that we use to track already captured email
        # addresses
        seen = {}
        for recip in zip(toList, toNames):
            if not seen.has_key(recip[0]):
                try:
                    mr = create_mail_recipient(message=m, toemail=recip[0], toname=recip[1], isto=True)
                    seen[recip[0]] = 1
                except UnicodeDecodeError:
                    pass
        for recip in zip(ccList,ccNames):
            if not seen.has_key(recip[0]):
                try:
                    
                    mr = create_mail_recipient(message=m, toemail=recip[0], toname=recip[1], isto=False)
                    seen[recip[0]] = 1
                except UnicodeDecodeError:
                    pass
            
        msg = mbox.next()
        nummsgs = nummsgs + 1
    return nummsgs
Esempio n. 8
0
def buildData(weeks, start, stop, overlap):
    """Build an agentxagent network in weeks intervals, also spit
    out some CSV files with statistics for each of the agents.

    @param weeks - the number of weeks to use for each interval
    @param start - the date to start
    @param stop - the date to stop
    @param overlap - number of weeks to overlap analysis
    """
    currentDate = start
    ctr = 0
    lastActiveUsers = None
    allActiveUsers = Set()
    rows = []
    while (currentDate < stop):
        thisRow = {}
        nextDate = currentDate + timeutil.makeTimeDelta(weeks=weeks-overlap)
        stopDate = currentDate + timeutil.makeTimeDelta(weeks=weeks)
        activeUsers = Set([x.name for x in User.select(AND(User.q.id == CVSCommit.q.userID,
                                                       CVSCommit.q.startDate >= currentDate,
                                                       CVSCommit.q.startDate <= stopDate),
                                                       distinct=True)])
        if lastActiveUsers:
            hamInactive = len(lastActiveUsers.difference(activeUsers))
            hamActive = len(activeUsers.difference(lastActiveUsers))
            newActives = len(activeUsers.difference(allActiveUsers))
            try:
                percentDrop = float(hamInactive)/float(len(lastActiveUsers))
            except:
                percentDrop = 0.0
        else:
            hamInactive = None
            hamActive = None
            newActives = None
            percentDrop = None

        lastActiveUsers = activeUsers
        allActiveUsers = allActiveUsers.union(activeUsers)

        thisRow["hamInactive"] = hamInactive
        thisRow["hamActive"] = hamActive
        thisRow["newActives"] = newActives
        thisRow["percentDrop"] = percentDrop
        thisRow["allActives"] = len(allActiveUsers)

        rows.append(thisRow)
        currentDate = nextDate
        ctr = ctr + 1

    g = graph.graphxy(width=8,
                      key=graph.key.key(pos="mr", hinside=0),
                      x=graph.axis.linear(min=0, max=len(rows), title="Time Period"),
                      y=graph.axis.linear(min=0, max=rows[-1]["allActives"]+50, title="Developers"))
    dlist = graph.data.list(zip(range(len(rows)-1), [x["allActives"] for x in rows[1:-1]]),
                            x=1, y=2, title="total developers")
    
    g.plot([dlist], [graph.style.line([color.rgb.red, style.linestyle.solid, style.linewidth.thick])])
    g.writePDFfile("communityGrowth.pdf")

    g = graph.graphxy(width=8,
                      key=graph.key.key(pos="mr", hinside=0),
                      x=graph.axis.linear(min=0, max=len(rows), title="Time Period"),
                      y=graph.axis.linear(min=0, max=1, title="Proportion of Developers"))
    dlist = graph.data.list(zip(range(len(rows)-1), [1-x["percentDrop"] for x in rows[1:-1]]),
                            x=1, y=2, title="Retention Rate")
    g.plot([dlist], [graph.style.line([color.palette.Rainbow, style.linestyle.solid, style.linewidth.thick])])
    g.writePDFfile("retentionRates.pdf")