def weightplot(self,clip=1): dtb = db.WikiDatabase() pages = dtb.getallfetched() print "got", len(pages) import itertools possw = list(itertools.product([0,1], repeat=6)) shuffle(pages) for r in pages[:clip]: article = r[0].decode('utf-8') pageid = r[1] domain = r[2] xaxisname = "Username" yaxisname = "Edit share", title = "Editors of " + safefilename(article) title += ", " + domain + str(pageid) + ", by edit count" filename = domain + str(pageid) + "editcW" dbdata = dtb.getuserchange2(pageid,domain) dat = WikiDataHandle() xlz = [] ylz = [[],[]] for w in possw: for d in (0,1): ww = list(w + (d,)) f = filename + str(ww) xlabels, ypoints = dat.editsharedata(pageid,domain,weights=ww, namesort=True,dbdata=dbdata) xlz = xlabels ylz[d].append(ypoints) realy = [[],[]] for i,y in enumerate(ylz): yy = zip(*y) yyy = [sum(e) for e in yy] assert len(yyy) == len(xlz) realy[i] = yyy xf = [] yf = [[],[]] for i,y in enumerate(realy): f = filename + "average" + "G" + str(i) comb = zip(xlz,y) comb = sorted(comb, key = lambda x: x[1]) xf, yf[i] = zip(*comb) xpoints = list(range(len(xf))) print self.specialtrajectory(xpoints, yf[0], xpoints, yf[1], None, None, None, xaxisname, yaxisname, None, title, filename, width=20, height=12)
def talkplots(self): dat = WikiDataHandle() titles, pids, tids, domains = dat.talkpages() for i in range(len(titles)): domain = domains[i] pageid = pids[i] talkid = tids[i] title = titles[i] print title, pageid try: x1, t1, _ = dat.trajectorydata(pageid, domain, normalise=False) x2, t2, growth = dat.trajectorydata(talkid, domain, normalise=False) except: continue t1sum = max(t1) if t1sum: for i in xrange(len(t1)): t1[i] /= t1sum t2sum = max(t2) if t2sum: for i in xrange(len(t2)): t2[i] /= t2sum gsum = max(growth) if gsum: for i in xrange(len(growth)): growth[i] /= gsum creation = min(x1[0],x2[0]) for x in (x1, x2): for i in xrange(len(x)): x[i] = (x[i]-creation).total_seconds()/3600 title = title.decode('utf-8').replace('/','') + " article vs talk page trajectories" xaxisname = "Hours since creation" taxisname = "Change" gaxisname = "Article page size" filename = domain + str(pageid) + " Special Combo" print self.talktrajectory(x1, t1, x2, t2, growth, xaxisname, taxisname, gaxisname, title, filename, width=20, height=12)
def plot(self, title, pageid, domain, trajectory=True, editcount=True, editshare=True, weights=None): filenames = [] dat = WikiDataHandle() try: title = title.decode('utf-8') except: title = "id" + str(pageid) domain = domain.decode('utf-8') if trajectory: xpoints, tpoints, gpoints = dat.trajectorydata(pageid, domain) filenames.append(self.trajectory(xpoints, tpoints, gpoints, "Hours since creation", "Edit distance from final", "Article size", "Trajectory of " + safefilename(title) + ", " +\ domain + str(pageid), domain + str(pageid) + "traj", width=13, height=8)) print "plotted", filenames[-1] logDebug("plotted "+ filenames[-1]) if editcount: xlabels, ypoints = dat.editcountdata(pageid, domain) filenames.append(self.barchart(xlabels, ypoints, "Username", "Edit count", "Editors of " + safefilename(title) + ", " + domain + str(pageid) + ", by edit count", domain + str(pageid) + "editc", width = 13, height = 8)) print "plotted", filenames[-1] logDebug("plotted "+ filenames[-1]) if editshare: xlabels, ypoints = dat.editsharedata(pageid, domain, self.weights) filenames.append(self.barchart(xlabels, ypoints, "Username", "Edit share", "Editors of " + safefilename(title) + ", " + domain\ + str(pageid) + ", by share", domain + str(pageid) + "share", width = 13, height = 8)) print "plotted", filenames[-1] logDebug("plotted "+ filenames[-1]) return filenames