def cumulative_registrations(width=1000, height=700): registrations = [float(x[0]) / 1000.0 for x in UserRegistration.objects.values_list("when")] registrations.sort() total = 0 cudist = {} for i in registrations: total += 1 cudist[i] = total print make_filename("registrations") r.png(file=make_filename("registrations"), width=width, height=height) r.plot(c(cudist.keys()), c([x for x in cudist.values()]), xlab="", ylab="", axes=False, cex=0.3) days = 60 print total # print len(r.seq(min(registrations),max(registrations),24*60*60*days)) r.axis( 1, r.seq(min(registrations), max(registrations), 24 * 60 * 60 * days), [ "%s %d" % (months[x.tm_mon], x.tm_year) for x in [time.localtime(b) for b in xrange(min(registrations), max(registrations), 24 * 60 * 60 * days)] ], ) # label the max r.axis( 2, r.c(r.seq(0, ((total / 5000) + 1) * 5000, 5000), total), r.c(r.seq(0, ((total / 5000) + 1) * 5000, 5000), total), ) r.title("cumulative listit registrations (august 2008-may 2011)") # r.lines(c([float(x) for x in cudist.keys()]), c([x for x in cudist.values()]),xlab=r.c(),ylab=r.c())) r("dev.off()")
def time_till_deletehist(notes, width=1024, height=768): delete_logs = lambda n: ActivityLog.objects.filter(owner=n.owner, noteid=n.jid, action="note-delete") if type(notes) == QuerySet: notes = notes.filter(deleted=1) else: notes = [n for n in notes if n.deleted] vals = [] for n in notes: dl = delete_logs(n) if dl.count() == 0: continue if float(dl[0].when) / 1000 - float(n.created) / 1000 > 0: vals.append(float(dl[0].when) / 1000 - float(n.created) / 1000) r.png(file=make_filename("ttd"), width=width, height=height) breaks = r.c( 0, 60 * 60, 2 * 60 * 60, 24 * 60 * 60, 2 * 24 * 60 * 60, r.seq(3 * 24 * 60 * 60, max(vals) + 24 * 60 * 60, 24 * 60 * 60), ) rr = r.hist(c(vals), breaks=breaks, axes=False, xlab="", ylab="", ylim=r.c(0, 10)) r("dev.off()") r.png(file=make_filename("ttd"), width=width, height=height) r.barplot(rr[1]) print len(breaks) print len([x / 60 * 60 for x in breaks]) r.axis(1, at=r.seq(0, len(breaks) - 1), labels=[x / (24 * 60 * 60) for x in breaks]) r("dev.off()") return rr
def time_till_deletehist(notes, width=1024, height=768): delete_logs = lambda n: ActivityLog.objects.filter( owner=n.owner, noteid=n.jid, action='note-delete') if type(notes) == QuerySet: notes = notes.filter(deleted=1) else: notes = [n for n in notes if n.deleted] vals = [] for n in notes: dl = delete_logs(n) if dl.count() == 0: continue if float(dl[0].when) / 1000 - float(n.created) / 1000 > 0: vals.append(float(dl[0].when) / 1000 - float(n.created) / 1000) r.png(file=make_filename('ttd'), width=width, height=height) breaks = r.c( 0, 60 * 60, 2 * 60 * 60, 24 * 60 * 60, 2 * 24 * 60 * 60, r.seq(3 * 24 * 60 * 60, max(vals) + 24 * 60 * 60, 24 * 60 * 60)) rr = r.hist(c(vals), breaks=breaks, axes=False, xlab='', ylab='', ylim=r.c(0, 10)) r('dev.off()') r.png(file=make_filename('ttd'), width=width, height=height) r.barplot(rr[1]) print len(breaks) print len([x / 60 * 60 for x in breaks]) r.axis(1, at=r.seq(0, len(breaks) - 1), labels=[x / (24 * 60 * 60) for x in breaks]) r('dev.off()') return rr
def plot_(rhos,filename): from jv3.study.ca_plot import make_filename rhosnn = [rho for rho in rhos if not rho is None] r.png(cap.make_filename(filename),width=1280,height=1024) r.plot(c(rhosnn),ylab='') r('dev.off()') return rhosnn
def sTime(filename, user, title='title'): filename = cap.make_filename(filename) aveSize = lambda a,b: int(float(a)/float(b)) if b != 0 else 0 ## a=quantity of something per how many b elts, if no b, return 0 msecToDate = lambda msec : dd.fromtimestamp(float(msec)/1000.0) dtToDayMsec = lambda dt: int((((dt.weekday()*24+dt.hour)*60+dt.minute)*60+dt.second)*1000 + float(dt.microsecond)/1000.0) dtToHourMsec = lambda dt: int(((dt.hour*60+dt.minute)*60+dt.second)*1000 + float(dt.microsecond)/1000.0) notes = user.note_owner.all() allLogs = ActivityLog.objects.filter(owner=user, action__in=['note-add','note-save','note-delete']) points = {'note-add':r.c(),'note-save':r.c(),'note-delete':r.c()} for log in allLogs: noteArr = notes.filter(jid=log.noteid) if len(noteArr) < 1: ## Processing logs for which we still continue ## have the note (deleted or not) note = noteArr[0] aDate, bDate = msecToDate(log.when), msecToDate(note.created) xTime = dtToDayMsec(aDate) yTime = dtToHourMsec(aDate)##dd(year=1,month=1,day=1, hour=aDate.hour,minute=aDate.minute,microsecond=aDate.microsecond)) points[log.action] = r.rbind(points[log.action],c([xTime, yTime])) pass r.png(file = '/var/listit/www-ssl/_studywolfe/' + filename + '.png', w=2000,h=1000) if title == 'title': title = "#Notes:#Logs:Email:ID -- " + str(notes.count()) + ":" + str(allLogs.count()) + ":" + user.email + ":" + str(user.id) dayNames = ["Mon","Tues","Wed","Thur","Fri","Sat","Sun","Mon"] hourNames = ['midnight', '1am','2am','3am','4am','5am','6am','7am','8am','9am','10am','11am','noon'] hourNames.extend(['1pm','2pm','3pm','4pm','5pm','6pm','7pm','8pm','9pm','10pm','11pm','midnight']) r.plot(points['note-add'], cex=8.0,col = "green", pch='o',xlab="Day Of Week", ylab="Hour of Day",main=title, axes=False, xlim=r.c(0,7*24*3600*1000), ylim=r.c(0,24*3600*1000)) r.points(points['note-save'], cex=4.0,col = "purple", pch=17) r.points(points['note-delete'], cex=4.0,col = "dark red", pch='x') r.axis(1, at=c([float(x*24*60*60*1000.0) for x in range(0,8)]), labels=c([x for x in dayNames]), tck=1) r.axis(2, at=c([float(y*60*60*1000.0) for y in range(0,25)]), labels=c([x for x in hourNames]), tck=1) devoff()
def plot_(rhos, filename): from jv3.study.ca_plot import make_filename rhosnn = [rho for rho in rhos if not rho is None] r.png(cap.make_filename(filename), width=1280, height=1024) r.plot(c(rhosnn), ylab='') r('dev.off()') return rhosnn
def count_log_histograms(counts,filename='log_counts.png'): wc = cap.count(counts) fname = cap.make_filename(filename) r.png(fname,width=1280,height=1024) wc = [ r.log(x)[0]/r.log(10)[0] for x in wc ] r.barplot(c(wc),ylim=c([0,max(wc)]),**({"axis.lty":1, "names.arg": c(xrange(min(counts),max(counts)+1))})) r('dev.off()')
def edit_recency(notes, action='note-save', filename='recency', width=1280, height=1024): #, nuke_consecutive_edits=True): user = notes[0].owner tosecs = lambda s: float(s / 1000) creations = dict([(x[0], tosecs(x[1])) for x in notes.values_list('jid', 'created')]) actlogs = ActivityLog.objects.filter(owner=user, action=action, noteid__in=[n.jid for n in notes]).values( 'when', 'noteid') actlogd = {} for a in actlogs: actlogd[a['noteid']] = actlogd.get(a['noteid'], []) + [tosecs(a['when'])] edits_since_creation_per_note = dict([(n.jid, [ (x - creations[n.jid]) for x in actlogd.get(n.jid, []) ]) for n in notes]) # COMMENTING OUT nuke reedits because now we have code in ca which # only considers note-add -> note-edit sequences that cause textual edits # # DISABLED # if note save, then we might want to obliterate adjacent re-edits; e.g., people editing over and over again # because they're likely to re-edit something they've touched # if action == 'note-save' and nuke_consecutive_edits : [nuke_reedits(x) for x in edits_since_creation_per_note.values()] edits_since_creation_all = reduce(lambda x, y: x + y, edits_since_creation_per_note.values()) breaks = [0, 60, 60 * 60] + [i * 24 * 60 * 60 for i in range(1, 7)] + [ i * 7 * 24 * 60 * 60 for i in range(2, 13) ] breaklabels = ['1 min', '1 hr'] + ["%d days" % x for x in range(1, 7)] + [ "%d weeks" % x for x in range(2, 13) ] # filter things out edits_since_creation_all = [ x for x in edits_since_creation_all if x < max(breaks) and x > 0 ] # print max(breaks), max(edits_since_creation_all), max(breaks)-max(edits_since_creation_all) r.png(file=make_filename(filename), width=width, height=height) r.hist( c(edits_since_creation_all), breaks=c(breaks), labels=c(breaklabels), freq=True, xlab='', ylab='', main='frequency of edits to notes (measured in time since creation) %s' % user.email) r('dev.off()')
def plotTukeyHSD(feature_name,fmla): import StringIO tsdres = StringIO.StringIO() name_aov = cap.make_filename('aov_%s' % feature_name) name_tsd = cap.make_filename('tsd_%s' % feature_name) r.png(name_aov) aov = r('aov')(fmla) print "AOV" print >>tsdres,r.summary(aov) r.plot(aov) r('dev.off()') tsd = r('TukeyHSD')(aov) print "TSD" print >>tsdres,tsd print r.summary(tsd) r.png(name_tsd) r.plot(tsd) r('dev.off()') return name_aov,name_tsd,tsdres.getvalue(),aov,tsd
def sBar(filename, user, title='title'): from jv3.study.ca_plot import make_filename numLines = lambda txt: len(txt.splitlines())-txt.splitlines().count('') aveSize = lambda a,b: int(float(a)/float(b)) if b != 0 else 0 ## a=quantity of something per how many b elts, if no b, return 0 wksToIndex = lambda rowWeek, colWeek : rowWeek + (colWeek)*COL_SEGMENTS msecToDate = lambda msec : datetime.datetime.fromtimestamp(float(msec)/1000.0) DAY_IN_MS = 1000*60*60*24 COL_SEGMENTS, ROW_GROUPS, GROUP_TYPES = 7,7,5 ## add, edit,edit, del,del notes = user.note_owner.all() allLogs = ActivityLog.objects.filter(owner=user, action__in=['note-add','note-save','note-delete']) data = r.matrix(0,nrow=COL_SEGMENTS, ncol=ROW_GROUPS*GROUP_TYPES) nOldEdit, nNewEdit = [[] for n in range(ROW_GROUPS)], [[] for n in range(ROW_GROUPS)] createdSize = [[0,0] for i in range(ROW_GROUPS)] ## [# notes, # lines] for each day of week editedSize = [[0,0] for i in range(ROW_GROUPS)] noteD = {} for log in allLogs: noteArr = notes.filter(jid=log.noteid) if len(noteArr) < 1: ## Processing logs for which we still continue ## have the note (deleted or not) note = noteArr[0] actDate, birthDate = msecToDate(log.when), msecToDate(note.created) actDay, birthDay = actDate.weekday(), birthDate.weekday() firstRecord, lastTime = min(actDate, birthDate ), max(actDate,birthDate) startOfDay = dd(lastTime.year, lastTime.month, lastTime.day) actTD = firstRecord - startOfDay actInPastWk = math.fabs(actTD.days) <= 6 ## Both .created and .when happened within (current day + 6 previous days) if (log.action == 'note-add'): ## Record Add data[wksToIndex(birthDay, actDay*GROUP_TYPES+0)] += 1 if ((log.noteText != None) and (log.noteText.count('') > 1) and (log.noteText.count('\n') < 100)): createdSize[actDay][0] += 1 ## Increment: ave note size, ##increment = 0 if note.deleted else 50 ##increment = numLines(log.noteText) if log and log.noteText and (numLines(log.noteText) < 1000) else 0 increment = log.noteText.count('')-1 ##note.version*10 createdSize[actDay][1] += int(increment) elif (log.action == 'note-save'): ## Record Save: Split (edit on day of note.created vs not) addVal = 1 if actInPastWk else 2 if (actInPastWk and log.noteid in nNewEdit[actDay]) or (not actInPastWk and log.noteid in nOldEdit[actDay]): continue ## We've already recorded this note for it's time-frame data[wksToIndex(birthDay, actDay*GROUP_TYPES + addVal)] += 1 nNewEdit[actDay].append(log.noteid) if actInPastWk else nOldEdit[actDay].append(log.noteid) ## Register Log elif (log.action == 'note-delete'): ## Record Death addVal = 3 if actInPastWk else 4 data[wksToIndex(birthDay, actDay*GROUP_TYPES + addVal)] += 1 pass r.png(file = make_filename(filename), w=1000,h=500) dayNames = ["Mon","Tues","Wed","Thur","Fri","Sat","Sun"] colors = r.c("red", 'orange', 'yellow', 'green', 'blue', 'grey', 'brown') title = "#Notes:#Logs:Email:ID -- " + str(notes.count()) + ":" + str(allLogs.count()) + ":" + user.email + ":" + str(user.id) aveWidth = int(float(sum([elt[1] for elt in createdSize]))/float(sum([elt[0] for elt in createdSize]))) widths = [] [widths.extend([aveSize(elt[1],elt[0]), aveWidth, aveWidth, aveWidth, aveWidth]) for elt in createdSize] axisNames = [] [axisNames.extend([str(widths[i*5]),"","",str(dayNames[i]),""]) for i in range(ROW_GROUPS)] r.barplot(data, main=title,ylab='# Action Logs',beside=False, col=colors, space=r.c(3,1,0.1,1,.1), names=axisNames, width=c(widths)) devoff()
def cumulative_time_till_delete(notes, filename='ttd', width=1280, height=800, xlim_days=90): tlim = 24 * 60 * 60 * xlim_days deletions = dict([(x[0], float(x[1] / 1000)) for x in ActivityLog.objects.filter( owner=notes[0].owner, noteid__in=[x.jid for x in notes], action='note-delete').values_list('noteid', 'when')]) creations = dict([(x[0], float(x[1]) / 1000) for x in notes.values_list('jid', 'created')]) d1 = [(deletions[d] - creations[d]) for d in deletions.keys() if d in creations.keys()] d1.sort() durations = [D for D in d1 if D > 0 and D < 24 * 60 * 60 * xlim_days] print "durations: ", len(durations) total = 0 cudist = {} for i in durations: total += 1 cudist[i] = total print make_filename(filename) r.png(file=make_filename(filename), width=width, height=height) r.plot(c(cudist.keys()), c([x * 1.0 / len(d1) for x in cudist.values()]), xlab='', ylab='', axes=False, xlim=r.c(0, tlim)) BUCKET = 24 * 60 * 60 breaks = r.seq(0, tlim + BUCKET, BUCKET) r.axis(1, breaks, [x / (24 * 60 * 60) for x in breaks]) r.lines(r.c(0, 7000.0), r.c(0, 1000.0), lwd=2, col='black') r.title( 'percentage of deleted notes deleted after X days after it was created (user: %s) ' % notes[0].owner.email) r('dev.off()')
def count_histograms(counts, filename='counts.png'): wc = cap.count(counts) fname = cap.make_filename(filename) r.png(fname, width=1280, height=1024) r.barplot(c(wc), ylim=c([0, max(wc)]), **({ "axis.lty": 1, "names.arg": c(xrange(min(counts), max(counts) + 1)) })) r('dev.off()')
def cumulative_time_till_delete(notes, filename="ttd", width=1280, height=800, xlim_days=90): tlim = 24 * 60 * 60 * xlim_days deletions = dict( [ (x[0], float(x[1] / 1000)) for x in ActivityLog.objects.filter( owner=notes[0].owner, noteid__in=[x.jid for x in notes], action="note-delete" ).values_list("noteid", "when") ] ) creations = dict([(x[0], float(x[1]) / 1000) for x in notes.values_list("jid", "created")]) d1 = [(deletions[d] - creations[d]) for d in deletions.keys() if d in creations.keys()] d1.sort() durations = [D for D in d1 if D > 0 and D < 24 * 60 * 60 * xlim_days] print "durations: ", len(durations) total = 0 cudist = {} for i in durations: total += 1 cudist[i] = total print make_filename(filename) r.png(file=make_filename(filename), width=width, height=height) r.plot( c(cudist.keys()), c([x * 1.0 / len(d1) for x in cudist.values()]), xlab="", ylab="", axes=False, xlim=r.c(0, tlim), ) BUCKET = 24 * 60 * 60 breaks = r.seq(0, tlim + BUCKET, BUCKET) r.axis(1, breaks, [x / (24 * 60 * 60) for x in breaks]) r.lines(r.c(0, 7000.0), r.c(0, 1000.0), lwd=2, col="black") r.title("percentage of deleted notes deleted after X days after it was created (user: %s) " % notes[0].owner.email) r("dev.off()")
def cumulative_registrations(width=1000, height=700): registrations = [ float(x[0]) / 1000.0 for x in UserRegistration.objects.values_list('when') ] registrations.sort() total = 0 cudist = {} for i in registrations: total += 1 cudist[i] = total print make_filename('registrations') r.png(file=make_filename('registrations'), width=width, height=height) r.plot(c(cudist.keys()), c([x for x in cudist.values()]), xlab='', ylab='', axes=False, cex=0.3) days = 60 print total # print len(r.seq(min(registrations),max(registrations),24*60*60*days)) r.axis(1, r.seq(min(registrations), max(registrations), 24 * 60 * 60 * days), [ "%s %d" % (months[x.tm_mon], x.tm_year) for x in [ time.localtime(b) for b in xrange(min(registrations), max(registrations), 24 * 60 * 60 * days) ] ]) # label the max r.axis(2, r.c(r.seq(0, ((total / 5000) + 1) * 5000, 5000), total), r.c(r.seq(0, ((total / 5000) + 1) * 5000, 5000), total)) r.title('cumulative listit registrations (august 2008-may 2011)') # r.lines(c([float(x) for x in cudist.keys()]), c([x for x in cudist.values()]),xlab=r.c(),ylab=r.c())) r('dev.off()')
def plot_notelife_with_color(basedir, users, color_fn):#, firstHalf=True): i=0 print "Start time: ", time.gmtime() startTime = time.time() ##start, end = (0,len(u)/2) if firstHalf else (len(u)/2, len(u)-1) for user in users: uNotes = Note.objects.filter(owner=user) uLogs = ActivityLog.objects.filter(owner=user, action__in=['note-add','note-save','note-delete']) if (((uNotes.count() >= 120) or (uLogs.count() >= 120)) and ((uNotes.count() >= 50) and (uLogs.count() >= 50))): lifelineFlatCollapsedCompareColor(cap.make_filename(basedir + "userid-"+str(user.id)), uNotes, color_function=color_fn) i += 1 pass pass print "Users processed: ", str(i) , " out of: ", str(len(users)) print "Finish time: ", time.gmtime() finishTime = time.time()
def bTime(filename, user, title='title'): COL_SEGMENTS, ROW_GROUPS, GROUP_TYPES = 3, 24, 1 aveSize = lambda a,b: int(float(a)/float(b)) if b != 0 else 0 ## a=quantity of something per how many b elts, if no b, return 0 ##msecToDate = lambda msec : dd.fromtimestamp(float(msec)/1000.0) dtToDayMsec = lambda dt: int((((dt.weekday()*24+dt.hour)*60+dt.minute)*60+dt.second)*1000 + float(dt.microsecond)/1000.0) dtToHourMsec = lambda dt: int(((dt.hour*60+dt.minute)*60+dt.second)*1000 + float(dt.microsecond)/1000.0) hrsToIndex = lambda colSeg, group, hrOfDay : colSeg + (hrOfDay*GROUP_TYPES+group)*COL_SEGMENTS # for group in [0,GROUP_TYPES-1] notes = user.note_owner.all() allLogs = ActivityLog.objects.filter(owner=user, action__in=['note-add','note-save','note-delete']) ##points = {'note-add':[0 for i in xrange(0,ROW_GROUPS*COL_SEGMENTS)],'note-save':[0 for i in xrange(0,24)],'note-delete':[0 for i in xrange(0,24)]} data = r.matrix(0,nrow=COL_SEGMENTS, ncol=ROW_GROUPS*GROUP_TYPES) for log in allLogs: noteArr = notes.filter(jid=log.noteid) if len(noteArr) < 1: ## Processing logs for which we still continue ## have the note (deleted or not) note = noteArr[0] aDate, bDate = wUtil.msecToDate(log.when), wUtil.msecToDate(note.created) aDay, aHour = aDate.weekday(), aDate.hour ##dIndex = hrsToIndex(0, aHour) ##points[log.action][dIndex] += 1 ##data[dIndex] += 1 if (log.action == 'note-add'): ## Record Add data[hrsToIndex(0,0,aHour)] += 1 elif (log.action == 'note-save'): ## Record Save: Split (edit on day of note.created vs not) data[hrsToIndex(1,0,aHour)] += 1 elif (log.action == 'note-delete'): ## Record Death data[hrsToIndex(2,0,aHour)] += 1 ##xTime = dtToDayMsec(aDate) ##yTime = dtToHourMsec(aDate)##dd(year=1,month=1,day=1, hour=aDate.hour,minute=aDate.minute,microsecond=aDate.microsecond)) pass r.png(file=cap.make_filename(filename), w=2000,h=1000) if title == 'title': title = "#Notes:#Logs:Email:ID -- " + str(notes.count()) + ":" + str(allLogs.count()) + ":" + user.email + ":" + str(user.id) dayNames = ["Mon","Tues","Wed","Thur","Fri","Sat","Sun","Mon"] hourNames = ['midnight', '1am','2am','3am','4am','5am','6am','7am','8am','9am','10am','11am','noon'] hourNames.extend(['1pm','2pm','3pm','4pm','5pm','6pm','7pm','8pm','9pm','10pm','11pm']) ##names = [] ##[names.extend([hrName, '','']) for hrName in hourNames] r.barplot(data, main=title,ylab='# Action Logs',beside=False, names=hourNames, col=r.c('green', 'orange', 'black'))##, width=c(widths), col=colors) ## ##r.barplot(points['note-add'], cex=8.0,col = "green", pch='o',xlab="Day Of Week", ylab="Hour of Day",main=title, axes=False, xlim=r.c(0,7*24*3600*1000), ylim=r.c(0,24*3600*1000)) ##r.points(points['note-save'], cex=4.0,col = "purple", pch=17) ##r.points(points['note-delete'], cex=4.0,col = "dark red", pch='x') ##r.axis(1, at=c([float(x*24*60*60*1000.0) for x in range(0,8)]), labels=c([x for x in dayNames]), tck=1) ##r.axis(2, at=c([float(y*60*60*1000.0) for y in range(0,25)]), labels=c([x for x in hourNames]), tck=1) devoff()
def get_distribution_of_types(ss,filename=None,width=1600,height=1200): from jv3.study.ca_plot import make_filename get_all_labels_for_notes = lambda notes: reduce(lambda x,y:x+y,[note_labels(ss,n) for n in notes]) if filename is not None: r.png(file=make_filename(filename),width=width,height=height) t = r.table(r.factor(c(get_all_labels_for_notes(ss['notes'])))) # #reduce(lambda x,y:x+y,[note_labels(ss,n) for n in ss['notes']])))) if filename is not None: t = r.sort(t,decreasing=True) r.barplot(t,names=t,col='white') r.legend(40,r.max(t),r.names(t),cex=0.90) r('dev.off()') ## find freqs -- compare max and min return t,nltk.FreqDist(get_all_labels_for_notes(ss['notes']))
def plot_note_words_hist_modfilename(notes, filename="num_words", width=1024, height=800, soft_max=300): user = notes[0].owner nwords = [ ca.note_words(n2vals(x))['note_words'] for x in notes.values("contents") ] nchars_ = [len(x["contents"].strip()) for x in notes.values("contents")] nchars = [x for x in nchars_ if x < soft_max] r.png(file='/dev/null', width=width, height=height) breaks = [x for x in xrange(soft_max)] nchars = r.hist(c(nchars), breaks=c(breaks))[1] r('dev.off()') r.png(file=make_filename(filename), width=width, height=height) nwords_ = [x for x in nwords] nwords = [x for x in nwords if x < soft_max] hh = r.hist(c(nwords), breaks=c(breaks), labels=c(breaks), freq=True, xlab='', ylab='', main='length of notes (in words) %s (%d)' % (user.email, len(notes))) # print len(breaks)," ", len(nchars) print nchars r.lines(c(breaks[:-1]), nchars, col='green') r.text( r.c(3.0 / 4.0 * soft_max), r.c(3.0 / 4.8 * max(hh[1]) + 0.1 * max(hh[1])), "notes min-median-mode-max: %f %f %f %f" % (min(nwords_), median(nwords_), ca.mode(nwords_), max(nwords_))) r.text( r.c(3.0 / 4.0 * soft_max), r.c(3.0 / 4.8 * max(hh[1])), "char min-median-mode-max: %f %f %f %f" % (min(nchars_), median(nchars_), ca.mode(nchars_), max(nchars_))) r('dev.off()') return hh
def edit_recency( notes, action="note-save", filename="recency", width=1280, height=1024 ): # , nuke_consecutive_edits=True): user = notes[0].owner tosecs = lambda s: float(s / 1000) creations = dict([(x[0], tosecs(x[1])) for x in notes.values_list("jid", "created")]) actlogs = ActivityLog.objects.filter(owner=user, action=action, noteid__in=[n.jid for n in notes]).values( "when", "noteid" ) actlogd = {} for a in actlogs: actlogd[a["noteid"]] = actlogd.get(a["noteid"], []) + [tosecs(a["when"])] edits_since_creation_per_note = dict( [(n.jid, [(x - creations[n.jid]) for x in actlogd.get(n.jid, [])]) for n in notes] ) # COMMENTING OUT nuke reedits because now we have code in ca which # only considers note-add -> note-edit sequences that cause textual edits # # DISABLED # if note save, then we might want to obliterate adjacent re-edits; e.g., people editing over and over again # because they're likely to re-edit something they've touched # if action == 'note-save' and nuke_consecutive_edits : [nuke_reedits(x) for x in edits_since_creation_per_note.values()] edits_since_creation_all = reduce(lambda x, y: x + y, edits_since_creation_per_note.values()) breaks = [0, 60, 60 * 60] + [i * 24 * 60 * 60 for i in range(1, 7)] + [i * 7 * 24 * 60 * 60 for i in range(2, 13)] breaklabels = ["1 min", "1 hr"] + ["%d days" % x for x in range(1, 7)] + ["%d weeks" % x for x in range(2, 13)] # filter things out edits_since_creation_all = [x for x in edits_since_creation_all if x < max(breaks) and x > 0] # print max(breaks), max(edits_since_creation_all), max(breaks)-max(edits_since_creation_all) r.png(file=make_filename(filename), width=width, height=height) r.hist( c(edits_since_creation_all), breaks=c(breaks), labels=c(breaklabels), freq=True, xlab="", ylab="", main="frequency of edits to notes (measured in time since creation) %s" % user.email, ) r("dev.off()")
def get_distribution_of_types(ss, filename=None, width=1600, height=1200): from jv3.study.ca_plot import make_filename get_all_labels_for_notes = lambda notes: reduce( lambda x, y: x + y, [note_labels(ss, n) for n in notes]) if filename is not None: r.png(file=make_filename(filename), width=width, height=height) t = r.table( r.factor(c(get_all_labels_for_notes(ss['notes']))) ) # #reduce(lambda x,y:x+y,[note_labels(ss,n) for n in ss['notes']])))) if filename is not None: t = r.sort(t, decreasing=True) r.barplot(t, names=t, col='white') r.legend(40, r.max(t), r.names(t), cex=0.90) r('dev.off()') ## find freqs -- compare max and min return t, nltk.FreqDist(get_all_labels_for_notes(ss['notes']))
def plot_note_words_hist_modfilename(notes, filename="num_words", width=1024, height=800, soft_max=300): user = notes[0].owner nwords = [ca.note_words(n2vals(x))["note_words"] for x in notes.values("contents")] nchars_ = [len(x["contents"].strip()) for x in notes.values("contents")] nchars = [x for x in nchars_ if x < soft_max] r.png(file="/dev/null", width=width, height=height) breaks = [x for x in xrange(soft_max)] nchars = r.hist(c(nchars), breaks=c(breaks))[1] r("dev.off()") r.png(file=make_filename(filename), width=width, height=height) nwords_ = [x for x in nwords] nwords = [x for x in nwords if x < soft_max] hh = r.hist( c(nwords), breaks=c(breaks), labels=c(breaks), freq=True, xlab="", ylab="", main="length of notes (in words) %s (%d)" % (user.email, len(notes)), ) # print len(breaks)," ", len(nchars) print nchars r.lines(c(breaks[:-1]), nchars, col="green") r.text( r.c(3.0 / 4.0 * soft_max), r.c(3.0 / 4.8 * max(hh[1]) + 0.1 * max(hh[1])), "notes min-median-mode-max: %f %f %f %f" % (min(nwords_), median(nwords_), ca.mode(nwords_), max(nwords_)), ) r.text( r.c(3.0 / 4.0 * soft_max), r.c(3.0 / 4.8 * max(hh[1])), "char min-median-mode-max: %f %f %f %f" % (min(nchars_), median(nchars_), ca.mode(nchars_), max(nchars_)), ) r("dev.off()") return hh
def plot_notelife_with_color(basedir, users, color_fn): #, firstHalf=True): i = 0 print "Start time: ", time.gmtime() startTime = time.time() ##start, end = (0,len(u)/2) if firstHalf else (len(u)/2, len(u)-1) for user in users: uNotes = Note.objects.filter(owner=user) uLogs = ActivityLog.objects.filter( owner=user, action__in=['note-add', 'note-save', 'note-delete']) if (((uNotes.count() >= 120) or (uLogs.count() >= 120)) and ((uNotes.count() >= 50) and (uLogs.count() >= 50))): lifelineFlatCollapsedCompareColor( cap.make_filename(basedir + "userid-" + str(user.id)), uNotes, color_function=color_fn) i += 1 pass pass print "Users processed: ", str(i), " out of: ", str(len(users)) print "Finish time: ", time.gmtime() finishTime = time.time()
def plotVerUrl(filename, user): numLines = lambda txt: len(txt.splitlines()) - txt.splitlines().count("") aveSize = ( lambda a, b: int(float(a) / float(b)) if b != 0 else 0 ) ## a=quantity of something per how many b elts, if no b, return 0 wksToIndex = lambda rowWeek, colWeek: rowWeek + (colWeek) * COL_SEGMENTS msecToDate = lambda msec: datetime.datetime.fromtimestamp(float(msec) / 1000.0) DAY_IN_MS = 1000 * 60 * 60 * 24 COL_SEGMENTS, ROW_GROUPS, GROUP_TYPES = 7, 7, 5 ## add, edit,edit, del,del notes = user.note_owner.all().exclude(jid="-1") allLogs = ActivityLog.objects.filter(owner=user, action__in=["note-add", "note-save", "note-delete"]) noteD = {"note-add": [], "note-save": [], "note-delete": []} nD, nDCount = {}, {} ##[0 for i in range(0,maxVer+1)] for log in allLogs: if log.noteid == -1: continue noteArr = notes.filter(jid=log.noteid) if len(noteArr) < 1: ## Processing logs for which we still continue ## have the note (deleted or not) note = noteArr[0] noteD[log.action].append(log) pass for noteLog in noteD["note-add"]: if noteLog.noteid == -1: continue noteVer = sum([1 if log.noteid == noteLog.noteid else 0 for log in noteD["note-save"]]) note = notes.filter(jid=noteLog.noteid)[0] ##addAmt = len(notes.filter(jid=noteLog.noteid)[0].contents) addAmt = ( 1 if len(grabUrls(note.contents)) > 0 else 0 ) ##1 if note.contents.find('http://') != -1 or note.contents.find('.comq/') != -1 else 0 if addAmt > 5000: continue ## Skip atrociously long notes if noteVer in nD: nD[noteVer] += addAmt nDCount[noteVer] += 1 else: nD[noteVer] = addAmt nDCount[noteVer] = 1 maxVer = 0 for k, v in nD.items(): maxVer = max(maxVer, k) print maxVer nData = [0 for i in range(0, min(maxVer + 1, 200))] nDataC = [0 for i in range(0, min(maxVer + 1, 200))] for k, v in nD.items(): ## k=ver, v=count of whatever's being counted if k > 200: continue ## skip notes more than 400 versions - emax bug... nData[k] = float(v) ##/float(nDCount[k]) ## quantity / note average for version=k nDataC[k] += nDCount[k] ## Count of notes at each version level print nDataC r.png(file=cap.make_filename(filename), w=1000, h=500) dayNames = ["Mon", "Tues", "Wed", "Thur", "Fri", "Sat", "Sun"] colors = r.c("red", "orange", "yellow", "green", "blue", "grey", "brown") title = ( "#Notes:#Logs:Email:ID -- " + str(notes.count()) + ":" + str(allLogs.count()) + ":" + user.email + ":" + str(user.id) ) r.barplot( c(r.rbind(nDataC, nData)), main=title, ylab="# Somethings", beside=True, col=r.c("grey", "green") ) ##, col=colors, space=r.c(3,1,0.1,1,.1), names=axisNames, width=c(widths)) devoff()
def plot_xyz(filename, x, y, z, xl="x", yl="y", title="title"): r.png(file=make_filename(filename), w=3200, h=1600) r.plot(x, y, cex=z, xlab=xl, ylab=yl, main=title) devoff()
def get_distribution_of_days_active(user_ids,user_feature=wuw.user_percent_active_days,filename='daysdist'): r.png(cap.make_filename(filename)) rhi = r.hist(c([user_feature(x).values()[0] for x in user_ids]),xlab=r.c(),ylab=r.c(),main=r.c()) r('dev.off()') return rhi
def sTime(filename, user, title='title'): filename = cap.make_filename(filename) aveSize = lambda a, b: int( float(a) / float(b) ) if b != 0 else 0 ## a=quantity of something per how many b elts, if no b, return 0 msecToDate = lambda msec: dd.fromtimestamp(float(msec) / 1000.0) dtToDayMsec = lambda dt: int( (((dt.weekday() * 24 + dt.hour) * 60 + dt.minute) * 60 + dt.second ) * 1000 + float(dt.microsecond) / 1000.0) dtToHourMsec = lambda dt: int(((dt.hour * 60 + dt.minute) * 60 + dt.second) * 1000 + float(dt.microsecond) / 1000.0) notes = user.note_owner.all() allLogs = ActivityLog.objects.filter( owner=user, action__in=['note-add', 'note-save', 'note-delete']) points = {'note-add': r.c(), 'note-save': r.c(), 'note-delete': r.c()} for log in allLogs: noteArr = notes.filter(jid=log.noteid) if len(noteArr) < 1: ## Processing logs for which we still continue ## have the note (deleted or not) note = noteArr[0] aDate, bDate = msecToDate(log.when), msecToDate(note.created) xTime = dtToDayMsec(aDate) yTime = dtToHourMsec( aDate ) ##dd(year=1,month=1,day=1, hour=aDate.hour,minute=aDate.minute,microsecond=aDate.microsecond)) points[log.action] = r.rbind(points[log.action], c([xTime, yTime])) pass r.png(file='/var/listit/www-ssl/_studywolfe/' + filename + '.png', w=2000, h=1000) if title == 'title': title = "#Notes:#Logs:Email:ID -- " + str(notes.count()) + ":" + str( allLogs.count()) + ":" + user.email + ":" + str(user.id) dayNames = ["Mon", "Tues", "Wed", "Thur", "Fri", "Sat", "Sun", "Mon"] hourNames = [ 'midnight', '1am', '2am', '3am', '4am', '5am', '6am', '7am', '8am', '9am', '10am', '11am', 'noon' ] hourNames.extend([ '1pm', '2pm', '3pm', '4pm', '5pm', '6pm', '7pm', '8pm', '9pm', '10pm', '11pm', 'midnight' ]) r.plot(points['note-add'], cex=8.0, col="green", pch='o', xlab="Day Of Week", ylab="Hour of Day", main=title, axes=False, xlim=r.c(0, 7 * 24 * 3600 * 1000), ylim=r.c(0, 24 * 3600 * 1000)) r.points(points['note-save'], cex=4.0, col="purple", pch=17) r.points(points['note-delete'], cex=4.0, col="dark red", pch='x') r.axis(1, at=c([float(x * 24 * 60 * 60 * 1000.0) for x in range(0, 8)]), labels=c([x for x in dayNames]), tck=1) r.axis(2, at=c([float(y * 60 * 60 * 1000.0) for y in range(0, 25)]), labels=c([x for x in hourNames]), tck=1) devoff()
def lifelineFlatCollapsedCompareColor(filename, notes, title='Note Lifelines', color_function=black, YMAG=1.0): allLogs = ActivityLog.objects.filter( owner=notes[0].owner, action__in=['note-add', 'note-save', 'note-delete']) firstBirth = float(min([x[0] for x in notes.values_list('created')])) title = "%s -- %s %s %s " % (title, str( notes.count()), notes[0].owner.email, notes[0].owner.id) ## Meta-data for points births, deaths = {}, {}, today = time.time() * 1000.0 print "saving to %s " % cap.make_filename(filename) r.png(file=cap.make_filename(filename), w=3200, h=1600) ## 3200x1600, 9600x4800, 12.8x6.4 texts = dict([(n.jid, n.contents) for n in notes]) births = dict([(n.jid, float(n.created)) for n in notes]) jid2note = dict([(n.jid, n) for n in notes]) whenmax = 0 whenmin = time.time() * 1000.0 for log in allLogs: if log.action == 'note-add': births[log.noteid] = float(log.when) if log.action in [ 'note-add', 'note-save' ] and log.noteText is not None and log.noteid not in texts: texts[log.noteid] = log.noteText if log.action == 'note-delete': deaths[log.noteid] = float(log.when) whenmax = max(whenmax, float(log.when)) whenmin = min(whenmin, float(log.when)) print "whenmax ", whenmin, '-', int( whenmax), (whenmax - whenmin) / (24 * 60 * 60 * 1000.0) # if not deleted we fill these in for n in notes.filter(deleted=False): deaths[n.jid] = whenmax # order notes by creation births_ordered = [(jid, btime) for jid, btime in births.iteritems()] births_ordered.sort(key=lambda x: x[1]) jid2idx = dict([(births_ordered[i][0], i) for i in xrange(len(births_ordered))]) # compute the edits, compile the colors print "computing edits" edits_ = ca.note_edits_for_user(notes[0].owner) print "Color function" colors = dict([(jid, color_function(text2vals(text))) for jid, text in texts.iteritems()]) print "--" births_r, colors_r, deletes_r = r.c(), r.c(), r.c() edits_r = r.c() edit_dirs_r = r.c() edit_delta = 3 print "BIRTHS= %d " % len(births) print "DEATHS %d " % len(deaths) for njid in births: births_r = r.rbind(births_r, c([jid2idx[njid], 0])) # colors_r = r.c(colors_r, colors[njid] if njid in colors else 'grey') deletes_r = r.rbind( deletes_r, c([ jid2idx[njid], deaths[njid] - births[njid] if njid in deaths else whenmax - whenmin ])) # whenmax-whenmin])) if njid in edits_: for edit_action in edits_[njid]: ## Add big icon edits_r = r.rbind( edits_r, c([ jid2idx[njid], float(edit_action['when']) - births[njid] ])) edit_dirs_r = r.c( edit_dirs_r, pch_of_delta(edit_action['initial'], edit_action['final'])) r.plot(births_r, cex=2.0, col=colors_r, pch='o', xlab='Created date', ylab='Action date', main=title, xlim=r.c(0, notes.count()), ylim=r.c(0, YMAG * (whenmax - whenmin)), axes=False) r.points(edits_r, col='black', pch=edit_dirs_r, cex=edit_delta) r.points(deletes_r, cex=2.0, col='black', pch='x') yWeeks = [ int(y) for y in range(int(msecToWeek(firstBirth)), int(msecToWeek(whenmax)), 1) ] r.axis(2, at=c([float(y * 7 * 24 * 60 * 60 * 1000.0) for y in yWeeks]), labels=c([int(x) - 2012 for x in yWeeks]), tck=1) duds = [] for x in births.keys(): if x in deaths and x in colors: r.lines(c([float(jid2idx[x])] * 2), c([float(jid2idx[x]), float(deaths[x] - births[x])]), col=colors[x]) else: duds.append(x) print "skipped %d whose death time was not known " % len(duds) goodness_stats = [ ("notes", len(births)), ("missing note", len([x for x in births if x not in jid2note])), ("missing text", len([x for x in births if x not in texts])), ("missing note-delete times", len([ jid2note[x].deleted for x in births if x in jid2note and x not in deaths ])), ("negative lifetime", len([x for x in births if x in deaths and deaths[x] - births[x] < 0])) ] print "avg start-end %d " % (median([ deaths[njid] - births[njid] for njid in births.keys() if njid in deaths ]) / (24 * 60 * 60 * 1000.0)) devoff() return goodness_stats
def plot_xyz(filename,x,y,z, xl="x",yl="y",title="title"): r.png(file = make_filename(filename), w=3200,h=1600) r.plot(x,y,cex=z,xlab=xl,ylab=yl,main=title) devoff()
def mmmPlot(filename, notes, title='title'): global firstBirth ##firstBirth = 1217622560992.0 ## Meta-data for title allLogs = ActivityLog.objects.filter( owner=notes[0].owner, action__in=['note-add', 'note-delete']) saveLogCount = ActivityLog.objects.filter(owner=notes[0].owner, action='note-save').count() msecToWeek = 1.0 / (1000.0 * 60 * 60 * 24 * 7) msecOfWeek = 1000.0 * 60 * 60 * 24 * 7 useremail, noteCount, actionCount = notes[0].owner.email, notes.count( ), allLogs.count() title = "#Notes:#SaveLogs:#Dels:Email -- " + str(noteCount) + ":" + str( saveLogCount) + ":" + str(sum([n.deleted for n in notes])) + ":" + useremail ## Meta-data for points points = { 'note-add': r.c(), 'note-delete': r.c() } ## Shortened to just the two, since note-edit added later births, deaths = {}, {} today = time.time() * 1000.0 r.png(file=cap.make_filename(filename), w=6400, h=3200) ## 3200x1600, 9600x4800, 12.8x6.4 cc = [x['created'] for x in notes.values('created')] dd = allLogs.values('when') minBirth, maxBirth = float(min(cc)), float( max(cc)) ## Week 2011 was first week of listit minAction, maxAction = float(min(dd)['when']), float(max(dd)['when']) for log in allLogs: noteArr = notes.filter(jid=log.noteid) if len(noteArr) < 1: continue note = noteArr[0] births[note.jid] = note.created if not note.deleted and note.jid not in deaths: deaths[note.jid] = today pass if (log.action == 'note-delete'): deaths[note.jid] = float(log.when) points[log.action] = r.rbind(points[log.action], c([float(note.created), float(log.when)])) pass xl, yl = "Created Date", "Action Date" r.plot(points['note-add'], cex=2.0, col="green", pch='o', xlab=xl, ylab=yl, main=title, xlim=r.c(firstBirth, today), ylim=r.c(firstBirth, today), axes=False) xWeeks = [ int(x) for x in range(int(firstBirth * msecToWeek), int(time.time() * 1000 * msecToWeek), 1) ] yWeeks = [ int(y) for y in range(int(firstBirth * msecToWeek), int(time.time() * 1000 * msecToWeek), 1) ] r.axis(1, at=c([float(x * 7 * 24 * 60 * 60 * 1000.0) for x in xWeeks]), labels=c([int(x) - 2012 for x in xWeeks]), tck=1) r.axis(2, at=c([float(y * 7 * 24 * 60 * 60 * 1000.0) for y in yWeeks]), labels=c([int(x) - 2012 for x in yWeeks]), tck=1) #New code for edits inserted here edits_ = ca.note_edits_for_user(notes[0].owner) points['note-edit'] = r.c() points['note-searches'] = r.c() edit_dir, edit_delta, edit_col = r.c(), r.c(), r.c() searches = searches_by_note_jid(notes[0].owner) for n in notes: ## wCom: not all notes eval'd here may have note-add events !! if n.jid in edits_: ##print "in ",n.jid,n.owner.email,len(edits_[n.jid]) for edit_action in edits_[n.jid]: ##if edit_categorized( ## Add big icon points['note-edit'] = r.rbind( points['note-edit'], c([float(n.created), float(edit_action['when'])])) edit_dir = r.c( edit_dir, pch_of_delta(edit_action['initial'], edit_action['final'])) edit_delta = r.c( edit_delta, 14 ) #abs(10 + 10*(len(edit_action['initial']) - len(edit_action['final']))/1000.0)) edit_col = r.c(edit_col, col_of_edit(edit_action['initial'], 'outer')) ## Add small icon points['note-edit'] = r.rbind( points['note-edit'], c([float(n.created), float(edit_action['when'])])) edit_dir = r.c( edit_dir, pch_of_innerEdit(edit_action['initial'], edit_action['final'])) edit_delta = r.c( edit_delta, 7 ) #abs(10 + 10*(len(edit_action['initial']) - len(edit_action['final']))/1000.0)) edit_col = r.c( edit_col, col_of_edit(edit_action['initial'], 'inner', edit_action['when'])) for x in searches.get(str(n.jid), []): print 'searches', x points['note-searches'] = r.rbind( points['note-searches'], c([float(n.created), float(x['when'])])) ##End new code r.points(points['note-searches'], cex=5.0, col="dark green", pch='o') r.points(points['note-edit'], col=edit_col, pch=edit_dir, cex=edit_delta) r.points(points['note-delete'], cex=5.0, col="dark red", pch='x') for x in births.keys(): if x in deaths: color = 'green' if (today - deaths[x] < 0.001) else 'black' r.lines(c([float(births[x])] * 2), c([float(births[x]), float(deaths[x])]), col=color, lwd=3) pass devoff()
def sBar(filename, user, title='title'): from jv3.study.ca_plot import make_filename numLines = lambda txt: len(txt.splitlines()) - txt.splitlines().count('') aveSize = lambda a, b: int( float(a) / float(b) ) if b != 0 else 0 ## a=quantity of something per how many b elts, if no b, return 0 wksToIndex = lambda rowWeek, colWeek: rowWeek + (colWeek) * COL_SEGMENTS msecToDate = lambda msec: datetime.datetime.fromtimestamp( float(msec) / 1000.0) DAY_IN_MS = 1000 * 60 * 60 * 24 COL_SEGMENTS, ROW_GROUPS, GROUP_TYPES = 7, 7, 5 ## add, edit,edit, del,del notes = user.note_owner.all() allLogs = ActivityLog.objects.filter( owner=user, action__in=['note-add', 'note-save', 'note-delete']) data = r.matrix(0, nrow=COL_SEGMENTS, ncol=ROW_GROUPS * GROUP_TYPES) nOldEdit, nNewEdit = [[] for n in range(ROW_GROUPS) ], [[] for n in range(ROW_GROUPS)] createdSize = [[0, 0] for i in range(ROW_GROUPS) ] ## [# notes, # lines] for each day of week editedSize = [[0, 0] for i in range(ROW_GROUPS)] noteD = {} for log in allLogs: noteArr = notes.filter(jid=log.noteid) if len(noteArr) < 1: ## Processing logs for which we still continue ## have the note (deleted or not) note = noteArr[0] actDate, birthDate = msecToDate(log.when), msecToDate(note.created) actDay, birthDay = actDate.weekday(), birthDate.weekday() firstRecord, lastTime = min(actDate, birthDate), max(actDate, birthDate) startOfDay = dd(lastTime.year, lastTime.month, lastTime.day) actTD = firstRecord - startOfDay actInPastWk = math.fabs( actTD.days ) <= 6 ## Both .created and .when happened within (current day + 6 previous days) if (log.action == 'note-add'): ## Record Add data[wksToIndex(birthDay, actDay * GROUP_TYPES + 0)] += 1 if ((log.noteText != None) and (log.noteText.count('') > 1) and (log.noteText.count('\n') < 100)): createdSize[actDay][0] += 1 ## Increment: ave note size, ##increment = 0 if note.deleted else 50 ##increment = numLines(log.noteText) if log and log.noteText and (numLines(log.noteText) < 1000) else 0 increment = log.noteText.count('') - 1 ##note.version*10 createdSize[actDay][1] += int(increment) elif (log.action == 'note-save' ): ## Record Save: Split (edit on day of note.created vs not) addVal = 1 if actInPastWk else 2 if (actInPastWk and log.noteid in nNewEdit[actDay]) or ( not actInPastWk and log.noteid in nOldEdit[actDay]): continue ## We've already recorded this note for it's time-frame data[wksToIndex(birthDay, actDay * GROUP_TYPES + addVal)] += 1 nNewEdit[actDay].append( log.noteid) if actInPastWk else nOldEdit[actDay].append( log.noteid) ## Register Log elif (log.action == 'note-delete'): ## Record Death addVal = 3 if actInPastWk else 4 data[wksToIndex(birthDay, actDay * GROUP_TYPES + addVal)] += 1 pass r.png(file=make_filename(filename), w=1000, h=500) dayNames = ["Mon", "Tues", "Wed", "Thur", "Fri", "Sat", "Sun"] colors = r.c("red", 'orange', 'yellow', 'green', 'blue', 'grey', 'brown') title = "#Notes:#Logs:Email:ID -- " + str(notes.count()) + ":" + str( allLogs.count()) + ":" + user.email + ":" + str(user.id) aveWidth = int( float(sum([elt[1] for elt in createdSize])) / float(sum([elt[0] for elt in createdSize]))) widths = [] [ widths.extend( [aveSize(elt[1], elt[0]), aveWidth, aveWidth, aveWidth, aveWidth]) for elt in createdSize ] axisNames = [] [ axisNames.extend([str(widths[i * 5]), "", "", str(dayNames[i]), ""]) for i in range(ROW_GROUPS) ] r.barplot(data, main=title, ylab='# Action Logs', beside=False, col=colors, space=r.c(3, 1, 0.1, 1, .1), names=axisNames, width=c(widths)) devoff()
def compareEditsBinWeek(filename, notes, title='Note Lifelines', color_function=one_or_no_url_redblk, YMAG=0.5): allLogs = ActivityLog.objects.filter( owner=notes[0].owner, action__in=['note-add', 'note-delete']) ##firstBirth = float(min([x[0] for x in notes.values_list('created')])) global firstBirth title = "%s -- %s %s %s " % (title, str( notes.count()), notes[0].owner.email, notes[0].owner.id) ## Meta-data for points points = { 'note-add': r.c(), 'note-delete': r.c() } ## Shortened to just the two, since note-edit added later births, deaths = {}, {}, today = time.time() * 1000.0 print "saving to %s " % cap.make_filename(filename) r.png(file=cap.make_filename(filename), w=3200, h=1600) ## 3200x1600, 9600x4800, 12.8x6.4 colors = dict([(n.jid, color_function(n)) for n in notes]) ## 7 creation bins, each with 7 edit bins inside ## Each of 49 bins holds entries detailing when edits happen - plot these on line for that bin! creationBins = [[r.c() for i in xrange(7)] for i in xrange(7)] # order notes by creation jids = [id[0] for id in notes.order_by("created").values_list('jid')] jid2idx = dict([(jids[i], i) for i in xrange(len(jids))]) for log in allLogs: noteArr = notes.filter(jid=log.noteid) if len(noteArr) < 1: continue note = noteArr[0] births[note.jid] = jid2idx[note.jid] if not note.deleted and note.jid not in deaths: deaths[note.jid] = today pass if (log.action == 'note-delete'): deaths[note.jid] = float(log.when - note.created) points[log.action] = r.rbind( points[log.action], c([jid2idx[note.jid], float(log.when - note.created)])) pass # compute the edits, compile the colors edits_ = ca.note_edits_for_user(notes[0].owner) points['note-edit'] = r.c() edit_dir, edit_delta, colors_r = r.c(), r.c(), r.c(), ymax = 0 points['bin-edits'] = r.c() weekdayColors = [ 'red', 'orange', 'yellow', 'green', 'blue', 'grey', 'brown' ] edit_col = r.c() maxDelta = 0 for n in notes: ## wCom: not all notes eval'd here may have note-add events !! colors_r = r.c(colors_r, colors[n.jid]) if n.jid in edits_: for edit_action in edits_[n.jid]: ## Add edit to creationBin noteDOW = wUtil.msecToDate(n.created).weekday() editDOW = wUtil.msecToDate(edit_action['when']).weekday() timeDelta = edit_action['when'] - n.created ##if edit_action['when'] != None and edit_action['when'] > firstBirth and edit_action['when'] < ca.DATABASE_SNAPSHOT_TIME: ## Current y-val plots time between edit and creation -- second y-val in comment plots absolute time of edit if timeDelta > 1000 * 3600 * 24 * 7: points['bin-edits'] = r.rbind( points['bin-edits'], c([int(noteDOW * 7 + editDOW), float(timeDelta)])) maxDelta = max(float(timeDelta), maxDelta) ##KEEP: points['bin-edits'] = r.rbind(points['bin-edits'], c([int(noteDOW*7+editDOW), float(edit_action['when'])]) ) edit_col = r.c(edit_col, weekdayColors[editDOW]) pass xl, yl = "Created Date", "Action Date" #maxDelta = 1000*3600*24*60 r.plot(points['bin-edits'], cex=3.0, col=edit_col, pch='x', xlab=xl, ylab=yl, main=title, xlim=r.c(0, 48), ylim=c([0, float(maxDelta)]), axes=False) ##KEEP: , axes=False)#, ylim=c([firstBirth, ca.DATABASE_SNAPSHOT_TIME]) ) ## 1 below, 2 left r.axis(1, at=c(range(0, 49, 7)), labels=c(['Mon', 'Tue', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun']), col='grey') ## Something wrong here #help!# ## Graph is coming up with tiny y-axis plotting # of days between note-create and edit event yTicks, yNames, ySep = [], [], 1000 * 3600 * 24 # daily ticks for tickTime in range(0, maxDelta, ySep): # daily ticks yTicks.append(tickTime) yNames.append(float(tickTime) / ySep) pass r.axis(2, at=c(yTicks), labels=c(yNames)) #help!# ##r.points(points['note-edit'], col=colors_r, pch=edit_dir, cex=edit_delta) ##r.points(points['note-delete'], cex=2.0,col = colors_r, pch='x') yWeeks = [ int(y) for y in range(int(msecToWeek(firstBirth)), int(msecToWeek(time.time() * 1000)), 1) ] for x in births.keys(): if x in deaths: stillalive = (today - deaths[x] < 0.001) color = colors[x] # ('green' if stillalive else 'black') ##r.lines( c( [float(births[x])]*2) ,c([ float(births[x]),float(deaths[x]-births[x]) ]), col=color) pass pass for i in range(0, 49, 7): r.abline(v=float(i) - 0.5, col='purple') pass for i in range(0, 49): r.abline(v=float(i) - 0.5, col='gray') devoff()
def lifelineFlatCollapsedCompareColor(filename, notes, title='Note Lifelines', color_function=black, YMAG=1.0): allLogs = ActivityLog.objects.filter(owner=notes[0].owner, action__in=['note-add','note-save','note-delete']) firstBirth = float(min([x[0] for x in notes.values_list('created')])) title = "%s -- %s %s %s " % (title,str(notes.count()),notes[0].owner.email,notes[0].owner.id) ## Meta-data for points births, deaths = {}, {}, today = time.time()*1000.0 print "saving to %s " % cap.make_filename(filename) r.png(file=cap.make_filename(filename), w=3200,h=1600) ## 3200x1600, 9600x4800, 12.8x6.4 texts=dict([(n.jid,n.contents) for n in notes]) births = dict([(n.jid,float(n.created)) for n in notes]) jid2note = dict([(n.jid,n) for n in notes]) whenmax = 0 whenmin = time.time()*1000.0 for log in allLogs: if log.action == 'note-add': births[log.noteid] = float(log.when) if log.action in ['note-add','note-save'] and log.noteText is not None and log.noteid not in texts : texts[log.noteid] = log.noteText if log.action == 'note-delete': deaths[log.noteid] = float(log.when) whenmax = max(whenmax,float(log.when)) whenmin = min(whenmin,float(log.when)) print "whenmax ", whenmin, '-', int(whenmax), (whenmax-whenmin)/(24*60*60*1000.0) # if not deleted we fill these in for n in notes.filter(deleted=False) : deaths[n.jid] = whenmax # order notes by creation births_ordered = [ (jid, btime) for jid, btime in births.iteritems() ] births_ordered.sort(key=lambda x:x[1]) jid2idx = dict([(births_ordered[i][0],i) for i in xrange(len(births_ordered))]) # compute the edits, compile the colors print "computing edits" edits_ = ca.note_edits_for_user(notes[0].owner) print "Color function" colors=dict([(jid,color_function(text2vals(text))) for jid,text in texts.iteritems()]) print "--" births_r,colors_r,deletes_r= r.c(),r.c(),r.c() edits_r = r.c() edit_dirs_r = r.c() edit_delta = 3 print "BIRTHS= %d " % len(births) print "DEATHS %d " % len(deaths) for njid in births: births_r = r.rbind(births_r, c([ jid2idx[njid], 0 ])) # colors_r = r.c(colors_r,colors[njid] if njid in colors else'grey') deletes_r = r.rbind(deletes_r, c([ jid2idx[njid], deaths[njid]-births[njid] if njid in deaths else whenmax-whenmin ])) # whenmax-whenmin])) if njid in edits_: for edit_action in edits_[njid]: ## Add big icon edits_r = r.rbind(edits_r,c([jid2idx[njid], float(edit_action['when'])-births[njid]])) edit_dirs_r = r.c(edit_dirs_r, pch_of_delta(edit_action['initial'],edit_action['final'])) r.plot(births_r,cex=2.0, col=colors_r, pch='o',xlab='Created date',ylab='Action date', main=title, xlim=r.c(0, notes.count()),ylim=r.c(0, YMAG*(whenmax-whenmin)), axes=False) r.points(edits_r, col='black', pch=edit_dirs_r, cex=edit_delta) r.points(deletes_r, cex=2.0,col = 'black', pch='x') yWeeks = [int(y) for y in range(int(msecToWeek(firstBirth)), int(msecToWeek(whenmax)), 1)] r.axis(2, at=c([float(y*7*24*60*60*1000.0) for y in yWeeks]), labels=c([int(x)-2012 for x in yWeeks]), tck=1) duds = [] for x in births.keys(): if x in deaths and x in colors: r.lines(c([float(jid2idx[x])]*2),c([float(jid2idx[x]),float(deaths[x]-births[x])]), col=colors[x]) else: duds.append(x) print "skipped %d whose death time was not known " % len(duds) goodness_stats = [ ("notes", len(births) ), ("missing note" ,len( [x for x in births if x not in jid2note]) ), ("missing text" ,len([x for x in births if x not in texts])), ("missing note-delete times" , len([jid2note[x].deleted for x in births if x in jid2note and x not in deaths])), ("negative lifetime", len([x for x in births if x in deaths and deaths[x] - births[x] < 0])) ] print "avg start-end %d " % (median( [deaths[njid]-births[njid] for njid in births.keys() if njid in deaths ] )/(24*60*60*1000.0)) devoff() return goodness_stats
def compareEditsBinWeek(filename, notes, title='Note Lifelines', color_function=one_or_no_url_redblk, YMAG=0.5): allLogs = ActivityLog.objects.filter(owner=notes[0].owner, action__in=['note-add','note-delete']) ##firstBirth = float(min([x[0] for x in notes.values_list('created')])) global firstBirth title = "%s -- %s %s %s " % (title,str(notes.count()),notes[0].owner.email,notes[0].owner.id) ## Meta-data for points points = {'note-add':r.c(), 'note-delete':r.c()} ## Shortened to just the two, since note-edit added later births, deaths = {}, {}, today = time.time()*1000.0 print "saving to %s " % cap.make_filename(filename) r.png(file=cap.make_filename(filename), w=3200,h=1600) ## 3200x1600, 9600x4800, 12.8x6.4 colors=dict([(n.jid,color_function(n)) for n in notes]) ## 7 creation bins, each with 7 edit bins inside ## Each of 49 bins holds entries detailing when edits happen - plot these on line for that bin! creationBins = [[r.c() for i in xrange(7)] for i in xrange(7)] # order notes by creation jids = [id[0] for id in notes.order_by("created").values_list('jid')] jid2idx = dict([(jids[i],i) for i in xrange(len(jids))]) for log in allLogs: noteArr = notes.filter(jid=log.noteid) if len(noteArr) < 1: continue note = noteArr[0] births[note.jid] = jid2idx[note.jid] if not note.deleted and note.jid not in deaths: deaths[note.jid] = today pass if (log.action == 'note-delete'): deaths[note.jid] = float(log.when-note.created) points[log.action] = r.rbind(points[log.action],c([jid2idx[note.jid],float(log.when-note.created)])) pass # compute the edits, compile the colors edits_ = ca.note_edits_for_user(notes[0].owner) points['note-edit'] = r.c() edit_dir, edit_delta, colors_r = r.c(), r.c(), r.c(), ymax = 0 points['bin-edits'] = r.c() weekdayColors = ['red','orange','yellow','green','blue','grey','brown'] edit_col = r.c() maxDelta = 0 for n in notes: ## wCom: not all notes eval'd here may have note-add events !! colors_r = r.c(colors_r,colors[n.jid]) if n.jid in edits_: for edit_action in edits_[n.jid]: ## Add edit to creationBin noteDOW = wUtil.msecToDate(n.created).weekday() editDOW = wUtil.msecToDate(edit_action['when']).weekday() timeDelta = edit_action['when'] - n.created ##if edit_action['when'] != None and edit_action['when'] > firstBirth and edit_action['when'] < ca.DATABASE_SNAPSHOT_TIME: ## Current y-val plots time between edit and creation -- second y-val in comment plots absolute time of edit if timeDelta > 1000*3600*24*7: points['bin-edits'] = r.rbind(points['bin-edits'], c([int(noteDOW*7+editDOW), float(timeDelta)])) maxDelta = max(float(timeDelta), maxDelta) ##KEEP: points['bin-edits'] = r.rbind(points['bin-edits'], c([int(noteDOW*7+editDOW), float(edit_action['when'])]) ) edit_col = r.c(edit_col, weekdayColors[editDOW]) pass xl,yl="Created Date", "Action Date" #maxDelta = 1000*3600*24*60 r.plot(points['bin-edits'], cex=3.0, col=edit_col, pch='x' ,xlab=xl,ylab=yl, main=title, xlim=r.c(0, 48), ylim=c([0, float(maxDelta)]), axes=False ) ##KEEP: , axes=False)#, ylim=c([firstBirth, ca.DATABASE_SNAPSHOT_TIME]) ) ## 1 below, 2 left r.axis(1, at=c(range(0,49,7)), labels=c(['Mon','Tue','Wed','Thur','Fri','Sat','Sun']), col='grey' ) ## Something wrong here #help!# ## Graph is coming up with tiny y-axis plotting # of days between note-create and edit event yTicks, yNames, ySep = [], [], 1000*3600*24 # daily ticks for tickTime in range(0,maxDelta, ySep): # daily ticks yTicks.append(tickTime) yNames.append(float(tickTime)/ySep) pass r.axis(2, at=c(yTicks), labels=c(yNames)) #help!# ##r.points(points['note-edit'], col=colors_r, pch=edit_dir, cex=edit_delta) ##r.points(points['note-delete'], cex=2.0,col = colors_r, pch='x') yWeeks = [int(y) for y in range(int(msecToWeek(firstBirth)), int(msecToWeek(time.time()*1000)), 1)] for x in births.keys(): if x in deaths: stillalive = (today-deaths[x] < 0.001) color = colors[x] # ('green' if stillalive else 'black') ##r.lines( c( [float(births[x])]*2) ,c([ float(births[x]),float(deaths[x]-births[x]) ]), col=color) pass pass for i in range(0,49,7): r.abline(v=float(i)-0.5, col='purple') pass for i in range(0,49): r.abline(v=float(i)-0.5, col='gray') devoff()
def mmmPlot(filename, notes, title='title'): global firstBirth ##firstBirth = 1217622560992.0 ## Meta-data for title allLogs = ActivityLog.objects.filter(owner=notes[0].owner, action__in=['note-add','note-delete']) saveLogCount = ActivityLog.objects.filter(owner=notes[0].owner, action='note-save').count() msecToWeek = 1.0/(1000.0*60*60*24*7) msecOfWeek = 1000.0*60*60*24*7 useremail ,noteCount, actionCount = notes[0].owner.email, notes.count(), allLogs.count() title = "#Notes:#SaveLogs:#Dels:Email -- " + str(noteCount) + ":" + str(saveLogCount) + ":" + str(sum([n.deleted for n in notes])) + ":" + useremail ## Meta-data for points points = {'note-add':r.c(), 'note-delete':r.c()} ## Shortened to just the two, since note-edit added later births , deaths = {}, {} today = time.time()*1000.0 r.png(file=cap.make_filename(filename), w=6400,h=3200) ## 3200x1600, 9600x4800, 12.8x6.4 cc=[x['created'] for x in notes.values('created')] dd=allLogs.values('when') minBirth, maxBirth = float(min(cc)), float(max(cc)) ## Week 2011 was first week of listit minAction, maxAction = float(min(dd)['when']), float(max(dd)['when']) for log in allLogs: noteArr = notes.filter(jid=log.noteid) if len(noteArr) < 1: continue note = noteArr[0] births[note.jid] = note.created if not note.deleted and note.jid not in deaths: deaths[note.jid] = today pass if (log.action == 'note-delete'): deaths[note.jid] = float(log.when) points[log.action] = r.rbind(points[log.action],c([float(note.created),float(log.when)])) pass xl,yl="Created Date", "Action Date" r.plot(points['note-add'], cex=2.0,col = "green", pch='o',xlab=xl,ylab=yl, main=title, xlim=r.c(firstBirth, today), ylim=r.c(firstBirth, today), axes=False) xWeeks = [int(x) for x in range(int(firstBirth*msecToWeek), int(time.time()*1000*msecToWeek), 1)] yWeeks = [int(y) for y in range(int(firstBirth*msecToWeek), int(time.time()*1000*msecToWeek), 1)] r.axis(1, at=c([float(x*7*24*60*60*1000.0) for x in xWeeks]), labels=c([int(x)-2012 for x in xWeeks]), tck=1) r.axis(2, at=c([float(y*7*24*60*60*1000.0) for y in yWeeks]), labels=c([int(x)-2012 for x in yWeeks]), tck=1) #New code for edits inserted here edits_ = ca.note_edits_for_user(notes[0].owner) points['note-edit'] = r.c() points['note-searches'] = r.c() edit_dir, edit_delta, edit_col = r.c(), r.c(), r.c() searches = searches_by_note_jid(notes[0].owner) for n in notes: ## wCom: not all notes eval'd here may have note-add events !! if n.jid in edits_: ##print "in ",n.jid,n.owner.email,len(edits_[n.jid]) for edit_action in edits_[n.jid]: ##if edit_categorized( ## Add big icon points['note-edit'] = r.rbind(points['note-edit'],c([float(n.created), float(edit_action['when'])])) edit_dir = r.c(edit_dir, pch_of_delta(edit_action['initial'],edit_action['final'])) edit_delta = r.c(edit_delta, 14)#abs(10 + 10*(len(edit_action['initial']) - len(edit_action['final']))/1000.0)) edit_col = r.c(edit_col, col_of_edit(edit_action['initial'],'outer')) ## Add small icon points['note-edit'] = r.rbind(points['note-edit'],c([float(n.created), float(edit_action['when'])])) edit_dir = r.c(edit_dir, pch_of_innerEdit(edit_action['initial'],edit_action['final'])) edit_delta = r.c(edit_delta, 7)#abs(10 + 10*(len(edit_action['initial']) - len(edit_action['final']))/1000.0)) edit_col = r.c(edit_col, col_of_edit(edit_action['initial'], 'inner', edit_action['when'])) for x in searches.get(str(n.jid), []): print 'searches', x points['note-searches'] = r.rbind(points['note-searches'],c([float(n.created), float(x['when'])])) ##End new code r.points(points['note-searches'], cex=5.0,col = "dark green", pch='o') r.points(points['note-edit'], col=edit_col, pch=edit_dir, cex=edit_delta) r.points(points['note-delete'], cex=5.0,col = "dark red", pch='x') for x in births.keys(): if x in deaths: color = 'green' if (today-deaths[x] < 0.001) else 'black' r.lines(c([float(births[x])]*2),c([float(births[x]),float(deaths[x])]), col=color, lwd=3) pass devoff()