from Annotation import Annotation from itertools import groupby from operator import itemgetter from sklearn.neighbors import KernelDensity import matplotlib.pyplot as plt import numpy as np import seaborn as sns sns.set(color_codes=True, style="whitegrid") A = Annotation('data/exportMedium.json') key = itemgetter('proposalFlag') g = dict((flag, A.durationToSec(map(itemgetter('duration'), annotations))) for flag, annotations in groupby(sorted(A.all, key=key), key=key)) def stats(label): return np.mean(g[label]), np.std(g[label]), np.median(g[label]) labels = ['proposal', 'no proposal', 'wrong proposal'] map(stats, labels) xPlot = np.linspace(-10, 300, 1000)[:, np.newaxis] c = {'no proposal': 'g', 'proposal': 'b', 'wrong proposal': 'r'} fig = plt.Figure()
# now = datetime.now() # deltas = reduce(lambda lst, d: lst+[d-lst.pop()]+[d], # tmpDates[1:], [tmpDates[0]])[:-1]+[now-now] # To create a datetime.delta subtract two datetimes accordingly threshold = datetime(2016, 12, 1, 13, 5, 0) - datetime(2016, 12, 1, 13, 0, 0) sessions = reduce( lambda lst, (date, duration, flag): lst + [[(date, duration, flag)]] if (date - lst[-1][-1][0]) > threshold else lst + [(lst.pop() + [( date, duration, flag)])], dateDurations[1:], [[dateDurations[0]]]) sesssionDurations = map(lambda s: s[-1][0] - s[0][0], sessions) secToHour = lambda sec: sec / 3600 totalSessionDuration = reduce(add, sesssionDurations).total_seconds() totalAnnotationDuration = A.durationToSec( sum(map(itemgetter(1), dateDurations))) fig = plt.Figure() bar_width = 0.5 label = ['Total annotation time', 'Time spent to pick label'] plt.bar(0, secToHour(totalSessionDuration), label=label[0], color='r') plt.bar(0, secToHour(totalAnnotationDuration), label=label[1], color='b') plt.ylabel('Hours') plt.xticks([0 + bar_width / 2], ['Annotation time']) x1, x2, y1, y2 = plt.axis() plt.axis((0, 0.5, y1, y2)) plt.legend(frameon=True, loc='best', fancybox=True, prop={'size': 12})
docAnnos = A.perDocument(1, ['document', 'annotations']) # This blob produces a dictionary with the values ['proposal', 'no # proposal', 'wrong proposal']. Values are tuples (number of word in document, proposalFlag, duration) blob = dict((k, list(v)) for k, v in groupby(sorted([(len(preprocessing(document)), annotation['proposalFlag'], annotation['duration']) for (document, annotations) in docAnnos for annotation in annotations], key=itemgetter(1)), key=itemgetter(1))) plotContent = dict(( f, map(lambda (count, flag, duration): (count, A.durationToSec(duration)), v)) for f, v in blob.iteritems()) smooth = lambda c: filter(lambda (x, s): s < 100, c) plotContent = dict( (key, smooth(value)) for key, value in plotContent.iteritems()) labels = ['proposal', 'no proposal', 'wrong proposal'] sns.set(font_scale=1.5) fig = plt.figure() fig.set_size_inches(fig.get_size_inches() * 1.5) for label in labels: count, sec = zip(*plotContent[label]) data = pd.DataFrame()