from __future__ import division from Annotation import Annotation from nltk import FreqDist from operator import itemgetter import matplotlib.pyplot as plt import numpy as np import seaborn as sns sns.set(color_codes=True, style="whitegrid") A = Annotation('data/exportMedium.json') allLabels = map(lambda a: a[0][0], A.ofAll(['labels'])) dist = FreqDist(allLabels) keys = [u'Neg', u'Neut', u'Pos', u'No Sent', u'Undecided', u'Irrelevant'] index = np.arange(len(keys)) bar_width = 0.5 fig = plt.Figure() plt.bar(index, map(lambda k: dist[k] / len(allLabels), keys), bar_width) plt.xlabel('Label') plt.ylabel('% of annotations') plt.xticks(index + bar_width / 2, keys) plt.legend()
from operator import itemgetter, concat, add from itertools import groupby from datetime import datetime import matplotlib.pyplot as plt import numpy as np import copy import seaborn as sns sns.set(style="whitegrid", color_codes=True) A = Annotation('data/exportMedium.json') dateDurations = sorted([(datetime.strptime( dateString, "%Y-%m-%d %H:%M:%S"), int(durationString), flag) for dateString, durationString, flag in A.ofAll( ['dateTime', 'duration', 'proposalFlag'])], key=itemgetter(0)) # discover sessions # calculate the time between two consecutive # tmpDates = copy.deepcopy(dates) # now = datetime.now() # deltas = reduce(lambda lst, d: lst+[d-lst.pop()]+[d], # tmpDates[1:], [tmpDates[0]])[:-1]+[now-now] # To create a datetime.delta subtract two datetimes accordingly threshold = datetime(2016, 12, 1, 13, 5, 0) - datetime(2016, 12, 1, 13, 0, 0) sessions = reduce( lambda lst, (date, duration, flag): lst + [[(date, duration, flag)]] if (date - lst[-1][-1][0]) > threshold else lst + [(lst.pop() + [( date, duration, flag)])], dateDurations[1:], [[dateDurations[0]]])
from itertools import groupby import matplotlib.pyplot as plt import numpy as np import seaborn as sns; sns.set(color_codes=True, style="whitegrid") def save(name, value): path = '/home/kai/Dropbox/MA/thesis/const/{}.tex'.format(name) with open(path, 'w+') as file: file.write(value) A = Annotation('data/exportMedium.json') annotations = A.ofAll(['labels', 'proposalFlag']) perDocument = A.perDocument(3, ['dateTime']) save('numberOfAnnotations', str(len(annotations))) # save('numberOfDocuments', str(len(perDocument))) # flags = dict((k, list(v)) for k, v in groupby(sorted(annotations, key=itemgetter(1)), key=itemgetter(1))) save('numberOfProposals', str(len(flags['proposal']))) save('numberOfNoProposals', str(len(flags['no proposal']))) save('numberOfWrongProposals', str(len(flags['wrong proposal'])))