예제 #1
0
 def test_IsAnnotation_TrueNegative(self):
     self.assertFalse(Annotation.IsAnnotation(
         "Allegro"))
     self.assertFalse(Annotation.IsAnnotation(
         "Ludwig van Beethoven"))
     self.assertFalse(Annotation.IsAnnotation(
         "1 A Test"))
     self.assertFalse(Annotation.IsAnnotation(
         "A: A Test"))
예제 #2
0
    def from_xml(cls, xml_tree):

        room = Room()
        room.name = xml_tree.get('name')
        room.numberofdoors = xml_tree.get('numberofdoors')

        for potential_annot in xml_tree.getchildren():
            if potential_annot.tag.lower() == Annotation.get_tag().lower():
                room.annotation = Annotation.from_xml(potential_annot)

        return room
예제 #3
0
    def __init__(self, Dataset_Name):
        """
        用于生成全局GUI,GUI只负责生成一个配置文件,之后的逻辑处理还是交给opencv进行处理
        """
        super().__init__()
        #1:参数初始化
        self.Dataset_Name = Dataset_Name
        self.mission_name = tk.StringVar()

        #2:文件路径初始化
        #主目录定义
        ROOT_DIR = os.path.dirname(
            os.path.realpath(__file__))  #ROOT_DIR是lib的文件路径
        self.target_file = ROOT_DIR + "/../../" + str(
            self.Dataset_Name)  #总文件夹指定
        self.target_file = os.path.abspath(self.target_file)

        #生成原数据集路径
        self.dataset_path = self.target_file + "/dataset"  #用于之后图片路径生成
        self.images_path = self.dataset_path + "/images"
        self.annotations_path = self.dataset_path + "/image_annotations"

        #生成aug数据集路径
        self.aug_path = self.target_file + "/aug"
        self.aug_images_path = self.aug_path + "/images"
        self.aug_annotations_path = self.aug_path + "/image_annotations"

        #如果是一个新的项目,则生成所有需要文件
        if not os.path.exists(self.target_file):
            os.mkdir(self.target_file)
            #存放原图的位置
            os.mkdir(self.dataset_path)
            os.mkdir(self.images_path)
            os.mkdir(self.annotations_path)

            #存放数据增强的位置
            os.mkdir(self.aug_path)
            os.mkdir(self.aug_images_path)
            os.mkdir(self.aug_annotations_path)

        else:
            print("已经存在{}数据集,基于这个进行处理".format(self.Dataset_Name))

        #绘制GUI,初始化bbox生成器
        self.drawGUI()

        #功能类初始化
        self.dataAugment = DataAugment(dataset_path=self.dataset_path,
                                       aug_path=self.aug_path)
        self.dataProcesser = DataProcesser(dataset_path=self.dataset_path)
        self.generateDataset = GenerateDataset(self.aug_path)  #生成数据集依赖于增强的数据结果
        self.annotation = Annotation(self.Dataset_Name)
예제 #4
0
    def from_xml_local(cls, xml_tree):
        loc = Location()
        loc.name = xml_tree.get('name')
        loc.isbeacon = xml_tree.get('isbeacon') == 'true'
        loc.isplacement = xml_tree.get('isplacement') == 'true'

        for potential_annot in xml_tree.getchildren():
            if potential_annot.tag.lower() == Annotation.get_tag().lower():
                loc.annotation = Annotation.from_xml(potential_annot)

        loc.room = xml_tree.get('room')

        return loc
예제 #5
0
 def test_depth(self):
     self.assertEqual(
         1,
         Annotation("1: Exposition", 1, 4).depth
     )
     self.assertEqual(
         2,
         Annotation("2: First Subject Group", 1, 4).depth
     )
     self.assertEqual(
         3,
         Annotation("3: Theme a", 1, 4).depth
     )
     self.assertEqual(
         4,
         Annotation("4: Sentence", 1, 4).depth
     )
     self.assertEqual(
         5,
         Annotation("5: Presentation", 1, 4).depth
     )
     self.assertEqual(
         6,
         Annotation("6: Basic idea", 1, 4).depth
     )
     self.assertEqual(
         5,
         Annotation("5: Continuation", 6, 1).depth
     )
     self.assertEqual(
         6,
         Annotation("6: Fragmentation", 6, 1).depth
     )
예제 #6
0
    def annotations(self):
        """
        Returns an iterator of all Annotation nodes connected to this
        object.
        """
        self.logger.debug("In annotations().")

        linkage_query = '"annotation"[node_type] && "{}"[linkage.computed_from]'.format(self.id)

        query = iHMPSession.get_session().get_osdf().oql_query

        from Annotation import Annotation

        for page_no in count(1):
            res = query(WgsAssembledSeqSet.namespace, linkage_query,
                        page=page_no)
            res_count = res['result_count']

            for doc in res['results']:
                yield Annotation.load_annotation(doc)

            res_count -= len(res['results'])

            if res_count < 1:
                break
예제 #7
0
 def getAnnotations(self) -> [Annotation]:
     self.__text = self.__text.replace(", ", ",")
     annotations = [
         Annotation(text, self.__measure, self.__beat)
         for text in self.__text.split(",")
     ]
     return annotations
예제 #8
0
    def from_xml(cls, xml_tree):
        loc = Location()
        loc.name = xml_tree.get('name')
        loc.isbeacon = xml_tree.get('isbeacon') == 'true'
        loc.isplacement = xml_tree.get('isplacement') == 'true'

        for potential_annot in xml_tree.getchildren():
            if potential_annot.tag.lower() == Annotation.get_tag().lower():
                loc.annotation = Annotation.from_xml(potential_annot)

        room = xml_tree.get('room')
        room_doc = Room.objects(name=room).get()
        if not type(room_doc) == Room:
            raise NoSuchRoomException()
        loc.room = room_doc

        return loc
예제 #9
0
 def readXml(self):
     for elem in self.tree.iter("Annotation"):
         if (elem.get("Type") != "None"):
             curAnn = Annotation()
             self.updateAnnInformation(elem, curAnn)
             self.getAnnCoordinate(elem, curAnn)
             self.computeBorder(curAnn)
             self.storeAnn(curAnn)
     return self.annList
예제 #10
0
def index():
    if request.method == "POST":
        latitude = request.form.get("latitude", "null")
        longitude = request.form.get("longitude", "null")
        date = request.form.get("date", "null")
        date = date.split(' ')
        da = date[2]
        tweets_file = './data/tweets_processed.csv'
        stop_words_file = './data/stop-word-list.csv'
        ann = Annotation()
        ann.initialize_data(tweets_file, stop_words_file)
        print('2016-04-' + da)
        result = ann.anntation(latitude, longitude, '2016-04-' + da)
        result = result[0:21]
        jsonstr = json.dumps([r.toJSON() for r in result])
        return jsonstr
    else:
        return render_template("index.html")
예제 #11
0
파일: Vcf.py 프로젝트: sebatlab/SV2
 def load_genotypes(self,
                    Structural_Variant=None,
                    SVs=None,
                    Ped=None,
                    ids=None,
                    gen=None,
                    no_anno=None,
                    tmp_dir=None):
     svs = BedTool(
         list(
             set([(format_chrom(x[0]), x[1], x[2])
                  for x in Structural_Variant.raw]))).sort()
     if no_anno == False:
         Annot = Annotation()
         Annot.check_overlap(svs, Structural_Variant.raw, gen, tmp_dir)
         self.Annotations = Annot
     for locus in SVs:
         Variant = SVs[locus]
         self.quals[locus] = Variant.med_ref, Variant.med_alt
         self.filters[
             locus] = Variant.standard_filter, Variant.denovo_filter
         for sample_id in ids:
             gt = './.'
             if (locus[0] == 'chrX'
                     or locus[0] == 'chrY') and Ped.males.get(
                         sample_id
                     ) != None and Structural_Variant.par[locus] == False:
                 gt = '.'
             if Variant.gt.get(locus + (sample_id, )) != None:
                 gt = Variant.gt[locus + (sample_id, )]
                 for allele in gt.split(':').pop(0).split('/'):
                     if allele == '.': continue
                     if self.allele_freq.get(locus) == None:
                         self.allele_freq[locus] = [int(allele), 1]
                     else:
                         self.allele_freq[locus] = [
                             self.allele_freq[locus][0] + int(allele),
                             self.allele_freq[locus][1] + 1
                         ]
             if self.genotypes.get(locus) == None:
                 self.genotypes[locus] = [gt]
             else:
                 self.genotypes[locus].append(gt)
예제 #12
0
def generate_crop_synset(synset_annotation_folder,images_folder,target_folder):
    synset_images_cropped = 0
    synset_crops_saved = 0
    synset_images_notfound = 0
    try:
        annotation_files = os.listdir(synset_annotation_folder)
        for xml_file in annotation_files:
            image_annotation = Annotation(synset_annotation_folder + xml_file)
            try:
                image = Image.open(images_folder + image_annotation.folder + '/' + image_annotation.filename + ".JPEG")
                synset_crops_saved += image_annotation.save_crops(image,target_folder,scale=True,p=0.2)
                synset_images_cropped += 1
            except IOError:
                synset_images_notfound += 1
    except OSError:
        print(synset_annotation_folder + " not found")
    print(str(synset_images_cropped) + ' images cropped')
    print(str(synset_images_notfound) + ' images with annotations not found in this synset')
    print(str(synset_crops_saved) + ' synset crops saved')
    return [synset_crops_saved,synset_images_cropped,synset_images_notfound]
예제 #13
0
 def read_arg_annotation(self, path):
     capture_anns_pattern = '(T\d+).(Claim|Premise|MajorClaim).(\d+).(\d+).(.+)'
     with open(path, 'r') as anns:
         temp_anns = {}  #  ann_id : Annotation
         lines = anns.readlines()
         for line in lines:
             if (re.match(capture_anns_pattern, line)):
                 (id, ann, start, end,
                  span) = re.findall(capture_anns_pattern, line).pop()
                 temp_anns[id] = Annotation(ann, start, end, span)
     return temp_anns
예제 #14
0
    def __setitem__(self, key, value):
        
        if key in MutationData.attributes:
            self.__dict__[key] = value
        else:
#            self.lock.acquire()
            if key not in self.annotations.keys():
                logging.getLogger(__name__).warn("Attempting to create an annotation using dictionary output.  Cannot determine annotation source, but creating it anyway.")
                self.annotations[key] = Annotation(value)
            else:
                self.annotations[key].value = value
예제 #15
0
 def UpdateVideoDetails(self, fileName, videoFilePath, annotationFilePath,
                        currentFrameNbr):
     if self.videoIsProcessed == False:
         self.lblFileName.config(text="File: " + fileName)
     else:
         self.lblFileName.config(text=" ")
     self.videoFilePath = videoFilePath
     self.cvVideo = cv2.VideoCapture(videoFilePath)
     self.annotationFilePath = annotationFilePath
     self.totalFrames = int(self.cvVideo.get(cv2.CAP_PROP_FRAME_COUNT))
     if self.totalFrames > 0:
         self.loadedFrames = True
     self.currentFrameNbr = currentFrameNbr
     self.annotations = Annotation(self.annotationFilePath,
                                   self.totalFrames)
     hasAnnotations = self.annotations.IsValid()
     # Habilita ou desabilita checkboxe para mostrar bounding boxes caso tenha annotation file
     if hasAnnotations:
         self.chkAnnotations.configure(state='normal')
     else:
         self.chkAnnotations.configure(state='disabled')
     # Define o status do player
     if self.totalFrames == 0:  # Se vídeo não tem frames para tocar
         self.statusPlayer = StatusPlayer.FAILED
         self.btnPlayPause.config(state="disabled")
     else:  # Se possuir frames para tocar
         self.statusPlayer = StatusPlayer.NOT_STARTED
         self.btnPlayPause.config(state="normal")
         # Cria evento e mostra o primeiro frame
         self.eventPause = threading.Event()
         self.threadPlayOn = threading.Thread(
             target=self.PlayOn,
             args=[])  # Play video from current frame on
         self.eventPause.set()
         self.threadPlayOn.start()
         self.callBack_PlayPauseBtn_Clicked(
             True, StatusPlayer.NOT_STARTED
         )  # disable buttons on Player and pass new action
         self.ChangeNavButtonsStatus(True)  # enable navigation buttons
         # Define starting frames
         self.GoToFrame(1)
예제 #16
0
def init_annotation_set(video_path, video_resolution, initial_bbs,
                        ann_scale_factor):
    """ Creates and intializes an instance of AnnotationSet. """
    last_slash_index = video_path.rfind('/')
    video_name = video_path[last_slash_index + 1:]
    annotations2 = [Annotation() for x in range(len(initial_bbs))]
    ann_set = AnnotationSet(video_name, video_resolution, annotations2)
    for i in range(len(initial_bbs)):
        rescaled_bb = initial_bbs[i].clone()
        rescaled_bb.scale_space(1 / ann_scale_factor)
        ann_set.annotations[i].add_entry(*rescaled_bb.xywh())
    return ann_set
예제 #17
0
 def GetAnnotDetails(self, annotUrl, totalFrames):
     details = []
     if annotUrl == '':
         return details
     # Get locally the physical file
     annotationName = annotUrl[annotUrl.rfind('/') + 1:]
     for dirpath, _, filenames in os.walk(self.directory):
         if annotationName in filenames:
             myAnnot = Annotation(os.path.join(dirpath, annotationName),
                                  totalFrames)
             classes = myAnnot.GetClassesObjects()
             nmbAnnotedFrames, minFrame, maxFrame, minObj, maxObj = myAnnot.GetNumberOfAnnotatedFrames(
             )
             details.append(dirpath)
             details.append(annotationName)
             details.append(
                 '%s (%.2f%%)' %
                 (nmbAnnotedFrames, 100 * nmbAnnotedFrames /
                  totalFrames))  #annotated frames and its percentage
             details.append(len(classes))  #number of classes
             strClasses = ''
             for a in classes:
                 strClasses = '%s, %s' % (strClasses, a)
             strClasses = strClasses[2:]
             details.append(strClasses)  #classes
             details.append('frame %s' % minFrame)  #first annotation
             details.append('frame %s' % maxFrame)  #last annotation
             # details.append('(x,y,r,b)=%s / Area: %s / Frame: %s'%(str(minObj[2]),minObj[0],minObj[1])) #min obj (area,frame,x,y,r,b)
             # details.append('(x,y,r,b)=%s / Area: %s / Frame: %s'%(str(maxObj[2]),maxObj[0],maxObj[1])) #max obj (area,frame,x,y,r,b)
             details.append(
                 'Area: %s / Frame: %s' %
                 (minObj[0], minObj[1]))  #min obj (area,frame,x,y,r,b)
             details.append(
                 'Area: %s / Frame: %s' %
                 (maxObj[0], maxObj[1]))  #max obj (area,frame,x,y,r,b)
     return details
예제 #18
0
 def test_IsAnnotation_TruePositive(self):
     self.assertTrue(Annotation.IsAnnotation(
         "1: Exposition, 2: First Subject Group, 3: Theme a, 4: Sentence, 5: Presentation, 6: Basic idea"))
     self.assertTrue(Annotation.IsAnnotation(
         "6: Basic idea"))
     self.assertTrue(Annotation.IsAnnotation(
         "5: Continuation, 6: Fragmentation"))
     self.assertTrue(Annotation.IsAnnotation(
         "6: Cadence"))
     self.assertTrue(Annotation.IsAnnotation(
         "2: Transition, 3: Theme a"))
     self.assertTrue(Annotation.IsAnnotation(
         "4: X"))
     self.assertTrue(Annotation.IsAnnotation(
         "401: X, 502: X"))
     self.assertTrue(Annotation.IsAnnotation(
         "20: Second Subject Group, 3: Theme b, 4: Sentence, 5: Presentation, 6: Basic idea"))
예제 #19
0
 def find_subjectivity_lexicon(self):
     ret = []
     for sent in self.sentences:
         sent_span = sent.span
         #print('SPAN::',sent_span)
         sent = sent.text.lower()
         #print(sent)
         b = Essay.sc.analyse_sentence(sent)
         #print('find_subjectivity_lexicon::',b)
         for (word, subj_type, pol) in b:  #TODO: use polarity
             index = sent.find(word)
             subjectivity_clue_annotation = Annotation(
                 subj_type, sent_span.start + index,
                 sent_span.start + index + len(word), word)
             ret.append(subjectivity_clue_annotation)
     return ret
예제 #20
0
    def test_parse_Beethoven_SonateNo1(self):
        parser = MusicXmlParser()

        thisFilePath = os.path.dirname(os.path.abspath(__file__))
        parser.load(thisFilePath+"/../examples/op2no1movt1.musicxml")
        parser.parse()

        annotations = parser.getAnnotations()[:9]

        self.assertEqual(
            [Annotation("1: Exposition", 1, 4),
             Annotation("2: First Subject Group", 1, 4),
             Annotation("3: Theme a", 1, 4),
             Annotation("4: Sentence", 1, 4),
             Annotation("5: Presentation", 1, 4),
             Annotation("6: Basic idea", 1, 4),
             Annotation("6: Basic idea", 4, 1),
             Annotation("5: Continuation", 6, 1),
             Annotation("6: Fragmentation", 6, 1),
             ],
            annotations
        )
예제 #21
0
    def _read_annotations(self, directory_path, ann_file_prefix):
        """ Read all the annotations in the folder.  """
        file_names = [f for f in listdir(directory_path)]

        num_objects = 0
        for name in file_names:
            if name.startswith(ann_file_prefix):
                num_objects += 1

        annotations = [Annotation() for x in range(num_objects)]
        for name in file_names:
            if name.startswith(ann_file_prefix):
                extension_position = name.rfind('.')
                obj_index = int(
                    name[len(ann_file_prefix):extension_position]) - 1
                annotations[obj_index].read(directory_path + name)

        return annotations
예제 #22
0
 def test_getAnnotations_ManyAnnotations_CorrectString_NoSpaceAfterComma(
         self):
     grp = AnnotationGroup(
         "1: Exposition,2: First Subject Group,3: Theme a,4: Sentence,5: Presentation,6: Basic idea",
         1, 1)
     self.assertEqual([
         Annotation("1: Exposition", 1, 1),
         Annotation("2: First Subject Group", 1, 1),
         Annotation("3: Theme a", 1, 1),
         Annotation("4: Sentence", 1, 1),
         Annotation("5: Presentation", 1, 1),
         Annotation("6: Basic idea", 1, 1),
     ], grp.getAnnotations())
예제 #23
0
    def createAnnotation(self,
                         annotationName,
                         annotationValue,
                         annotationSource="Unknown",
                         annotationDataType="String",
                         annotationDescription="",
                         newRequired=None,
                         tags=None,
                         number=None):
        """
        newRequired implies that this cannot update an existing value.  If a value exists, throw an exception.
        
        This method must be called to add an annotation to a mutation.  Do not use: mut['new_annotation_name'] = 'annotation_value'
        
        """
        tags = [] if tags is None else tags
        is_new_required = self._new_required if newRequired is None else newRequired
        #        self.lock.acquire()

        if is_new_required and (annotationName in self.annotations.keys()
                                ) and (annotationName
                                       not in MutationData.attributes):
            #            self.lock.release()
            if annotationValue == self.annotations[annotationName].value:
                logging.getLogger(__name__).warn(
                    "Attempting to create an annotation multiple times, but with the same value: "
                    + str(annotationName) + "  :  " + str(annotationValue))
            else:
                raise DuplicateAnnotationException(
                    'Attempting to create an annotation multiple times (' +
                    annotationName + ') with old, new values of (' +
                    str(self.annotations[annotationName].value) + ", " +
                    str(annotationValue) + ")")
        if annotationName in MutationData.attributes:
            # FYI ... logging.getLogger(__name__).debug("Attempting to create an attribute with createAnnotation.  Should be using instance attribute setting.  x." + str(annotationName) + " = " + str(annotationValue) + " ... Ignoring annotationSource, but setting attribute.")
            self[annotationName] = annotationValue
        else:
            self.annotations[annotationName] = Annotation(
                annotationValue,
                annotationSource,
                annotationDataType,
                annotationDescription,
                tags=tags,
                number=number)
예제 #24
0
 def find_arguing_lexicon(self):
     ret = []
     for sent in self.sentences:
         sent_span = sent.span
         #print('SPAN::',sent_span)
         sent = sent.text.lower()
         #print(sent)
         b = Essay.al.SentenceFragment(sent)
         for k, list in b.items():
             for v in list:
                 #print('key:',k)
                 #print('value:', v)
                 index = sent.find(v)
                 #print('begin:',sent_span.start+index,' | end:' ,sent_span.start+index+len(v) )
                 a_l_annotations = Annotation(
                     k, sent_span.start + index,
                     sent_span.start + index + len(v), v)
                 ret.append(a_l_annotations)
     return ret
예제 #25
0
 def read_all_opinion_finder_annotations(self):
     capture_pattern = 'argumentative_essays_(essay\d{1,3}).txt_(\d*)_(\d*)\t(subj|obj)\n'
     for doc in self.doclist:
         path_to_annotation = str(
             ArgumentativeEssays.path_to_opinion_finder +
             ArgumentativeEssays.opinion_finder_database +
             ArgumentativeEssays.argumentative_essays_input + doc + '.txt' +
             ArgumentativeEssays.annotation_output)
         if (os.path.exists(path_to_annotation)):
             with open(path_to_annotation, 'r') as anno_file:
                 lines = anno_file.readlines()
                 temp_list = []
                 for line in lines:
                     (doc_id, start, end,
                      classification) = re.findall(capture_pattern,
                                                   line).pop()
                     #print(doc_id, start, end, classification)
                     opinion_finder_annotation = Annotation(
                         'opinion_finder', start, end, classification)
                     temp_list.append(opinion_finder_annotation)
                 self.essay_dict[doc_id].add_annotation(temp_list)
                 # print(doc_id,start,end,classification)
         else:
             print('could not find: ', path_to_annotation)
예제 #26
0
from __future__ import division
from Annotation import Annotation as Annotation
from operator import itemgetter, concat, add
from itertools import groupby
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
import copy

import seaborn as sns
sns.set(style="whitegrid", color_codes=True)

A = Annotation('data/exportMedium.json')

dateDurations = sorted([(datetime.strptime(
    dateString, "%Y-%m-%d %H:%M:%S"), int(durationString), flag)
                        for dateString, durationString, flag in A.ofAll(
                            ['dateTime', 'duration', 'proposalFlag'])],
                       key=itemgetter(0))

# discover sessions
# calculate the time between two consecutive
# tmpDates = copy.deepcopy(dates)
# now = datetime.now()
# deltas = reduce(lambda lst, d: lst+[d-lst.pop()]+[d],
#                 tmpDates[1:], [tmpDates[0]])[:-1]+[now-now]

# To create a datetime.delta subtract two datetimes accordingly
threshold = datetime(2016, 12, 1, 13, 5, 0) - datetime(2016, 12, 1, 13, 0, 0)
sessions = reduce(
예제 #27
0
from Annotation import Annotation
from itertools import groupby
from operator import itemgetter
from sklearn.neighbors import KernelDensity

import matplotlib.pyplot as plt
import numpy as np

import seaborn as sns
sns.set(color_codes=True, style="whitegrid")

A = Annotation('data/exportMedium.json')

key = itemgetter('proposalFlag')
g = dict((flag, A.durationToSec(map(itemgetter('duration'), annotations)))
         for flag, annotations in groupby(sorted(A.all, key=key), key=key))


def stats(label):
    return np.mean(g[label]), np.std(g[label]), np.median(g[label])


labels = ['proposal', 'no proposal', 'wrong proposal']

map(stats, labels)

xPlot = np.linspace(-10, 300, 1000)[:, np.newaxis]

c = {'no proposal': 'g', 'proposal': 'b', 'wrong proposal': 'r'}

fig = plt.Figure()
예제 #28
0
 def add_annotations_from_xml(self, xml):
     from xml.dom.minidom import parseString
     root = parseString(xml)
     for node in root.getElementsByTagName('annotation'):
         self.add_annotation(Annotation.fromxml(self, node))
예제 #29
0
 def test_getAnnotations_SingleAnnotation_CorrectString(self):
     grp = AnnotationGroup("6: Basic idea", 1, 1)
     self.assertEqual([Annotation("6: Basic idea", 1, 1)],
                      grp.getAnnotations())
예제 #30
0
 def getAnnotation(self, annotationName):
     """ Returns the Annotation instance, rather than just the value """
     if annotationName in MutationData.attributes:
         return Annotation(self.__dict__[annotationName], "__ATTR__")
     return self.annotations[annotationName]
예제 #31
0
import numpy as np
import re

import seaborn as sns
sns.set(style="white", color_codes=True)


def preprocessing(raw_document):
    urls = r'(http.+?(\s|$))'
    specialchar = r'|[^A-Za-z\s]'
    doc = raw_document.lower()
    tokens = word_tokenize(re.sub(urls + specialchar, ' ', doc))
    return tokens


A = Annotation('data/exportMedium.json')

docAnnos = A.perDocument(1, ['document', 'annotations'])

# This blob produces a dictionary with the values ['proposal', 'no
# proposal', 'wrong proposal']. Values are tuples (number of word in document, proposalFlag, duration)
blob = dict((k, list(v))
            for k, v in groupby(sorted([(len(preprocessing(document)),
                                         annotation['proposalFlag'],
                                         annotation['duration'])
                                        for (document, annotations) in docAnnos
                                        for annotation in annotations],
                                       key=itemgetter(1)),
                                key=itemgetter(1)))

plotContent = dict((
예제 #32
0
from __future__ import division
from Annotation import Annotation
from nltk import FreqDist
from operator import itemgetter

import matplotlib.pyplot as plt
import numpy as np

import seaborn as sns
sns.set(color_codes=True, style="whitegrid")

A = Annotation('data/exportMedium.json')

allLabels = map(lambda a: a[0][0], A.ofAll(['labels']))

dist = FreqDist(allLabels)

keys = [u'Neg', u'Neut', u'Pos', u'No Sent', u'Undecided', u'Irrelevant']

index = np.arange(len(keys))

bar_width = 0.5

fig = plt.Figure()
plt.bar(index, map(lambda k: dist[k] / len(allLabels), keys), bar_width)

plt.xlabel('Label')
plt.ylabel('% of annotations')
plt.xticks(index + bar_width / 2, keys)
plt.legend()