Example #1
0
    def get(self, doc_id):
        ''' Given a doc id, accesses wp entities and then cross-references entity parses 
        :param doc_id: the id of the article
        '''

        entities = WpEntitiesService().nestedGet(doc_id)
        coreferences = CoreferenceCountsService().get(doc_id).get(doc_id, {})
        
        exists = lambda x: x is not None
        docParaphrases = coreferences.get('paraphrases', {})
        coref_mention_keys = map(title_confirmation.preprocess, docParaphrases.keys())
        coref_mention_values = map(title_confirmation.preprocess, [item for sublist in docParaphrases.values() for item in sublist])
        paraphrases = dict([(title_confirmation.preprocess(item[0]), map(title_confirmation.preprocess, item[1]))\
                            for item in docParaphrases.items()])

        counts ={}

        for val in map(title_confirmation.preprocess, entities):
            try:
                canonical = val
                if canonical in coref_mention_keys:
                    counts[canonical] = len(paraphrases[canonical])
                elif canonical != val and val in coref_mention_keys:
                    counts[canonical] = len(paraphrases[val])
                elif canonical in coref_mention_values:
                    counts[canonical] = len(filter(lambda x: canonical in x[1], paraphrases.items())[0][1])
                elif canonical != val and val in coref_mention_values:
                    counts[canonical] = len(filter(lambda x: val in x[1], paraphrases.items())[0][1])
            except:
                pass

        return {doc_id: counts, 'status': 200}
Example #2
0
    def get(self, doc_id, entity):
        parse = ParsedJsonService().nestedGet(doc_id)
        if parse.get('root', {}).get('document', {}).get('sentences', {}) is None:
            return {'status': 200, entity: []}
        sents = asList(parse.get('root', {}).get('document', {}).get('sentences', {}).get('sentence', []))
        corefs = asList(parse.get('root', {}).get('document', {}).get('coreference', {}).get('coreference', []))
        sents_processed =  [' '.join([token['word'] for token in asList(sent['tokens']['token'])]) for sent in sents]
        sents_preprocessed = [' '.join([title_confirmation.preprocess(token['word']) for token in asList(sent['tokens']['token'])]) for sent in sents]
        coreferences = []
        sentences_to_add = []
        for coref in corefs:
            dont_add = True
            for m in asList(coref['mention']):
                try:
                    mention_string = ' '.join([title_confirmation.preprocess(a['word']) for a in sents[int(m['sentence'])-1]['tokens']['token'][int(m['start'])-1:int(m['end'])-1]])
                    if title_confirmation.preprocess(entity) == mention_string:
                        sentences_to_add += [int(m['sentence'])-1 for m in asList(coref['mention'])]
                        break
                except TypeError:
                    pass # f**k it

        sentences_to_add = list(set(sentences_to_add + [i for i in range(0, len(sents)) if entity in sents_preprocessed[i]]))

        return {'status': 200, entity: [{'sentiment': sents[i].get('@sentiment', None), 'sentence': sents_processed[i]} for i in range(0, len(sents)) if i in sentences_to_add]}
Example #3
0
 def get(self, doc_id):
     jsonResponse = ParsedJsonService().get(doc_id)
     if jsonResponse['status'] != 200:
         return jsonResponse
     dict = jsonResponse[doc_id]
     counter = 0
     if not isEmptyDoc(dict):
         return {'status':200,
                 doc_id: [title_confirmation.preprocess(MrgSentence(sentence.get('parse', '')).nodes.getTermHead().getString()) \
                              for sentence in asList(dict.get('root', {}).get('document', {}).get('sentences', {}).get('sentence', [])) \
                              ]
                 }
     else:
         return {'status':400,
                 'message': "No sentences found"}