def get(self, doc_id): ''' Given a doc id, accesses wp entities and then cross-references entity parses :param doc_id: the id of the article ''' entities = WpEntitiesService().nestedGet(doc_id) coreferences = CoreferenceCountsService().get(doc_id).get(doc_id, {}) exists = lambda x: x is not None docParaphrases = coreferences.get('paraphrases', {}) coref_mention_keys = map(title_confirmation.preprocess, docParaphrases.keys()) coref_mention_values = map(title_confirmation.preprocess, [item for sublist in docParaphrases.values() for item in sublist]) paraphrases = dict([(title_confirmation.preprocess(item[0]), map(title_confirmation.preprocess, item[1]))\ for item in docParaphrases.items()]) counts ={} for val in map(title_confirmation.preprocess, entities): try: canonical = val if canonical in coref_mention_keys: counts[canonical] = len(paraphrases[canonical]) elif canonical != val and val in coref_mention_keys: counts[canonical] = len(paraphrases[val]) elif canonical in coref_mention_values: counts[canonical] = len(filter(lambda x: canonical in x[1], paraphrases.items())[0][1]) elif canonical != val and val in coref_mention_values: counts[canonical] = len(filter(lambda x: val in x[1], paraphrases.items())[0][1]) except: pass return {doc_id: counts, 'status': 200}
def get(self, doc_id, entity): parse = ParsedJsonService().nestedGet(doc_id) if parse.get('root', {}).get('document', {}).get('sentences', {}) is None: return {'status': 200, entity: []} sents = asList(parse.get('root', {}).get('document', {}).get('sentences', {}).get('sentence', [])) corefs = asList(parse.get('root', {}).get('document', {}).get('coreference', {}).get('coreference', [])) sents_processed = [' '.join([token['word'] for token in asList(sent['tokens']['token'])]) for sent in sents] sents_preprocessed = [' '.join([title_confirmation.preprocess(token['word']) for token in asList(sent['tokens']['token'])]) for sent in sents] coreferences = [] sentences_to_add = [] for coref in corefs: dont_add = True for m in asList(coref['mention']): try: mention_string = ' '.join([title_confirmation.preprocess(a['word']) for a in sents[int(m['sentence'])-1]['tokens']['token'][int(m['start'])-1:int(m['end'])-1]]) if title_confirmation.preprocess(entity) == mention_string: sentences_to_add += [int(m['sentence'])-1 for m in asList(coref['mention'])] break except TypeError: pass # f**k it sentences_to_add = list(set(sentences_to_add + [i for i in range(0, len(sents)) if entity in sents_preprocessed[i]])) return {'status': 200, entity: [{'sentiment': sents[i].get('@sentiment', None), 'sentence': sents_processed[i]} for i in range(0, len(sents)) if i in sentences_to_add]}
def get(self, doc_id): jsonResponse = ParsedJsonService().get(doc_id) if jsonResponse['status'] != 200: return jsonResponse dict = jsonResponse[doc_id] counter = 0 if not isEmptyDoc(dict): return {'status':200, doc_id: [title_confirmation.preprocess(MrgSentence(sentence.get('parse', '')).nodes.getTermHead().getString()) \ for sentence in asList(dict.get('root', {}).get('document', {}).get('sentences', {}).get('sentence', [])) \ ] } else: return {'status':400, 'message': "No sentences found"}