def find_coincidences(cls, tokens): # search for the site object which contains all sequence of tokens sites = cls.objects.all() coincidences = [] for site in sites: map = site.map_link for key, link in map.iteritems(): key = only_nouns(key) # remving all not necessary chars if contains_any(tokens, key): coincidences.append(link) return coincidences
def tokens(self): # Add spaces between puntuation signals document = self.content.split('.') document_filtered = [] for sentence in document: sentence_filtered = sentence.replace(',', ' , ') sentence_filtered = sentence_filtered.replace(';', ' ; ') sentence_filtered = sentence_filtered.replace('.', ' . ') sentence_filtered = sentence_filtered.replace(':', ' : ') sentence_filtered = sentence_filtered.replace('"', ' " ') sentence_filtered = sentence_filtered.replace("'", " ' ") document_filtered.append(sentence_filtered) # Extract nouns, adjectives new_document = [] for sentence in document_filtered: new_sentence = only_nouns(sentence) if new_sentence: new_document.append(new_sentence) # Remove empty lines and duplicate words from sentence texts = [list(set(sentence)) for sentence in new_document if sentence] # TODO: Reduce the matrix dimension to 2 x 2 return texts