Example #1
0
 def find_coincidences(cls, tokens):
     # search for the site object which contains all sequence of tokens
     sites = cls.objects.all()
     coincidences = []
     for site in sites:
         map = site.map_link
         for key, link in map.iteritems():
             key = only_nouns(key) # remving all not necessary chars
             if contains_any(tokens, key):
                 coincidences.append(link)
     return coincidences
Example #2
0
        def tokens(self):
            # Add spaces between puntuation signals
            document = self.content.split('.')
            document_filtered = []
            for sentence in document:
                sentence_filtered = sentence.replace(',', ' , ')
                sentence_filtered = sentence_filtered.replace(';', ' ; ')
                sentence_filtered = sentence_filtered.replace('.', ' . ')
                sentence_filtered = sentence_filtered.replace(':', ' : ')
                sentence_filtered = sentence_filtered.replace('"', ' " ')
                sentence_filtered = sentence_filtered.replace("'", " ' ")
                document_filtered.append(sentence_filtered)

            # Extract nouns, adjectives
            new_document = []
            for sentence in document_filtered:
                new_sentence = only_nouns(sentence)
                if new_sentence:
                    new_document.append(new_sentence)
            # Remove empty lines and duplicate words from sentence
            texts = [list(set(sentence)) for sentence in new_document if sentence]
            # TODO: Reduce the matrix dimension to 2 x 2
            return texts