Esempio n. 1
0
 def create_index(self, url):
     # url = "http://fapl.ru/"
     htmlToTextConverter = HtmlToTextConverter()
     title, content = htmlToTextConverter.transform_html_into_text(url)
     buildIndex = BuildIndex(content)
     index = buildIndex.get_index()
     number_of_words = buildIndex.get_number_of_words()
     try:
         document = Page.objects.get(url=url)
         for match in Match.objects.filter(page=document):
             word = match.word
             match.delete()
             if not word.pages.all():
                 word.delete()
     except ObjectDoesNotExist:
         document = Page(url=url,
                         number_of_words=number_of_words,
                         title=title,
                         content=content)
         document.save()
     for word in index.keys():
         positions = " ".join(str(x) for x in index[word])
         try:
             word = Word.objects.get(value=word)
         except ObjectDoesNotExist:
             word = Word(value=word)
             word.save()
         match = Match(word=word, page=document, positions=positions)
         match.save()