예제 #1
0
    def test_get_synonyms(self):
        original_google_url = "http://www.google.com"
        original_google_page = IndexedPage(url=original_google_url)
        original_google_page.save()

        other_google_urls = [
            "www.google.com", "https://www.google.com", "google.com",
            "http://google.com"
        ]
        noice_urls = ["facebook.com", "math.org"]
        for url in other_google_urls:
            IndexedPage(url=url, original_page=original_google_page).save()
        for url in noice_urls:
            IndexedPage(url=url).save()

        # test function from original
        expected = set(other_google_urls)
        actual = set([e.url for e in original_google_page.get_synonyms()])
        self.assertSetEqual(expected, actual)

        # test function from all secondary pages.
        for url in other_google_urls:
            secondary_page = IndexedPage.objects.get(pk=url)
            expected = set(other_google_urls) ^ set([url]) | set(
                [original_google_url])
            actual = set([e.url for e in secondary_page.get_synonyms()])
            self.assertSetEqual(expected, actual)
 def test_get_words_with_repetition(self):
     url = "http://www.google.com"
     google_page = IndexedPage(url=url)
     google_page.save()
     word_list = ["google", "feeling", "lucky", "search"] * 3
     for word in word_list:
         WordFromIndexedPage(word=word,
                             indexed_page=google_page,
                             offsets_in_indexedPage=str([1])).save()
     self.assertEqual(len(google_page.words.all()), len(set(word_list)))
     all_words = map(lambda element: element.word, google_page.get_words())
     for word in word_list:
         self.assertIn(word, all_words)
 def test_get_words_no_words(self):
     url = "http://www.google.com"
     google_page = IndexedPage(url=url)
     google_page.save()
     word_list = []
     for word in word_list:
         WordFromIndexedPage(word=word,
                             indexed_page=google_page,
                             offsets_in_indexedPage=str([1])).save()
     self.assertEqual(len(google_page.words.all()), len(word_list))
     all_words = map(lambda element: element.word, google_page.get_words())
     for word in word_list:
         self.assertIn(word, all_words)
    def test_set_and_check_attributes_duplicated_locations(self):
        url = "http://www.google.com"
        google_page = IndexedPage(url=url)
        google_page.save()
        our_word = WordFromIndexedPage(word="google", indexed_page=google_page)
        locations_list = [2, 5, 7, 7, 2, 2, 11]
        our_word.set_offsets(locations_list)
        our_word.save()

        # checking if the locations are retrievable
        retrieved_offsets = our_word.get_offsets()
        for location in locations_list:
            self.assertIn(location, retrieved_offsets)
        self.assertEqual(len(locations_list), len(retrieved_offsets))
예제 #5
0
 def test_set_and_check_attributes_duplicated_locations(self):
     url = "http://www.google.com"
     google_page = IndexedPage(url = url)
     google_page.save()
     our_word = WordFromIndexedPage(word = "google", indexed_page = google_page)
     locations_list = [2, 5, 7, 7, 2, 2, 11]
     our_word.set_offsets(locations_list)
     our_word.save()
     
     #checking if the locations are retrievable
     retrieved_offsets = our_word.get_offsets()
     for location in locations_list:
         self.assertIn(location, retrieved_offsets)
     self.assertEqual(len(locations_list), len(retrieved_offsets))
예제 #6
0
 def test_get_words_with_repetition(self):
     url = "http://www.google.com"
     google_page = IndexedPage(url=url)
     google_page.save()
     word_list = ["google", "feeling", "lucky", "search"] * 3
     for word in word_list:
         WordFromIndexedPage(
             word=word,
             indexed_page=google_page,
             offsets_in_indexedPage=str([1])).save()
     self.assertEqual(len(google_page.words.all()), len(set(word_list)))
     all_words = map(lambda element: element.word, google_page.get_words())
     for word in word_list:
         self.assertIn(word, all_words)
예제 #7
0
 def test_get_words_no_words(self):
     url = "http://www.google.com"
     google_page = IndexedPage(url=url)
     google_page.save()
     word_list = []
     for word in word_list:
         WordFromIndexedPage(
             word=word,
             indexed_page=google_page,
             offsets_in_indexedPage=str([1])).save()
     self.assertEqual(len(google_page.words.all()), len(word_list))
     all_words = map(lambda element: element.word, google_page.get_words())
     for word in word_list:
         self.assertIn(word, all_words)
    def test_get_synonyms(self):
        original_google_url = "http://www.google.com"
        original_google_page = IndexedPage(url=original_google_url)
        original_google_page.save()

        other_google_urls = [
            "www.google.com", "https://www.google.com", "google.com",
            "http://google.com"
        ]
        noice_urls = ["facebook.com", "math.org"]
        for url in other_google_urls:
            IndexedPage(url=url, original_page=original_google_page).save()
        for url in noice_urls:
            IndexedPage(url=url).save()

        # test function from original
        expected = set(other_google_urls)
        actual = set([e.url for e in original_google_page.get_synonyms()])
        self.assertSetEqual(expected, actual)

        # test function from all secondary pages.
        for url in other_google_urls:
            secondary_page = IndexedPage.objects.get(pk=url)
            expected = set(other_google_urls) ^ set([url]) | set(
                [original_google_url])
            actual = set([e.url for e in secondary_page.get_synonyms()])
            self.assertSetEqual(expected, actual)
예제 #9
0
    def test_synonyms_logic_invariant(self):
        original_google_url = "http://www.google.com"
        original_google_page = IndexedPage(url=original_google_url)
        original_google_page.save()

        other_google_urls = [
            "www.google.com",
            "https://www.google.com",
            "google.com",
            "http://google.com"]
        for url in other_google_urls:
            IndexedPage(url=url, original_page=original_google_page).save()

        # Make sure they all know their original
        for page in IndexedPage.objects.all():
            if not page == original_google_page:
                self.assertEqual(original_google_page, page.original_page, "original not set!")
            else:
                self.assertIsNone(page.original_page, "original_page's orginal_page should be none")
    def test_synonyms_logic_invariant(self):
        original_google_url = "http://www.google.com"
        original_google_page = IndexedPage(url=original_google_url)
        original_google_page.save()

        other_google_urls = [
            "www.google.com", "https://www.google.com", "google.com",
            "http://google.com"
        ]
        for url in other_google_urls:
            IndexedPage(url=url, original_page=original_google_page).save()

        # Make sure they all know their original
        for page in IndexedPage.objects.all():
            if not page == original_google_page:
                self.assertEqual(original_google_page, page.original_page,
                                 "original not set!")
            else:
                self.assertIsNone(
                    page.original_page,
                    "original_page's orginal_page should be none")
예제 #11
0
from searchEngine.models import WordFromIndexedPage, IndexedPage

googlePage = IndexedPage(url="http://www.google.com")
googlePage.save()
print googlePage

googleWord = "google"
googleWord2 = "google2"

googleWordLocation = WordFromIndexedPage(indexedPage=googlePage,
                                         word=googleWord)
googleWordLocation.set_offsets([1])
googleWordLocation.save()

googleWord2Location = WordFromIndexedPage(indexedPage=googlePage,
                                          word=googleWord2)
googleWord2Location.set_offsets([1])
googleWord2Location.save()

print "--" * 100

print "googlePage.words:", googlePage.get_words()
print len(googlePage.words.all())
예제 #12
0
from searchEngine.models import WordFromIndexedPage, IndexedPage

googlePage = IndexedPage(url="http://www.google.com")
googlePage.save()
print googlePage

googleWord = "google"
googleWord2 = "google2"


googleWordLocation = WordFromIndexedPage(indexedPage=googlePage, word=googleWord)
googleWordLocation.set_offsets([1])
googleWordLocation.save()

googleWord2Location = WordFromIndexedPage(indexedPage=googlePage, word=googleWord2)
googleWord2Location.set_offsets([1])
googleWord2Location.save()

print "--"* 100

print "googlePage.words:", googlePage.get_words()
print len(googlePage.words.all())