def test_get_synonyms(self): original_google_url = "http://www.google.com" original_google_page = IndexedPage(url=original_google_url) original_google_page.save() other_google_urls = [ "www.google.com", "https://www.google.com", "google.com", "http://google.com" ] noice_urls = ["facebook.com", "math.org"] for url in other_google_urls: IndexedPage(url=url, original_page=original_google_page).save() for url in noice_urls: IndexedPage(url=url).save() # test function from original expected = set(other_google_urls) actual = set([e.url for e in original_google_page.get_synonyms()]) self.assertSetEqual(expected, actual) # test function from all secondary pages. for url in other_google_urls: secondary_page = IndexedPage.objects.get(pk=url) expected = set(other_google_urls) ^ set([url]) | set( [original_google_url]) actual = set([e.url for e in secondary_page.get_synonyms()]) self.assertSetEqual(expected, actual)
def test_get_words_with_repetition(self): url = "http://www.google.com" google_page = IndexedPage(url=url) google_page.save() word_list = ["google", "feeling", "lucky", "search"] * 3 for word in word_list: WordFromIndexedPage(word=word, indexed_page=google_page, offsets_in_indexedPage=str([1])).save() self.assertEqual(len(google_page.words.all()), len(set(word_list))) all_words = map(lambda element: element.word, google_page.get_words()) for word in word_list: self.assertIn(word, all_words)
def test_get_words_no_words(self): url = "http://www.google.com" google_page = IndexedPage(url=url) google_page.save() word_list = [] for word in word_list: WordFromIndexedPage(word=word, indexed_page=google_page, offsets_in_indexedPage=str([1])).save() self.assertEqual(len(google_page.words.all()), len(word_list)) all_words = map(lambda element: element.word, google_page.get_words()) for word in word_list: self.assertIn(word, all_words)
def test_set_and_check_attributes_duplicated_locations(self): url = "http://www.google.com" google_page = IndexedPage(url=url) google_page.save() our_word = WordFromIndexedPage(word="google", indexed_page=google_page) locations_list = [2, 5, 7, 7, 2, 2, 11] our_word.set_offsets(locations_list) our_word.save() # checking if the locations are retrievable retrieved_offsets = our_word.get_offsets() for location in locations_list: self.assertIn(location, retrieved_offsets) self.assertEqual(len(locations_list), len(retrieved_offsets))
def test_set_and_check_attributes_duplicated_locations(self): url = "http://www.google.com" google_page = IndexedPage(url = url) google_page.save() our_word = WordFromIndexedPage(word = "google", indexed_page = google_page) locations_list = [2, 5, 7, 7, 2, 2, 11] our_word.set_offsets(locations_list) our_word.save() #checking if the locations are retrievable retrieved_offsets = our_word.get_offsets() for location in locations_list: self.assertIn(location, retrieved_offsets) self.assertEqual(len(locations_list), len(retrieved_offsets))
def test_get_words_with_repetition(self): url = "http://www.google.com" google_page = IndexedPage(url=url) google_page.save() word_list = ["google", "feeling", "lucky", "search"] * 3 for word in word_list: WordFromIndexedPage( word=word, indexed_page=google_page, offsets_in_indexedPage=str([1])).save() self.assertEqual(len(google_page.words.all()), len(set(word_list))) all_words = map(lambda element: element.word, google_page.get_words()) for word in word_list: self.assertIn(word, all_words)
def test_get_words_no_words(self): url = "http://www.google.com" google_page = IndexedPage(url=url) google_page.save() word_list = [] for word in word_list: WordFromIndexedPage( word=word, indexed_page=google_page, offsets_in_indexedPage=str([1])).save() self.assertEqual(len(google_page.words.all()), len(word_list)) all_words = map(lambda element: element.word, google_page.get_words()) for word in word_list: self.assertIn(word, all_words)
def test_synonyms_logic_invariant(self): original_google_url = "http://www.google.com" original_google_page = IndexedPage(url=original_google_url) original_google_page.save() other_google_urls = [ "www.google.com", "https://www.google.com", "google.com", "http://google.com"] for url in other_google_urls: IndexedPage(url=url, original_page=original_google_page).save() # Make sure they all know their original for page in IndexedPage.objects.all(): if not page == original_google_page: self.assertEqual(original_google_page, page.original_page, "original not set!") else: self.assertIsNone(page.original_page, "original_page's orginal_page should be none")
def test_synonyms_logic_invariant(self): original_google_url = "http://www.google.com" original_google_page = IndexedPage(url=original_google_url) original_google_page.save() other_google_urls = [ "www.google.com", "https://www.google.com", "google.com", "http://google.com" ] for url in other_google_urls: IndexedPage(url=url, original_page=original_google_page).save() # Make sure they all know their original for page in IndexedPage.objects.all(): if not page == original_google_page: self.assertEqual(original_google_page, page.original_page, "original not set!") else: self.assertIsNone( page.original_page, "original_page's orginal_page should be none")
from searchEngine.models import WordFromIndexedPage, IndexedPage googlePage = IndexedPage(url="http://www.google.com") googlePage.save() print googlePage googleWord = "google" googleWord2 = "google2" googleWordLocation = WordFromIndexedPage(indexedPage=googlePage, word=googleWord) googleWordLocation.set_offsets([1]) googleWordLocation.save() googleWord2Location = WordFromIndexedPage(indexedPage=googlePage, word=googleWord2) googleWord2Location.set_offsets([1]) googleWord2Location.save() print "--" * 100 print "googlePage.words:", googlePage.get_words() print len(googlePage.words.all())
from searchEngine.models import WordFromIndexedPage, IndexedPage googlePage = IndexedPage(url="http://www.google.com") googlePage.save() print googlePage googleWord = "google" googleWord2 = "google2" googleWordLocation = WordFromIndexedPage(indexedPage=googlePage, word=googleWord) googleWordLocation.set_offsets([1]) googleWordLocation.save() googleWord2Location = WordFromIndexedPage(indexedPage=googlePage, word=googleWord2) googleWord2Location.set_offsets([1]) googleWord2Location.save() print "--"* 100 print "googlePage.words:", googlePage.get_words() print len(googlePage.words.all())