def testFileConnector(self): data = {} model = "trip_review_detail" webconfigs = config.load_pages("../config/tripadvisor.json") web_config = config.get_model_config(webconfigs, model) test_page = "http://www.tripadvisor.com.sg/ShowUserReviews-g294265-d2516429-r165190313-Auld_Alliance-Singapore.html#REVIEWS" html = urllib2.urlopen(test_page).read() data, links = parser.parse_page(html, web_config) for edata in data: database.save_data(edata, web_config["model"], web_config) self.assertEqual(1, 1) self.assertTrue(True)
def testParser_trip_review_detail(self): pages_config = config.load_pages("../config/tripadvisor.json") model = "trip_review_detail" page_config = config.get_model_config(pages_config, model) test_page = "http://www.tripadvisor.com.sg/ShowUserReviews-g294265-d2516429-r165190313-Auld_Alliance-Singapore.html" test_page = "http://www.tripadvisor.com.sg/ShowUserReviews-g294265-d2516429-r139369230-Auld_Alliance-Singapore.html#REVIEWS" html = urllib2.urlopen(test_page).read() data, links = parser.parse_page(html, page_config) print data print links print len(data) print len(links)
def testParser_hungry_list(self): pages_config = config.load_pages("../config/hungrygowhere.json") model = "hungrygowhere_review_list" page_config = None for webpage in pages_config: for k, v in webpage.iteritems(): if k == "model" and v == model: page_config = webpage test_page = "http://www.hungrygowhere.com/reviews/" html = urllib2.urlopen(test_page).read() data, links = parser.parse_page(html, page_config) print links print len(data) print len(links)
def testParser_trip_review(self): pages_config = config.load_pages("../config/tripadvisor.json") model = "trip_review" page_config = None for webpage in pages_config: for k, v in webpage.iteritems(): if k == "model" and v == model: page_config = webpage test_page = "http://www.tripadvisor.com.sg/Restaurant_Review-g294265-d2516429-Reviews-Auld_Alliance-Singapore.html" html = urllib2.urlopen(test_page).read() data, links = parser.parse_page(html, page_config) print links print len(data) print len(links)
def testParser_trip_main(self): pages_config = config.load_pages("../config/tripadvisor.json") model = "trip_mainpage" page_config = None for webpage in pages_config: for k, v in webpage.iteritems(): if k == "model" and v == model: page_config = webpage test_page = "http://www.tripadvisor.com.sg/Hotels-g294265-Singapore-Hotels.html" html = urllib2.urlopen(test_page).read() data, links = parser.parse_page(html, page_config) print len(data) print len(links) self.assertEqual(len(data), 30) self.assertEqual(len(links), 32)
def testParser_hungry_detail(self): pages_config = config.load_pages("../config/hungrygowhere.json") model = "hungrygowhere_review_detail" page_config = None for webpage in pages_config: for k, v in webpage.iteritems(): if k == "model" and v == model: page_config = webpage test_page = "http://www.hungrygowhere.com/singapore/je_crab_specialist_tampines/review/id-1f340200/" html = urllib2.urlopen(test_page).read() data, links = parser.parse_page(html, page_config) print links print data print len(data) print len(links)