def test_scrape(self): get_book_url, book_title, new_form_id, image_url, book_description = scrape(self.raw_html, "//a[contains(@class, 'twelve-days-claim')]/@href", "//div[contains(@class, 'dotd-title')]", "//input[@type='hidden'][starts-with(@id, 'form')][starts-with(@value, 'form')]/@value", "//img[contains(@class, 'bookimage')]/@src", "//*[@id='deal-of-the-day']/div/div/div[2]/div[3]") assert get_book_url == '/freelearning-claim/13250/21478' assert book_title == 'Machine Learning with R' assert new_form_id == 'form-7fb58741bd0c31a875c542ffc232268d' assert image_url == '//d1ldz4te4covpm.cloudfront.net/sites/default/files/imagecache/dotd_main_image/2148OS.jpg' expected_description = "Today's free eBook is simple - it shows you how to get started with Machine Learning using R. Taking you through some potentially tricky concepts and mathematics, you'll soon learn how to apply Machine Learning principles to produce some real-world wins. What's holding you back? Try your hand with some neat Machine Learning techniques and tools today." self.assertEqual(expected_description, book_description)
def post(self): # scrape essential informations raw_html = curl(url) get_book_url, book_title, new_form_id, image_url, book_description = scrape( raw_html, "//a[contains(@class, 'twelve-days-claim')]/@href", "//div[contains(@class, 'dotd-title')]", "//input[@type='hidden'][starts-with(@id, 'form')][starts-with(@value, 'form')]/@value", "//img[contains(@class, 'bookimage')]/@src", "//*[@id='deal-of-the-day']/div/div/div[2]/div[3]" ) if new_form_id: login_details["form_build_id"] = new_form_id logging.info('get_book_url: %s \n book_title: %s \n new_form_id: %s \n image_url: %s' % ( get_book_url, book_title, new_form_id, image_url)) logging.info('book_description: \n %s' % book_description) # login login_payload = urllib.urlencode(login_details) login_request = urllib2.Request( url, login_payload, {'content-type': 'application/x-www-form-urlencoded'} ) login_response = opener.open(login_request, timeout=45) login_failed = login_error in login_response.read() if login_failed: logging.error('login failed') self.error(401) return # grab book grab_book = opener.open( 'https://www.packtpub.com' + get_book_url, timeout=45) grab_book_response = grab_book.read() send_email(book_title, book_description, 'https:' + image_url) self.response.write('done')