def test_get_nothing__content_is_empty(self): raw_content = '' regex_text='id="photo-display" src="(?P<src>.+?)"' response = get_image_url_from_raw_html( raw_content, regex_text ) self.assertEqual(None, response)
def test_get_nothing__not_found(self): regex_text='id="photo-display" src="(?P<src>.+?)"' raw_content = '<html><body>I like unit testing</body></html>' response = get_image_url_from_raw_html( raw_content, regex_text ) self.assertEqual(None, response)
def test_get_url(self): regex_text='id="photo-display" src="(?P<src>.+?)"' expected_result = "http://s3.amazonaws.com/twitpic/photos/large/29483605.png?AWSAccessKeyId=AKIAJF3XCCKACR3QDMOA&Expires=1316184447&Signature=gOXGQ9bmYWhdFvdOhKls658AwnY%3D" response = get_image_url_from_raw_html( raw_content, regex_text ) self.assertEqual( expected_result, response)
def find_image_url_in_page(url): """ Open the page, find the photo in the page, and return url of the photo image. """ response, content = client.request(url) image_url = None if is_twitpic_response(response): regex_text='id="photo-display" src="(?P<src>.+?)"' image_url = get_image_url_from_raw_html(content, regex_text) elif is_lockerz_response(response): regex_text='id="photo" src="(?P<src>.+?)"' image_url = get_image_url_from_raw_html(content, regex_text) # elif is_pic_twitter_response(response): # regex_text='src="(?P<src>.+?)" alt="pic.twitter.com.' # image_url = get_image_url_from_raw_html(content, regex_text) elif is_yfrog_response(response): regex_text='id="main_image" src="(?P<src>.+?)"' image_url = get_image_url_from_raw_html(content, regex_text) elif is_molome_response(response): regex_text='src="(?P<src>.+?)" alt="Photo"' image_url = get_image_url_from_raw_html(content, regex_text) elif is_picplz_response(response): regex_text='src="(?P<src>.+?)" width="\d+" height="\d+" id="mainImage"' image_url = get_image_url_from_raw_html(content, regex_text) elif is_instagram_response(response): regex_text='class="photo" src="(?P<src>.+?)"' image_url = get_image_url_from_raw_html(content, regex_text) elif is_flickr_response(response): regex_text='src="(?P<src>.+?)" alt="photo"' image_url = get_image_url_from_raw_html(content, regex_text) elif is_twitgoo_response(response): regex_text='id="fullsize" src="(?P<src>.+?)"' image_url = get_image_url_from_raw_html(content, regex_text) elif is_imgly_response(response): regex_text='id="the-image" src="(?P<src>.+?)"' image_url = get_image_url_from_raw_html(content, regex_text) elif is_mobypicture_response(response): regex_text='src="(?P<src>.+?)" id="main_picture"' image_url = get_image_url_from_raw_html(content, regex_text) elif is_owly_response(response): regex_text='</?a+\s+href+[^>]+title="View original size"+[^>]*>+\s+</?img\s+src="(?P<src>.+?)"' image_url = get_image_url_from_raw_html(content, regex_text) elif is_brizzly_response(response): regex_text='</?div+\s+class="picture r"+[^>]*>+\s+</?a+\s+href+[^>]*><img+\s+src="(?P<src>.+?)"' image_url = get_image_url_from_raw_html(content, regex_text) return image_url