Exemple #1
0
 def test_get_nothing__content_is_empty(self):
     raw_content = ''
     regex_text='id="photo-display" src="(?P<src>.+?)"'
     response = get_image_url_from_raw_html( raw_content, regex_text )
     self.assertEqual(None, response)
Exemple #2
0
 def test_get_nothing__not_found(self):
     regex_text='id="photo-display" src="(?P<src>.+?)"'
     raw_content = '<html><body>I like unit testing</body></html>'
     response = get_image_url_from_raw_html( raw_content, regex_text )
     self.assertEqual(None, response)
Exemple #3
0
 def test_get_url(self):
     regex_text='id="photo-display" src="(?P<src>.+?)"'
     expected_result = "http://s3.amazonaws.com/twitpic/photos/large/29483605.png?AWSAccessKeyId=AKIAJF3XCCKACR3QDMOA&Expires=1316184447&Signature=gOXGQ9bmYWhdFvdOhKls658AwnY%3D"
     response = get_image_url_from_raw_html( raw_content, regex_text )
     self.assertEqual( expected_result, response)
Exemple #4
0
def find_image_url_in_page(url):
    """
    Open the page, find the photo in the page, and return url of the photo 
    image.
    """
    response, content = client.request(url)
    image_url = None
    if is_twitpic_response(response):
        regex_text='id="photo-display" src="(?P<src>.+?)"'
        image_url = get_image_url_from_raw_html(content, regex_text)
    elif is_lockerz_response(response):
        regex_text='id="photo" src="(?P<src>.+?)"'
        image_url = get_image_url_from_raw_html(content, regex_text)
#    elif is_pic_twitter_response(response):
#        regex_text='src="(?P<src>.+?)" alt="pic.twitter.com.'
#        image_url = get_image_url_from_raw_html(content, regex_text)
    elif is_yfrog_response(response):
        regex_text='id="main_image" src="(?P<src>.+?)"'
        image_url = get_image_url_from_raw_html(content, regex_text)
    elif is_molome_response(response):
        regex_text='src="(?P<src>.+?)" alt="Photo"'
        image_url = get_image_url_from_raw_html(content, regex_text)
    elif is_picplz_response(response):
        regex_text='src="(?P<src>.+?)" width="\d+" height="\d+" id="mainImage"'
        image_url = get_image_url_from_raw_html(content, regex_text)
    elif is_instagram_response(response):
        regex_text='class="photo" src="(?P<src>.+?)"'
        image_url = get_image_url_from_raw_html(content, regex_text)
    elif is_flickr_response(response):
        regex_text='src="(?P<src>.+?)" alt="photo"'
        image_url = get_image_url_from_raw_html(content, regex_text)
    elif is_twitgoo_response(response):
        regex_text='id="fullsize" src="(?P<src>.+?)"'
        image_url = get_image_url_from_raw_html(content, regex_text)
    elif is_imgly_response(response):
        regex_text='id="the-image" src="(?P<src>.+?)"'
        image_url = get_image_url_from_raw_html(content, regex_text)
    elif is_mobypicture_response(response):
        regex_text='src="(?P<src>.+?)" id="main_picture"'
        image_url = get_image_url_from_raw_html(content, regex_text)
    elif is_owly_response(response):
        regex_text='</?a+\s+href+[^>]+title="View original size"+[^>]*>+\s+</?img\s+src="(?P<src>.+?)"'
        image_url = get_image_url_from_raw_html(content, regex_text)
    elif is_brizzly_response(response):
        regex_text='</?div+\s+class="picture r"+[^>]*>+\s+</?a+\s+href+[^>]*><img+\s+src="(?P<src>.+?)"'
        image_url = get_image_url_from_raw_html(content, regex_text)
    return image_url