def test_list_newer_album_photos(self, mocked_get): """Should return a list of photos when given a recent photo album url""" expected = [ {"caption": "intro || photo 1 caption", "url": u"pic1.jpg"}, {"caption": u"photo 2 caption", "url": u"pic2.jpg"}, {"caption": u"photo 3 caption", "url": u"pic3.jpg"}, ] self.assertEqual(expected, list_album_photos("bla"))
def download_album(name, path, url): """Downloads a photo album if necessary""" if not os.path.exists(path): photos = list_album_photos(url) if not photos: return log.info(u'Downloading: "{0}"'.format(name)) os.makedirs(path) i = 0 for photo in photos: i += 1 orig_filename = photo['url'].split('/')[-1] file_path = os.path.join(path, '{0} - {1}'.format(i, orig_filename)) try: response = http.get(photo['url'], stream=True) except http.DownloadError: continue with open(file_path, 'wb') as f: shutil.copyfileobj(response.raw, f) if not file_path.endswith('.gif'): write_caption(file_path, photo['caption']) else: log.info(u'Photo album "{0}" already downloaded, skipping...'.format(name))
def test_list_album_photos(self, mocked_get): """Should return a list of photos when given a photo album url""" expected = [ { "caption": ( u"NASA's Cassini Spacecraft is now reaching the end of its four-year prime mission " "(on June 30th), and about to enter into its extended mission. What a nice excuse " "for a retrospective of some of the great images sent back home by Cassini over " "the past four years. || The Sun is on the opposite side, so all of Saturn is " "backlit. Courtesy NASA/JPL-Caltech" ), "url": u"http://cache.boston.com/universal/site_graphics/blogs/bigpicture/saturn_05_30/cassini1.jpg", }, { "caption": u"Swirls in Saturn's cloud-tops. Courtesy NASA/JPL-Caltech", "url": u"http://cache.boston.com/universal/site_graphics/blogs/bigpicture/saturn_05_30/cassini2.jpg", }, { "caption": u"The surface of Saturn's moon Dione, up close. Courtesy NASA/JPL-Caltech", "url": u"http://cache.boston.com/universal/site_graphics/blogs/bigpicture/saturn_05_30/cassini3.jpg", }, ] self.assertEqual(expected, list_album_photos("bla"))
def test_list_album_photos_removed_photos(self, mocked_get): """Should return a list of photos when the album contains removed photos""" # e.g. http://www.boston.com/bigpicture/2010/01/earthquake_in_haiti.html self.assertEqual(41, len(list_album_photos("bla")))
def test_list_album_photos_containing_youtube(self, mocked_get): """Should return a list of photos when the album contains an embedded youtube""" # e.g. http://www.boston.com/bigpicture/2008/10/nachtweys_wish_awareness_of_xd.html self.assertEqual(1, len(list_album_photos("bla")))
def test_list_album_photos_non_gallery(self, mocked_get): """Should return an empty list when scraping a non-gallery page""" # e.g. http://www.boston.com/bigpicture/2008/10/a_quick_note_1.html self.assertEqual([], list_album_photos("bla"))