def fetch_image(self, url, referer_url, terms): """ If it meets the size and metadata requirements, download the image referenced by the target URL. Args: url: A string with the uniform resource locator of the image """ logging.info('Started - fetch_image') if re.search(GeoSpider._RE_FILTER, url): raise gserror.FetchError('regular expression filter error') try: response = urllib2.urlopen(url) img_data = response.read() img_name = hashlib.sha224(img_data).hexdigest() except urllib2.URLError as e: if hasattr(e,'code'): raise gserror.FetchError('{0} {1}'.format(e.code, GeoSpider.http_status_message(e.code))) raise gserror.FetchError('url error') except socket.timeout: raise gserror.FetchError('socket timeout') except socket.error: raise gserror.FetchError('socket error') if img_name in self.seen_images: raise gserror.FetchError('previously crawled image error') self.seen_images.add(img_name) try: img = Image.open(StringIO.StringIO(img_data)) except IOError: raise gserror.FetchError('could not open the image error') if img.format != "JPEG": raise gserror.FetchError('incorrect image format %s' % img.format) if img.size[0] < GeoSpider._MIN_IMAGE_WIDTH or img.size[1] < GeoSpider._MIN_IMAGE_HEIGHT: raise gserror.FetchError('the image size (%d,%d) does not reach the minimum threshold' % img.size) try: exif_header = gsutils.get_exif_header(img) except gserror.EmptyExifHeaderError as e: raise gserror.FetchError('exif header not found') logging.info(termcolor.colored('exif-header: %s' % (exif_header), 'green')) try: latlng = gsutils.get_latlng(exif_header) except gserror.EmptyLatLngError as e: raise gserror.FetchError('coordinates not found') logging.info(termcolor.colored('image coordinates: (%.6f, %.6f)' % latlng, 'magenta')) logging.info(termcolor.colored('image size: (%d,%d)' % img.size, 'magenta')) logging.info(termcolor.colored('image name: {0}.jpg'.format(img_name), 'magenta')) img_path = os.path.join(self.path, '{0}.jpg'.format(img_name)) if os.path.isfile(img_path): raise gserror.FetchError('previously storaged image error') gsdoc = GeoSpiderDocument(img_name, latlng, url, referer_url, terms, img, img_data, exif_header) try: self.save(gsdoc) except gserror.GeocodeResponseError as e: raise gserror.FetchError('google api error {0}'.format(e)) except urllib2.URLError: raise gserror.FetchError('could not connect to google map api') except ValueError: raise gserror.FetchError('value error could not save the xml document') except IOError: raise gserror.FetchError('input/output error') except socket.timeout: raise gserror.FetchError('socket timeout') except socket.error: raise gserror.FetchError('socket error') logging.info('Finished - fetch_image')
google_map_url = "http://maps.googleapis.com/maps/api/staticmap?center=0,0&zoom=1&size=640x480&sensor=false" google_map_params = "" for root, dirs, files in os.walk(dirpath): for name in files: filename = os.path.join(root,name) img = None exif_header = None try: img = Image.open(filename) except: img = None if img: img_cnt = img_cnt + 1 try: exif_header = gsutils.get_exif_header(img) except: exif_header = None if exif_header: try: latlng = gsutils.get_latlng(exif_header) except: latlng = [None,None] if latlng[0] and latlng[1]: google_map_params = google_map_params + "&markers=color:red%7C" + "%.6f,%.6f" % (latlng[0],latlng[1]) img_gps_cnt = img_gps_cnt + 1 exif_model = exif_header.get("Model", '') if len(exif_model) == 0: exif_model = 'None' exif_model = exif_model.translate(__CONTROL_CH, __CONTROL_CH[:32])