def test_is_s3_url(self): self.assertTrue( md5s3stash.is_s3_url('https://s3.amazonaws.com/adlkfj')) self.assertTrue( md5s3stash.is_s3_url('https://s3-us-west-2.amazonaws.com/adlkfj')) self.assertFalse( md5s3stash.is_s3_url('https://s3.amazonas.com/adlkfj'))
def link_is_to_image(url, auth=None): """Check if the link points to an image content type. Return True or False accordingly """ if md5s3stash.is_s3_url(url): response = requests.head(url, allow_redirects=True) else: response = requests.head(url, allow_redirects=True, auth=auth) if response.status_code != 200: return False content_type = response.headers.get("content-type", None) if not content_type: return False reg_type = content_type.split("/", 1)[0].lower() # situation where a server returned 'text/html' to HEAD requests # but returned 'image/jpeg' for GET. # try a slower GET if not image type if reg_type != "image": response = requests.get(url, allow_redirects=True, auth=auth) if response.status_code != 200: return False content_type = response.headers.get("content-type", None) if not content_type: return False reg_type = content_type.split("/", 1)[0].lower() return reg_type == "image"
def link_is_to_image(doc_id, url, auth=None): '''Check if the link points to an image content type. Return True or False accordingly ''' if md5s3stash.is_s3_url(url): response = requests.head(url, allow_redirects=True) else: response = requests.head(url, allow_redirects=True, auth=auth) # have a server that returns a 403 here, does have content-type of # text/html. Dropping this test here. requests throws if can't connect if response.status_code != 200: # many servers do not support HEAD requests, try get if md5s3stash.is_s3_url(url): response = requests.get(url, allow_redirects=True) else: response = requests.get(url, allow_redirects=True, auth=auth) if response.status_code != 200: raise ImageHTTPError( 'HTTP ERROR: {}'.format(response.status_code), doc_id=doc_id) content_type = response.headers.get('content-type', None) if not content_type: return False reg_type = content_type.split('/', 1)[0].lower() # situation where a server returned 'text/html' to HEAD requests # but returned 'image/jpeg' for GET. # try a slower GET if not image type if reg_type != 'image': response = requests.get(url, allow_redirects=True, auth=auth) if response.status_code != 200: raise ImageHTTPError( 'HTTP ERROR: {}'.format(response.status_code), doc_id=doc_id) content_type = response.headers.get('content-type', None) if not content_type: return False reg_type = content_type.split('/', 1)[0].lower() return reg_type == 'image'
def link_is_to_image(doc_id, url, auth=None): '''Check if the link points to an image content type. Return True or False accordingly. ''' if md5s3stash.is_s3_url(url): response = requests.head(url, allow_redirects=True) else: response = requests.head(url, allow_redirects=True, auth=auth) # have a server that returns a 403 here, does have content-type of # text/html. Dropping this test here. requests throws if can't connect if response.status_code != 200: # many servers do not support HEAD requests, try get if md5s3stash.is_s3_url(url): response = requests.get(url, allow_redirects=True) else: response = requests.get(url, allow_redirects=True, auth=auth) if response.status_code != 200: raise ImageHTTPError('HTTP ERROR: {}'.format(response.status_code), doc_id=doc_id) content_type = response.headers.get('content-type', None) if not content_type: return False reg_type = content_type.split('/', 1)[0].lower() # situation where a server returned 'text/html' to HEAD requests # but returned 'image/jpeg' for GET. # try a slower GET if not image type if reg_type != 'image': response = requests.get(url, allow_redirects=True, auth=auth) if response.status_code != 200: raise ImageHTTPError('HTTP ERROR: {}'.format(response.status_code), doc_id=doc_id) content_type = response.headers.get('content-type', None) if not content_type: return False reg_type = content_type.split('/', 1)[0].lower() return reg_type == 'image'
def test_is_s3_url(self): self.assertTrue(md5s3stash.is_s3_url('https://s3.amazonaws.com/adlkfj')) self.assertTrue(md5s3stash.is_s3_url('https://s3-us-west-2.amazonaws.com/adlkfj')) self.assertFalse(md5s3stash.is_s3_url('https://s3.amazonas.com/adlkfj'))