def get_link_content(link): try: response = requests.get(link) if response.status_code == 400: logging.warn(u"404 {}".format(link)) return None if response.status_code != 200: raise Exception(u"Unable to fetch release content: {0}".format(link)) except requests.exceptions.InvalidURL as e: logging.warn(u"Invalid link {0}: {1}".format(link, unicode(e))) return None content_type = response.headers.get('content-type') if not content_type: logging.warn(u"Response did not contain a Content-Type header: {0}".format(link)) return None (mime_type, mime_subtype, mt_params) = parse_mime_type(content_type) if mime_type != 'text' or mime_subtype not in ('html', 'xhtml'): logging.warn(u"Skipping non-HTML link: {0}".format(link)) return None if len(response.content) == 0: logging.warn(u"Server returned an empty body: {0}".format(link)) return None (title, body) = readability_extract(response.content) return kill_control_characters(body)
def _http_get_json(self, url): """ Make an HTTP GET request to the specified URL, check that it returned a JSON response, and returned the data parsed from that response. Parameters ---------- url The URL to GET. Returns ------- Dictionary of data parsed from a JSON HTTP response. Exceptions ---------- * PythonKCMeetupsBadJson * PythonKCMeetupsBadResponse * PythonKCMeetupsMeetupDown * PythonKCMeetupsNotJson * PythonKCMeetupsRateLimitExceeded """ response = self._http_get(url) content_type = response.headers['content-type'] parsed_mimetype = mimeparse.parse_mime_type(content_type) if parsed_mimetype[1] not in ('json', 'javascript'): raise PythonKCMeetupsNotJson(content_type) try: return json.loads(response.content) except ValueError as e: raise PythonKCMeetupsBadJson(e)
def get_best_handler(cls, mimetype): """Return the handler and score that that best fit the mimetype. Args: mimetype (tuple): A parsed mimetype to find the best handler for. This is a 3-tuple of the type, subtype, and parameters as returned by :py:func:`mimeparse.parse_mime_type`. Returns: tuple: A tuple of ``(best_score, mimetype_handler)``. If no handler was found, this will be ``(0, None)``. """ best_score, best_fit = (0, None) for mimetype_handler in _registered_mimetype_handlers: for mt in mimetype_handler.supported_mimetypes: try: score = score_match(mimeparse.parse_mime_type(mt), mimetype) if score > best_score: best_score, best_fit = (score, mimetype_handler) except ValueError: continue return (best_score, best_fit)
def serialize(self, obj, accept=None, **opts): '''serialize(obj) -> content, content_type Serialize an object to text. ''' accept = accept or self.default_content_type content_type, format = self.get_format(accept) method = getattr(self, 'to_%s' % format) # ugly hack to get the params to the header part out try: accept = [ part for part in accept.split(',') if part.startswith(content_type) ][0] except IndexError: # '*/*' case accept = content_type params = mimeparse.parse_mime_type(accept)[2] for key, value in params.items(): opts.setdefault(key, value) return self.SerializedContainer(method(obj, **opts), content_type)
def require_representation(self, req): """Require raw representation dictionary from falcon request object. This does not perform any field parsing or validation but only uses allowed content-encoding handler to decode content body. Note: Currently only JSON is allowed as content type. Args: req (falcon.Request): request object Returns: dict: raw dictionary of representation supplied in request body """ try: type_, subtype, _ = parse_mime_type(req.content_type) content_type = '/'.join((type_, subtype)) except: raise falcon.HTTPUnsupportedMediaType( description="Invalid Content-Type header: {}".format( req.content_type ) ) if content_type == 'application/json': body = req.stream.read() return json.loads(body.decode('utf-8')) else: raise falcon.HTTPUnsupportedMediaType( description="only JSON supported, got: {}".format(content_type) )
def get_best_handler(cls, mimetype): """Return the handler and score that that best fit the mimetype. Args: mimetype (unicode): The mimetype to find the best handler for. Returns: tuple: A tuple of ``(best_score, mimetype_handler)``. If no handler was found, this will be ``(0, None)``. """ best_score, best_fit = (0, None) for mimetype_handler in _registered_mimetype_handlers: for mt in mimetype_handler.supported_mimetypes: try: score = score_match(mimeparse.parse_mime_type(mt), mimetype) if score > best_score: best_score, best_fit = (score, mimetype_handler) except ValueError: continue return (best_score, best_fit)
def for_type(cls, attachment): """Returns the handler that is the best fit for provided mimetype.""" if attachment.mimetype: try: mimetype = mimeparse.parse_mime_type(attachment.mimetype) except: logging.error('Unable to parse MIME type "%s" for %s', attachment.mimetype, attachment) return None # Override the mimetype if mimeparse is known to misinterpret this # type of file as 'octet-stream' extension = os.path.splitext(attachment.filename)[1] if extension in MIMETYPE_EXTENSIONS: mimetype = MIMETYPE_EXTENSIONS[extension] score, handler = cls.get_best_handler(mimetype) if handler: try: return handler(attachment.get_review_request(), attachment) except ObjectDoesNotExist as e: logging.error('Unable to load review UI for %s: %s', attachment, e) except Exception as e: logging.error('Error instantiating ' 'FileAttachmentReviewUI %r: %s', handler, e) return None
def get_best_handler(cls, mimetype): """Return the Review UI and score that that best fit the mimetype. Args: mimetype (unicode): The mimetype to find a Review UI for. Returns: tuple: A tuple of ``(best_score, review_ui)``, or ``(0, None)`` if one could not be found. """ best_score = 0 best_fit = None for review_ui in _file_attachment_review_uis: for mt in review_ui.supported_mimetypes: try: score = score_match(mimeparse.parse_mime_type(mt), mimetype) if score > best_score: best_score = score best_fit = review_ui except ValueError: continue return best_score, best_fit
def for_type(cls, attachment): """Return the handler that is the best fit for provided mimetype.""" if not attachment.mimetype: return None try: mimetype = mimeparse.parse_mime_type(attachment.mimetype) except: logging.warning('Unable to parse MIME type "%s" for %s', attachment, attachment.mimetype) mimetype = ('application', 'octet-stream', {}) # Override the mimetype if mimeparse is known to misinterpret this # type of file as `octet-stream` extension = os.path.splitext(attachment.filename)[1] if extension in MIMETYPE_EXTENSIONS: mimetype = MIMETYPE_EXTENSIONS[extension] score, handler = cls.get_best_handler(mimetype) if handler: try: return handler(attachment, mimetype) except Exception as e: logging.error('Unable to load Mimetype Handler for %s: %s', attachment, e) return MimetypeHandler(attachment, mimetype)
def _test_parse_mime_type(self, args, expected): if expected is None: self.assertRaises(mimeparse.MimeTypeParseException, mimeparse.parse_mime_type, args) else: expected = tuple(expected) result = mimeparse.parse_mime_type(args) message = "Expected: '%s' but got %s" % (expected, result) self.assertEqual(expected, result, message)
def __init__(self, http, postproc, uri, method='GET', body=None, headers=None, methodId=None, resumable=None): """Constructor for an HttpRequest. Args: http: httplib2.Http, the transport object to use to make a request postproc: callable, called on the HTTP response and content to transform it into a data object before returning, or raising an exception on an error. uri: string, the absolute URI to send the request to method: string, the HTTP method to use body: string, the request body of the HTTP request, headers: dict, the HTTP request headers methodId: string, a unique identifier for the API method being called. resumable: MediaUpload, None if this is not a resumbale request. """ self.uri = uri self.method = method self.body = body self.headers = headers or {} self.methodId = methodId self.http = http self.postproc = postproc self.resumable = resumable # Pull the multipart boundary out of the content-type header. major, minor, params = mimeparse.parse_mime_type( headers.get('content-type', 'application/json')) # Terminating multipart boundary get a trailing '--' appended. self.multipart_boundary = params.get('boundary', '').strip('"') + '--' # If this was a multipart resumable, the size of the non-media part. self.multipart_size = 0 # The resumable URI to send chunks to. self.resumable_uri = None # The bytes that have been uploaded. self.resumable_progress = 0 self.total_size = 0 if resumable is not None: if self.body is not None: self.multipart_size = len(self.body) else: self.multipart_size = 0 self.total_size = ( self.resumable.size() + self.multipart_size + len(self.multipart_boundary))
def for_type(cls, attachment): """Returns the handler that is the best fit for provided mimetype.""" mimetype = mimeparse.parse_mime_type(attachment.mimetype) score, handler = cls.get_best_handler(mimetype) if handler: try: return handler(attachment.get_review_request(), attachment) except Exception, e: logging.error('Unable to load review UI for %s: %s', attachment, e, exc_info=1)
def _http_get_json(self, url): response = self._http_get(url) content_type = response.headers['content-type'] parsed_mimetype = mimeparse.parse_mime_type(content_type) if parsed_mimetype[1] not in ('json', 'javascript'): raise MeetupsNotJson(content_type) try: return json.loads(response.content) except ValueError as e: raise MeetupsBadJson(e)
def import_submission(self, submission: praw.objects.Submission) -> dict: """ Import a submission from flickr. Uses their oEmbed API. flickr.com was nice enough to provide us with an oEmbed API. Apparently these guys also support video, so we should also make sure to not try to parse that. This function will define the following values in its return data: - author: simply "a flickr.com user" - source: The url of the submission - importer_display/header - import_urls :param submission: A reddit submission to parse. """ try: if not self.regex.match(urlsplit(submission.url).netloc): return None url = submission.url data = {'author': 'a flickr.com user', 'source': url, 'importer_display': {'header': 'Imported flickr.com image:\n\n'}} r = requests.head(url, headers=self.headers) if r.status_code == 301: return None mime_text = r.headers.get('Content-Type') mime = mimeparse.parse_mime_type(mime_text) # If we're already given an image... if mime[0] == 'image': # Use the already given URL image_url = submission.url else: # Otherwise, find the image in the html self.log.info("Getting submission.url: " + url) html = urllib.request.urlopen(url).read().decode('utf-8') image_urls = re.findall(r'farm[\d]\.[a-z0-9/.\\/_]*', html) if image_urls: image_url = 'http://' + image_urls[-1].replace('\\', '') self.log.info("Got image url %s", image_url) else: self.log.error('Could not find any flickr URL %s', submission.url) return None assert image_url data['import_urls'] = [image_url] return data except Exception: self.log.error('Could not import flickr URL %s (%s)', submission.url, traceback.format_exc()) return None
def parse_content_type(contenttype): mime_type = mimeparse.parse_mime_type(contenttype) if "charset" in mime_type[2]: # Remove charset from mime_type, if we have it encoding = mime_type[2].pop("charset") else: encoding = None if encoding == 'x-ctext': encoding = 'latin1' return mime_type, encoding
def status( self, test_id=None, test_status=None, test_tags=None, runnable=True, file_name=None, file_bytes=None, eof=False, mime_type=None, route_code=None, timestamp=None, ): super(Starts, self).status( test_id, test_status, test_tags=test_tags, runnable=runnable, file_name=file_name, file_bytes=file_bytes, eof=eof, mime_type=mime_type, route_code=route_code, timestamp=timestamp, ) if not test_id: if not file_bytes: return if not mime_type or mime_type == "test/plain;charset=utf8": mime_type = "text/plain; charset=utf-8" primary, sub, parameters = mimeparse.parse_mime_type(mime_type) content_type = testtools.content_type.ContentType(primary, sub, parameters) content = testtools.content.Content(content_type, lambda: [file_bytes]) text = content.as_text() if text and text[-1] not in "\r\n": self._neednewline = True self._output.write(text) elif test_status == "inprogress" and test_id not in self._emitted: if self._neednewline: self._neednewline = False self._output.write("\n") worker = "" for tag in test_tags or (): if tag.startswith("worker-"): worker = "(" + tag[7:] + ") " if timestamp: timestr = timestamp.isoformat() else: timestr = "" self._output.write("%s: %s%s [start]\n" % (timestr, worker, test_id)) self._emitted.add(test_id)
def import_submission(self, submission: praw.objects.Submission) -> dict: """Import a submission from drawcrowd. Uses raw HTML scraping. As it turns out, drawcrowd likes to provide different data (all in <meta> tags) to non-web-browser requests. Since it provides enough information anyways, we don't bother getting around it and just parse that. This function will define the following values in its return data: - author: The author of the post - source: The url of the submission - importer_display/header - import_urls :param submission: A reddit submission to parse. """ try: url = html.unescape(submission.url) if not self.regex.match(urlsplit(url).netloc): return None data = {'source': url} r = requests.head(url, headers=self.headers) if r.status_code == 301: # Moved Permanently return None mime_text = r.headers.get('Content-Type') mime = mimeparse.parse_mime_type(mime_text) if mime[0] == 'image': data['author'] = 'An unknown drawcrowd user' image_url = url else: # Note: Drawcrowd provides different content to non-web-browsers. r = requests.get(url, headers=self.headers) bs = bs4.BeautifulSoup(r.content.decode('utf-8')) matched = bs.find(property='og:image') if not matched: self.log.warning('Could not find locate drawcrowd image to scrape.') return None image_url = matched['content'] matched = bs.find(property='og:title') if matched: data['author'] = matched['content'] else: data['author'] = 'an unknown drawcrowd author' data['importer_display'] = {'header': 'Mirrored image from {}:\n\n'.format(data['author'])} assert image_url data['import_urls'] = [image_url] return data except Exception: self.log.error('Could not import drawcrowd URL %s (%s)', submission.url, traceback.format_exc()) return None
def import_submission(self, submission: praw.objects.Submission) -> dict: """ Import a submission from gyazo. Uses their oEmbed API. gyazo.com was nice enough to provide us with an oEmbed API. Apparently these guys also support video, so we should also make sure to not try to parse that. This function will define the following values in its return data: - author: simply "a gyazo.com user" - source: The url of the submission - importer_display/header - import_urls :param submission: A reddit submission to parse. """ try: if not self.regex.match(urlsplit(submission.url).netloc): return None data = {'author': 'a gyazo.com user', 'source': submission.url, 'importer_display': {'header': 'Imported gyazo.com image:\n\n'}} r = requests.head(submission.url, headers=self.headers) if r.status_code == 301: return None mime_text = r.headers.get('Content-Type') mime = mimeparse.parse_mime_type(mime_text) # If we're already given an image... if mime[0] == 'image': # Use the already given URL image_url = submission.url else: # Otherwise, use the gyazo oEmbed API. response = requests.get( 'https://api.gyazo.com/api/oembed/', {'url': submission.url}, headers=self.headers).json() if response.get('type') == 'photo': image_url = response.get('url') else: # This is something that is not a photo. Do not scrape. return None assert image_url data['import_urls'] = [image_url] return data except Exception: self.log.error('Could not import gyazo URL %s (%s)', submission.url, traceback.format_exc()) return None
def encoding(self): if self._encoding is not None: # Encoding has been set manually. return self._encoding # Get the `Content-Type` header, if available. content_type = self.headers.get('Content-Type') if content_type: # Parse out the primary type and parameters from the media type. ptype, _, params = mimeparse.parse_mime_type(content_type) # Return the specified charset or the default depending on the # primary type. default = 'utf-8' if ptype == 'application' else 'iso-8859-1' return params.get('charset', default)
def get_best_handler(cls, mimetype): """Returns the handler and score that that best fit the mimetype.""" best_score, best_fit = (0, None) for mimetype_handler in _registered_mimetype_handlers: for mt in mimetype_handler.supported_mimetypes: try: score = score_match(mimeparse.parse_mime_type(mt), mimetype) if score > best_score: best_score, best_fit = (score, mimetype_handler) except ValueError: continue return (best_score, best_fit)
def build_content_type(format, encoding='utf-8', api=None): """ Adds the vnd.api.<api_name> attribute to the content type (if using AcceptHeaderRouter) and appends the character encoding. """ if api and api._accept_header_routing: type, subtype, vars = mimeparse.parse_mime_type(format) subtype = '%s+%s' % (api.subtype, subtype) attributes = '' for k, v in vars.iteritems(): attributes += '; %s=%s' % (k, v) format = '%s/%s%s' % (type, subtype, attributes) if 'charset' in format: return format return "%s; charset=%s" % (format, encoding)
def import_submission(self, submission: praw.objects.Submission) -> dict: """Import a submission from tinypic. Uses raw HTML scraping. Because this downloads the page and tries to scrape the HTML, we are at significant risk of the image ID on the DOM changing. Therefore, this plugin is liable to break. This function will define the following values in its return data: - author: simply "an anonymous Tinypic user" - source: The url of the submission - importer_display/header - import_urls :param submission: A reddit submission to parse. """ try: # It seems PRAW doesn't unescape the reddit URL. # Tinyurl is the only importer so far that depends on URL parameters. url = html.unescape(submission.url) if not self.regex.match(urlsplit(url).netloc): return None data = {'author': 'an anonymous Tinypic user', 'source': url, 'importer_display': {'header': '~~Liberated~~Mirrored tinypic image:\n\n'}} r = requests.head(url, headers=self.headers) if r.status_code == 301: # Moved Permanently return None mime_text = r.headers.get('Content-Type') mime = mimeparse.parse_mime_type(mime_text) if mime[0] == 'image': image_url = url else: r = requests.get(url, headers=self.headers) bs = bs4.BeautifulSoup(r.content.decode('utf-8')) matched = bs.select('div#imgFrame img') if not matched: self.log.warning('Could not find locate Tinypic image to scrape.') return None image_url = matched[0]['src'] assert image_url data['import_urls'] = [image_url] return data except Exception: self.log.error('Could not import tinypic URL %s (%s)', submission.url, traceback.format_exc()) return None
def get_best_handler(cls, mimetype): """Returns the handler and score that that best fit the mimetype.""" best_score = 0 best_fit = None for review_ui in _file_attachment_review_uis: for mt in review_ui.supported_mimetypes: try: score = score_match(mimeparse.parse_mime_type(mt), mimetype) if score > best_score: best_score = score best_fit = review_ui except ValueError: continue return best_score, best_fit
def encoding(self): """ The name of the encoding used to decode the stream’s bytes into strings, and to encode strings into bytes. Reads the charset value from the `Content-Type` header, if available; else, returns nothing. """ # Get the `Content-Type` header, if available. content_type = self.headers.get('Content-Type') if content_type: # Parse out the primary type and parameters from the media type. ptype, _, params = mimeparse.parse_mime_type(content_type) # Return the specified charset or the default depending on the # primary type. default = 'utf-8' if ptype == 'application' else 'iso-8859-1' return params.get('charset', default)
def import_submission(self, submission: praw.objects.Submission) -> dict: """Import a submission from Derpibooru. This function will define the following values in its return data: - author: simply "an anonymous user on Derpibooru" - source: The url of the submission - importer_display/header - import_urls After we define that, we need to get the image. Since Derpibooru has an API, we use that to try to get the image if the image is a non-CDN URL. If it is a CDN, we take the image directory and upload *that* to Imgur. image_url is the variable of the image to upload. :param submission: A reddit submission to parse. """ try: url = html.unescape(submission.url) if not self.regex.match(urlsplit(url).netloc): return None r = requests.head(url, headers=self.headers) mime_text = r.headers.get('Content-Type') mime = mimeparse.parse_mime_type(mime_text) # if mime[0] == 'image': self.log.debug('Initiating Derpibooru plugin') jsonUrl = 'http://derpiboo.ru/oembed.json?url=' + url # The API endpoint callapi = requests.get(jsonUrl) # Fetch the API's JSON file. json = callapi.json() img = 'http:' + (json['thumbnail_url']) author = (json['author_name']) provider_url = (json['provider_url']) data = {'author': author, 'source': img, 'importer_display': {'header': 'Mirrored [image](' + provider_url + ') by Derpibooru artist \ [' + author + '](https://derpiboo.ru/tags/artist-colon-' + author + '):\n\n'}} image_url = img data['import_urls'] = [image_url] return data except Exception: self.log.error('Could not import Derpibooru URL %s (%s)', submission.url, traceback.format_exc()) return None
def __init__(self, http, postproc, uri, method='GET', body=None, headers=None, methodId=None, resumable=None): """Constructor for an HttpRequest. Args: http: httplib2.Http, the transport object to use to make a request postproc: callable, called on the HTTP response and content to transform it into a data object before returning, or raising an exception on an error. uri: string, the absolute URI to send the request to method: string, the HTTP method to use body: string, the request body of the HTTP request, headers: dict, the HTTP request headers methodId: string, a unique identifier for the API method being called. resumable: MediaUpload, None if this is not a resumbale request. """ self.uri = uri self.method = method self.body = body self.headers = headers or {} self.methodId = methodId self.http = http self.postproc = postproc self.resumable = resumable self.response_callbacks = [] self._in_error_state = False # Pull the multipart boundary out of the content-type header. major, minor, params = mimeparse.parse_mime_type( headers.get('content-type', 'application/json')) # The size of the non-media part of the request. self.body_size = len(self.body or '') # The resumable URI to send chunks to. self.resumable_uri = None # The bytes that have been uploaded. self.resumable_progress = 0
def for_type(cls, attachment): """Returns the handler that is the best fit for provided mimetype.""" mimetype = mimeparse.parse_mime_type(attachment.mimetype) # Override the mimetype if mimeparse is known to misinterpret this # type of file as `octet-stream` extension = os.path.splitext(attachment.filename)[1] if extension in MIMETYPE_EXTENSIONS: mimetype = MIMETYPE_EXTENSIONS[extension] score, handler = cls.get_best_handler(mimetype) if handler: try: return handler(attachment, mimetype) except Exception, e: logging.error('Unable to load Mimetype Handler for %s: %s', attachment, e, exc_info=1)
def analyze_resource_file(path, extension=None): def isnt_msdoc_text(content_type): extensions = list( filter(lambda x: x[1] == content_type, settings.SUPPORTED_CONTENT_TYPES))[0][2] return len({'doc', 'docx'} & set(extensions)) == 0 logger.debug(f"analyze_resource_file({path}, {extension})") m = magic.Magic(mime=True, mime_encoding=True) result = m.from_file(path) family, content_type, options = parse_mime_type(result) logger.debug(f" parsed mimetype: {family}/{content_type});{options}") file_info = magic.from_file(path) logger.debug(f" file info: {file_info}") encoding = options.get('charset', 'unknown') logger.debug(f" encoding: {encoding}") extension = file_format_from_content_type(content_type, family=family, extension=extension) logger.debug(f" extension: {extension}") if family == 'text' and content_type == 'plain': if encoding.startswith('unknown'): encoding = guess_file_encoding(path) logger.debug(f" encoding (guess-plain): {encoding}") extension = guess_text_file_format(path, encoding) logger.debug(f" extension (guess-plain): {extension}") if extension in ('doc', 'docx', 'xls', 'xlsx', 'ods', 'odt') or content_type == 'msword': if encoding.startswith('unknown'): encoding = guess_file_encoding(path) logger.debug(f" encoding (guess-spreadsheet): {encoding}") spreadsheet_format = guess_spreadsheet_file_format(path, encoding) if any((extension in ('xls', 'xlsx', 'ods'), isnt_msdoc_text(content_type), spreadsheet_format)): extension = spreadsheet_format logger.debug(f" extension (guess-spreadsheet): {extension}") logger.debug( f' finally: extension = {extension}, file_info = {file_info}, encoding = {encoding}' ) return extension, file_info, encoding
def import_submission(self, submission: praw.objects.Submission) -> dict: """Import a submission from gifs.com. Because this downloads the page and tries to scrape the HTML, we are at significant risk of the image ID on the DOM changing. Therefore, this plugin is liable to break. This function will define the following values in its return data: - author: simply "an anonymous user on gifs.com" - source: The url of the submission - importer_display/header - import_urls :param submission: A reddit submission to parse. """ try: url = html.unescape(submission.url) if not self.regex.match(urlsplit(url).netloc): return None data = { 'author': 'a gifscom user', 'source': url, 'importer_display': { 'header': 'Mirrored gifscom image:\n\n' } } r = requests.head(url, headers=self.headers) mime_text = r.headers.get('Content-Type') mime = mimeparse.parse_mime_type(mime_text) if mime[0] == 'image': image_url = url else: self.log.warning( 'gifs.com URL posted that is not an image: %s', submission.url) return None data['import_urls'] = [image_url] return data except Exception: self.log.error('Could not import gifs.com URL %s (%s)', submission.url, traceback.format_exc()) return None
def supportsSingleExports(self, bug_ids): """Return True if the Trac instance provides CSV exports for single tickets, False otherwise. :bug_ids: A list of bug IDs that we can use for discovery purposes. """ html_ticket_url = '%s/%s' % ( self.baseurl, self.ticket_url.replace('?format=csv', '')) for bug_id in bug_ids: try: # We try to retrieve the ticket in HTML form, since that # will tell us whether or not it is actually a valid ticket. ticket_id = int(bug_id) self._getPage(html_ticket_url % ticket_id) except BugTrackerConnectError as e: if isinstance(e.error, requests.HTTPError): # We can consider the ticket to be invalid. pass else: raise except ValueError: # The ticket_id couldn't be identified and it's of no use to # us anyway. pass else: # If we didn't get an error we can try to get the ticket in # CSV form. If this fails then we can consider single ticket # exports to be unsupported. try: response = self._getPage( "%s/%s" % (self.baseurl, self.ticket_url % ticket_id)) subtype = parse_mime_type( response.headers.get('Content-Type', ''))[1] return subtype == 'csv' except BugTrackerConnectError: return False else: # If we reach this point then we likely haven't had any valid # tickets or something else is wrong. Either way, we can only # assume that CSV exports of single tickets aren't supported. return False
def get_best_handler(cls, mimetype): """Returns the handler and score that that best fit the mimetype.""" best_score, best_fit = (0, cls) for mt in cls.supported_mimetypes: try: score = score_match(mimeparse.parse_mime_type(mt), mimetype) if score > best_score: best_score, best_fit = (score, cls) except ValueError: continue for handler in cls.__subclasses__(): score, best_handler = handler.get_best_handler(mimetype) if score > best_score: best_score, best_fit = (score, best_handler) return (best_score, best_fit)
def for_type(cls, attachment): """Return the Review UI that is the best fit for a file attachment. Args: attachment (reviewboard.attachments.models.FileAttachments): The file attachment to locate a Review UI for. Returns: FileAttachmentReviewUI: The Review UI for the attachment, or ``None`` if a suitable one could not be found. """ if attachment.mimetype: try: mimetype = mimeparse.parse_mime_type(attachment.mimetype) except: logging.error('Unable to parse MIME type "%s" for %s', attachment.mimetype, attachment) return None # Override the mimetype if mimeparse is known to misinterpret this # type of file as 'octet-stream' extension = os.path.splitext(attachment.filename)[1] if extension in MIMETYPE_EXTENSIONS: mimetype = MIMETYPE_EXTENSIONS[extension] score, handler = cls.get_best_handler(mimetype) if handler: try: return handler(attachment.get_review_request(), attachment) except ObjectDoesNotExist as e: logging.error('Unable to load review UI for %s: %s', attachment, e) except Exception as e: logging.error('Error instantiating ' 'FileAttachmentReviewUI %r: %s', handler, e) return None
def mime_object_maker(url, mimetype, session=None): """ return a data object suitable for the mimetype given. this will either return a astropy fits object or a pyvo DALResults object, a PIL object for conventional images or string for text content. Parameters ---------- url : str the object download url mimetype : str the content mimetype session : object optional session to use for network requests """ session = use_session(session) mimetype = mimeparse.parse_mime_type(mimetype) if mimetype[0] == 'text': return session.get(url).text if mimetype[1] == 'fits' or mimetype[1] == 'x-fits': response = session.get(url) return HDUList.fromstring(response.content) if mimetype[0] == 'image': from PIL import Image from io import BytesIO response = session.get(url) bio = BytesIO(response.content) return Image.open(bio) if mimetype[1] == 'x-votable' or mimetype[1] == 'x-votable+xml': # As soon as there are some kind of recursive data structures, # things start to get messy if mimetype[2].get('content', None) == 'datalink': from .adhoc import DatalinkResults return DatalinkResults.from_result_url(url) else: from .query import DALResults return DALResults.from_result_url(url)
def for_type(cls, attachment): """Return the Review UI that is the best fit for a file attachment. Args: attachment (reviewboard.attachments.models.FileAttachments): The file attachment to locate a Review UI for. Returns: FileAttachmentReviewUI: The Review UI for the attachment, or ``None`` if a suitable one could not be found. """ if attachment.mimetype: try: mimetype = mimeparse.parse_mime_type(attachment.mimetype) except: logging.error('Unable to parse MIME type "%s" for %s', attachment.mimetype, attachment) return None # Override the mimetype if mimeparse is known to misinterpret this # type of file as 'octet-stream' extension = os.path.splitext(attachment.filename)[1] if extension in MIMETYPE_EXTENSIONS: mimetype = MIMETYPE_EXTENSIONS[extension] score, handler = cls.get_best_handler(mimetype) if handler: try: return handler(attachment.get_review_request(), attachment) except ObjectDoesNotExist as e: logging.error('Unable to load review UI for %s: %s', attachment, e) except Exception as e: logging.error( 'Error instantiating ' 'FileAttachmentReviewUI %r: %s', handler, e) return None
def create_text(self, komtext): misc_info = kom.CookedMiscInfo() if komtext.recipient_list is not None: for rec in komtext.recipient_list: if rec is not None: misc_info.recipient_list.append(rec) if komtext.comment_to_list is not None: for ct in komtext.comment_to_list: if ct is not None: misc_info.comment_to_list.append(ct) print misc_info.to_string() mime_type = mimeparse.parse_mime_type(komtext.content_type) # Because a text consists of both a subject and body, and you # can have a text subject in combination with an image, a # charset is needed to specify the encoding of the subject. mime_type[2]['charset'] = 'utf-8' content_type = mime_type_tuple_to_str(mime_type) # TODO: how would this work with images? fulltext = str() fulltext += komtext.subject.encode('utf-8') + "\n" if (mime_type[0] == 'text'): fulltext += komtext.body.encode('utf-8') else: fulltext += komtext.body aux_items = [] aux_items.append( kom.AuxItem(kom.AI_CREATING_SOFTWARE, data="%s %s" % (self.client_name, self.client_version))) aux_items.append(kom.AuxItem(kom.AI_CONTENT_TYPE, data=content_type)) text_no = kom.ReqCreateText(self.conn, fulltext, misc_info, aux_items).response() return text_no
def for_type(cls, attachment): """Return the handler that is the best fit for provided mimetype. Args: attachment (reviewboard.attachments.models.FileAttachment): The file attachment to find the best handler for. Returns: MimetypeHandler: The best mimetype handler for the attachment, or ``None`` if one could not be found. """ if not attachment.mimetype: return None try: mimetype = mimeparse.parse_mime_type(attachment.mimetype) except: logging.warning('Unable to parse MIME type "%s" for %s', attachment, attachment.mimetype) mimetype = ('application', 'octet-stream', {}) # Override the mimetype if mimeparse is known to misinterpret this # type of file as `octet-stream` extension = os.path.splitext(attachment.filename)[1] if extension in MIMETYPE_EXTENSIONS: mimetype = MIMETYPE_EXTENSIONS[extension] score, handler = cls.get_best_handler(mimetype) if handler: try: return handler(attachment, mimetype) except Exception as e: logging.error('Unable to load Mimetype Handler for %s: %s', attachment, e) return MimetypeHandler(attachment, mimetype)
def export_submission(self, import_urls: list, video: bool = False, **import_info) -> dict: """Check if something reported as a video is a raw video, then post the direct link if it is. This function will define the following values in the export data: - link_display :param import_urls: A set (of one?) of links to videos. :param video: Whether the imported data is a video or not. :param import_info: Other importing information passed. Ignored. :return: None if no export, an export info dictionary otherwise. """ if not video: return None self.log.debug('Attempting to upload raw video URL.') links = [] for url in import_urls: req = requests.head(url, headers=self.headers) if not req.ok: self.log.debug('URL %s was not valid.', url) continue try: mime_text = req.headers.get('Content-Type') mime = mimeparse.parse_mime_type(mime_text) except Exception: self.log.debug('Error parsing MIME for URL %s', url) continue if mime[0] != 'video': self.log.debug('URL %s is not a video!', url) continue links.append('[Direct video](%s) \n' % url) if not links: self.log.info('No direct video links found!') return None return {'link_display': ''.join(links)}
def mime_object_maker(url, mimetype): """ return a data object suitable for the mimetype given. this will either return a astropy fits object or a pyvo DALResults object, a PIL object for conventional images or string for text content. Parameters ---------- url : str the object download url mimetype : str the content mimetype """ mimetype = mimeparse.parse_mime_type(mimetype) if mimetype[0] == 'text': return s.get(url).text if mimetype[1] == 'fits' or mimetype[1] == 'x-fits': r = s.get(url) return HDUList.fromstring(r.content) if mimetype[0] == 'image': from PIL import Image from io import BytesIO r = s.get(url) b = BytesIO(r.content) return Image.open(b) if mimetype[1] == 'x-votable' or mimetype[1] == 'x-votable+xml': # As soon as there are some kind of recursive data structures, # things start to get really f*cked up if mimetype[2].get('content', None) == 'datalink': from .adhoc import DatalinkResults return DatalinkResults.from_result_url(url) else: from .query import DALResults return DALResults.from_result_url(url)
def create_text(self, komtext): misc_info = kom.CookedMiscInfo() if komtext.recipient_list is not None: for rec in komtext.recipient_list: if rec is not None: misc_info.recipient_list.append(rec) if komtext.comment_to_list is not None: for ct in komtext.comment_to_list: if ct is not None: misc_info.comment_to_list.append(ct) print misc_info.to_string() mime_type = mimeparse.parse_mime_type(komtext.content_type) # Because a text consists of both a subject and body, and you # can have a text subject in combination with an image, a # charset is needed to specify the encoding of the subject. mime_type[2]['charset'] = 'utf-8' content_type = mime_type_tuple_to_str(mime_type) # TODO: how would this work with images? fulltext = str() fulltext += komtext.subject.encode('utf-8') + "\n" if (mime_type[0] == 'text'): fulltext += komtext.body.encode('utf-8') else: fulltext += komtext.body aux_items = [] aux_items.append(kom.AuxItem(kom.AI_CREATING_SOFTWARE, data="%s %s" % (self.client_name, self.client_version))) aux_items.append(kom.AuxItem(kom.AI_CONTENT_TYPE, data=content_type)) text_no = kom.ReqCreateText(self.conn, fulltext, misc_info, aux_items).response() return text_no
def parseType(fmt): if (fmt == "null"): fmt = "application/x-unknown" if (fmt == "text"): fmt = "text/plain" if fmt.find("/") == -1: return (fmt.lower(), '', {}) # Attept to parse: try: (type, subtype, params) = mimeparse.parse_mime_type(fmt) return (type.lower(), subtype.lower(), params) except: print "ERROR: Could not fully parse: " + fmt try: fmt_matcher = re.compile(r'([a-z0-9\+\.]+)\/([a-z0-9\+\.]+)') fmt_match = fmt_matcher.match(fmt.lower()) (type, subtype) = fmt_match.groups() return (type, subtype, {}) except: print "ERROR: Could not partially parse: " + fmt return ("application", 'x-malformed-mimetype', {})
def status(self, test_id=None, test_status=None, test_tags=None, runnable=True, file_name=None, file_bytes=None, eof=False, mime_type=None, route_code=None, timestamp=None): super(Starts, self).status( test_id, test_status, test_tags=test_tags, runnable=runnable, file_name=file_name, file_bytes=file_bytes, eof=eof, mime_type=mime_type, route_code=route_code, timestamp=timestamp) if not test_id: if not file_bytes: return if not mime_type or mime_type == 'test/plain;charset=utf8': mime_type = 'text/plain; charset=utf-8' primary, sub, parameters = mimeparse.parse_mime_type(mime_type) content_type = testtools.content_type.ContentType( primary, sub, parameters) content = testtools.content.Content( content_type, lambda: [file_bytes]) text = content.as_text() if text and text[-1] not in '\r\n': self._neednewline = True self._output.write(text) elif test_status == 'inprogress' and test_id not in self._emitted: if self._neednewline: self._neednewline = False self._output.write('\n') worker = '' for tag in test_tags or (): if tag.startswith('worker-'): worker = '(' + tag[7:] + ') ' if timestamp: timestr = timestamp.isoformat() else: timestr = '' self._output.write('%s: %s%s [start]\n' % (timestr, worker, test_id)) self._emitted.add(test_id)
def require_representation(self, req): """Require raw representation dictionary from falcon request object. This does not perform any field parsing or validation but only uses allowed content-encoding handler to decode content body. Note: Currently only JSON is allowed as content type. Args: req (falcon.Request): request object Returns: dict: raw dictionary of representation supplied in request body """ try: type_, subtype, _ = parse_mime_type(req.content_type) content_type = '/'.join((type_, subtype)) except: raise falcon.HTTPUnsupportedMediaType( description="Invalid Content-Type header: {}".format( req.content_type)) if content_type == 'application/json': body = req.stream.read() try: res = json.loads(body.decode('utf-8')) except json.decoder.JSONDecodeError: raise falcon.HTTPBadRequest( title="Bad Request", description="Body is not a valid json.") return res else: raise falcon.HTTPUnsupportedMediaType( description="only JSON supported, got: {}".format( content_type))
def download_file(url, allowed_content_types=None): # noqa: C901 logger.debug(f"download_file({url}, {allowed_content_types})") try: URLValidator()(url) except ValidationError: raise InvalidUrl('Invalid url address: %s' % url) filename, format = None, None supported_content_types = allowed_content_types or [ ct[1] for ct in settings.SUPPORTED_CONTENT_TYPES ] r = requests.get(url, stream=True, allow_redirects=True, verify=False, timeout=180) if r.status_code != 200: raise InvalidResponseCode('Invalid response code: %s' % r.status_code) family, content_type, options = parse_mime_type( r.headers.get('Content-Type')) logger.debug(f' Content-Type: {family}/{content_type};{options}') if content_type not in ('octet-stream', 'octetstream' ) and content_type not in supported_content_types: raise InvalidContentType('Unsupported type: %s' % r.headers.get('Content-Type')) resource_type = _get_resource_type(r) logger.debug(f' resource_type: {resource_type}') if resource_type == 'file': content_disposition = r.headers.get('Content-Disposition', None) logger.debug(f' content_disposition: {content_disposition}') if content_disposition: # Get filename from header res = re.findall("filename=(.+)", content_disposition) filename = res[0][:100] if res else None logger.debug(f' filename: {filename}') if filename: filename = filename.replace('"', '') format = filename.split('.')[-1] logger.debug(f' filename: {filename}, format: {format}') if not filename: name, format = filename_from_url(url, content_type) filename = '.'.join([name, format]) logger.debug( f' filename: {filename}, format: {format} - from url') filename = filename.strip('.') if content_type in ('octet-stream', 'octetstream'): family, content_type = content_type_from_file_format(format) logger.debug(f' {family}/{content_type} - from file format') format = file_format_from_content_type(content_type, family=family, extension=format) logger.debug(f' format:{format} - from content type') content = BytesIO(r.content) return resource_type, { 'filename': filename, 'format': format, 'content': content } else: format = file_format_from_content_type(content_type, family) logger.debug(f' format: {format} - from content type') if resource_type == 'api': return resource_type, {'format': format} else: if r.url != url: if r.history and r.history[-1].status_code == 301: raise InvalidResponseCode( 'Resource location has been moved!') return resource_type, {'format': format}
def import_submission(self, submission: praw.objects.Submission) -> dict: """Import a submission from deviantArt. Ignores flash content. Uses a combination of the DA backend and HTML scraping. This function will define the following values in its return data: - author: The author of the image. - source: The submission URL. - importer_display/header - import_urls :param submission: A reddit submission to parse. :return: None if no import, an import info dictionary otherwise. """ try: if self.regex_direct.match(urlsplit(submission.url).netloc): r = requests.head(submission.url, headers=self.headers) mime_text = r.headers.get('Content-Type') mime = mimeparse.parse_mime_type(mime_text) if mime[0] == 'image': self.log.debug('DA link is a direct image') data = {'author': 'An unknown DA author', 'source': submission.url, 'import_urls': [submission.url], 'importer_display': {'header': 'Mirrored deviantArt image ' 'by an unknown author:\n\n'}} return data if not self.regex.match(urlsplit(submission.url).netloc): return None query_url = 'http://backend.deviantart.com/oembed?{}'.format( urlencode({'format': 'json', 'url': submission.url})) self.log.debug('%s is valid DA url.', submission.url) self.log.debug('Querying DA API %s', query_url) response = json.loads(self.read_url(query_url)) if response['type'] not in ('link', 'photo'): self.log.debug('Response is not link or photo') return None self.log.debug('Author name: %s', response['author_name']) # Using the official DA API data = {'author': response['author_name'], 'source': submission.url, 'importer_display': {'header': 'Mirrored deviantArt image by the author "{}":\n\n'.format( response['author_name'])}} if response['type'] == 'link': data['import_urls'] = [response['fullsize_url']] self.log.debug('Found DA API url %s', data['import_urls']) try: # Trying to scrape manually bs = BeautifulSoup(self.read_url(submission.url)) # Checking for flash animation, because mirroring a preview # for a flash animation is stupid is_flash = bool(bs.select('iframe[class~=flashtime]')) is_madefire = bool(bs.select('iframe[class~=madefire-player]')) if is_flash or is_madefire: self.log.info('DA url is flash, no preview needed.') return None # Seems to alternate between the two full_view = (bs.select('img[class~=fullview]') or bs.select('img[class~=dev-content-full]')) if full_view: full_url = full_view[0]['src'] self.log.debug('Found full DA image url: %s', full_url) data['import_urls'] = [full_url] except Exception as e: self.log.error(traceback.format_exc()) if 'import_urls' not in data: self.log.debug('No url found for DA image.') return None return data except Exception as e: self.log.error('Deviantart Error: %s', traceback.format_exc()) return None
def test_parse_mime_type(args, expected): expected = tuple(expected) result = mimeparse.parse_mime_type(args) message = "Expected: '%s' but got %s" % (expected, result) assert expected == result, message
def export_submission(self, import_urls: list, author: str = 'an Unknown Author', source: str = 'an Unknown Source', video: bool = False, **import_info) -> dict: """Upload one or multiple images to Imgur. Cannot support videos. Uses the imgurpython library. This function will define the following values in the export data: - exporter - link_display :param import_urls: A set of direct links to images to upload. :param author: The author to note in the description. :param source: The source to note in the description. :param video: Whether the imported data is a video or not. :param import_info: Other importing information passed. Ignored. :return: None if no export, an export info dictionary otherwise. """ # imgur does not support videos. if not self.client: return None if video: return None description = ('This is a mirror uploaded by /u/%s, ' 'originally made by %s, located at %s' % (self.username, author, source)) results = {'exporter': self.__class__.__name__} config = {} album = {} # Should we do an album? if len(import_urls) == 0: self.log.warning('An import gave no urls.') return None elif len(import_urls) == 1: self.log.debug('A single image will be uploaded.') is_album = False config['description'] = description else: self.log.debug('An album will be uploaded.') try: album = self.client.create_album({'description': description}) except ImgurClientRateLimitError: self.log.error('Ran into imgur rate limit! %s', self.client.credits) return None except Exception: self.log.error('Could not create album! %s', traceback.format_exc()) return None config['album'] = album['deletehash'] is_album = True try: # Try to upload each image given. images = [] for import_url in import_urls: self.log.debug('Uploading URL "%s" to imgur', import_url) image = self.client.upload_from_url(import_url, config) self.log.debug('Uploaded image: %s', str(image)) images.append(image) if is_album: results[ 'link_display'] = '[Imgur Album](https://imgur.com/a/%s) \n' % album[ 'id'] else: picture_url = images[0]['link'].replace('http', 'https') r = requests.head(picture_url) mime_text = r.headers.get('Content-Type') mime = mimeparse.parse_mime_type(mime_text) if mime[1] == 'gif': picture_url = re.sub(r'(\.\w+)?$', '.gifv', picture_url) results['link_display'] = '[Imgur](%s) \n' % picture_url except ImgurClientRateLimitError: self.log.error('Ran into imgur rate limit! %s', self.client.credits) return None except Exception: self.log.error('Broken exception catch %s', traceback.format_exc()) if is_album: self.log.error('Try to delete album!') self.delete_export(album['deletehash']) return results
def _get_doc_type(response): return mimeparse.parse_mime_type(response.headers["Content-Type"])[1]
def __search_documents(self, index_name): start_time = time.time() @after_this_request def to_do_after_this_request(response): record_log(request, response, logger=self.__http_logger) self.__record_metrics(start_time, request, resp) return response data = {} status_code = None try: query = request.args.get('query', default='', type=str) search_field = request.args.get('search_field', default='', type=str) page_num = request.args.get('page_num', default=1, type=int) page_len = request.args.get('page_len', default=10, type=int) weighting = BM25F if len(request.data) > 0: mime = mimeparse.parse_mime_type(request.headers.get('Content-Type')) charset = 'utf-8' if mime[2].get('charset') is None else mime[2].get('charset') if mime[1] == 'yaml': weighting = get_multi_weighting(yaml.safe_load(request.data.decode(charset))) elif mime[1] == 'json': weighting = get_multi_weighting(json.loads(request.data.decode(charset))) else: raise ValueError('unsupported format') results_page = self.__indexer.search_documents(index_name, query, search_field, page_num, page_len=page_len, weighting=weighting) if results_page.pagecount >= page_num or results_page.total <= 0: results = { 'is_last_page': results_page.is_last_page(), 'page_count': results_page.pagecount, 'page_len': results_page.pagelen, 'page_num': results_page.pagenum, 'total': results_page.total, 'offset': results_page.offset } hits = [] for result in results_page.results[results_page.offset:]: fields = {} for item in result.iteritems(): fields[item[0]] = item[1] hit = { 'fields': fields, 'doc_num': result.docnum, 'score': result.score, 'rank': result.rank, 'pos': result.pos } hits.append(hit) results['hits'] = hits data['results'] = results status_code = HTTPStatus.OK else: data['error'] = 'page_num must be <= {0}'.format(results_page.pagecount) status_code = HTTPStatus.BAD_REQUEST except (ConstructorError, JSONDecodeError, ValueError) as ex: data['error'] = '{0}'.format(ex.args[0]) status_code = HTTPStatus.BAD_REQUEST self.__logger.error(ex) except Exception as ex: data['error'] = '{0}'.format(ex.args[0]) status_code = HTTPStatus.INTERNAL_SERVER_ERROR self.__logger.error(ex) finally: data['time'] = time.time() - start_time data['status'] = {'code': status_code.value, 'phrase': status_code.phrase, 'description': status_code.description} output = request.args.get('output', default='json', type=str).lower() # make response resp = make_response(data, output) resp.status_code = status_code return resp
def _parse_mime_type(path): result = _magic.from_file(path) return parse_mime_type(result)
def _handler_for(self, mimetype): mt = mimeparse.parse_mime_type(mimetype) score, handler = MimetypeHandler.get_best_handler(mt) return handler
def request(self, method, url, query_params=None, headers=None, body=None, post_params=None, _preload_content=True, _request_timeout=None): """Perform requests. :param method: http request method :param url: http request url :param query_params: query parameters in the url :param headers: http request headers :param body: request json body, for `application/json` :param post_params: request post parameters, `application/x-www-form-urlencoded` and `multipart/form-data` :param _preload_content: if False, the urllib3.HTTPResponse object will be returned without reading/decoding response data. Default is True. :param _request_timeout: timeout setting for this request. If one number provided, it will be total request timeout. It can also be a pair (tuple) of (connection, read) timeouts. """ method = method.upper() assert method in ['GET', 'HEAD', 'DELETE', 'POST', 'PUT', 'PATCH', 'OPTIONS'] if post_params and body: raise ValueError( "body parameter cannot be used with post_params parameter." ) post_params = post_params or {} headers = headers or {} timeout = None if _request_timeout: if isinstance(_request_timeout, (int, ) if six.PY3 else (int, long)): # noqa: E501,F821 timeout = urllib3.Timeout(total=_request_timeout) elif (isinstance(_request_timeout, tuple) and len(_request_timeout) == 2): timeout = urllib3.Timeout( connect=_request_timeout[0], read=_request_timeout[1]) if 'Content-Type' not in headers: headers['Content-Type'] = 'application/json' try: # For `POST`, `PUT`, `PATCH`, `OPTIONS`, `DELETE` if method in ['POST', 'PUT', 'PATCH', 'OPTIONS', 'DELETE']: if query_params: url += '?' + urlencode(query_params) if re.search('json', headers['Content-Type'], re.IGNORECASE): request_body = '{}' if body is not None: request_body = json.dumps(body) r = self.pool_manager.request( method, url, body=request_body, preload_content=_preload_content, timeout=timeout, headers=headers) elif headers['Content-Type'] == 'application/x-www-form-urlencoded': # noqa: E501 r = self.pool_manager.request( method, url, fields=post_params, encode_multipart=False, preload_content=_preload_content, timeout=timeout, headers=headers) elif headers['Content-Type'] == 'multipart/form-data': # must del headers['Content-Type'], or the correct # Content-Type which generated by urllib3 will be # overwritten. del headers['Content-Type'] r = self.pool_manager.request( method, url, fields=post_params, encode_multipart=True, preload_content=_preload_content, timeout=timeout, headers=headers) # Pass a `string` parameter directly in the body to support # other content types than Json when `body` argument is # provided in serialized form elif isinstance(body, str): request_body = body r = self.pool_manager.request( method, url, body=request_body, preload_content=_preload_content, timeout=timeout, headers=headers) else: # Cannot generate the request from given parameters msg = """Cannot prepare a request message for provided arguments. Please check that your arguments match declared content type.""" raise ApiException(status=0, reason=msg) # For `GET`, `HEAD` else: r = self.pool_manager.request(method, url, fields=query_params, preload_content=_preload_content, timeout=timeout, headers=headers) except urllib3.exceptions.SSLError as e: msg = "{0}\n{1}".format(type(e).__name__, str(e)) raise ApiException(status=0, reason=msg) if _preload_content: r = RESTResponse(r) # In the python 3, the response.data is bytes. # we need to decode it to string. if six.PY3: if 'Content-Type' not in r.getheaders() or mimeparse.parse_mime_type(r.getheader('Content-Type'))[1].upper() in ['PDF']: # when a file is returned, 'Content-Type' does not exist or is one of ['PDF',] pass else: # when a json is returned, 'Content-Type' exists r.data = r.data.decode('utf8') pass # log response body logger.debug("response body: %s", r.data) if not 200 <= r.status <= 299: raise ApiException(http_resp=r) return r
def negotiated_mime_type(self): parsed_mime_type = mimeparse.parse_mime_type( self._negotiate_content_type()) return '%s/%s' % (parsed_mime_type[0], parsed_mime_type[1])
def assert_score(pattern, test, score): self.assertAlmostEqual( score_match(mimeparse.parse_mime_type(pattern), mimeparse.parse_mime_type(test)), score)
def __put(self, key=''): start_time = time.time() @after_this_request def to_do_after_this_request(response): record_log(request, response, logger=self.__http_logger) self.__record_metrics(start_time, request, response) return response data = {} status_code = None try: mime = mimeparse.parse_mime_type( request.headers.get('Content-Type')) charset = 'utf-8' if mime[2].get( 'charset') is None else mime[2].get('charset') if mime[1] == 'yaml': value = yaml.safe_load(request.data.decode(charset)) elif mime[1] == 'json': value = json.loads(request.data.decode(charset)) else: # handle as a string value = request.data.decode(charset) sync = False if request.args.get('sync', default='', type=str).lower() in TRUE_STRINGS: sync = True self.__manager.put(key if key.startswith('/') else '/' + key, value, sync=sync) if sync: status_code = HTTPStatus.CREATED else: status_code = HTTPStatus.ACCEPTED except (ConstructorError, JSONDecodeError, ValueError) as ex: data['error'] = '{0}'.format(ex.args[0]) status_code = HTTPStatus.BAD_REQUEST self.__logger.error(ex) except Exception as ex: data['error'] = '{0}'.format(ex.args[0]) status_code = HTTPStatus.INTERNAL_SERVER_ERROR self.__logger.error(ex) finally: data['time'] = time.time() - start_time data['status'] = { 'code': status_code.value, 'phrase': status_code.phrase, 'description': status_code.description } output = request.args.get('output', default='json', type=str).lower() # make response resp = make_response(data, output) resp.status_code = status_code return resp
def for_type(cls, attachment): """Returns the handler that is the best fit for provided mimetype.""" mimetype = mimeparse.parse_mime_type(attachment.mimetype) score, handler = cls.get_best_handler(mimetype) return handler(attachment, mimetype)
def get_file_info(path): _magic = magic.Magic(mime=True, mime_encoding=True) result = _magic.from_file(path) return parse_mime_type(result)