Python parse_mime_type Examples, mimeparse.parse_mime_type Python Examples

Example #1

0

Show file

File: scrape.py Project: socoboy/pressley

def get_link_content(link):
    try:
        response = requests.get(link)
        if response.status_code == 400:
            logging.warn(u"404 {}".format(link))
            return None
        if response.status_code != 200:
            raise Exception(u"Unable to fetch release content: {0}".format(link))
    except requests.exceptions.InvalidURL as e:
        logging.warn(u"Invalid link {0}: {1}".format(link, unicode(e)))
        return None

    content_type = response.headers.get('content-type')
    if not content_type:
        logging.warn(u"Response did not contain a Content-Type header: {0}".format(link))
        return None

    (mime_type, mime_subtype, mt_params) = parse_mime_type(content_type)
    if mime_type != 'text' or mime_subtype not in ('html', 'xhtml'):
        logging.warn(u"Skipping non-HTML link: {0}".format(link))
        return None

    if len(response.content) == 0:
        logging.warn(u"Server returned an empty body: {0}".format(link))
        return None

    (title, body) = readability_extract(response.content)
    return kill_control_characters(body)

Example #2

0

Show file

File: client.py Project: brianwatt/pythonkc-meetups

    def _http_get_json(self, url):
        """
        Make an HTTP GET request to the specified URL, check that it returned a
        JSON response, and returned the data parsed from that response.

        Parameters
        ----------
        url
            The URL to GET.

        Returns
        -------
        Dictionary of data parsed from a JSON HTTP response.

        Exceptions
        ----------
        * PythonKCMeetupsBadJson
        * PythonKCMeetupsBadResponse
        * PythonKCMeetupsMeetupDown
        * PythonKCMeetupsNotJson
        * PythonKCMeetupsRateLimitExceeded

        """
        response = self._http_get(url)

        content_type = response.headers['content-type']
        parsed_mimetype = mimeparse.parse_mime_type(content_type)
        if parsed_mimetype[1] not in ('json', 'javascript'):
            raise PythonKCMeetupsNotJson(content_type)

        try:
            return json.loads(response.content)
        except ValueError as e:
            raise PythonKCMeetupsBadJson(e)

Example #3

0

Show file

File: mimetypes.py Project: chipx86/reviewboard

    def get_best_handler(cls, mimetype):
        """Return the handler and score that that best fit the mimetype.

        Args:
            mimetype (tuple):
                A parsed mimetype to find the best handler for. This is a
                3-tuple of the type, subtype, and parameters as returned by
                :py:func:`mimeparse.parse_mime_type`.

        Returns:
            tuple:
            A tuple of ``(best_score, mimetype_handler)``. If no handler
            was found, this will be ``(0, None)``.
        """
        best_score, best_fit = (0, None)

        for mimetype_handler in _registered_mimetype_handlers:
            for mt in mimetype_handler.supported_mimetypes:
                try:
                    score = score_match(mimeparse.parse_mime_type(mt),
                                        mimetype)

                    if score > best_score:
                        best_score, best_fit = (score, mimetype_handler)
                except ValueError:
                    continue

        return (best_score, best_fit)

Example #4

0

Show file

File: serializer.py Project: jesselang/krankshaft

    def serialize(self, obj, accept=None, **opts):
        '''serialize(obj) -> content, content_type

        Serialize an object to text.
        '''
        accept = accept or self.default_content_type
        content_type, format = self.get_format(accept)
        method = getattr(self, 'to_%s' % format)

        # ugly hack to get the params to the header part out
        try:
            accept = [
                part
                for part in accept.split(',')
                if part.startswith(content_type)
            ][0]
        except IndexError:
            # '*/*' case
            accept = content_type

        params = mimeparse.parse_mime_type(accept)[2]
        for key, value in params.items():
            opts.setdefault(key, value)

        return self.SerializedContainer(method(obj, **opts), content_type)

Example #5

0

Show file

File: base.py Project: swistakm/graceful

    def require_representation(self, req):
        """Require raw representation dictionary from falcon request object.

        This does not perform any field parsing or validation but only uses
        allowed content-encoding handler to decode content body.

        Note:
            Currently only JSON is allowed as content type.

        Args:
            req (falcon.Request): request object

        Returns:
            dict: raw dictionary of representation supplied in request body

        """
        try:
            type_, subtype, _ = parse_mime_type(req.content_type)
            content_type = '/'.join((type_, subtype))
        except:
            raise falcon.HTTPUnsupportedMediaType(
                description="Invalid Content-Type header: {}".format(
                    req.content_type
                )
            )

        if content_type == 'application/json':
            body = req.stream.read()
            return json.loads(body.decode('utf-8'))
        else:
            raise falcon.HTTPUnsupportedMediaType(
                description="only JSON supported, got: {}".format(content_type)
            )

Example #6

0

Show file

File: mimetypes.py Project: darmhoo/reviewboard

    def get_best_handler(cls, mimetype):
        """Return the handler and score that that best fit the mimetype.

        Args:
            mimetype (unicode):
                The mimetype to find the best handler for.

        Returns:
            tuple:
            A tuple of ``(best_score, mimetype_handler)``. If no handler
            was found, this will be ``(0, None)``.
        """
        best_score, best_fit = (0, None)

        for mimetype_handler in _registered_mimetype_handlers:
            for mt in mimetype_handler.supported_mimetypes:
                try:
                    score = score_match(mimeparse.parse_mime_type(mt),
                                        mimetype)

                    if score > best_score:
                        best_score, best_fit = (score, mimetype_handler)
                except ValueError:
                    continue

        return (best_score, best_fit)

Example #7

0

Show file

File: base.py Project: hardCTE/reviewboard

    def for_type(cls, attachment):
        """Returns the handler that is the best fit for provided mimetype."""
        if attachment.mimetype:
            try:
                mimetype = mimeparse.parse_mime_type(attachment.mimetype)
            except:
                logging.error('Unable to parse MIME type "%s" for %s',
                              attachment.mimetype, attachment)
                return None

            # Override the mimetype if mimeparse is known to misinterpret this
            # type of file as 'octet-stream'
            extension = os.path.splitext(attachment.filename)[1]

            if extension in MIMETYPE_EXTENSIONS:
                mimetype = MIMETYPE_EXTENSIONS[extension]

            score, handler = cls.get_best_handler(mimetype)

            if handler:
                try:
                    return handler(attachment.get_review_request(), attachment)
                except ObjectDoesNotExist as e:
                    logging.error('Unable to load review UI for %s: %s',
                                  attachment, e)
                except Exception as e:
                    logging.error('Error instantiating '
                                  'FileAttachmentReviewUI %r: %s',
                                  handler, e)

        return None

Example #8

0

Show file

File: base.py Project: darmhoo/reviewboard

    def get_best_handler(cls, mimetype):
        """Return the Review UI and score that that best fit the mimetype.

        Args:
            mimetype (unicode):
                The mimetype to find a Review UI for.

        Returns:
            tuple:
            A tuple of ``(best_score, review_ui)``, or ``(0, None)`` if one
            could not be found.
        """
        best_score = 0
        best_fit = None

        for review_ui in _file_attachment_review_uis:
            for mt in review_ui.supported_mimetypes:
                try:
                    score = score_match(mimeparse.parse_mime_type(mt),
                                        mimetype)

                    if score > best_score:
                        best_score = score
                        best_fit = review_ui
                except ValueError:
                    continue

        return best_score, best_fit

Example #9

0

Show file

File: mimetypes.py Project: Anastasiya2307/reviewboard

    def for_type(cls, attachment):
        """Return the handler that is the best fit for provided mimetype."""
        if not attachment.mimetype:
            return None

        try:
            mimetype = mimeparse.parse_mime_type(attachment.mimetype)
        except:
            logging.warning('Unable to parse MIME type "%s" for %s',
                            attachment, attachment.mimetype)
            mimetype = ('application', 'octet-stream', {})

        # Override the mimetype if mimeparse is known to misinterpret this
        # type of file as `octet-stream`
        extension = os.path.splitext(attachment.filename)[1]

        if extension in MIMETYPE_EXTENSIONS:
            mimetype = MIMETYPE_EXTENSIONS[extension]

        score, handler = cls.get_best_handler(mimetype)

        if handler:
            try:
                return handler(attachment, mimetype)
            except Exception as e:
                logging.error('Unable to load Mimetype Handler for %s: %s',
                              attachment, e)

        return MimetypeHandler(attachment, mimetype)

Example #10

0

Show file

File: mimeparse_test.py Project: adamchainz/python-mimeparse

 def _test_parse_mime_type(self, args, expected):
     if expected is None:
         self.assertRaises(mimeparse.MimeTypeParseException, mimeparse.parse_mime_type, args)
     else:
         expected = tuple(expected)
         result = mimeparse.parse_mime_type(args)
         message = "Expected: '%s' but got %s" % (expected, result)
         self.assertEqual(expected, result, message)

Example #11

0

Show file

File: http.py Project: ErfanBagheri/starcal

  def __init__(self, http, postproc, uri,
               method='GET',
               body=None,
               headers=None,
               methodId=None,
               resumable=None):
    """Constructor for an HttpRequest.

    Args:
      http: httplib2.Http, the transport object to use to make a request
      postproc: callable, called on the HTTP response and content to transform
                it into a data object before returning, or raising an exception
                on an error.
      uri: string, the absolute URI to send the request to
      method: string, the HTTP method to use
      body: string, the request body of the HTTP request,
      headers: dict, the HTTP request headers
      methodId: string, a unique identifier for the API method being called.
      resumable: MediaUpload, None if this is not a resumbale request.
    """
    self.uri = uri
    self.method = method
    self.body = body
    self.headers = headers or {}
    self.methodId = methodId
    self.http = http
    self.postproc = postproc
    self.resumable = resumable

    # Pull the multipart boundary out of the content-type header.
    major, minor, params = mimeparse.parse_mime_type(
        headers.get('content-type', 'application/json'))

    # Terminating multipart boundary get a trailing '--' appended.
    self.multipart_boundary = params.get('boundary', '').strip('"') + '--'

    # If this was a multipart resumable, the size of the non-media part.
    self.multipart_size = 0

    # The resumable URI to send chunks to.
    self.resumable_uri = None

    # The bytes that have been uploaded.
    self.resumable_progress = 0

    self.total_size = 0

    if resumable is not None:
      if self.body is not None:
        self.multipart_size = len(self.body)
      else:
        self.multipart_size = 0
      self.total_size = (
          self.resumable.size() +
          self.multipart_size +
          len(self.multipart_boundary))

Example #12

0

Show file

File: base.py Project: aam1r/reviewboard

    def for_type(cls, attachment):
        """Returns the handler that is the best fit for provided mimetype."""
        mimetype = mimeparse.parse_mime_type(attachment.mimetype)
        score, handler = cls.get_best_handler(mimetype)

        if handler:
            try:
                return handler(attachment.get_review_request(), attachment)
            except Exception, e:
                logging.error('Unable to load review UI for %s: %s',
                              attachment, e, exc_info=1)

Example #13

0

Show file

File: client.py Project: 3009420/python-mg

    def _http_get_json(self, url):
        response = self._http_get(url)

        content_type = response.headers['content-type']
        parsed_mimetype = mimeparse.parse_mime_type(content_type)
        if parsed_mimetype[1] not in ('json', 'javascript'):
            raise MeetupsNotJson(content_type)

        try:
            return json.loads(response.content)
        except ValueError as e:
            raise MeetupsBadJson(e)

Example #14

0

Show file

File: flickr.py Project: Shugabuga/LapisMirror

    def import_submission(self, submission: praw.objects.Submission) -> dict:
        """ Import a submission from flickr. Uses their oEmbed API.

        flickr.com was nice enough to provide us with an oEmbed API.
        Apparently these guys also support video, so we should also make sure
        to not try to parse that.

        This function will define the following values in its return data:
        - author: simply "a flickr.com user"
        - source: The url of the submission
        - importer_display/header
        - import_urls

        :param submission: A reddit submission to parse.
        """
        try:
            if not self.regex.match(urlsplit(submission.url).netloc):
                return None
            url = submission.url
            data = {'author': 'a flickr.com user',
                    'source': url,
                    'importer_display':
                        {'header': 'Imported flickr.com image:\n\n'}}
            r = requests.head(url, headers=self.headers)
            if r.status_code == 301:
                return None

            mime_text = r.headers.get('Content-Type')
            mime = mimeparse.parse_mime_type(mime_text)
            # If we're already given an image...
            if mime[0] == 'image':
                # Use the already given URL
                image_url = submission.url
            else:
                # Otherwise, find the image in the html
                 self.log.info("Getting submission.url: " + url)
                 html = urllib.request.urlopen(url).read().decode('utf-8')
                 image_urls = re.findall(r'farm[\d]\.[a-z0-9/.\\/_]*', html)
                 if image_urls:
                     image_url = 'http://' + image_urls[-1].replace('\\', '')
                     self.log.info("Got image url %s", image_url)
                 else:
                     self.log.error('Could not find any flickr URL %s', submission.url)
                     return None
                 
            assert image_url
            data['import_urls'] = [image_url]
            return data
        except Exception:
            self.log.error('Could not import flickr URL %s (%s)',
                           submission.url, traceback.format_exc())
            return None

Example #15

0

Show file

File: komsession.py Project: adamel/httpkom

def parse_content_type(contenttype):
    mime_type = mimeparse.parse_mime_type(contenttype)
    
    if "charset" in mime_type[2]:
        # Remove charset from mime_type, if we have it
        encoding = mime_type[2].pop("charset")
    else:
        encoding = None
    
    if encoding == 'x-ctext':
        encoding = 'latin1'
    
    return mime_type, encoding

Example #16

0

Show file

File: subunit-trace.py Project: xujinrong/dragonflow

 def status(
     self,
     test_id=None,
     test_status=None,
     test_tags=None,
     runnable=True,
     file_name=None,
     file_bytes=None,
     eof=False,
     mime_type=None,
     route_code=None,
     timestamp=None,
 ):
     super(Starts, self).status(
         test_id,
         test_status,
         test_tags=test_tags,
         runnable=runnable,
         file_name=file_name,
         file_bytes=file_bytes,
         eof=eof,
         mime_type=mime_type,
         route_code=route_code,
         timestamp=timestamp,
     )
     if not test_id:
         if not file_bytes:
             return
         if not mime_type or mime_type == "test/plain;charset=utf8":
             mime_type = "text/plain; charset=utf-8"
         primary, sub, parameters = mimeparse.parse_mime_type(mime_type)
         content_type = testtools.content_type.ContentType(primary, sub, parameters)
         content = testtools.content.Content(content_type, lambda: [file_bytes])
         text = content.as_text()
         if text and text[-1] not in "\r\n":
             self._neednewline = True
         self._output.write(text)
     elif test_status == "inprogress" and test_id not in self._emitted:
         if self._neednewline:
             self._neednewline = False
             self._output.write("\n")
         worker = ""
         for tag in test_tags or ():
             if tag.startswith("worker-"):
                 worker = "(" + tag[7:] + ") "
         if timestamp:
             timestr = timestamp.isoformat()
         else:
             timestr = ""
             self._output.write("%s: %s%s [start]\n" % (timestr, worker, test_id))
         self._emitted.add(test_id)

Example #17

0

Show file

File: drawcrowd.py Project: Shugabuga/LapisMirror

    def import_submission(self, submission: praw.objects.Submission) -> dict:
        """Import a submission from drawcrowd. Uses raw HTML scraping.

        As it turns out, drawcrowd likes to provide different data
        (all in <meta> tags) to non-web-browser requests.
        Since it provides enough information anyways, we don't bother getting
        around it and just parse that.

        This function will define the following values in its return data:
        - author: The author of the post
        - source: The url of the submission
        - importer_display/header
        - import_urls

        :param submission: A reddit submission to parse.
        """
        try:
            url = html.unescape(submission.url)
            if not self.regex.match(urlsplit(url).netloc):
                return None
            data = {'source': url}
            r = requests.head(url, headers=self.headers)
            if r.status_code == 301:  # Moved Permanently
                return None
            mime_text = r.headers.get('Content-Type')
            mime = mimeparse.parse_mime_type(mime_text)
            if mime[0] == 'image':
                data['author'] = 'An unknown drawcrowd user'
                image_url = url
            else:
                # Note: Drawcrowd provides different content to non-web-browsers.
                r = requests.get(url, headers=self.headers)
                bs = bs4.BeautifulSoup(r.content.decode('utf-8'))
                matched = bs.find(property='og:image')
                if not matched:
                    self.log.warning('Could not find locate drawcrowd image to scrape.')
                    return None
                image_url = matched['content']
                matched = bs.find(property='og:title')
                if matched:
                    data['author'] = matched['content']
                else:
                    data['author'] = 'an unknown drawcrowd author'
                data['importer_display'] = {'header': 'Mirrored image from {}:\n\n'.format(data['author'])}
            assert image_url
            data['import_urls'] = [image_url]
            return data
        except Exception:
            self.log.error('Could not import drawcrowd URL %s (%s)',
                           submission.url, traceback.format_exc())
            return None

Example #18

0

Show file

File: gyazo.py Project: MasterEric/LapisMirror

    def import_submission(self, submission: praw.objects.Submission) -> dict:
        """ Import a submission from gyazo. Uses their oEmbed API.

        gyazo.com was nice enough to provide us with an oEmbed API.
        Apparently these guys also support video, so we should also make sure
        to not try to parse that.

        This function will define the following values in its return data:
        - author: simply "a gyazo.com user"
        - source: The url of the submission
        - importer_display/header
        - import_urls

        :param submission: A reddit submission to parse.
        """
        try:
            if not self.regex.match(urlsplit(submission.url).netloc):
                return None
            data = {'author': 'a gyazo.com user',
                    'source': submission.url,
                    'importer_display':
                        {'header': 'Imported gyazo.com image:\n\n'}}
            r = requests.head(submission.url, headers=self.headers)
            if r.status_code == 301:
                return None

            mime_text = r.headers.get('Content-Type')
            mime = mimeparse.parse_mime_type(mime_text)
            # If we're already given an image...
            if mime[0] == 'image':
                # Use the already given URL
                image_url = submission.url
            else:
                # Otherwise, use the gyazo oEmbed API.
                response = requests.get(
                    'https://api.gyazo.com/api/oembed/',
                    {'url': submission.url},
                    headers=self.headers).json()
                if response.get('type') == 'photo':
                    image_url = response.get('url')
                else:
                    # This is something that is not a photo. Do not scrape.
                    return None

            assert image_url
            data['import_urls'] = [image_url]
            return data
        except Exception:
            self.log.error('Could not import gyazo URL %s (%s)',
                           submission.url, traceback.format_exc())
            return None

Example #19

0

Show file

File: response.py Project: Pholey/python-armet

    def encoding(self):
        if self._encoding is not None:
            # Encoding has been set manually.
            return self._encoding

        # Get the `Content-Type` header, if available.
        content_type = self.headers.get('Content-Type')
        if content_type:
            # Parse out the primary type and parameters from the media type.
            ptype, _, params = mimeparse.parse_mime_type(content_type)

            # Return the specified charset or the default depending on the
            # primary type.
            default = 'utf-8' if ptype == 'application' else 'iso-8859-1'
            return params.get('charset', default)

Example #20

0

Show file

File: mimetypes.py Project: B-Rich/reviewboard

    def get_best_handler(cls, mimetype):
        """Returns the handler and score that that best fit the mimetype."""
        best_score, best_fit = (0, None)

        for mimetype_handler in _registered_mimetype_handlers:
            for mt in mimetype_handler.supported_mimetypes:
                try:
                    score = score_match(mimeparse.parse_mime_type(mt), mimetype)

                    if score > best_score:
                        best_score, best_fit = (score, mimetype_handler)
                except ValueError:
                    continue

        return (best_score, best_fit)

Example #21

0

Show file

File: mime.py Project: Bahus/django-tastypie

def build_content_type(format, encoding='utf-8', api=None):
    """
    Adds the vnd.api.<api_name> attribute to the content type
    (if using AcceptHeaderRouter) and appends the character encoding.
    """
    if api and api._accept_header_routing:
        type, subtype, vars = mimeparse.parse_mime_type(format)
        subtype = '%s+%s' % (api.subtype, subtype)
        attributes = ''
        for k, v in vars.iteritems():
            attributes += '; %s=%s' % (k, v)
        format = '%s/%s%s' % (type, subtype, attributes)
    if 'charset' in format:
        return format
    
    return "%s; charset=%s" % (format, encoding)

Example #22

0

Show file

File: tinypic.py Project: MasterEric/LapisMirror

    def import_submission(self, submission: praw.objects.Submission) -> dict:
        """Import a submission from tinypic. Uses raw HTML scraping.

        Because this downloads the page and tries to scrape the HTML,
        we are at significant risk of the image ID on the DOM changing.
        Therefore, this plugin is liable to break.

        This function will define the following values in its return data:
        - author: simply "an anonymous Tinypic user"
        - source: The url of the submission
        - importer_display/header
        - import_urls

        :param submission: A reddit submission to parse.
        """
        try:
            # It seems PRAW doesn't unescape the reddit URL.
            # Tinyurl is the only importer so far that depends on URL parameters.
            url = html.unescape(submission.url)
            if not self.regex.match(urlsplit(url).netloc):
                return None
            data = {'author': 'an anonymous Tinypic user',
                    'source': url,
                    'importer_display':
                        {'header': '~~Liberated~~Mirrored tinypic image:\n\n'}}
            r = requests.head(url, headers=self.headers)
            if r.status_code == 301:  # Moved Permanently
                return None
            mime_text = r.headers.get('Content-Type')
            mime = mimeparse.parse_mime_type(mime_text)
            if mime[0] == 'image':
                image_url = url
            else:
                r = requests.get(url, headers=self.headers)
                bs = bs4.BeautifulSoup(r.content.decode('utf-8'))
                matched = bs.select('div#imgFrame img')
                if not matched:
                    self.log.warning('Could not find locate Tinypic image to scrape.')
                    return None
                image_url = matched[0]['src']
            assert image_url
            data['import_urls'] = [image_url]
            return data
        except Exception:
            self.log.error('Could not import tinypic URL %s (%s)',
                           submission.url, traceback.format_exc())
            return None

Example #23

0

Show file

File: base.py Project: B-Rich/reviewboard

    def get_best_handler(cls, mimetype):
        """Returns the handler and score that that best fit the mimetype."""
        best_score = 0
        best_fit = None

        for review_ui in _file_attachment_review_uis:
            for mt in review_ui.supported_mimetypes:
                try:
                    score = score_match(mimeparse.parse_mime_type(mt), mimetype)

                    if score > best_score:
                        best_score = score
                        best_fit = review_ui
                except ValueError:
                    continue

        return best_score, best_fit

Example #24

0

Show file

File: request.py Project: Pholey/python-armet

    def encoding(self):
        """
        The name of the encoding used to decode the stream’s bytes
        into strings, and to encode strings into bytes.

        Reads the charset value from the `Content-Type` header, if available;
        else, returns nothing.
        """
        # Get the `Content-Type` header, if available.
        content_type = self.headers.get('Content-Type')
        if content_type:
            # Parse out the primary type and parameters from the media type.
            ptype, _, params = mimeparse.parse_mime_type(content_type)

            # Return the specified charset or the default depending on the
            # primary type.
            default = 'utf-8' if ptype == 'application' else 'iso-8859-1'
            return params.get('charset', default)

Example #25

0

Show file

File: derpibooru.py Project: Shugabuga/LapisMirror

    def import_submission(self, submission: praw.objects.Submission) -> dict:
        """Import a submission from Derpibooru.

        This function will define the following values in its return data:
        - author: simply "an anonymous user on Derpibooru"
        - source: The url of the submission
        - importer_display/header
        - import_urls

        After we define that, we need to get the image. Since Derpibooru has an API,
        we use that to try to get the image if the image is a non-CDN URL. If it is
        a CDN, we take the image directory and upload *that* to Imgur.

        image_url is the variable of the image to upload.

        :param submission: A reddit submission to parse.
        """
        try:
            url = html.unescape(submission.url)
            if not self.regex.match(urlsplit(url).netloc):
                return None
            r = requests.head(url, headers=self.headers)
            mime_text = r.headers.get('Content-Type')
            mime = mimeparse.parse_mime_type(mime_text)
            # if mime[0] == 'image':
            self.log.debug('Initiating Derpibooru plugin')
            jsonUrl = 'http://derpiboo.ru/oembed.json?url=' + url  # The API endpoint
            callapi = requests.get(jsonUrl)  # Fetch the API's JSON file.
            json = callapi.json()
            img = 'http:' + (json['thumbnail_url'])
            author = (json['author_name'])
            provider_url = (json['provider_url'])
            data = {'author': author,
                    'source': img,
                    'importer_display':
                        {'header': 'Mirrored [image](' + provider_url + ') by Derpibooru artist \
                        [' + author + '](https://derpiboo.ru/tags/artist-colon-' + author + '):\n\n'}}
            image_url = img
            data['import_urls'] = [image_url]
            return data
        except Exception:
            self.log.error('Could not import Derpibooru URL %s (%s)',
                           submission.url, traceback.format_exc())
            return None

Example #26

0

Show file

File: http.py Project: tbpmig/mig-website

  def __init__(self, http, postproc, uri,
               method='GET',
               body=None,
               headers=None,
               methodId=None,
               resumable=None):
    """Constructor for an HttpRequest.

    Args:
      http: httplib2.Http, the transport object to use to make a request
      postproc: callable, called on the HTTP response and content to transform
                it into a data object before returning, or raising an exception
                on an error.
      uri: string, the absolute URI to send the request to
      method: string, the HTTP method to use
      body: string, the request body of the HTTP request,
      headers: dict, the HTTP request headers
      methodId: string, a unique identifier for the API method being called.
      resumable: MediaUpload, None if this is not a resumbale request.
    """
    self.uri = uri
    self.method = method
    self.body = body
    self.headers = headers or {}
    self.methodId = methodId
    self.http = http
    self.postproc = postproc
    self.resumable = resumable
    self.response_callbacks = []
    self._in_error_state = False

    # Pull the multipart boundary out of the content-type header.
    major, minor, params = mimeparse.parse_mime_type(
        headers.get('content-type', 'application/json'))

    # The size of the non-media part of the request.
    self.body_size = len(self.body or '')

    # The resumable URI to send chunks to.
    self.resumable_uri = None

    # The bytes that have been uploaded.
    self.resumable_progress = 0

Example #27

0

Show file

File: mimetypes.py Project: aam1r/reviewboard

    def for_type(cls, attachment):
        """Returns the handler that is the best fit for provided mimetype."""
        mimetype = mimeparse.parse_mime_type(attachment.mimetype)

        # Override the mimetype if mimeparse is known to misinterpret this
        # type of file as `octet-stream`
        extension = os.path.splitext(attachment.filename)[1]

        if extension in MIMETYPE_EXTENSIONS:
            mimetype = MIMETYPE_EXTENSIONS[extension]

        score, handler = cls.get_best_handler(mimetype)

        if handler:
            try:
                return handler(attachment, mimetype)
            except Exception, e:
                logging.error('Unable to load Mimetype Handler for %s: %s',
                              attachment, e, exc_info=1)

Example #28

0

Show file

File: utils.py Project: kaglowka/danyzespolapi

def analyze_resource_file(path, extension=None):
    def isnt_msdoc_text(content_type):
        extensions = list(
            filter(lambda x: x[1] == content_type,
                   settings.SUPPORTED_CONTENT_TYPES))[0][2]
        return len({'doc', 'docx'} & set(extensions)) == 0

    logger.debug(f"analyze_resource_file({path}, {extension})")
    m = magic.Magic(mime=True, mime_encoding=True)
    result = m.from_file(path)
    family, content_type, options = parse_mime_type(result)
    logger.debug(f"  parsed mimetype: {family}/{content_type});{options}")
    file_info = magic.from_file(path)
    logger.debug(f"  file info: {file_info}")
    encoding = options.get('charset', 'unknown')
    logger.debug(f"  encoding: {encoding}")
    extension = file_format_from_content_type(content_type,
                                              family=family,
                                              extension=extension)
    logger.debug(f"  extension: {extension}")
    if family == 'text' and content_type == 'plain':
        if encoding.startswith('unknown'):
            encoding = guess_file_encoding(path)
            logger.debug(f" encoding (guess-plain): {encoding}")
        extension = guess_text_file_format(path, encoding)
        logger.debug(f"  extension (guess-plain): {extension}")

    if extension in ('doc', 'docx', 'xls', 'xlsx', 'ods',
                     'odt') or content_type == 'msword':
        if encoding.startswith('unknown'):
            encoding = guess_file_encoding(path)
            logger.debug(f"  encoding (guess-spreadsheet): {encoding}")
        spreadsheet_format = guess_spreadsheet_file_format(path, encoding)
        if any((extension in ('xls', 'xlsx', 'ods'),
                isnt_msdoc_text(content_type), spreadsheet_format)):
            extension = spreadsheet_format
            logger.debug(f"  extension (guess-spreadsheet): {extension}")

    logger.debug(
        f'  finally: extension = {extension}, file_info = {file_info}, encoding = {encoding}'
    )
    return extension, file_info, encoding

Example #29

0

Show file

    def import_submission(self, submission: praw.objects.Submission) -> dict:
        """Import a submission from gifs.com.

        Because this downloads the page and tries to scrape the HTML,
        we are at significant risk of the image ID on the DOM changing.
        Therefore, this plugin is liable to break.

        This function will define the following values in its return data:
        - author: simply "an anonymous user on gifs.com"
        - source: The url of the submission
        - importer_display/header
        - import_urls

        :param submission: A reddit submission to parse.
        """
        try:
            url = html.unescape(submission.url)
            if not self.regex.match(urlsplit(url).netloc):
                return None
            data = {
                'author': 'a gifscom user',
                'source': url,
                'importer_display': {
                    'header': 'Mirrored gifscom image:\n\n'
                }
            }
            r = requests.head(url, headers=self.headers)
            mime_text = r.headers.get('Content-Type')
            mime = mimeparse.parse_mime_type(mime_text)
            if mime[0] == 'image':
                image_url = url
            else:
                self.log.warning(
                    'gifs.com URL posted that is not an image: %s',
                    submission.url)
                return None
            data['import_urls'] = [image_url]
            return data
        except Exception:
            self.log.error('Could not import gifs.com URL %s (%s)',
                           submission.url, traceback.format_exc())
            return None

Example #30

0

Show file

    def supportsSingleExports(self, bug_ids):
        """Return True if the Trac instance provides CSV exports for single
        tickets, False otherwise.

        :bug_ids: A list of bug IDs that we can use for discovery purposes.
        """
        html_ticket_url = '%s/%s' % (
            self.baseurl, self.ticket_url.replace('?format=csv', ''))

        for bug_id in bug_ids:
            try:
                # We try to retrieve the ticket in HTML form, since that
                # will tell us whether or not it is actually a valid ticket.
                ticket_id = int(bug_id)
                self._getPage(html_ticket_url % ticket_id)
            except BugTrackerConnectError as e:
                if isinstance(e.error, requests.HTTPError):
                    # We can consider the ticket to be invalid.
                    pass
                else:
                    raise
            except ValueError:
                # The ticket_id couldn't be identified and it's of no use to
                # us anyway.
                pass
            else:
                # If we didn't get an error we can try to get the ticket in
                # CSV form. If this fails then we can consider single ticket
                # exports to be unsupported.
                try:
                    response = self._getPage(
                        "%s/%s" % (self.baseurl, self.ticket_url % ticket_id))
                    subtype = parse_mime_type(
                        response.headers.get('Content-Type', ''))[1]
                    return subtype == 'csv'
                except BugTrackerConnectError:
                    return False
        else:
            # If we reach this point then we likely haven't had any valid
            # tickets or something else is wrong. Either way, we can only
            # assume that CSV exports of single tickets aren't supported.
            return False

Example #31

0

Show file

    def get_best_handler(cls, mimetype):
        """Returns the handler and score that that best fit the mimetype."""
        best_score, best_fit = (0, cls)

        for mt in cls.supported_mimetypes:
            try:
                score = score_match(mimeparse.parse_mime_type(mt), mimetype)

                if score > best_score:
                    best_score, best_fit = (score, cls)
            except ValueError:
                continue

        for handler in cls.__subclasses__():
            score, best_handler = handler.get_best_handler(mimetype)

            if score > best_score:
                best_score, best_fit = (score, best_handler)

        return (best_score, best_fit)

Example #32

0

Show file

File: mimetypes.py Project: Catherine1/reviewboard

    def get_best_handler(cls, mimetype):
        """Returns the handler and score that that best fit the mimetype."""
        best_score, best_fit = (0, cls)

        for mt in cls.supported_mimetypes:
            try:
                score = score_match(mimeparse.parse_mime_type(mt), mimetype)

                if score > best_score:
                    best_score, best_fit = (score, cls)
            except ValueError:
                continue

        for handler in cls.__subclasses__():
            score, best_handler = handler.get_best_handler(mimetype)

            if score > best_score:
                best_score, best_fit = (score, best_handler)

        return (best_score, best_fit)

Example #33

0

Show file

File: base.py Project: darmhoo/reviewboard

    def for_type(cls, attachment):
        """Return the Review UI that is the best fit for a file attachment.

        Args:
            attachment (reviewboard.attachments.models.FileAttachments):
                The file attachment to locate a Review UI for.

        Returns:
            FileAttachmentReviewUI:
            The Review UI for the attachment, or ``None`` if a suitable one
            could not be found.
        """
        if attachment.mimetype:
            try:
                mimetype = mimeparse.parse_mime_type(attachment.mimetype)
            except:
                logging.error('Unable to parse MIME type "%s" for %s',
                              attachment.mimetype, attachment)
                return None

            # Override the mimetype if mimeparse is known to misinterpret this
            # type of file as 'octet-stream'
            extension = os.path.splitext(attachment.filename)[1]

            if extension in MIMETYPE_EXTENSIONS:
                mimetype = MIMETYPE_EXTENSIONS[extension]

            score, handler = cls.get_best_handler(mimetype)

            if handler:
                try:
                    return handler(attachment.get_review_request(), attachment)
                except ObjectDoesNotExist as e:
                    logging.error('Unable to load review UI for %s: %s',
                                  attachment, e)
                except Exception as e:
                    logging.error('Error instantiating '
                                  'FileAttachmentReviewUI %r: %s',
                                  handler, e)

        return None

Example #34

0

Show file

File: mimetype.py Project: trjaffe/pyvo

def mime_object_maker(url, mimetype, session=None):
    """
    return a data object suitable for the mimetype given.
    this will either return a astropy fits object or a pyvo DALResults object,
    a PIL object for conventional images or string for text content.

    Parameters
    ----------
    url : str
        the object download url
    mimetype : str
        the content mimetype
    session : object
        optional session to use for network requests
    """
    session = use_session(session)
    mimetype = mimeparse.parse_mime_type(mimetype)

    if mimetype[0] == 'text':
        return session.get(url).text

    if mimetype[1] == 'fits' or mimetype[1] == 'x-fits':
        response = session.get(url)
        return HDUList.fromstring(response.content)

    if mimetype[0] == 'image':
        from PIL import Image
        from io import BytesIO
        response = session.get(url)
        bio = BytesIO(response.content)
        return Image.open(bio)

    if mimetype[1] == 'x-votable' or mimetype[1] == 'x-votable+xml':
        # As soon as there are some kind of recursive data structures,
        # things start to get messy
        if mimetype[2].get('content', None) == 'datalink':
            from .adhoc import DatalinkResults
            return DatalinkResults.from_result_url(url)
        else:
            from .query import DALResults
            return DALResults.from_result_url(url)

Example #35

0

Show file

    def for_type(cls, attachment):
        """Return the Review UI that is the best fit for a file attachment.

        Args:
            attachment (reviewboard.attachments.models.FileAttachments):
                The file attachment to locate a Review UI for.

        Returns:
            FileAttachmentReviewUI:
            The Review UI for the attachment, or ``None`` if a suitable one
            could not be found.
        """
        if attachment.mimetype:
            try:
                mimetype = mimeparse.parse_mime_type(attachment.mimetype)
            except:
                logging.error('Unable to parse MIME type "%s" for %s',
                              attachment.mimetype, attachment)
                return None

            # Override the mimetype if mimeparse is known to misinterpret this
            # type of file as 'octet-stream'
            extension = os.path.splitext(attachment.filename)[1]

            if extension in MIMETYPE_EXTENSIONS:
                mimetype = MIMETYPE_EXTENSIONS[extension]

            score, handler = cls.get_best_handler(mimetype)

            if handler:
                try:
                    return handler(attachment.get_review_request(), attachment)
                except ObjectDoesNotExist as e:
                    logging.error('Unable to load review UI for %s: %s',
                                  attachment, e)
                except Exception as e:
                    logging.error(
                        'Error instantiating '
                        'FileAttachmentReviewUI %r: %s', handler, e)

        return None

Example #36

0

Show file

File: mimetypes.py Project: yangtina/reviewboard

    def for_type(cls, attachment):
        """Returns the handler that is the best fit for provided mimetype."""
        mimetype = mimeparse.parse_mime_type(attachment.mimetype)

        # Override the mimetype if mimeparse is known to misinterpret this
        # type of file as `octet-stream`
        extension = os.path.splitext(attachment.filename)[1]

        if extension in MIMETYPE_EXTENSIONS:
            mimetype = MIMETYPE_EXTENSIONS[extension]

        score, handler = cls.get_best_handler(mimetype)

        if handler:
            try:
                return handler(attachment, mimetype)
            except Exception, e:
                logging.error('Unable to load Mimetype Handler for %s: %s',
                              attachment,
                              e,
                              exc_info=1)

Example #37

0

Show file

File: komsession.py Project: adamel/httpkom

    def create_text(self, komtext):
        misc_info = kom.CookedMiscInfo()

        if komtext.recipient_list is not None:
            for rec in komtext.recipient_list:
                if rec is not None:
                    misc_info.recipient_list.append(rec)

        if komtext.comment_to_list is not None:
            for ct in komtext.comment_to_list:
                if ct is not None:
                    misc_info.comment_to_list.append(ct)

        print misc_info.to_string()

        mime_type = mimeparse.parse_mime_type(komtext.content_type)
        # Because a text consists of both a subject and body, and you
        # can have a text subject in combination with an image, a
        # charset is needed to specify the encoding of the subject.
        mime_type[2]['charset'] = 'utf-8'
        content_type = mime_type_tuple_to_str(mime_type)

        # TODO: how would this work with images?
        fulltext = str()
        fulltext += komtext.subject.encode('utf-8') + "\n"
        if (mime_type[0] == 'text'):
            fulltext += komtext.body.encode('utf-8')
        else:
            fulltext += komtext.body

        aux_items = []
        aux_items.append(
            kom.AuxItem(kom.AI_CREATING_SOFTWARE,
                        data="%s %s" %
                        (self.client_name, self.client_version)))
        aux_items.append(kom.AuxItem(kom.AI_CONTENT_TYPE, data=content_type))

        text_no = kom.ReqCreateText(self.conn, fulltext, misc_info,
                                    aux_items).response()
        return text_no

Example #38

0

Show file

    def for_type(cls, attachment):
        """Return the handler that is the best fit for provided mimetype.

        Args:
            attachment (reviewboard.attachments.models.FileAttachment):
                The file attachment to find the best handler for.

        Returns:
            MimetypeHandler:
            The best mimetype handler for the attachment, or ``None`` if
            one could not be found.
        """
        if not attachment.mimetype:
            return None

        try:
            mimetype = mimeparse.parse_mime_type(attachment.mimetype)
        except:
            logging.warning('Unable to parse MIME type "%s" for %s',
                            attachment, attachment.mimetype)
            mimetype = ('application', 'octet-stream', {})

        # Override the mimetype if mimeparse is known to misinterpret this
        # type of file as `octet-stream`
        extension = os.path.splitext(attachment.filename)[1]

        if extension in MIMETYPE_EXTENSIONS:
            mimetype = MIMETYPE_EXTENSIONS[extension]

        score, handler = cls.get_best_handler(mimetype)

        if handler:
            try:
                return handler(attachment, mimetype)
            except Exception as e:
                logging.error('Unable to load Mimetype Handler for %s: %s',
                              attachment, e)

        return MimetypeHandler(attachment, mimetype)

Example #39

0

Show file

File: rawvideo.py Project: RWBY-Bots/VelvetBot

    def export_submission(self,
                          import_urls: list,
                          video: bool = False,
                          **import_info) -> dict:
        """Check if something reported as a video is a raw video, then
        post the direct link if it is.

        This function will define the following values in the export data:
        - link_display

        :param import_urls: A set (of one?) of links to videos.
        :param video: Whether the imported data is a video or not.
        :param import_info: Other importing information passed. Ignored.
        :return: None if no export, an export info dictionary otherwise.
        """
        if not video:
            return None
        self.log.debug('Attempting to upload raw video URL.')
        links = []
        for url in import_urls:
            req = requests.head(url, headers=self.headers)
            if not req.ok:
                self.log.debug('URL %s was not valid.', url)
                continue
            try:
                mime_text = req.headers.get('Content-Type')
                mime = mimeparse.parse_mime_type(mime_text)
            except Exception:
                self.log.debug('Error parsing MIME for URL %s', url)
                continue
            if mime[0] != 'video':
                self.log.debug('URL %s is not a video!', url)
                continue
            links.append('[Direct video](%s)  \n' % url)
        if not links:
            self.log.info('No direct video links found!')
            return None
        return {'link_display': ''.join(links)}

Example #40

0

Show file

File: mimetype.py Project: pyvirtobs/pyvo

def mime_object_maker(url, mimetype):
    """
    return a data object suitable for the mimetype given.
    this will either return a astropy fits object or a pyvo DALResults object,
    a PIL object for conventional images or string for text content.

    Parameters
    ----------
    url : str
        the object download url
    mimetype : str
        the content mimetype
    """
    mimetype = mimeparse.parse_mime_type(mimetype)

    if mimetype[0] == 'text':
        return s.get(url).text

    if mimetype[1] == 'fits' or mimetype[1] == 'x-fits':
        r = s.get(url)
        return HDUList.fromstring(r.content)

    if mimetype[0] == 'image':
        from PIL import Image
        from io import BytesIO
        r = s.get(url)
        b = BytesIO(r.content)
        return Image.open(b)

    if mimetype[1] == 'x-votable' or mimetype[1] == 'x-votable+xml':
        # As soon as there are some kind of recursive data structures,
        # things start to get really f*cked up
        if mimetype[2].get('content', None) == 'datalink':
            from .adhoc import DatalinkResults
            return DatalinkResults.from_result_url(url)
        else:
            from .query import DALResults
            return DALResults.from_result_url(url)

Example #41

0

Show file

File: komsession.py Project: adamel/httpkom

 def create_text(self, komtext):
     misc_info = kom.CookedMiscInfo()
     
     if komtext.recipient_list is not None:
         for rec in komtext.recipient_list:
             if rec is not None:
                 misc_info.recipient_list.append(rec)
     
     if komtext.comment_to_list is not None:
         for ct in komtext.comment_to_list:
             if ct is not None:
                 misc_info.comment_to_list.append(ct)
     
     print misc_info.to_string()
     
     mime_type = mimeparse.parse_mime_type(komtext.content_type)
     # Because a text consists of both a subject and body, and you
     # can have a text subject in combination with an image, a
     # charset is needed to specify the encoding of the subject.
     mime_type[2]['charset'] = 'utf-8'
     content_type = mime_type_tuple_to_str(mime_type)
     
     # TODO: how would this work with images?
     fulltext = str()
     fulltext += komtext.subject.encode('utf-8') + "\n"
     if (mime_type[0] == 'text'):
         fulltext += komtext.body.encode('utf-8')
     else:
         fulltext += komtext.body
     
     aux_items = []
     aux_items.append(kom.AuxItem(kom.AI_CREATING_SOFTWARE,
                                  data="%s %s" % (self.client_name, self.client_version)))
     aux_items.append(kom.AuxItem(kom.AI_CONTENT_TYPE,
                                  data=content_type))
     
     text_no = kom.ReqCreateText(self.conn, fulltext, misc_info, aux_items).response()
     return text_no

Example #42

0

Show file

File: utils.py Project: uk-gov-mirror/ukwa.opendata

def parseType(fmt):
    if (fmt == "null"):
        fmt = "application/x-unknown"
    if (fmt == "text"):
        fmt = "text/plain"
    if fmt.find("/") == -1:
        return (fmt.lower(), '', {})
    # Attept to parse:
    try:
        (type, subtype, params) = mimeparse.parse_mime_type(fmt)
        return (type.lower(), subtype.lower(), params)
    except:
        print "ERROR: Could not fully parse: " + fmt

    try:
        fmt_matcher = re.compile(r'([a-z0-9\+\.]+)\/([a-z0-9\+\.]+)')
        fmt_match = fmt_matcher.match(fmt.lower())
        (type, subtype) = fmt_match.groups()
        return (type, subtype, {})
    except:
        print "ERROR: Could not partially parse: " + fmt

    return ("application", 'x-malformed-mimetype', {})

Example #43

0

Show file

 def status(self, test_id=None, test_status=None, test_tags=None,
            runnable=True, file_name=None, file_bytes=None, eof=False,
            mime_type=None, route_code=None, timestamp=None):
     super(Starts, self).status(
         test_id, test_status,
         test_tags=test_tags, runnable=runnable, file_name=file_name,
         file_bytes=file_bytes, eof=eof, mime_type=mime_type,
         route_code=route_code, timestamp=timestamp)
     if not test_id:
         if not file_bytes:
             return
         if not mime_type or mime_type == 'test/plain;charset=utf8':
             mime_type = 'text/plain; charset=utf-8'
         primary, sub, parameters = mimeparse.parse_mime_type(mime_type)
         content_type = testtools.content_type.ContentType(
             primary, sub, parameters)
         content = testtools.content.Content(
             content_type, lambda: [file_bytes])
         text = content.as_text()
         if text and text[-1] not in '\r\n':
             self._neednewline = True
         self._output.write(text)
     elif test_status == 'inprogress' and test_id not in self._emitted:
         if self._neednewline:
             self._neednewline = False
             self._output.write('\n')
         worker = ''
         for tag in test_tags or ():
             if tag.startswith('worker-'):
                 worker = '(' + tag[7:] + ') '
         if timestamp:
             timestr = timestamp.isoformat()
         else:
             timestr = ''
             self._output.write('%s: %s%s [start]\n' %
                                (timestr, worker, test_id))
         self._emitted.add(test_id)

Example #44

0

Show file

File: custom.py Project: snowsky/zops-platform

    def require_representation(self, req):
        """Require raw representation dictionary from falcon request object.

        This does not perform any field parsing or validation but only uses
        allowed content-encoding handler to decode content body.

        Note:
            Currently only JSON is allowed as content type.

        Args:
            req (falcon.Request): request object

        Returns:
            dict: raw dictionary of representation supplied in request body

        """
        try:
            type_, subtype, _ = parse_mime_type(req.content_type)
            content_type = '/'.join((type_, subtype))
        except:
            raise falcon.HTTPUnsupportedMediaType(
                description="Invalid Content-Type header: {}".format(
                    req.content_type))

        if content_type == 'application/json':
            body = req.stream.read()
            try:
                res = json.loads(body.decode('utf-8'))
            except json.decoder.JSONDecodeError:
                raise falcon.HTTPBadRequest(
                    title="Bad Request",
                    description="Body is not a valid json.")
            return res
        else:
            raise falcon.HTTPUnsupportedMediaType(
                description="only JSON supported, got: {}".format(
                    content_type))

Example #45

0

Show file

File: utils.py Project: kaglowka/danyzespolapi

def download_file(url, allowed_content_types=None):  # noqa: C901
    logger.debug(f"download_file({url}, {allowed_content_types})")
    try:
        URLValidator()(url)
    except ValidationError:
        raise InvalidUrl('Invalid url address: %s' % url)

    filename, format = None, None

    supported_content_types = allowed_content_types or [
        ct[1] for ct in settings.SUPPORTED_CONTENT_TYPES
    ]

    r = requests.get(url,
                     stream=True,
                     allow_redirects=True,
                     verify=False,
                     timeout=180)

    if r.status_code != 200:
        raise InvalidResponseCode('Invalid response code: %s' % r.status_code)

    family, content_type, options = parse_mime_type(
        r.headers.get('Content-Type'))
    logger.debug(f'  Content-Type: {family}/{content_type};{options}')

    if content_type not in ('octet-stream', 'octetstream'
                            ) and content_type not in supported_content_types:
        raise InvalidContentType('Unsupported type: %s' %
                                 r.headers.get('Content-Type'))

    resource_type = _get_resource_type(r)
    logger.debug(f'  resource_type: {resource_type}')

    if resource_type == 'file':
        content_disposition = r.headers.get('Content-Disposition', None)
        logger.debug(f'  content_disposition: {content_disposition}')
        if content_disposition:
            # Get filename from header
            res = re.findall("filename=(.+)", content_disposition)
            filename = res[0][:100] if res else None
            logger.debug(f'  filename: {filename}')
            if filename:
                filename = filename.replace('"', '')
                format = filename.split('.')[-1]
                logger.debug(f'  filename: {filename}, format: {format}')

        if not filename:
            name, format = filename_from_url(url, content_type)
            filename = '.'.join([name, format])
            logger.debug(
                f'  filename: {filename}, format: {format} - from url')

        filename = filename.strip('.')

        if content_type in ('octet-stream', 'octetstream'):
            family, content_type = content_type_from_file_format(format)
            logger.debug(f'  {family}/{content_type} - from file format')

        format = file_format_from_content_type(content_type,
                                               family=family,
                                               extension=format)
        logger.debug(f'  format:{format} - from content type')

        content = BytesIO(r.content)
        return resource_type, {
            'filename': filename,
            'format': format,
            'content': content
        }
    else:
        format = file_format_from_content_type(content_type, family)
        logger.debug(f'  format: {format} - from content type')
        if resource_type == 'api':
            return resource_type, {'format': format}
        else:
            if r.url != url:
                if r.history and r.history[-1].status_code == 301:
                    raise InvalidResponseCode(
                        'Resource location has been moved!')
            return resource_type, {'format': format}

Example #46

0

Show file

    def import_submission(self, submission: praw.objects.Submission) -> dict:
        """Import a submission from deviantArt. Ignores flash content.

        Uses a combination of the DA backend and HTML scraping.

        This function will define the following values in its return data:
        - author: The author of the image.
        - source: The submission URL.
        - importer_display/header
        - import_urls


        :param submission: A reddit submission to parse.
        :return: None if no import, an import info dictionary otherwise.
        """
        try:
            if self.regex_direct.match(urlsplit(submission.url).netloc):
                r = requests.head(submission.url, headers=self.headers)
                mime_text = r.headers.get('Content-Type')
                mime = mimeparse.parse_mime_type(mime_text)
                if mime[0] == 'image':
                    self.log.debug('DA link is a direct image')
                    data = {'author': 'An unknown DA author',
                            'source': submission.url,
                            'import_urls': [submission.url],
                            'importer_display':
                                {'header': 'Mirrored deviantArt image '
                                           'by an unknown author:\n\n'}}
                    return data
            if not self.regex.match(urlsplit(submission.url).netloc):
                return None
            query_url = 'http://backend.deviantart.com/oembed?{}'.format(
                urlencode({'format': 'json', 'url': submission.url}))
            self.log.debug('%s is valid DA url.', submission.url)
            self.log.debug('Querying DA API %s', query_url)

            response = json.loads(self.read_url(query_url))

            if response['type'] not in ('link', 'photo'):
                self.log.debug('Response is not link or photo')
                return None
            self.log.debug('Author name: %s', response['author_name'])

            # Using the official DA API
            data = {'author': response['author_name'],
                    'source': submission.url,
                    'importer_display':
                        {'header': 'Mirrored deviantArt image by the author "{}":\n\n'.format(
                            response['author_name'])}}
            if response['type'] == 'link':
                data['import_urls'] = [response['fullsize_url']]
                self.log.debug('Found DA API url %s', data['import_urls'])

            try:
                # Trying to scrape manually
                bs = BeautifulSoup(self.read_url(submission.url))

                # Checking for flash animation, because mirroring a preview
                # for a flash animation is stupid
                is_flash = bool(bs.select('iframe[class~=flashtime]'))
                is_madefire = bool(bs.select('iframe[class~=madefire-player]'))
                if is_flash or is_madefire:
                    self.log.info('DA url is flash, no preview needed.')
                    return None
                # Seems to alternate between the two
                full_view = (bs.select('img[class~=fullview]') or
                             bs.select('img[class~=dev-content-full]'))
                if full_view:
                    full_url = full_view[0]['src']
                    self.log.debug('Found full DA image url: %s', full_url)
                    data['import_urls'] = [full_url]
            except Exception as e:
                self.log.error(traceback.format_exc())

            if 'import_urls' not in data:
                self.log.debug('No url found for DA image.')
                return None

            return data

        except Exception as e:
            self.log.error('Deviantart Error: %s', traceback.format_exc())
            return None

Example #47

0

Show file

File: mimeparse_test.py Project: balabit-deps/balabit-os-6-python-mimeparse

def test_parse_mime_type(args, expected):
    expected = tuple(expected)
    result = mimeparse.parse_mime_type(args)
    message = "Expected: '%s' but got %s" % (expected, result)
    assert expected == result, message

Example #48

0

Show file

    def export_submission(self,
                          import_urls: list,
                          author: str = 'an Unknown Author',
                          source: str = 'an Unknown Source',
                          video: bool = False,
                          **import_info) -> dict:
        """Upload one or multiple images to Imgur. Cannot support videos.

        Uses the imgurpython library.

        This function will define the following values in the export data:
        - exporter
        - link_display

        :param import_urls: A set of direct links to images to upload.
        :param author: The author to note in the description.
        :param source: The source to note in the description.
        :param video: Whether the imported data is a video or not.
        :param import_info: Other importing information passed. Ignored.
        :return: None if no export, an export info dictionary otherwise.
        """
        # imgur does not support videos.
        if not self.client:
            return None
        if video:
            return None
        description = ('This is a mirror uploaded by /u/%s, '
                       'originally made by %s, located at %s' %
                       (self.username, author, source))
        results = {'exporter': self.__class__.__name__}
        config = {}
        album = {}

        # Should we do an album?
        if len(import_urls) == 0:
            self.log.warning('An import gave no urls.')
            return None
        elif len(import_urls) == 1:
            self.log.debug('A single image will be uploaded.')
            is_album = False
            config['description'] = description
        else:
            self.log.debug('An album will be uploaded.')
            try:
                album = self.client.create_album({'description': description})
            except ImgurClientRateLimitError:
                self.log.error('Ran into imgur rate limit! %s',
                               self.client.credits)
                return None
            except Exception:
                self.log.error('Could not create album! %s',
                               traceback.format_exc())
                return None
            config['album'] = album['deletehash']
            is_album = True

        try:
            # Try to upload each image given.
            images = []
            for import_url in import_urls:
                self.log.debug('Uploading URL "%s" to imgur', import_url)
                image = self.client.upload_from_url(import_url, config)
                self.log.debug('Uploaded image: %s', str(image))
                images.append(image)
            if is_album:
                results[
                    'link_display'] = '[Imgur Album](https://imgur.com/a/%s)  \n' % album[
                        'id']
            else:
                picture_url = images[0]['link'].replace('http', 'https')
                r = requests.head(picture_url)
                mime_text = r.headers.get('Content-Type')
                mime = mimeparse.parse_mime_type(mime_text)
                if mime[1] == 'gif':
                    picture_url = re.sub(r'(\.\w+)?$', '.gifv', picture_url)
                results['link_display'] = '[Imgur](%s)  \n' % picture_url

        except ImgurClientRateLimitError:
            self.log.error('Ran into imgur rate limit! %s',
                           self.client.credits)
            return None
        except Exception:
            self.log.error('Broken exception catch %s', traceback.format_exc())
            if is_album:
                self.log.error('Try to delete album!')
                self.delete_export(album['deletehash'])
        return results

Example #49

0

Show file

def _get_doc_type(response):
    return mimeparse.parse_mime_type(response.headers["Content-Type"])[1]

Example #50

0

Show file

    def __search_documents(self, index_name):
        start_time = time.time()

        @after_this_request
        def to_do_after_this_request(response):
            record_log(request, response, logger=self.__http_logger)
            self.__record_metrics(start_time, request, resp)
            return response

        data = {}
        status_code = None

        try:
            query = request.args.get('query', default='', type=str)
            search_field = request.args.get('search_field', default='', type=str)
            page_num = request.args.get('page_num', default=1, type=int)
            page_len = request.args.get('page_len', default=10, type=int)
            weighting = BM25F
            if len(request.data) > 0:
                mime = mimeparse.parse_mime_type(request.headers.get('Content-Type'))
                charset = 'utf-8' if mime[2].get('charset') is None else mime[2].get('charset')
                if mime[1] == 'yaml':
                    weighting = get_multi_weighting(yaml.safe_load(request.data.decode(charset)))
                elif mime[1] == 'json':
                    weighting = get_multi_weighting(json.loads(request.data.decode(charset)))
                else:
                    raise ValueError('unsupported format')

            results_page = self.__indexer.search_documents(index_name, query, search_field, page_num,
                                                           page_len=page_len, weighting=weighting)

            if results_page.pagecount >= page_num or results_page.total <= 0:
                results = {
                    'is_last_page': results_page.is_last_page(),
                    'page_count': results_page.pagecount,
                    'page_len': results_page.pagelen,
                    'page_num': results_page.pagenum,
                    'total': results_page.total,
                    'offset': results_page.offset
                }
                hits = []
                for result in results_page.results[results_page.offset:]:
                    fields = {}
                    for item in result.iteritems():
                        fields[item[0]] = item[1]
                    hit = {
                        'fields': fields,
                        'doc_num': result.docnum,
                        'score': result.score,
                        'rank': result.rank,
                        'pos': result.pos
                    }
                    hits.append(hit)
                results['hits'] = hits

                data['results'] = results
                status_code = HTTPStatus.OK
            else:
                data['error'] = 'page_num must be <= {0}'.format(results_page.pagecount)
                status_code = HTTPStatus.BAD_REQUEST
        except (ConstructorError, JSONDecodeError, ValueError) as ex:
            data['error'] = '{0}'.format(ex.args[0])
            status_code = HTTPStatus.BAD_REQUEST
            self.__logger.error(ex)
        except Exception as ex:
            data['error'] = '{0}'.format(ex.args[0])
            status_code = HTTPStatus.INTERNAL_SERVER_ERROR
            self.__logger.error(ex)
        finally:
            data['time'] = time.time() - start_time
            data['status'] = {'code': status_code.value, 'phrase': status_code.phrase,
                              'description': status_code.description}

        output = request.args.get('output', default='json', type=str).lower()

        # make response
        resp = make_response(data, output)
        resp.status_code = status_code

        return resp

Example #51

0

Show file

 def _parse_mime_type(path):
     result = _magic.from_file(path)
     return parse_mime_type(result)

Example #52

0

Show file

 def _handler_for(self, mimetype):
     mt = mimeparse.parse_mime_type(mimetype)
     score, handler = MimetypeHandler.get_best_handler(mt)
     return handler

Example #53

0

Show file

File: rest.py Project: bjrnfrdnnd/viventor_api_mobile

    def request(self, method, url, query_params=None, headers=None,
                body=None, post_params=None, _preload_content=True,
                _request_timeout=None):
        """Perform requests.

        :param method: http request method
        :param url: http request url
        :param query_params: query parameters in the url
        :param headers: http request headers
        :param body: request json body, for `application/json`
        :param post_params: request post parameters,
                            `application/x-www-form-urlencoded`
                            and `multipart/form-data`
        :param _preload_content: if False, the urllib3.HTTPResponse object will
                                 be returned without reading/decoding response
                                 data. Default is True.
        :param _request_timeout: timeout setting for this request. If one
                                 number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of
                                 (connection, read) timeouts.
        """
        method = method.upper()
        assert method in ['GET', 'HEAD', 'DELETE', 'POST', 'PUT',
                          'PATCH', 'OPTIONS']

        if post_params and body:
            raise ValueError(
                "body parameter cannot be used with post_params parameter."
            )

        post_params = post_params or {}
        headers = headers or {}

        timeout = None
        if _request_timeout:
            if isinstance(_request_timeout, (int, ) if six.PY3 else (int, long)):  # noqa: E501,F821
                timeout = urllib3.Timeout(total=_request_timeout)
            elif (isinstance(_request_timeout, tuple) and
                  len(_request_timeout) == 2):
                timeout = urllib3.Timeout(
                    connect=_request_timeout[0], read=_request_timeout[1])

        if 'Content-Type' not in headers:
            headers['Content-Type'] = 'application/json'

        try:
            # For `POST`, `PUT`, `PATCH`, `OPTIONS`, `DELETE`
            if method in ['POST', 'PUT', 'PATCH', 'OPTIONS', 'DELETE']:
                if query_params:
                    url += '?' + urlencode(query_params)
                if re.search('json', headers['Content-Type'], re.IGNORECASE):
                    request_body = '{}'
                    if body is not None:
                        request_body = json.dumps(body)
                    r = self.pool_manager.request(
                        method, url,
                        body=request_body,
                        preload_content=_preload_content,
                        timeout=timeout,
                        headers=headers)
                elif headers['Content-Type'] == 'application/x-www-form-urlencoded':  # noqa: E501
                    r = self.pool_manager.request(
                        method, url,
                        fields=post_params,
                        encode_multipart=False,
                        preload_content=_preload_content,
                        timeout=timeout,
                        headers=headers)
                elif headers['Content-Type'] == 'multipart/form-data':
                    # must del headers['Content-Type'], or the correct
                    # Content-Type which generated by urllib3 will be
                    # overwritten.
                    del headers['Content-Type']
                    r = self.pool_manager.request(
                        method, url,
                        fields=post_params,
                        encode_multipart=True,
                        preload_content=_preload_content,
                        timeout=timeout,
                        headers=headers)
                # Pass a `string` parameter directly in the body to support
                # other content types than Json when `body` argument is
                # provided in serialized form
                elif isinstance(body, str):
                    request_body = body
                    r = self.pool_manager.request(
                        method, url,
                        body=request_body,
                        preload_content=_preload_content,
                        timeout=timeout,
                        headers=headers)
                else:
                    # Cannot generate the request from given parameters
                    msg = """Cannot prepare a request message for provided
                             arguments. Please check that your arguments match
                             declared content type."""
                    raise ApiException(status=0, reason=msg)
            # For `GET`, `HEAD`
            else:
                r = self.pool_manager.request(method, url,
                                              fields=query_params,
                                              preload_content=_preload_content,
                                              timeout=timeout,
                                              headers=headers)
        except urllib3.exceptions.SSLError as e:
            msg = "{0}\n{1}".format(type(e).__name__, str(e))
            raise ApiException(status=0, reason=msg)

        if _preload_content:
            r = RESTResponse(r)

            # In the python 3, the response.data is bytes.
            # we need to decode it to string.
            if six.PY3:
                if 'Content-Type' not in r.getheaders() or mimeparse.parse_mime_type(r.getheader('Content-Type'))[1].upper() in ['PDF']:
                    # when a file is returned, 'Content-Type' does not exist or is one of ['PDF',]
                    pass
                else:
                    # when a json is returned, 'Content-Type' exists
                    r.data = r.data.decode('utf8')
                    pass
            # log response body
            logger.debug("response body: %s", r.data)

        if not 200 <= r.status <= 299:
            raise ApiException(http_resp=r)

        return r

Example #54

0

Show file

 def negotiated_mime_type(self):
     parsed_mime_type = mimeparse.parse_mime_type(
         self._negotiate_content_type())
     return '%s/%s' % (parsed_mime_type[0], parsed_mime_type[1])

Example #55

0

Show file

 def assert_score(pattern, test, score):
     self.assertAlmostEqual(
         score_match(mimeparse.parse_mime_type(pattern),
                     mimeparse.parse_mime_type(test)), score)

Example #56

0

Show file

File: manager_http.py Project: mosuka/cockatrice

    def __put(self, key=''):
        start_time = time.time()

        @after_this_request
        def to_do_after_this_request(response):
            record_log(request, response, logger=self.__http_logger)
            self.__record_metrics(start_time, request, response)
            return response

        data = {}
        status_code = None

        try:
            mime = mimeparse.parse_mime_type(
                request.headers.get('Content-Type'))
            charset = 'utf-8' if mime[2].get(
                'charset') is None else mime[2].get('charset')
            if mime[1] == 'yaml':
                value = yaml.safe_load(request.data.decode(charset))
            elif mime[1] == 'json':
                value = json.loads(request.data.decode(charset))
            else:
                # handle as a string
                value = request.data.decode(charset)

            sync = False
            if request.args.get('sync', default='',
                                type=str).lower() in TRUE_STRINGS:
                sync = True

            self.__manager.put(key if key.startswith('/') else '/' + key,
                               value,
                               sync=sync)

            if sync:
                status_code = HTTPStatus.CREATED
            else:
                status_code = HTTPStatus.ACCEPTED
        except (ConstructorError, JSONDecodeError, ValueError) as ex:
            data['error'] = '{0}'.format(ex.args[0])
            status_code = HTTPStatus.BAD_REQUEST
            self.__logger.error(ex)
        except Exception as ex:
            data['error'] = '{0}'.format(ex.args[0])
            status_code = HTTPStatus.INTERNAL_SERVER_ERROR
            self.__logger.error(ex)
        finally:
            data['time'] = time.time() - start_time
            data['status'] = {
                'code': status_code.value,
                'phrase': status_code.phrase,
                'description': status_code.description
            }

        output = request.args.get('output', default='json', type=str).lower()

        # make response
        resp = make_response(data, output)
        resp.status_code = status_code

        return resp

Example #57

0

Show file

 def for_type(cls, attachment):
     """Returns the handler that is the best fit for provided mimetype."""
     mimetype = mimeparse.parse_mime_type(attachment.mimetype)
     score, handler = cls.get_best_handler(mimetype)
     return handler(attachment, mimetype)

Example #58

0

Show file

File: file_validation.py Project: olekstomek/mcod-backend-dane.gov.pl

def get_file_info(path):
    _magic = magic.Magic(mime=True, mime_encoding=True)
    result = _magic.from_file(path)
    return parse_mime_type(result)