def read_multi(self, environ, keep_blank_values, strict_parsing): ib = self.innerboundary if not valid_boundary(ib): raise ValueError('Invalid boundary in multipart form: %r' % (ib,)) self.list = [] if self.qs_on_post: query = urllib.parse.parse_qsl(self.qs_on_post, self.keep_blank_values, self.strict_parsing, encoding=self.encoding, errors=self.errors) for (key, value) in query: self.list.append(MiniFieldStorage(key, value)) FieldStorageClass = None klass = self.FieldStorageClass or self.__class__ first_line = self.fp.readline() if not isinstance(first_line, bytes): raise ValueError('%s should return bytes, got %s' % (self.fp, type(first_line).__name__)) while True: parser = FeedParser() hdr_text = b'' while True: data = self.fp.readline() hdr_text += data if not data.strip(): break if not hdr_text: break parser.feed(hdr_text.decode(self.encoding, self.errors)) headers = parser.close() part = klass(self.fp, headers, ib, environ, keep_blank_values, strict_parsing, self.limit - self.bytes_read, self.encoding, self.errors) self.list.append(part) if (part.done or self.bytes_read >= self.length) and self.length > 0: break self.skip_lines()
def pkg_info(self): p = FeedParser() data = self.egg_info_data("PKG-INFO") if not data: logger.warning("No PKG-INFO file found in %s", display_path(self.egg_info_path("PKG-INFO"))) p.feed(data or "") return p.close()
def email_as_list(self, email_id): raw_email=self.raw_email(email_id) f = FeedParser() f.feed(raw_email) rootMessage = f.close() if(rootMessage.is_multipart()): corps=rootMessage.get_payload(0).get_payload(decode=True).decode('utf-8') # Récupérer le corps du mail en plain/text bien décodé else: corps=rootMessage.get_payload(decode=True).decode('utf-8') subject=rootMessage.get('Subject') #méthode Alex # suppression des entêtes inutiles avec une regexp subject=rootMessage.get('Subject') for i in range(len(subject)): if subject[i] == "=": subject = subject[:i] + "%" + subject[i+1:] elif subject[i] == "_": subject = subject[:i] + " " + subject[i+1:] subject = re.sub('(\n)*\%\?(UTF|utf)\-8\?(Q|B|q|b)\? *', '', subject) subject = re.sub('\?\%(\r\n)*', '', subject) subject=urllib.parse.unquote(subject) #fin méthode Alex date =rootMessage.get('Date') exp=rootMessage.get('From') email_liste=[] email_liste.extend((email_id, exp, subject, corps, date)) return(email_liste)
def test_i18n_filenames(self): parser = FeedParser(_factory=Message) parser.feed("""\ Message-ID: <*****@*****.**> Content-Type: multipart/mixed; boundary="------------050607040206050605060208" This is a multi-part message in MIME format. --------------050607040206050605060208 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Test message containing an attachment with an accented filename --------------050607040206050605060208 Content-Disposition: attachment; filename*=UTF-8''d%C3%A9jeuner.txt Test content --------------050607040206050605060208-- """) msg = parser.close() attachment = msg.get_payload(1) try: filename = attachment.get_filename() except TypeError as error: self.fail(error) self.assertEqual(filename, u'd\xe9jeuner.txt')
def upload_mime(request): """ This callback receives raw MIME messages. Why? Because the URL ends with 'mime' POST parameters are: 'sender' 'recipient' 'body-mime' MIME is a raw message which can be saved into an .msg or .eml file, parsed with Python's MIME parser, etc Use callbacks like this when you want to have full control over messages, for example when you need access to all original MIME headers, etc """ if request.method == 'POST': sender = request.POST.get('sender', None) recipient = request.POST.get('recipient', None) body_mime = request.POST.get('body-mime', None) # Simplistic MIME parsing: parser = FeedParser() parser.feed(body_mime) message = parser.close() # Returned text is ignored but HTTP status code matters: # Mailgun wants to see 200, otherwise it will make another attempt return HttpResponse('OK')
def _execute(self, http, order, requests): """Serialize batch request, send to server, process response. Args: http: httplib2.Http, an http object to be used to make the request with. order: list, list of request ids in the order they were added to the batch. request: list, list of request objects to send. Raises: httplib2.Error if a transport error has occured. apiclient.errors.BatchError if the response is the wrong format. """ message = MIMEMultipart('mixed') # Message should not write out it's own headers. setattr(message, '_write_headers', lambda self: None) # Add all the individual requests. for request_id in order: request = requests[request_id] msg = MIMENonMultipart('application', 'http') msg['Content-Transfer-Encoding'] = 'binary' msg['Content-ID'] = self._id_to_header(request_id) body = self._serialize_request(request) msg.set_payload(body) message.attach(msg) body = message.as_string() headers = {} headers['content-type'] = ('multipart/mixed; ' 'boundary="%s"') % message.get_boundary() resp, content = http.request(self._batch_uri, 'POST', body=body, headers=headers) if resp.status >= 300: raise HttpError(resp, content, self._batch_uri) # Now break out the individual responses and store each one. boundary, _ = content.split(None, 1) # Prepend with a content-type header so FeedParser can handle it. header = 'content-type: %s\r\n\r\n' % resp['content-type'] for_parser = header + content parser = FeedParser() parser.feed(for_parser) mime_response = parser.close() if not mime_response.is_multipart(): raise BatchError("Response not in multipart/mixed format.", resp, content) for part in mime_response.get_payload(): request_id = self._header_to_id(part['Content-ID']) headers, content = self._deserialize_response(part.get_payload()) self._responses[request_id] = (headers, content)
def request(self, query, headers): # httplib would really help but there isn't any trivial way # that I know of to use your own socket with it. request = Message() for k,v in headers.iteritems(): request[k] = v self.sock.send(query + "\r\n" + request.as_string()) buffer = self.sock.recv(4096) try: [result, data] = buffer.split('\r\n', 1) except ValueError: traceback.print_exc() print >> sys.stderr, 'Buffer:', buffer print >> sys.stderr, 'Query:', query print >> sys.stderr, 'Headers:', headers return False result = result.split(' ') if int(result[1]) != 200: self.log.info('Request failed: %s', ' '.join(result)) raise httplib.BadStatusLine(' '.join(result)) response = FeedParser() response.feed(data) return response.close()
def _deserialize_response(self, payload): """Convert string into httplib2 response and content. Args: payload: string, headers and body as a string. Returns: A pair (resp, content) like would be returned from httplib2.request. """ # Strip off the status line status_line, payload = payload.split('\n', 1) protocol, status, reason = status_line.split(' ', 2) # Parse the rest of the response parser = FeedParser() parser.feed(payload) msg = parser.close() msg['status'] = status # Create httplib2.Response from the parsed headers. resp = httplib2.Response(msg) resp.reason = reason resp.version = int(protocol.split('/', 1)[1].replace('.', '')) content = payload.split('\r\n\r\n', 1)[1] return resp, content
def render_POST(self, request): # XXX request.requestHeaders headers = request.getAllHeaders() form = FieldStorage( fp=request.content, headers=headers, environ={ b'REQUEST_METHOD': request.method, b'CONTENT_TYPE': headers[b'content-type'], } ) image = form[b"image"] share = b"share" in form p = FeedParser() p.feed( "Content-Disposition: " + form['image'].headers.getheader('content-disposition')) m = p.close() filename = m.get_filename() value = image.value self.process_image(filename, value, share) return redirectTo(form[b"return-url"].value, request)
def parse_message(file_input, sender, recipient): """Parse message from file input and create MailRequest""" from .request import MailRequest # circular imports parser = FeedParser(partial(MailRequest, sender, recipient)) for line in file_input: parser.feed(line) return parser.close()
def _parse_pkg_info_file(self, filepath): # type: (str) -> Message # The PKG-INFO generated by the egg-info command is in an email feed # format, so we use an email feedparser here to extract the metadata # from the PKG-INFO file. data = self._osutils.get_file_contents(filepath, binary=False) parser = FeedParser() parser.feed(data) return parser.close()
def parse_email(self, email): fp = FeedParser() fp.feed(open(email).read()) message = fp.close() if message.is_multipart(): texts, html, images, videos, applications = self.extract_multipart(message) else: texts, html, images, videos, applications = (message.get_payload(), 0, 0, 0, 0) return (message.items(), html, texts, images, videos, applications)
def get_content_disposition(content_disposition): """ Get content disposition filename from given header. Do not include "Content-Disposition:". Returns a unicode string! """ parser = FeedParser() parser.feed(b"Content-Disposition: " + content_disposition) name = parser.close().get_filename() if not isinstance(name, unicode): name = name.decode("latin1", "ignore") return name
def get_content_disposition(content_disposition): """ Get content disposition filename from given header. Do not include "Content-Disposition:". Returns a unicode string! """ parser = FeedParser() parser.feed(b'Content-Disposition: ' + content_disposition) name = parser.close().get_filename() if not isinstance(name, six.text_type): name = name.decode('latin1', 'ignore') return name
def pkg_info(self): p = FeedParser() data = self.egg_info_data('PKG-INFO') if not data: logger.warning( 'No PKG-INFO file found in %s', display_path(self.egg_info_path('PKG-INFO')), ) p.feed(data or '') return p.close()
def __init__(self, context): self.context = context self._mimeType = None self._encoding = 'utf-8' self._closed = False self._name = None self._written = 0 self._parser = FeedParser() self._message = None
def pkg_info(self): p = FeedParser() data = self.egg_info_data("PKG-INFO") if not data: logger.warning( "No PKG-INFO file found in %s", display_path(self.egg_info_path("PKG-INFO")), ) p.feed(data or "") return p.close()
def search_packages_info(query): """ Gather details from installed distributions. Print distribution name, version, location, and installed files. Installed files requires a pip generated 'installed-files.txt' in the distributions '.egg-info' directory. """ installed = dict([(p.project_name.lower(), p) for p in pkg_resources.working_set]) query_names = [name.lower() for name in query] for dist in [installed[pkg] for pkg in query_names if pkg in installed]: package = { 'name': dist.project_name, 'version': dist.version, 'location': dist.location, 'requires': [dep.project_name for dep in dist.requires()], } file_list = None metadata = None if isinstance(dist, pkg_resources.DistInfoDistribution): # RECORDs should be part of .dist-info metadatas if dist.has_metadata('RECORD'): lines = dist.get_metadata_lines('RECORD') paths = [l.split(',')[0] for l in lines] paths = [os.path.join(dist.location, p) for p in paths] file_list = [os.path.relpath(p, dist.location) for p in paths] if dist.has_metadata('METADATA'): metadata = dist.get_metadata('METADATA') else: # Otherwise use pip's log for .egg-info's if dist.has_metadata('installed-files.txt'): paths = dist.get_metadata_lines('installed-files.txt') paths = [os.path.join(dist.egg_info, p) for p in paths] file_list = [os.path.relpath(p, dist.location) for p in paths] if dist.has_metadata('PKG-INFO'): metadata = dist.get_metadata('PKG-INFO') if dist.has_metadata('entry_points.txt'): entry_points = dist.get_metadata_lines('entry_points.txt') package['entry_points'] = entry_points # @todo: Should pkg_resources.Distribution have a # `get_pkg_info` method? feed_parser = FeedParser() feed_parser.feed(metadata) pkg_info_dict = feed_parser.close() for key in ('metadata-version', 'summary', 'home-page', 'author', 'author-email', 'license'): package[key] = pkg_info_dict.get(key) if file_list: package['files'] = sorted(file_list) yield package
def read_multi(self, environ, keep_blank_values, strict_parsing): """Internal: read a part that is itself multipart.""" ib = self.innerboundary if not valid_boundary(ib): raise ValueError('Invalid boundary in multipart form: %r' % (ib, )) self.list = [] if self.qs_on_post: query = urllib.parse.parse_qsl(self.qs_on_post, self.keep_blank_values, self.strict_parsing, encoding=self.encoding, errors=self.errors) for key, value in query: self.list.append(MiniFieldStorage(key, value)) klass = self.FieldStorageClass or self.__class__ first_line = self.fp.readline() # bytes if not isinstance(first_line, bytes): raise ValueError("%s should return bytes, got %s" \ % (self.fp, type(first_line).__name__)) self.bytes_read += len(first_line) # Ensure that we consume the file until we've hit our inner boundary while (first_line.strip() != (b"--" + self.innerboundary) and first_line): first_line = self.fp.readline() self.bytes_read += len(first_line) while True: parser = FeedParser() hdr_text = b"" while True: data = self.fp.readline() hdr_text += data if not data.strip(): break if not hdr_text: break # parser takes strings, not bytes self.bytes_read += len(hdr_text) parser.feed(hdr_text.decode(self.encoding, self.errors)) headers = parser.close() # Some clients add Content-Length for part headers, ignore them if 'content-length' in headers: del headers['content-length'] part = klass(self.fp, headers, ib, environ, keep_blank_values, strict_parsing, self.limit - self.bytes_read, self.encoding, self.errors) self.bytes_read += part.bytes_read self.list.append(part) if part.done or self.bytes_read >= self.length > 0: break self.skip_lines()
def search_packages_info(query): """ Gather details from installed distributions. Print distribution name, version, location, and installed files. Installed files requires a pip generated 'installed-files.txt' in the distributions '.egg-info' directory. """ installed = dict( [(p.project_name.lower(), p) for p in pkg_resources.working_set]) query_names = [name.lower() for name in query] for dist in [installed[pkg] for pkg in query_names if pkg in installed]: package = { 'name': dist.project_name, 'version': dist.version, 'location': dist.location, 'requires': [dep.project_name for dep in dist.requires()], } file_list = None metadata = None if isinstance(dist, pkg_resources.DistInfoDistribution): # RECORDs should be part of .dist-info metadatas if dist.has_metadata('RECORD'): lines = dist.get_metadata_lines('RECORD') paths = [l.split(',')[0] for l in lines] paths = [os.path.join(dist.location, p) for p in paths] file_list = [os.path.relpath(p, dist.location) for p in paths] if dist.has_metadata('METADATA'): metadata = dist.get_metadata('METADATA') else: # Otherwise use pip's log for .egg-info's if dist.has_metadata('installed-files.txt'): paths = dist.get_metadata_lines('installed-files.txt') paths = [os.path.join(dist.egg_info, p) for p in paths] file_list = [os.path.relpath(p, dist.location) for p in paths] if dist.has_metadata('PKG-INFO'): metadata = dist.get_metadata('PKG-INFO') if dist.has_metadata('entry_points.txt'): entry_points = dist.get_metadata_lines('entry_points.txt') package['entry_points'] = entry_points # @todo: Should pkg_resources.Distribution have a # `get_pkg_info` method? feed_parser = FeedParser() feed_parser.feed(metadata) pkg_info_dict = feed_parser.close() for key in ('metadata-version', 'summary', 'home-page', 'author', 'author-email', 'license'): package[key] = pkg_info_dict.get(key) if file_list: package['files'] = sorted(file_list) yield package
def _get_metadata_from_entrypoint(cls, entrypoint, extension_id): """Return metadata information from an entrypoint. This is used internally to parse and validate package information from an entrypoint for use in ExtensionInfo. Args: entrypoint (pkg_resources.EntryPoint): The EntryPoint pointing to the extension class. extension_id (unicode): The extension's ID. Returns: dict: The resulting metadata dictionary. """ dist = entrypoint.dist try: # Wheel, or other modern package. lines = dist.get_metadata_lines('METADATA') except IOError: try: # Egg, or other legacy package. lines = dist.get_metadata_lines('PKG-INFO') except IOError: lines = [] logging.error( 'No METADATA or PKG-INFO found for the package ' 'containing the %s extension. Information on ' 'the extension may be missing.', extension_id) data = '\n'.join(lines) # Try to decode the PKG-INFO content. If no decoding method is # successful then the PKG-INFO content will remain unchanged and # processing will continue with the parsing. for enc in cls.encodings: try: data = data.decode(enc) break except UnicodeDecodeError: continue else: logging.warning( 'Failed decoding PKG-INFO content for extension %s', entrypoint.name) p = FeedParser() p.feed(data) pkg_info = p.close() return dict(pkg_info.items())
def parse_metadata_file(contents: str) -> Message: """Parse :pep:`376` ``PKG-INFO``-style metadata files. ``METADATA`` and ``WHEEL`` files (as per :pep:`427`) use the same syntax and can also be parsed using this function. :param contents: The entire contents of the file """ feed_parser = FeedParser() feed_parser.feed(contents) return feed_parser.close()
def _get_metadata_from_entrypoint(cls, entrypoint, extension_id): """Return metadata information from an entrypoint. This is used internally to parse and validate package information from an entrypoint for use in ExtensionInfo. Args: entrypoint (pkg_resources.EntryPoint): The EntryPoint pointing to the extension class. extension_id (unicode): The extension's ID. Returns: dict: The resulting metadata dictionary. """ dist = entrypoint.dist try: # Wheel, or other modern package. lines = dist.get_metadata_lines('METADATA') except IOError: try: # Egg, or other legacy package. lines = dist.get_metadata_lines('PKG-INFO') except IOError: lines = [] logging.error('No METADATA or PKG-INFO found for the package ' 'containing the %s extension. Information on ' 'the extension may be missing.', extension_id) data = '\n'.join(lines) # Try to decode the PKG-INFO content. If no decoding method is # successful then the PKG-INFO content will remain unchanged and # processing will continue with the parsing. for enc in cls.encodings: try: data = data.decode(enc) break except UnicodeDecodeError: continue else: logging.warning( 'Failed decoding PKG-INFO content for extension %s', entrypoint.name) p = FeedParser() p.feed(data) pkg_info = p.close() return dict(pkg_info.items())
def read_multi(self, environ, keep_blank_values, strict_parsing): """Internal: read a part that is itself multipart.""" ib = self.innerboundary if not valid_boundary(ib): raise ValueError('Invalid boundary in multipart form: %r' % (ib,)) self.list = [] if self.qs_on_post: query = urllib.parse.parse_qsl( self.qs_on_post, self.keep_blank_values, self.strict_parsing, encoding=self.encoding, errors=self.errors) for key, value in query: self.list.append(MiniFieldStorage(key, value)) klass = self.FieldStorageClass or self.__class__ first_line = self.fp.readline() # bytes if not isinstance(first_line, bytes): raise ValueError("%s should return bytes, got %s" \ % (self.fp, type(first_line).__name__)) self.bytes_read += len(first_line) # Ensure that we consume the file until we've hit our inner boundary while (first_line.strip() != (b"--" + self.innerboundary) and first_line): first_line = self.fp.readline() self.bytes_read += len(first_line) while True: parser = FeedParser() hdr_text = b"" while True: data = self.fp.readline() hdr_text += data if not data.strip(): break if not hdr_text: break # parser takes strings, not bytes self.bytes_read += len(hdr_text) parser.feed(hdr_text.decode(self.encoding, self.errors)) headers = parser.close() # Some clients add Content-Length for part headers, ignore them if 'content-length' in headers: del headers['content-length'] part = klass(self.fp, headers, ib, environ, keep_blank_values, strict_parsing,self.limit-self.bytes_read, self.encoding, self.errors) self.bytes_read += part.bytes_read self.list.append(part) if part.done or self.bytes_read >= self.length > 0: break self.skip_lines()
def scriptTask(user, data, callback): d = defer.Deferred() parser = FeedParser() parser.feed(data) message = parser.close() callback(user, message) d.callback(None) return d
def read_multi(self, environ, keep_blank_values, strict_parsing): """Internal: read a part that is itself multipart.""" ib = self.innerboundary if not valid_boundary(ib): raise ValueError('Invalid boundary in multipart form: %r' % (ib, )) self.list = [] if self.qs_on_post: query = urllib.parse.parse_qsl(self.qs_on_post, self.keep_blank_values, self.strict_parsing, encoding=self.encoding, errors=self.errors) for key, value in query: self.list.append(MiniFieldStorage(key, value)) FieldStorageClass = None klass = self.FieldStorageClass or self.__class__ first_line = self.fp.readline() # bytes if isinstance(first_line, str): d = lambda x: x.encode("utf-8") first_line = d(first_line) elif not isinstance(first_line, bytes): raise ValueError("%s should return bytes, got %s" \ % (self.fp, type(first_line).__name__)) else: d = lambda x: x self.bytes_read += len(first_line) # first line holds boundary ; ignore it, or check that # b"--" + ib == first_line.strip() ? while True: parser = FeedParser() hdr_text = b"" while True: data = d(self.fp.readline()) hdr_text += data if not data.strip(): break if not hdr_text: break # parser takes strings, not bytes self.bytes_read += len(hdr_text) parser.feed(hdr_text.decode(self.encoding, self.errors)) headers = parser.close() part = klass( self.fp, headers, ib, environ, keep_blank_values, strict_parsing, None if self.limit is None else self.limit - self.bytes_read, self.encoding, self.errors) self.bytes_read += part.bytes_read self.list.append(part) if part.done or self.bytes_read >= self.length > 0: break self.skip_lines()
def process(self): my_parser = FeedParser() my_parser.feed(self.src_mail) self.dst_mail = self.src_mail _email = my_parser.close() _do_rewrite, _from_address = self._parse_from_to(_email) if _do_rewrite: user_xml = self._find_user(_from_address) if user_xml is None: self.log("no matching user found, no rewrite", logging_tools.LOG_LEVEL_WARN) else: self.do_rewrite(_email, user_xml) return
def get_metadata(dist): # type: (Distribution) -> Message if isinstance(dist, pkg_resources.DistInfoDistribution) and dist.has_metadata("METADATA"): metadata = dist.get_metadata("METADATA") elif dist.has_metadata("PKG-INFO"): metadata = dist.get_metadata("PKG-INFO") else: logger.warning("No metadata found in %s", display_path(dist.location)) metadata = "" feed_parser = FeedParser() feed_parser.feed(metadata) return feed_parser.close()
def get_metadata(dist): if (isinstance(dist, pkg_resources.DistInfoDistribution) and dist.has_metadata('METADATA')): metadata = dist.get_metadata('METADATA') elif dist.has_metadata('PKG-INFO'): metadata = dist.get_metadata('PKG-INFO') else: logger.warning("No metadata found in %s", display_path(dist.location)) metadata = '' feed_parser = FeedParser() feed_parser.feed(metadata) return feed_parser.close()
def get_message(mailbox, message): s = siteconfig.storage(mailbox) rawmsg = s.get_message(str(message)) if request.method == "HEAD": parser = FeedParser() parser.feed(rawmsg) try: msg = parser.close() except: return ("Unable to parse message", 500) return "".join(["%{0}: {1}\n".format(k, v) for (k, v) in msg.items()]) else: return (rawmsg, 200, {"Content-Type": "application/json"})
def read_multi(self, environ, keep_blank_values, strict_parsing): """Internal: read a part that is itself multipart.""" ib = self.innerboundary if not valid_boundary(ib): raise ValueError("Invalid boundary in multipart form: %r" % (ib,)) self.list = [] if self.qs_on_post: query = urllib.parse.parse_qsl( self.qs_on_post, self.keep_blank_values, self.strict_parsing, encoding=self.encoding, errors=self.errors ) for key, value in query: self.list.append(MiniFieldStorage(key, value)) FieldStorageClass = None klass = self.FieldStorageClass or self.__class__ first_line = self.fp.readline() # bytes if not isinstance(first_line, bytes): raise ValueError("%s should return bytes, got %s" % (self.fp, type(first_line).__name__)) self.bytes_read += len(first_line) # first line holds boundary ; ignore it, or check that # b"--" + ib == first_line.strip() ? while True: parser = FeedParser() hdr_text = b"" while True: data = self.fp.readline() hdr_text += data if not data.strip(): break if not hdr_text: break # parser takes strings, not bytes self.bytes_read += len(hdr_text) parser.feed(hdr_text.decode(self.encoding, self.errors)) headers = parser.close() part = klass( self.fp, headers, ib, environ, keep_blank_values, strict_parsing, self.limit - self.bytes_read, self.encoding, self.errors, ) self.bytes_read += part.bytes_read self.list.append(part) if part.done or self.bytes_read >= self.length > 0: break self.skip_lines()
def get_pkg_info(pkg): (license_file, license_text) = get_pkg_included_file( pkg, ('LICENSE*', 'LICENCE*', 'COPYING*')) (notice_file, notice_text) = get_pkg_included_file(pkg, ('NOTICE*', )) pkg_info = { 'name': pkg.project_name, 'version': pkg.version, 'namever': str(pkg), 'licensefile': license_file, 'licensetext': license_text, 'noticefile': notice_file, 'noticetext': notice_text, } metadata = None if pkg.has_metadata('METADATA'): metadata = pkg.get_metadata('METADATA') if pkg.has_metadata('PKG-INFO') and metadata is None: metadata = pkg.get_metadata('PKG-INFO') if metadata is None: for key in METADATA_KEYS: pkg_info[key] = LICENSE_UNKNOWN return pkg_info feed_parser = FeedParser() feed_parser.feed(metadata) parsed_metadata = feed_parser.close() for key in METADATA_KEYS: pkg_info[key] = parsed_metadata.get(key, LICENSE_UNKNOWN) if metadata is not None: message = message_from_string(metadata) pkg_info['license_classifier'] = \ find_license_from_classifier(message) if args.filter_strings: for k in pkg_info: if isinstance(pkg_info[k], list): for i, item in enumerate(pkg_info[k]): pkg_info[k][i] = item. \ encode(args.filter_code_page, errors="ignore"). \ decode(args.filter_code_page) else: pkg_info[k] = pkg_info[k]. \ encode(args.filter_code_page, errors="ignore"). \ decode(args.filter_code_page) return pkg_info
def get_pkg_info(pkg): (license_file, license_text) = get_pkg_included_file( pkg, ('LICENSE*', 'LICENCE*', 'COPYING*') ) (notice_file, notice_text) = get_pkg_included_file( pkg, ('NOTICE*',) ) pkg_info = { 'name': pkg.project_name, 'version': pkg.version, 'namever': str(pkg), 'licensefile': license_file, 'licensetext': license_text, 'noticefile': notice_file, 'noticetext': notice_text, } metadata = None if pkg.has_metadata('METADATA'): metadata = pkg.get_metadata('METADATA') if pkg.has_metadata('PKG-INFO') and metadata is None: metadata = pkg.get_metadata('PKG-INFO') if metadata is None: for key in METADATA_KEYS: pkg_info[key] = LICENSE_UNKNOWN return pkg_info feed_parser = FeedParser() feed_parser.feed(metadata) parsed_metadata = feed_parser.close() for key in METADATA_KEYS: pkg_info[key] = parsed_metadata.get(key, LICENSE_UNKNOWN) from_source = getattr(args, 'from') need_classifier = from_source == 'classifier' or from_source == 'mixed' if need_classifier and metadata is not None: message = message_from_string(metadata) license_classifier = find_license_from_classifier(message) license_meta = pkg_info['license'] # Overwrite license by condition pkg_info['license'] = select_license_by_source(from_source, license_classifier, license_meta) return pkg_info
def __init__(self, entrypoint, ext_class): data = '\n'.join(entrypoint.dist.get_metadata_lines('PKG-INFO')) # Try to decode the PKG-INFO content. If no decoding method is # successful then the PKG-INFO content will remain unchanged and # processing will continue with the parsing. for enc in self.encodings: try: data = data.decode(enc) break except UnicodeDecodeError: continue else: logging.warning( 'Failed decoding PKG-INFO content for extension %s', entrypoint.name) p = FeedParser() p.feed(data) pkg_info = p.close() # Extensions will often override "Name" to be something # user-presentable, but we sometimes need the package name self.package_name = pkg_info.get('Name') metadata = dict(pkg_info.items()) if ext_class.metadata is not None: metadata.update(ext_class.metadata) self.metadata = metadata self.name = metadata.get('Name') self.version = metadata.get('Version') self.summary = metadata.get('Summary') self.description = metadata.get('Description') self.author = metadata.get('Author') self.author_email = metadata.get('Author-email') self.license = metadata.get('License') self.url = metadata.get('Home-page') self.author_url = metadata.get('Author-home-page', self.url) self.app_name = '.'.join(ext_class.__module__.split('.')[:-1]) self.enabled = False self.installed = False self.is_configurable = ext_class.is_configurable self.has_admin_site = ext_class.has_admin_site self.installed_htdocs_path = \ os.path.join(settings.MEDIA_ROOT, 'ext', self.package_name) self.installed_static_path = \ os.path.join(settings.STATIC_ROOT, 'ext', ext_class.id)
def get_pkginfo(self): """Gets package info by reading PKG-INFO file""" egg_info_dir = self._get_package_egg_info_path() pkg_info_path = os.path.join(egg_info_dir or self.dist_dir, "PKG-INFO") if not os.path.exists(pkg_info_path): raise Exception("PKG-INFO not found %s" % self.name) with open(pkg_info_path, 'r') as pkg_info: data = pkg_info.read() p = FeedParser() p.feed(data.strip()) return p.close()
def get_dist_metadata(dist): import pkg_resources from email.parser import FeedParser if isinstance(dist, pkg_resources.DistInfoDistribution ) and dist.has_metadata("METADATA"): metadata = dist.get_metadata("METADATA") elif dist.has_metadata("PKG-INFO"): metadata = dist.get_metadata("PKG-INFO") else: metadata = "" feed_parser = FeedParser() feed_parser.feed(metadata) return feed_parser.close()
def parse(self): parser = FeedParser() line = self.input_file.readline() while line is not None and len(line) > 0: if self.valid_header_line(line): parser.feed(line) line = self.input_file.readline() self.msg = parser.close() self.from_email = self._get_from_email() self.from_name = self._get_from_name() self.to_email = self._get_to_email() self.subject = self._get_subject() self.date = self._get_date() self.id = self._get_id() self.id_hash = self._get_id_hash()
class PhotoMessage(object): implements(smtp.IMessage) def __init__(self): self.email = FeedParser() def lineReceived(self, line): self.email.feed(line + '\n') @defer.inlineCallbacks def eomReceived(self): email = self.email.close() photo_id = yield photoIDFromKey(email['Subject']) fname = extract_first_image(email, photo_id) yield processImage(fname)
def search_packages_info(query): from email.parser import FeedParser import pkg_resources installed = dict([(p.project_name.lower(), p) for p in pkg_resources.working_set]) query_names = [name.lower() for name in query] for dist in [installed[pkg] for pkg in query_names if pkg in installed]: package = {'name' : dist.project_name, 'version' : dist.version, 'location': dist.location, 'requires': [dep.project_name for dep in dist.requires()]} return package file_list = None metadata=None if isinstance(dist, pkg_resources.DistInfoDistribution): if dist.has_metadata('RECORD'): lines = dist.get_metadata_lines('RECORD') paths = [l.split(',')[0] for l in lines] paths = [os.path.join(dist.location, p) for p in paths] file_list = [os.path.relpath(p, dist.location) for p in paths] if dist.has_metadata('METADATA'): metadata = dist.get_metadata('METADATA') else: if dist.has_metadata('installed-files.txt'): paths = dist.get_metadata_lines('installed-files.txt') paths = [os.path.join(dist.egg_info, p) for p in paths] file_list = [os.path.relpath(p, dist.location) for p in paths] if dist.has_metadata('PKG-INFO'): metadata = dist.get_metadata('PKG-INFO') if dist.has_metadata('entry_points.txt'): entry_points = dist.get_metadata_lines('entry_points.txt') package['entry_points'] = entry_points feed_parser = FeedParser() feed_parser.feed(metadata) pkg_info_dict = feed_parser.close() for key in ('metadata-version', 'summary', 'home-page', 'author', 'author-email', 'license', 'keywords'): package[key] = pkg_info_dict.get(key) if file_list: package['files'] = sorted(file_list) return package
def check_dist_requires_python(dist): metadata = get_metadata(dist) feed_parser = FeedParser() feed_parser.feed(metadata) pkg_info_dict = feed_parser.close() requires_python = pkg_info_dict.get("Requires-Python") try: if not check_requires_python(requires_python): raise exceptions.UnsupportedPythonVersion( "%s requires Python '%s' but the running Python is %s" % (dist.project_name, requires_python, ".".join(map(str, sys.version_info[:3]))) ) except specifiers.InvalidSpecifier as e: logger.warning( "Package %s has an invalid Requires-Python entry %s - %s" % (dist.project_name, requires_python, e) ) return
def check_dist_requires_python(dist): metadata = get_metadata(dist) feed_parser = FeedParser() feed_parser.feed(metadata) pkg_info_dict = feed_parser.close() requires_python = pkg_info_dict.get('Requires-Python') try: if not check_requires_python(requires_python): raise exceptions.UnsupportedPythonVersion( "%s requires Python '%s' but the running Python is %s" % (dist.project_name, requires_python, os.environ['PIP_PYTHON_VERSION'])) except specifiers.InvalidSpecifier as e: logger.warning( "Package %s has an invalid Requires-Python entry %s - %s" % (dist.project_name, requires_python, e)) return
def msg_to_dict(rawmsg): parser = FeedParser() parser.feed(rawmsg) try: msg = parser.close() except: return {} data = { "to": msg["to"] if "to" in msg else "", "from": msg["from"] if "from" in msg else "", "bcc": msg["bcc"] if "bcc" in msg else "", "subject": msg["subject"] if "subject" in msg else "", "cc": msg["cc"] if "cc" in msg else "", "size": len(msg.__str__()), "date": msg["date"] if "date" in msg else "0", } return data
def send_mail(self, sender, destinataries, subject, content): if not re.search('^.+@.+$', sender): raise ValueError('Sender address is not valid: {0}'.format(sender)) parser = FeedParser() parser.feed('From: {0}\n'.format(sender)) parser.feed('To: {1}\n'.format(', '.join(destinataries))) parser.feed('Subject: {2}\n'.format(subject)) parser.feed('\n{3}\n'.format(content)) mail = parser.close() self.smtp.sendmail( self.sender, destinataries, mail.as_string())
def getLastMail(self): #ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv3) #ctx = ssl.SSLContext(PROTOCOL_SSLv3) #passwd = getpass.getpass() try: mail = imaplib.IMAP4_SSL(config.imap['host'], config.imap['port']) mail.login(config.imap['user'], config.imap['passwd']) mail.select(config.imap['mailBox'], 1) except: print("!Error with mail connection") try: result, data = mail.search(None, "ALL") ids = data[0] # data is a list. id_list = ids.split() # ids is a space separated string if len(id_list) == 0: return (0, "") latest_email_id = id_list[-1] # get the latest result, data = mail.fetch( latest_email_id, "(RFC822)") # fetch the email body (RFC822) for the given ID rawMailBody = data[0][1].decode("utf-8") #rawMailBody = data[0][1] #print (rawMailBody) f = FeedParser() f.feed(rawMailBody) rootMessage = f.close() #print ("rootMessage: "+str(rootMessage.get_payload(0))) mailBody = rootMessage.get_payload(1).get_payload(decode=True) mail.close() mail.logout() del mail return (latest_email_id, mailBody.decode("utf-8")) #return (latest_email_id, mailBody.decode("utf-16")) except: print("!Error with mail select") return (0, "")
def read_file(path): if path == '-': file = sys.stdin else: file = open(path, mode='rb') parser = FeedParser() logger.debug('loading %r', file.name) with file as f: data = f.read() try: parser.feed(data.decode('utf-8')) except UnicodeDecodeError: return data message = parser.close() if len(message._headers) == 0: return data return message
def check_dist_requires_python(dist, absorb=True): metadata = get_metadata(dist) feed_parser = FeedParser() feed_parser.feed(metadata) pkg_info_dict = feed_parser.close() requires_python = pkg_info_dict.get('Requires-Python') if not absorb: return requires_python try: if not check_requires_python(requires_python): return requires_python except specifiers.InvalidSpecifier as e: logger.warning( "Package %s has an invalid Requires-Python entry %s - %s", dist.project_name, requires_python, e, ) return
def get_metadata(dist): # type: (Distribution) -> Message """ :raises NoneMetadataError: if the distribution reports `has_metadata()` True but `get_metadata()` returns None. """ metadata_name = 'METADATA' if (isinstance(dist, pkg_resources.DistInfoDistribution) and dist.has_metadata(metadata_name)): metadata = dist.get_metadata(metadata_name) elif dist.has_metadata('PKG-INFO'): metadata_name = 'PKG-INFO' metadata = dist.get_metadata(metadata_name) else: logger.warning("No metadata found in %s", display_path(dist.location)) metadata = '' if metadata is None: raise NoneMetadataError(dist, metadata_name) feed_parser = FeedParser() # The following line errors out if with a "NoneType" TypeError if # passed metadata=None. feed_parser.feed(metadata) return feed_parser.close()
def check_dist_requires_python(dist): metadata = get_metadata(dist) feed_parser = FeedParser() feed_parser.feed(metadata) pkg_info_dict = feed_parser.close() requires_python = pkg_info_dict.pop_from_serial('Requires-Python') try: if not check_requires_python(requires_python): raise exceptions.UnsupportedPythonVersion( "%s requires Python '%s' but the running Python is %s" % ( dist.project_name, requires_python, '.'.join(map(str, sys.version_info[:3])), )) except specifiers.InvalidSpecifier as e: logger.warning( "Package %s has an invalid Requires-Python entry %s - %s" % (dist.project_name, requires_python, e)) return
def inject_email(filename=u'-'): """Read one email from stdin, parse it, forward it in a celery task to be persisted.""" parser = FeedParser() if logger.level is logging.NOTSET: logger.setLevel(logging.INFO) try: # iterate over stdin for line in fileinput.input(filename): parser.feed(line) except KeyboardInterrupt: logger.info('Aborted by user, exiting.') sys.exit(1) except: logger.error('Error during email parsing', exc_info=True) sys.exit(1) finally: # close the parser to generate a email.message message = parser.close() fileinput.close() if message: # make sure no email.errors are present if not message.defects: process_email.delay(message) else: logger.error( 'email has defects, message content:\n' '------ START -------\n' '%s' '\n------ END -------\n', message, extra={ 'stack': True, }) else: logger.error('no email was parsed from stdin', extra={ 'stack': True, })
def check_dist_requires_python(dist): metadata = get_metadata(dist) feed_parser = FeedParser() feed_parser.feed(metadata) pkg_info_dict = feed_parser.close() requires_python = pkg_info_dict.get('Requires-Python') try: if not check_requires_python(requires_python): # raise exceptions.UnsupportedPythonVersion( # "%s requires Python '%s' but the running Python is %s" % ( # dist.project_name, # requires_python, # '{0}.{1}.{2}'.format(*sys.version_info[:3]) # ) # ) return except specifiers.InvalidSpecifier as e: logger.warning( "Package %s has an invalid Requires-Python entry %s - %s" % ( dist.project_name, requires_python, e)) return
def get_content_disposition(content_disposition): """ Get the content disposition filename from given header. **Do not include "Content-Disposition:".** Parameters ---------- content_disposition : `str` The content disposition header. Returns ------- `str` : The content disposition filename. """ parser = FeedParser() parser.feed('Content-Disposition: ' + content_disposition) name = parser.close().get_filename() if name and not isinstance(name, str): name = name.decode('latin1', 'ignore') return name
def render_POST(self, request): # XXX request.requestHeaders headers = request.getAllHeaders() form = FieldStorage(fp=request.content, headers=headers, environ={ b'REQUEST_METHOD': request.method, b'CONTENT_TYPE': headers[b'content-type'], }) image = form[b"image"] share = b"share" in form p = FeedParser() p.feed("Content-Disposition: " + form['image'].headers.getheader('content-disposition')) m = p.close() filename = m.get_filename() value = image.value self.process_image(filename, value, share) return redirectTo(form[b"return-url"].value, request)
def parse(self, fp, headersonly=True): """Create a message structure from the data in a file.""" feedparser = FeedParser(self._class) feedparser._set_headersonly() mp = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ) data = "" while True: line = mp.readline() data = data + line if line == "\n": break feedparser.feed(data) # mp[0:5000]) return feedparser.close()
def _collect(self): pkg_infos = list( filter( lambda p: not str(p).startswith(".eggs/"), self.path.glob("**/PKG-INFO"), )) if not pkg_infos: log.warning("%r has no PKG-INFO", self) return pkg_info = pkg_infos[0] # parse metadata parser = FeedParser() parser.feed(pkg_info.open().read()) message = parser.close() for key, value in message.items(): value = value.strip() if not value or value == "UNKNOWN": continue key = key.lower().replace("-", "_") if key in const.METADATA_MULTI: self.metadata.setdefault(key, set()).add(value) else: self.metadata[key] = value # parse requires requires_path = pkg_info.parent / "requires.txt" if requires_path.exists(): requires = requires_path.open().read() for extra, reqs in sorted(pkg_resources.split_sections(requires), key=lambda x: x[0] or ""): if extra is None: extra = "run" for req in reqs: self.add_requirement(req, extra)
def _encode_parts(self, header_data, msg_data, encoder): """Encodes any MIME part in the current message that is 8-bit.""" self.headers = None self.message = None parser = FeedParser() parser.feed(header_data) parser.feed(msg_data) msg = parser.close() for part in msg.walk(): if not part.is_multipart(): payload = part.get_payload() try: payload.encode('ascii') except UnicodeDecodeError: del part['Content-Transfer-Encoding'] encoder(part) self.parse(msg)
def parse_message_envelope(fp): known_meta_headers = { 'Return-path', 'Envelope-to', 'X-Queue-Date', 'X-Last-Attempt', 'X-Retries', 'X-Queue-Meta-End', } parser = FeedParser() parser._headersonly = True while True: line = read_header_line(fp) if line == b'': raise ValueError('Header "X-Queue-Meta-End" not found.') # similar to Python's BytesFeedParser (Python 3.3+) line_str = line.decode('ascii', 'surrogateescape') parser.feed(line_str) if 'X-Queue-Meta-End' in line_str: break meta_msg = parser.close() queue_meta = dict(meta_msg.items()) unknown_headers = set(queue_meta).difference(known_meta_headers) assert len(unknown_headers) == 0, unknown_headers b_return_path = queue_meta.pop('Return-path') from_addr = decode_header_value(strip_brackets(b_return_path)) b_envelope_to = queue_meta.pop('Envelope-to') to_addrs = parse_envelope_addrs(decode_header_value(b_envelope_to)) queue_date = parse_datetime(queue_meta.pop('X-Queue-Date')) last = parse_datetime(queue_meta.pop('X-Last-Attempt', None)) retries = parse_number(queue_meta.pop('X-Retries', None)) msg_fp = BytesIO(fp.read()) msg_fp.seek(0) msg_info = MsgInfo(from_addr, tuple(to_addrs), msg_fp, queue_date, last=last, retries=retries) return msg_info
def search_packages_info(query): # type: (List[str]) -> Iterator[Dict[str, str]] """ Gather details from installed distributions. Print distribution name, version, location, and installed files. Installed files requires a pip generated 'installed-files.txt' in the distributions '.egg-info' directory. """ installed = {} for p in pkg_resources.working_set: installed[canonicalize_name(p.project_name)] = p query_names = [canonicalize_name(name) for name in query] missing = sorted([ name for name, pkg in zip(query, query_names) if pkg not in installed ]) if missing: logger.warning('Package(s) not found: %s', ', '.join(missing)) def get_requiring_packages(package_name): # type: (str) -> List[str] canonical_name = canonicalize_name(package_name) return [ pkg.project_name for pkg in pkg_resources.working_set if canonical_name in [canonicalize_name(required.name) for required in pkg.requires()] ] for dist in [installed[pkg] for pkg in query_names if pkg in installed]: package = { 'name': dist.project_name, 'version': dist.version, 'location': dist.location, 'requires': [dep.project_name for dep in dist.requires()], 'required_by': get_requiring_packages(dist.project_name) } file_list = None metadata = '' if isinstance(dist, pkg_resources.DistInfoDistribution): # RECORDs should be part of .dist-info metadatas if dist.has_metadata('RECORD'): lines = dist.get_metadata_lines('RECORD') paths = [line.split(',')[0] for line in lines] paths = [os.path.join(dist.location, p) for p in paths] file_list = [os.path.relpath(p, dist.location) for p in paths] if dist.has_metadata('METADATA'): metadata = dist.get_metadata('METADATA') else: # Otherwise use pip's log for .egg-info's if dist.has_metadata('installed-files.txt'): paths = dist.get_metadata_lines('installed-files.txt') paths = [os.path.join(dist.egg_info, p) for p in paths] file_list = [os.path.relpath(p, dist.location) for p in paths] if dist.has_metadata('PKG-INFO'): metadata = dist.get_metadata('PKG-INFO') if dist.has_metadata('entry_points.txt'): entry_points = dist.get_metadata_lines('entry_points.txt') package['entry_points'] = entry_points if dist.has_metadata('INSTALLER'): for line in dist.get_metadata_lines('INSTALLER'): if line.strip(): package['installer'] = line.strip() break # @todo: Should pkg_resources.Distribution have a # `get_pkg_info` method? feed_parser = FeedParser() feed_parser.feed(metadata) pkg_info_dict = feed_parser.close() for key in ('metadata-version', 'summary', 'home-page', 'author', 'author-email', 'license'): package[key] = pkg_info_dict.get(key) # It looks like FeedParser cannot deal with repeated headers classifiers = [] for line in metadata.splitlines(): if line.startswith('Classifier: '): classifiers.append(line[len('Classifier: '):]) package['classifiers'] = classifiers if file_list: package['files'] = sorted(file_list) yield package