def read_headers(buffered): headers = [] while True: line = yield buffered.read_line() if not line: raise ConnectionLost() if line == "\r\n" or line == "\n": break headers.append(line) msg = HeaderParser().parsestr("".join(headers)) header_dict = normalized_headers(msg.items()) idiokit.stop(header_dict)
def send_async_mdn(self): """ Send the asynchronous MDN to the partner""" # convert the mdn headers to dictionary headers = HeaderParser().parsestr(self.headers.read().decode()) # Send the mdn to the partner try: response = requests.post(self.return_url, headers=dict(headers.items()), data=self.payload.read()) response.raise_for_status() except requests.exceptions.RequestException: return # Update the status of the MDN self.status = "S"
def _rfc822_string_to_dict( cls, rfc822_string: str ) -> Dict[str, Union[List[str], str]]: """Extracts metadata information from a metadata-version 2.1 object. https://www.python.org/dev/peps/pep-0566/#json-compatible-metadata - The original key-value format should be read with email.parser.HeaderParser; - All transformed keys should be reduced to lower case. Hyphens should be replaced with underscores, but otherwise should retain all other characters; - The transformed value for any field marked with "(Multiple-use") should be a single list containing all the original values for the given key; - The Keywords field should be converted to a list by splitting the original value on whitespace characters; - The message body, if present, should be set to the value of the description key. - The result should be stored as a string-keyed dictionary. """ metadata: Dict[str, Union[List[str], str]] = {} parsed = HeaderParser().parsestr(rfc822_string) metadata_fields = VERSIONED_METADATA_FIELDS[parsed.get("Metadata-Version")] for key, value in parsed.items(): if key in metadata_fields["MULTI"]: metadata.setdefault(key, []).append(value) elif key in metadata_fields["TREAT_AS_MULTI"]: metadata[key] = [val.strip() for val in value.split(",")] elif key == "Description": metadata[key] = inspect.cleandoc(value) else: metadata[key] = value # Handle the message payload payload = parsed.get_payload() if payload: if "Description" in metadata: print("Both Description and payload given - ignoring Description") metadata["Description"] = payload return _canonicalize(metadata)
def _get_pip_show_pkgs(self, pkg_name, pkg_version=None, install_env=None): # run pip show to get the dependent packages pip_cmd = 'pip2' if self._get_py_version( pkg_name=pkg_name, pkg_version=pkg_version) == 'python2' else 'pip3' show_cmd = [pip_cmd, 'show', pkg_name] pkg_info_str = exec_command('pip show', show_cmd, ret_stdout=True, env=install_env) pkg_info_msg = HeaderParser().parsestr(pkg_info_str) pkg_info = {k: v for k, v in pkg_info_msg.items()} if "Requires" in pkg_info and pkg_info["Requires"]: dep_pkg_names = [ dep_pkgname.strip() for dep_pkgname in pkg_info["Requires"].split(',') ] else: dep_pkg_names = [] return [dpn.lower() for dpn in dep_pkg_names]
def _submission_to_mbox(submission): """Get an mbox representation of a single Submission. Handles both Patch and CoverLetter objects. Arguments: submission: The Patch object to convert. Returns: A string for the mbox file. """ is_patch = isinstance(submission, Patch) postscript_re = re.compile('\n-{2,3} ?\n') body = '' if submission.content: body = submission.content.strip() + "\n" parts = postscript_re.split(body, 1) if len(parts) == 2: (body, postscript) = parts body = body.strip() + "\n" postscript = postscript.rstrip() else: postscript = '' # TODO(stephenfin): Make this use the tags infrastructure for comment in Comment.objects.filter(submission=submission): body += comment.patch_responses if postscript: body += '---\n' + postscript + '\n' if is_patch and submission.diff: body += '\n' + submission.diff delta = submission.date - datetime.datetime.utcfromtimestamp(0) utc_timestamp = delta.seconds + delta.days * 24 * 3600 mail = PatchMbox(body) mail['X-Patchwork-Submitter'] = email.utils.formataddr( (str(Header(submission.submitter.name, mail.patch_charset)), submission.submitter.email)) mail['X-Patchwork-Id'] = str(submission.id) if is_patch and submission.delegate: mail['X-Patchwork-Delegate'] = str(submission.delegate.email) mail.set_unixfrom('From patchwork ' + submission.date.ctime()) orig_headers = HeaderParser().parsestr(str(submission.headers)) for key, val in orig_headers.items(): # we set this ourselves if key == 'Content-Transfer-Encoding': continue # we don't save GPG signatures described in RFC1847 [1] so this # Content-Type value is invalid # [1] https://tools.ietf.org/html/rfc1847 if key == 'Content-Type' and val == 'multipart/signed': continue mail[key] = val if 'Date' not in mail: mail['Date'] = email.utils.formatdate(utc_timestamp) # NOTE(stephenfin) http://stackoverflow.com/a/28584090/613428 if six.PY3: mail = mail.as_bytes(True).decode() else: mail = mail.as_string(True) return mail
# Initialize imaplib object with an SSL connection to our host imap = imaplib.IMAP4_SSL(host) # Login to the IMAP server imap.login(username, password) # Select the inbox folder _, messages = imap.select("Inbox") # Get number of messages _, message_count = imap.search(None, 'ALL') print(f'Total number of mesages in inbox: {len(message_count[0])}') # Fetch the messages and decode to string _, msg = imap.fetch(messages[0].decode(), "(RFC822)") # Iterate over the response for response in msg: # Confirm that we have a tuple - 2nd row will be our content if isinstance(response, tuple): # Extract headers headers = HeaderParser().parsestr( email.message_from_bytes(response[1]).as_string()) # Iterate over and print our headers for k, v in headers.items(): print(f"\nName={k}\nValue={v}") # Close and logout imap.close() imap.logout()
def _submission_to_mbox(submission): """Get an mbox representation of a single Submission. Handles both Patch and CoverLetter objects. Arguments: submission: The Patch object to convert. Returns: A string for the mbox file. """ is_patch = isinstance(submission, Patch) postscript_re = re.compile('\n-{2,3} ?\n') body = '' if submission.content: body = submission.content.strip() + "\n" parts = postscript_re.split(body, 1) if len(parts) == 2: (body, postscript) = parts body = body.strip() + "\n" postscript = postscript.rstrip() else: postscript = '' # TODO(stephenfin): Make this use the tags infrastructure for comment in Comment.objects.filter(submission=submission): body += comment.patch_responses if postscript: body += '---\n' + postscript + '\n' if is_patch and submission.diff: body += '\n' + submission.diff delta = submission.date - datetime.datetime.utcfromtimestamp(0) utc_timestamp = delta.seconds + delta.days * 24 * 3600 mail = PatchMbox(body) mail['X-Patchwork-Submitter'] = email.utils.formataddr(( str(Header(submission.submitter.name, mail.patch_charset)), submission.submitter.email)) mail['X-Patchwork-Id'] = str(submission.id) if is_patch and submission.delegate: mail['X-Patchwork-Delegate'] = str(submission.delegate.email) mail.set_unixfrom('From patchwork ' + submission.date.ctime()) orig_headers = HeaderParser().parsestr(str(submission.headers)) for key, val in orig_headers.items(): # we set this ourselves if key == 'Content-Transfer-Encoding': continue # we don't save GPG signatures described in RFC1847 [1] so this # Content-Type value is invalid # [1] https://tools.ietf.org/html/rfc1847 if key == 'Content-Type' and val == 'multipart/signed': continue mail[key] = val if 'Date' not in mail: mail['Date'] = email.utils.formatdate(utc_timestamp) # NOTE(stephenfin) http://stackoverflow.com/a/28584090/613428 if six.PY3: mail = mail.as_bytes(True).decode() else: mail = mail.as_string(True) return mail
def _submission_to_mbox(submission): """Get an mbox representation of a single submission. Handles both Patch and Cover objects. Arguments: submission: The Patch or Cover object to convert. Returns: A string for the mbox file. """ is_patch = isinstance(submission, Patch) postscript_re = re.compile('\n-{2,3} ?\n') body = '' if submission.content: body = submission.content.strip() + "\n" parts = postscript_re.split(body, 1) if len(parts) == 2: (body, postscript) = parts body = body.strip() + "\n" postscript = postscript.rstrip() else: postscript = '' # TODO(stephenfin): Make this use the tags infrastructure if is_patch: for comment in PatchComment.objects.filter(patch=submission): body += comment.patch_responses else: for comment in CoverComment.objects.filter(cover=submission): body += comment.patch_responses if postscript: body += '---\n' + postscript + '\n' if is_patch and submission.diff: body += '\n' + submission.diff delta = submission.date - datetime.datetime.utcfromtimestamp(0) utc_timestamp = delta.seconds + delta.days * 24 * 3600 mail = PatchMbox(body) mail['X-Patchwork-Submitter'] = email.utils.formataddr( (str(Header(submission.submitter.name, mail.patch_charset)), submission.submitter.email)) mail['X-Patchwork-Id'] = str(submission.id) if is_patch and submission.delegate: mail['X-Patchwork-Delegate'] = str(submission.delegate.email) mail.set_unixfrom('From patchwork ' + submission.date.ctime()) orig_headers = HeaderParser().parsestr(str(submission.headers)) for key, val in orig_headers.items(): # we set these ourselves if key in ['Content-Type', 'Content-Transfer-Encoding']: continue # we don't save GPG signatures described in RFC1847 [1] so this # Content-Type value is invalid # [1] https://tools.ietf.org/html/rfc1847 if key == 'Content-Type' and val == 'multipart/signed': continue if key == 'From': name, addr = split_from_header(val) if addr == submission.project.listemail: # If From: is the list address (typically DMARC munging), then # use the submitter details (which are cleaned up in the # parser) in the From: field so that the patch author details # are correct when applied with git am. mail['X-Patchwork-Original-From'] = val val = mail['X-Patchwork-Submitter'] mail[key] = val if 'Date' not in mail: mail['Date'] = email.utils.formatdate(utc_timestamp) # NOTE(stephenfin) http://stackoverflow.com/a/28584090/613428 mail = mail.as_bytes(True).decode() return mail
class AboutFile(object): """ Represent an ABOUT file and functions to parse and validate a file. """ def __init__(self, location=None): self.about_resource_path = None self.location = location self.parsed = None self.parsed_fields = None self.validated_fields = {} # map _file fields to a resolved OS file system absolute location # this is not used at all for now self.file_fields_locations = {} self.warnings = [] self.errors = [] if self.location: self.parse() def parse(self): """ Parse and validates a file-like object in an ABOUT structure. """ try: with open(self.location, "rU") as file_in: #FIXME: we should open the file only once, it is always small enough to be kept in memory no_blank_lines, pre_proc_warnings = self.pre_process(file_in) self.warnings.extend(pre_proc_warnings) # HeaderParser.parse returns the parsed file as keys and # values (allows for multiple keys, and it doesn't validate) self.parsed = HeaderParser().parse(no_blank_lines) except IOError as e: err_msg = 'Cannot read ABOUT file:' + repr(e) self.errors.append(Error(FILE, None, self.location, err_msg)) except Exception as e: err_msg = 'Unknown ABOUT processing error:' + repr(e) self.errors.append(Error(UNKNOWN, None, self.location, err_msg)) if self.parsed: self.warnings.extend(self.normalize()) self.validate() def pre_process(self, file_in): """ Pre-process an ABOUT file before using the email header parser. Return a tuple with a file-like object and a list of warnings. In the file-like object we remove: - blank/empty lines - invalid lines that cannot be parsed - spaces around the colon separator This also checks for field names with incorrect characters that could not be otherwise parsed. """ #TODO: add line endings normalization to LF about_string = '' warnings = [] last_line_is_field_or_continuation = False for line in file_in.readlines(): # continuation line if line.startswith(' '): warn = self.check_line_continuation(line, last_line_is_field_or_continuation) if last_line_is_field_or_continuation: about_string += line if warn: warnings.append(warn) continue # empty or blank line if not line.rstrip(): last_line_is_field_or_continuation = False continue # From here, we should have a field line and consider not a field # line if there is no colon warn, has_colon = self.check_line_has_colon(line) if not has_colon: last_line_is_field_or_continuation = False warnings.append(warn) continue # invalid space characters splitted = line.split(':', 1) field_name = splitted[0].rstrip() warn = self.check_invalid_space_characters(field_name, line) if warn: last_line_is_field_or_continuation = False warnings.append(warn) continue else: line = field_name + ":" + splitted[1] # invalid field characters invalid_chars, warn = self.check_invalid_chars_in_field_name(field_name, line) if warn: warnings.append(warn) last_line_is_field_or_continuation = False continue # finally add valid field lines last_line_is_field_or_continuation = True about_string += line # TODO: we should either yield and not return a stringIO or return a string return StringIO(about_string), warnings @staticmethod def check_line_continuation(line, continuation): warnings = "" if not continuation: msg = 'Line does not contain a field or continuation: ignored.' warnings = Warn(IGNORED, None, line, msg) return warnings @staticmethod def check_line_has_colon(line): warnings = "" has_colon = True if ':' not in line: msg = 'Line does not contain a field: ignored.' warnings = Warn(IGNORED, None, line, msg) has_colon = False return warnings, has_colon @staticmethod def check_invalid_space_characters(field_name, line): warnings = "" if ' ' in field_name: msg = 'Field name contains spaces: line ignored.' warnings = Warn(IGNORED, field_name, line, msg) return warnings @staticmethod def check_invalid_chars_in_field_name(field_name, line): """ Return a sequence of invalid characters in a field name. From spec 0.8.0: A field name can contain only these US-ASCII characters: <li> digits from 0 to 9 </li> <li> uppercase and lowercase letters from A to Z</li> <li> the _ underscore sign. </li> """ supported = string.digits + string.ascii_letters + '_' warnings = "" invalid_chars = [char for char in field_name if char not in supported] if invalid_chars: msg = "Field name contains invalid characters: '%s': line ignored." % ''.join(invalid_chars) warnings = Warn(IGNORED, field_name, line, msg) return invalid_chars, warnings def normalize(self): """ Converts field names to lower case. If a field name exist multiple times, keep only the last occurrence. """ warnings = [] for field_name, value in self.parsed.items(): field_name = field_name.lower() if field_name in self.validated_fields.keys(): field_value = self.validated_fields[field_name] msg = 'Duplicate field names found: ignored.' warnings.append(Warn(IGNORED, field_name, field_value, msg)) # if this is a multi-line value, we want to strip the first space of # the continuation lines if '\n' in value: value = value.replace('\n ', '\n') self.validated_fields[field_name] = value return warnings def validate(self): """ Validate a parsed about file. """ invalid_filename = self.invalid_chars_in_about_file_name(self.location) if invalid_filename: self.errors.append(Error(ASCII, None, invalid_filename, 'The filename contains invalid character.')) dup_filename = self.duplicate_file_names_when_lowercased(self.location) if dup_filename: self.errors.append(Error(FILE, None, dup_filename, 'Duplicated filename in the same directory detected.')) self.validate_field_values_are_not_empty() self.validate_about_resource_exist() self.validate_mandatory_fields_are_present() for field_name, value in self.validated_fields.items(): self.check_is_ascii(self.validated_fields.get(field_name)) self.validate_known_optional_fields(field_name) self.validate_file_field_exists(field_name, value) self.validate_url_field(field_name, network_check=False) self.validate_spdx_license(field_name, value) self.check_date_format(field_name) def validate_field_values_are_not_empty(self): for field_name, value in self.validated_fields.items(): if value.strip(): continue if field_name in MANDATORY_FIELDS: self.errors.append(Error(VALUE, field_name, None, 'This mandatory field has no value.')) elif field_name in OPTIONAL_FIELDS: self.warnings.append(Warn(VALUE, field_name, None, 'This optional field has no value.')) else: self.warnings.append(Warn(VALUE, field_name, None, 'This field has no value.')) def _exists(self, file_path): """ Return True if path exists. """ if file_path: return exists(self._location(file_path)) def _location(self, file_path): """ Return absolute location for a posix file_path. """ if file_path: return abspath(join(dirname(self.location), file_path.strip())) return file_path def _save_location(self, field_name, file_path): # TODO: we likely should not inject this in the validated fields and maybe use something else for this self.file_fields_locations[field_name] = self._location(file_path) def validate_about_resource_exist(self): """ Ensure that the file referenced by the about_resource field exists. """ about_resource = 'about_resource' # Note: a missing 'about_resource' field error will be caught # in validate_mandatory_fields_are_present(self) if about_resource in self.validated_fields \ and self.validated_fields[about_resource]: self.about_resource_path = self.validated_fields[about_resource] if not self._exists(self.about_resource_path): self.errors.append(Error(FILE, about_resource, self.about_resource_path, 'File does not exist.')) self._save_location(about_resource, self.about_resource_path) def validate_file_field_exists(self, field_name, file_path): """ Ensure a _file field in the OPTIONAL_FIELDS points to an existing file """ if not field_name.endswith('_file'): return if not file_path: return if not field_name in OPTIONAL_FIELDS: return if not self._exists(file_path): self.warnings.append(Warn(FILE, field_name, file_path, 'File does not exist.')) return self._save_location(field_name, file_path) try: with codecs.open(self._location(file_path), 'r', 'utf8', errors='replace') as f: # attempt to read the file to catch codec errors f.readlines() except Exception as e: self.errors.append(Error(FILE, field_name, file_path, 'Cannot read file: %s' % repr(e))) return def validate_mandatory_fields_are_present(self): for field_name in MANDATORY_FIELDS: if field_name not in self.validated_fields.keys(): self.errors.append(Error(VALUE, field_name, None, 'Mandatory field missing')) def validate_known_optional_fields(self, field_name): """ Validate which known optional fields are present. """ if (field_name not in OPTIONAL_FIELDS and field_name not in MANDATORY_FIELDS and field_name not in FILE_LOCATIONS_FIELDS): msg = 'Not a mandatory or optional field' self.warnings.append(Warn(IGNORED, field_name, self.validated_fields[field_name], msg)) def validate_spdx_license(self, field_name, field_value): if not field_name == 'license_spdx': return spdx_ids = field_value.split() for sid in spdx_ids: # valid sid, matching the case if sid in SPDX_LICENSE_IDS.values(): continue sidl = sid.lower() # conjunctions if sidl in ['or', 'and']: continue # lowercase check try: standard_id = SPDX_LICENSE_IDS[sidl] msg = "Non standard SPDX license id case. Should be '%s'." % ( standard_id) self.warnings.append(Warn(SPDX, field_name, sid, msg)) except KeyError: self.errors.append(Error(SPDX, field_name, sid, 'Invalid SPDX license id.')) def validate_url_field(self, field_name, network_check=False): """ Ensure that URL field is a valid URL. If network_check is True, do a network check to verify if it points to a live URL. """ if not field_name.endswith('_url') or field_name not in OPTIONAL_FIELDS: return # The "field is empty" warning will be thrown in the # "validate_field_values_are_not_empty" value = self.validated_fields[field_name] if not value: return try: is_url = self.check_url(value, network_check) if not is_url: msg = 'URL is either not in a valid format, or it is not reachable.' self.warnings.append(Warn(URL, field_name, value, msg)) except KeyError: return def check_is_ascii(self, str): """ Return True if string is composed only of US-ASCII characters. """ try: str.decode('ascii') except (UnicodeEncodeError, UnicodeDecodeError): msg = '%s is not valid US-ASCII.' % str self.errors.append(Error(ASCII, str, None, msg)) return False return True def check_date_format(self, field_name): """ Return True if date_string is a supported date format as: YYYY-MM-DD """ if not field_name == 'date': return date_strings = self.validated_fields[field_name] if not date_strings: return supported_dateformat = '%Y-%m-%d' try: return bool(datetime.strptime(date_strings, supported_dateformat)) except ValueError: msg = 'Unsupported date format, use YYYY-MM-DD.' self.warnings.append(Warn(DATE, field_name, date_strings, msg)) return False def check_url(self, url, network_check=False): """ Return True if a URL is valid. Optionally check that this is a live URL (using a HEAD request without downloading the whole file). """ scheme, netloc, path, _p, _q, _frg = urlparse.urlparse(url) url_has_valid_format = scheme in ('http', 'https', 'ftp') and netloc if not url_has_valid_format: return False if network_check: # FIXME: we should only check network connection ONCE per run # and cache the results, not do this here if self.check_network_connection(): # FIXME: HEAD request DO NOT WORK for ftp:// return self.check_url_reachable(netloc, path) else: print('No network connection detected.') return url_has_valid_format def check_network_connection(self): """ Return True if an HTTP connection to the live internet is possible. """ try: http_connection = httplib.HTTPConnection('dejacode.org') http_connection.connect() return True except socket.error: return False def check_url_reachable(self, host, path): # FIXME: we are only checking netloc and path ... NOT the whole url # FXIME: this will not work with FTP try: conn = httplib.HTTPConnection(host) conn.request('HEAD', path) # FIXME: we will consider a 404 as a valid status (this is a True value) # This is the list of all the HTTP status code # http://en.wikipedia.org/wiki/List_of_HTTP_status_codes return conn.getresponse().status except (httplib.HTTPException, socket.error): return False def get_about_info(self, update_path, about_object): """ Creates a row of data for an ABOUT object """ row = [update_path] for field in MANDATORY_FIELDS + OPTIONAL_FIELDS: if field in about_object.validated_fields.keys(): row += [about_object.validated_fields[field]] else: row += [''] warnings = [repr(w) for w in about_object.warnings] errors = [repr(e) for e in about_object.errors] row += ['\n'.join(warnings), '\n'.join(errors)] return row def invalid_chars_in_about_file_name(self, file_path): """ Return a sequence of invalid characters found in a file name. From spec 0.8.0: A file name can contain only these US-ASCII characters: <li> digits from 0 to 9 </li> <li> uppercase and lowercase letters from A to Z</li> <li> the _ underscore, - dash and . period signs. </li> """ supported = string.digits + string.ascii_letters + '_-.' file_name = resource_name(file_path) return [char for char in file_name if char not in supported] def duplicate_file_names_when_lowercased(self, file_location): """ Return a sequence of duplicate file names in the same directory as file_location when lower cased. From spec 0.8.0: The case of a file name is not significant. On case-sensitive file systems (such as Linux), a tool must raise an error if two ABOUT files stored in the same directory have the same lowercase file name. """ # TODO: Add a test file_name = resource_name(file_location) file_name_lower = file_name.lower() parent_dir = dirname(file_location) names = [] for name in listdir(parent_dir): if name.lower() in names: names.append(name) return names def license_text(self): try: license_text_path = self.file_fields_locations["license_text_file"] with open(license_text_path, 'rU') as f: return f.read() except Exception as e: pass #return empty string if the license file does not exist return ""