def find_content(project, mail): patchbuf = None commentbuf = '' pullurl = None is_attachment = False for part in mail.walk(): if part.get_content_maintype() != 'text': continue payload = part.get_payload(decode=True) subtype = part.get_content_subtype() if not isinstance(payload, six.text_type): charset = part.get_content_charset() # Check that we have a charset that we understand. Otherwise, # ignore it and fallback to our standard set. if charset is not None: try: codecs.lookup(charset) except LookupError: charset = None # If there is no charset or if it is unknown, then try some common # charsets before we fail. if charset is None: try_charsets = ['utf-8', 'windows-1252', 'iso-8859-1'] else: try_charsets = [charset] for cset in try_charsets: decoded_payload = try_decode(payload, cset) if decoded_payload is not None: break payload = decoded_payload # Could not find a valid decoded payload. Fail. if payload is None: return None if subtype in ['x-patch', 'x-diff']: is_attachment = True patchbuf = payload elif subtype == 'plain': c = payload if not patchbuf: (patchbuf, c) = parse_patch(payload) if not pullurl: pullurl = find_pull_request(payload) if c is not None: commentbuf += c.strip() + '\n' ret = MailContent() drop_prefixes = [project.linkname] + project.get_subject_prefix_tags() (name, prefixes) = clean_subject(mail.get('Subject'), drop_prefixes) (x, n) = parse_series_marker(prefixes) refs = build_references_list(mail) is_root = refs == [] is_cover_letter = is_root and x == 0 patch_prefix = re.match('(\s*\[[^]]*\]\s*)*\[\s*PATCH', mail.get('Subject')) is_patch = patchbuf is not None and patch_prefix drop_patch = not is_attachment and \ project.git_send_email_only and not is_git_send_email(mail) if pullurl or (is_patch and not drop_patch): if project.git_send_email_only or not is_cover_letter: ret.patch_order = x or 1 ret.patch = Patch(name=name, pull_url=pullurl, content=patchbuf, date=mail_date(mail), headers=mail_headers(mail)) if patchbuf: ret.filenames = patch_get_filenames(patchbuf) # Create/update the Series and SeriesRevision objects if is_cover_letter or is_patch: msgid = mail.get('Message-Id').strip() # check if msgid already exists in db ex_msgid=get_object_by_msgid(Patch, msgid) if not ex_msgid: ex_msgid=get_object_by_msgid(Comment, msgid) if ex_msgid and ex_msgid.content: # modify msgid to allow processing new message only if it # contains different content than existing Patch or Comment if patchbuf and patchbuf!=ex_msgid.content: refs.append(msgid) msgid=datetime.datetime.now().isoformat() + '-' + msgid ret.msgid=msgid # Series get a generic name when they don't start by a cover letter or # when they haven't received the root message yet. Except when it's # only 1 patch, then the series takes the patch subject as name. series_name = None if is_cover_letter or n is None: series_name = strip_prefixes(name) (ret.series, ret.revision, ret.patch_order, n) = \ find_series_for_mail(project, series_name, msgid, is_patch, ret.patch_order, n, refs) ret.revision.n_patches = n or 1 date = mail_date(mail) if not ret.series.submitted or date < ret.series.submitted: ret.series.submitted = date if is_cover_letter: ret.revision.cover_letter = clean_content(commentbuf) return ret if commentbuf: # If this is a new patch, we defer setting comment.patch until # patch has been saved by the caller if ret.patch: ret.comment = Comment(date=mail_date(mail), content=clean_content(commentbuf), headers=mail_headers(mail)) else: cpatch = find_patch_for_comment(project, refs) if not cpatch: return ret ret.comment = Comment(patch=cpatch, date=mail_date(mail), content=clean_content(commentbuf), headers=mail_headers(mail)) # make sure we always have a valid (series,revision) tuple if we have a # patch. We don't consider pull requests a series. if ret.patch and not pullurl and (not ret.series or not ret.revision): raise Exception("Could not find series for: %s" % name) return ret
def parse_mail(mail, list_id=None): """Parse a mail and add to the database. Args: mail (`mbox.Mail`): Mail to parse and add. list_id (str): Mailing list ID Returns: None """ # some basic sanity checks if 'From' not in mail: raise ValueError("Missing 'From' header") if 'Subject' not in mail: raise ValueError("Missing 'Subject' header") if 'Message-Id' not in mail: raise ValueError("Missing 'Message-Id' header") hint = mail.get('X-Patchwork-Hint', '').lower() if hint == 'ignore': LOGGER.debug("Ignoring email due to 'ignore' hint") return if list_id: project = find_project_by_id(list_id) else: project = find_project_by_header(mail) if project is None: LOGGER.error('Failed to find a project for email') return # parse content diff, message = find_content(project, mail) if not (diff or message): return # nothing to work with msgid = mail.get('Message-Id').strip() author = find_author(mail) name, prefixes = clean_subject(mail.get('Subject'), [project.linkname]) x, n = parse_series_marker(prefixes) refs = find_references(mail) date = find_date(mail) headers = find_headers(mail) pull_url = find_pull_request(message) # build objects if diff or pull_url: # patches or pull requests # we delay the saving until we know we have a patch. author.save() delegate = find_delegate(mail) if not delegate and diff: filenames = patch_get_filenames(diff) delegate = auto_delegate(project, filenames) patch = Patch( msgid=msgid, project=project, name=name, date=date, headers=headers, submitter=author, content=message, diff=diff, pull_url=pull_url, delegate=delegate, state=find_state(mail)) patch.save() LOGGER.debug('Patch saved') return patch elif x == 0: # (potential) cover letters # if refs are empty, it's implicitly a cover letter. If not, # however, we need to see if a match already exists and, if # not, assume that it is indeed a new cover letter is_cover_letter = False if not refs == []: try: CoverLetter.objects.all().get(name=name) except CoverLetter.DoesNotExist: # no match => new cover is_cover_letter = True else: is_cover_letter = True if is_cover_letter: author.save() cover_letter = CoverLetter( msgid=msgid, project=project, name=name, date=date, headers=headers, submitter=author, content=message) cover_letter.save() LOGGER.debug('Cover letter saved') return cover_letter # comments # we only save comments if we have the parent email submission = find_submission_for_comment(project, refs) if not submission: return author.save() comment = Comment( submission=submission, msgid=msgid, date=date, headers=headers, submitter=author, content=message) comment.save() LOGGER.debug('Comment saved') return comment
def find_content(project, mail): patchbuf = None commentbuf = '' pullurl = None is_attachment = False for part in mail.walk(): if part.get_content_maintype() != 'text': continue payload = part.get_payload(decode=True) subtype = part.get_content_subtype() if not isinstance(payload, six.text_type): charset = part.get_content_charset() # Check that we have a charset that we understand. Otherwise, # ignore it and fallback to our standard set. if charset is not None: try: codecs.lookup(charset) except LookupError: charset = None # If there is no charset or if it is unknown, then try some common # charsets before we fail. if charset is None: try_charsets = ['utf-8', 'windows-1252', 'iso-8859-1'] else: try_charsets = [charset] for cset in try_charsets: decoded_payload = try_decode(payload, cset) if decoded_payload is not None: break payload = decoded_payload # Could not find a valid decoded payload. Fail. if payload is None: return None if subtype in ['x-patch', 'x-diff']: is_attachment = True patchbuf = payload elif subtype == 'plain': c = payload if not patchbuf: (patchbuf, c) = parse_patch(payload) if not pullurl: pullurl = find_pull_request(payload) if c is not None: commentbuf += c.strip() + '\n' ret = MailContent() drop_prefixes = [project.linkname, project.get_listemail_tag()] drop_prefixes += project.get_subject_prefix_tags() (name, prefixes) = clean_subject(mail.get('Subject'), drop_prefixes) (x, n) = parse_series_marker(prefixes) refs = build_references_list(mail) is_root = refs == [] is_cover_letter = is_root and x == 0 is_patch = patchbuf is not None drop_patch = not is_attachment and \ project.git_send_email_only and not is_git_send_email(mail) if pullurl or (is_patch and not drop_patch): ret.patch_order = x or 1 ret.patch = Patch(name=name, pull_url=pullurl, content=patchbuf, date=mail_date(mail), headers=mail_headers(mail)) if patchbuf: ret.filenames = patch_get_filenames(patchbuf) # Create/update the Series and SeriesRevision objects if is_cover_letter or is_patch: msgid = mail.get('Message-Id').strip() # Series get a generic name when they don't start by a cover letter or # when they haven't received the root message yet. Except when it's # only 1 patch, then the series takes the patch subject as name. series_name = None if is_cover_letter or n is None: series_name = strip_prefixes(name) (ret.series, ret.revision, ret.patch_order, n) = \ find_series_for_mail(project, series_name, msgid, is_patch, ret.patch_order, n, refs) ret.revision.n_patches = n or 1 date = mail_date(mail) if not ret.series.submitted or date < ret.series.submitted: ret.series.submitted = date if is_cover_letter: ret.revision.cover_letter = clean_content(commentbuf) return ret if commentbuf: # If this is a new patch, we defer setting comment.patch until # patch has been saved by the caller if ret.patch: ret.comment = Comment(date=mail_date(mail), content=clean_content(commentbuf), headers=mail_headers(mail)) else: cpatch = find_patch_for_comment(project, refs) if not cpatch: return ret ret.comment = Comment(patch=cpatch, date=mail_date(mail), content=clean_content(commentbuf), headers=mail_headers(mail)) # make sure we always have a valid (series,revision) tuple if we have a # patch. We don't consider pull requests a series. if ret.patch and not pullurl and (not ret.series or not ret.revision): raise Exception("Could not find series for: %s" % name) return ret
def find_content(project, mail): patchbuf = None commentbuf = '' pullurl = None for part in mail.walk(): if part.get_content_maintype() != 'text': continue payload = part.get_payload(decode=True) subtype = part.get_content_subtype() if not isinstance(payload, six.text_type): charset = part.get_content_charset() # Check that we have a charset that we understand. Otherwise, # ignore it and fallback to our standard set. if charset is not None: try: codecs.lookup(charset) except LookupError: charset = None # If there is no charset or if it is unknown, then try some common # charsets before we fail. if charset is None: try_charsets = ['utf-8', 'windows-1252', 'iso-8859-1'] else: try_charsets = [charset] for cset in try_charsets: decoded_payload = try_decode(payload, cset) if decoded_payload is not None: break payload = decoded_payload # Could not find a valid decoded payload. Fail. if payload is None: return (None, None, None) if subtype in ['x-patch', 'x-diff']: patchbuf = payload elif subtype == 'plain': c = payload if not patchbuf: (patchbuf, c) = parse_patch(payload) if not pullurl: pullurl = find_pull_request(payload) if c is not None: commentbuf += c.strip() + '\n' patch = None comment = None filenames = None if patchbuf: filenames = patch_get_filenames(patchbuf) if pullurl or patchbuf: name = clean_subject(mail.get('Subject'), [project.linkname]) patch = Patch(name=name, pull_url=pullurl, diff=patchbuf, content=clean_content(commentbuf), date=mail_date(mail), headers=mail_headers(mail)) if commentbuf and not patch: cpatch = find_patch_for_comment(project, mail) if not cpatch: return (None, None, None) comment = Comment(submission=cpatch, date=mail_date(mail), content=clean_content(commentbuf), headers=mail_headers(mail)) return (patch, comment, filenames)