def find_series_for_mail(project, name, msgid, is_patch, order, refs): if refs == []: root_msgid = msgid else: root_msgid = refs[-1] try: # grab the latest revision for this mail thread revisions = SeriesRevision.objects.filter(series__project=project, root_msgid=root_msgid) \ .reverse() revision = revisions[0] series = revision.series if name: series.name = name if is_patch: previous_patch = find_previous_patch(revision, order, refs) if previous_patch: order = find_patch_order(revisions, previous_patch, order) revision = revision.duplicate(exclude_patches=(order, )) # series has been updated, grab the new instance series = revision.series except IndexError: if not name: name = SERIES_DEFAULT_NAME series = Series(name=name) revision = SeriesRevision(root_msgid=root_msgid) return (series, revision, order)
def find_series_for_mail(project, name, msgid, is_patch, order, n_patches, refs, is_cover_letter): if refs == []: root_msgid = msgid else: root_msgid = refs[-1] try: # grab the latest revision for this mail thread revisions = SeriesRevision.objects.filter(series__project=project, root_msgid=root_msgid) \ .reverse() revision = revisions[0] series = revision.series if name: series.name = name if is_patch: previous_patch = find_previous_patch(project, revision, order, refs) if previous_patch: (order, n_patches) = find_patch_order(revisions, previous_patch, order, n_patches) revision = revision.duplicate(exclude_patches=(order, )) # series has been updated, grab the new instance series = revision.series else: try: prev_patch = SeriesRevisionPatch.objects.get( revision=revision, order=order).patch if prev_patch and not prev_patch.msgid == msgid: # this is a new patch in the thread revision = revision.duplicate( exclude_patches=(order, )) series = revision.series except SeriesRevisionPatch.DoesNotExist: pass elif is_cover_letter and not refs == []: revision = revision.duplicate(exclude_patches=range( 1, revision.n_patches + 1), ) series = revision.series except IndexError: if not name: name = SERIES_DEFAULT_NAME series = Series(name=name) revision = SeriesRevision(root_msgid=root_msgid) return (series, revision, order, n_patches)
def parse_mail(mail, list_id=None): """Parse a mail and add to the database. Args: mail (`mbox.Mail`): Mail to parse and add. list_id (str): Mailing list ID Returns: None """ # some basic sanity checks if 'From' not in mail: raise ValueError("Missing 'From' header") if 'Subject' not in mail: raise ValueError("Missing 'Subject' header") if 'Message-Id' not in mail: raise ValueError("Missing 'Message-Id' header") hint = clean_header(mail.get('X-Patchwork-Hint', '')) if hint and hint.lower() == 'ignore': logger.debug("Ignoring email due to 'ignore' hint") return if list_id: project = find_project_by_id(list_id) else: project = find_project_by_header(mail) if project is None: logger.error('Failed to find a project for email') return # parse metadata msgid = clean_header(mail.get('Message-Id')) if not msgid: raise ValueError("Broken 'Message-Id' header") msgid = msgid[:255] author = find_author(mail) subject = mail.get('Subject') name, prefixes = clean_subject(subject, [project.linkname]) is_comment = subject_check(subject) x, n = parse_series_marker(prefixes) version = parse_version(name, prefixes) refs = find_references(mail) date = find_date(mail) headers = find_headers(mail) # parse content if not is_comment: diff, message = find_patch_content(mail) else: diff, message = find_comment_content(mail) if not (diff or message): return # nothing to work with pull_url = parse_pull_request(message) # build objects if not is_comment and (diff or pull_url): # patches or pull requests # we delay the saving until we know we have a patch. author.save() delegate = find_delegate_by_header(mail) if not delegate and diff: filenames = find_filenames(diff) delegate = find_delegate_by_filename(project, filenames) # if we don't have a series marker, we will never have an existing # series to match against. series = None if n: series = find_series(project, mail) else: x = n = 1 # We will create a new series if: # - there is no existing series to assign this patch to, or # - there is an existing series, but it already has a patch with this # number in it if not series or ( SeriesPatch.objects.filter(series=series, number=x).count()): series = Series(project=project, date=date, submitter=author, version=version, total=n) series.save() # NOTE(stephenfin) We must save references for series. We # do this to handle the case where a later patch is # received first. Without storing references, it would not # be possible to identify the relationship between patches # as the earlier patch does not reference the later one. for ref in refs + [msgid]: ref = ref[:255] # we don't want duplicates try: # we could have a ref to a previous series. (For # example, a series sent in reply to another # series.) That should not create a series ref # for this series, so check for the msg-id only, # not the msg-id/series pair. SeriesReference.objects.get(msgid=ref, series__project=project) except SeriesReference.DoesNotExist: SeriesReference.objects.create(series=series, msgid=ref) patch = Patch( msgid=msgid, project=project, name=name[:255], date=date, headers=headers, submitter=author, content=message, diff=diff, pull_url=pull_url, delegate=delegate, state=find_state(mail)) patch.save() logger.debug('Patch saved') # add to a series if we have found one, and we have a numbered # patch. Don't add unnumbered patches (for example diffs sent # in reply, or just messages with random refs/in-reply-tos) if series and x: series.add_patch(patch, x) return patch elif x == 0: # (potential) cover letters # if refs are empty, it's implicitly a cover letter. If not, # however, we need to see if a match already exists and, if # not, assume that it is indeed a new cover letter is_cover_letter = False if not is_comment: if not refs == []: try: CoverLetter.objects.all().get(name=name) except CoverLetter.DoesNotExist: # if no match, this is a new cover letter is_cover_letter = True except CoverLetter.MultipleObjectsReturned: # if multiple cover letters are found, just ignore pass else: is_cover_letter = True if is_cover_letter: author.save() # we don't use 'find_series' here as a cover letter will # always be the first item in a thread, thus the references # could only point to a different series or unrelated # message try: series = SeriesReference.objects.get( msgid=msgid, series__project=project).series except SeriesReference.DoesNotExist: series = None if not series: series = Series(project=project, date=date, submitter=author, version=version, total=n) series.save() # we don't save the in-reply-to or references fields # for a cover letter, as they can't refer to the same # series SeriesReference.objects.get_or_create(series=series, msgid=msgid) cover_letter = CoverLetter( msgid=msgid, project=project, name=name[:255], date=date, headers=headers, submitter=author, content=message) cover_letter.save() logger.debug('Cover letter saved') series.add_cover_letter(cover_letter) return cover_letter # comments # we only save comments if we have the parent email submission = find_submission_for_comment(project, refs) if not submission: return author.save() comment = Comment( submission=submission, msgid=msgid, date=date, headers=headers, submitter=author, content=message) comment.save() logger.debug('Comment saved') return comment
def parse_mail(mail, list_id=None): """Parse a mail and add to the database. Args: mail (`mbox.Mail`): Mail to parse and add. list_id (str): Mailing list ID Returns: patch/cover letter/comment Or None if nothing is found in the mail or X-P-H: ignore or project not found Raises: ValueError if there is an error in parsing or a duplicate mail Other truly unexpected issues may bubble up from the DB. """ # some basic sanity checks if 'From' not in mail: raise ValueError("Missing 'From' header") if 'Subject' not in mail: raise ValueError("Missing 'Subject' header") if 'Message-Id' not in mail: raise ValueError("Missing 'Message-Id' header") hint = clean_header(mail.get('X-Patchwork-Hint', '')) if hint and hint.lower() == 'ignore': logger.debug("Ignoring email due to 'ignore' hint") return project = find_project(mail, list_id) if project is None: logger.error('Failed to find a project for email') return # parse metadata msgid = clean_header(mail.get('Message-Id')) if not msgid: raise ValueError("Broken 'Message-Id' header") msgid = msgid[:255] subject = mail.get('Subject') name, prefixes = clean_subject(subject, [project.linkname]) is_comment = subject_check(subject) x, n = parse_series_marker(prefixes) version = parse_version(name, prefixes) refs = find_references(mail) date = find_date(mail) headers = find_headers(mail) # parse content if not is_comment: diff, message = find_patch_content(mail) else: diff, message = find_comment_content(mail) if not (diff or message): return # nothing to work with pull_url = parse_pull_request(message) # build objects if not is_comment and (diff or pull_url): # patches or pull requests # we delay the saving until we know we have a patch. author = get_or_create_author(mail) delegate = find_delegate_by_header(mail) if not delegate and diff: filenames = find_filenames(diff) delegate = find_delegate_by_filename(project, filenames) try: patch = Patch.objects.create(msgid=msgid, project=project, patch_project=project, name=name[:255], date=date, headers=headers, submitter=author, content=message, diff=diff, pull_url=pull_url, delegate=delegate, state=find_state(mail)) logger.debug('Patch saved') except IntegrityError: logger.error("Duplicate mail for message ID %s" % msgid) return None # if we don't have a series marker, we will never have an existing # series to match against. series = None if n: series = find_series(project, mail, author) else: x = n = 1 # We will create a new series if: # - there is no existing series to assign this patch to, or # - there is an existing series, but it already has a patch with this # number in it if not series or (SeriesPatch.objects.filter(series=series, number=x).count()): series = Series(project=project, date=date, submitter=author, version=version, total=n) series.save() # NOTE(stephenfin) We must save references for series. We # do this to handle the case where a later patch is # received first. Without storing references, it would not # be possible to identify the relationship between patches # as the earlier patch does not reference the later one. for ref in refs + [msgid]: ref = ref[:255] # we don't want duplicates try: # we could have a ref to a previous series. (For # example, a series sent in reply to another # series.) That should not create a series ref # for this series, so check for the msg-id only, # not the msg-id/series pair. SeriesReference.objects.get(msgid=ref, series__project=project) except SeriesReference.DoesNotExist: SeriesReference.objects.create(series=series, msgid=ref) except SeriesReference.MultipleObjectsReturned: logger.error("Multiple SeriesReferences for %s" " in project %s!" % (ref, project.name)) # add to a series if we have found one, and we have a numbered # patch. Don't add unnumbered patches (for example diffs sent # in reply, or just messages with random refs/in-reply-tos) if series and x: series.add_patch(patch, x) return patch elif x == 0: # (potential) cover letters # if refs are empty, it's implicitly a cover letter. If not, # however, we need to see if a match already exists and, if # not, assume that it is indeed a new cover letter is_cover_letter = False if not is_comment: if not refs == []: try: CoverLetter.objects.all().get(name=name) except CoverLetter.DoesNotExist: # if no match, this is a new cover letter is_cover_letter = True except CoverLetter.MultipleObjectsReturned: # if multiple cover letters are found, just ignore pass else: is_cover_letter = True if is_cover_letter: author = get_or_create_author(mail) # we don't use 'find_series' here as a cover letter will # always be the first item in a thread, thus the references # could only point to a different series or unrelated # message try: series = SeriesReference.objects.get( msgid=msgid, series__project=project).series except SeriesReference.DoesNotExist: series = None except SeriesReference.MultipleObjectsReturned: logger.error("Multiple SeriesReferences for %s" " in project %s!" % (msgid, project.name)) series = SeriesReference.objects.filter( msgid=msgid, series__project=project).first().series if not series: series = Series(project=project, date=date, submitter=author, version=version, total=n) series.save() # we don't save the in-reply-to or references fields # for a cover letter, as they can't refer to the same # series try: SeriesReference.objects.get_or_create(series=series, msgid=msgid) except SeriesReference.MultipleObjectsReturned: logger.error("Multiple SeriesReferences for %s" " in project %s!" % (msgid, project.name)) cover_letter = CoverLetter(msgid=msgid, project=project, name=name[:255], date=date, headers=headers, submitter=author, content=message) cover_letter.save() logger.debug('Cover letter saved') series.add_cover_letter(cover_letter) return cover_letter # comments # we only save comments if we have the parent email submission = find_submission_for_comment(project, refs) if not submission: return author = get_or_create_author(mail) comment = Comment(submission=submission, msgid=msgid, date=date, headers=headers, submitter=author, content=message) comment.save() logger.debug('Comment saved') return comment
def parse_mail(mail, list_id=None): """Parse a mail and add to the database. Args: mail (`mbox.Mail`): Mail to parse and add. list_id (str): Mailing list ID Returns: None """ # some basic sanity checks if 'From' not in mail: raise ValueError("Missing 'From' header") if 'Subject' not in mail: raise ValueError("Missing 'Subject' header") if 'Message-Id' not in mail: raise ValueError("Missing 'Message-Id' header") hint = mail.get('X-Patchwork-Hint', '').lower() if hint == 'ignore': logger.debug("Ignoring email due to 'ignore' hint") return if list_id: project = find_project_by_id(list_id) else: project = find_project_by_header(mail) if project is None: logger.error('Failed to find a project for email') return # parse content diff, message = find_content(project, mail) if not (diff or message): return # nothing to work with msgid = mail.get('Message-Id').strip() author = find_author(mail) subject = mail.get('Subject') name, prefixes = clean_subject(subject, [project.linkname]) is_comment = subject_check(subject) x, n = parse_series_marker(prefixes) version = parse_version(name, prefixes) refs = find_references(mail) date = find_date(mail) headers = find_headers(mail) pull_url = parse_pull_request(message) # build objects if not is_comment and (diff or pull_url): # patches or pull requests # we delay the saving until we know we have a patch. author.save() delegate = find_delegate(mail) if not delegate and diff: filenames = find_filenames(diff) delegate = auto_delegate(project, filenames) series = find_series(mail) # We will create a new series if: # - we have a patch number (x of n), and # - either: # * there is no series, or # * the version doesn't match # * we have a patch with this number already if n and ( (not series) or (series.version != version) or (SeriesPatch.objects.filter(series=series, number=x).count())): series = Series(date=date, submitter=author, version=version, total=n) series.save() # NOTE(stephenfin) We must save references for series. We # do this to handle the case where a later patch is # received first. Without storing references, it would not # be possible to identify the relationship between patches # as the earlier patch does not reference the later one. for ref in refs + [msgid]: # we don't want duplicates try: # we could have a ref to a previous series. (For # example, a series sent in reply to another # series.) That should not create a series ref # for this series, so check for the msg-id only, # not the msg-id/series pair. SeriesReference.objects.get(msgid=ref) except SeriesReference.DoesNotExist: SeriesReference.objects.create(series=series, msgid=ref) patch = Patch(msgid=msgid, project=project, name=name, date=date, headers=headers, submitter=author, content=message, diff=diff, pull_url=pull_url, delegate=delegate, state=find_state(mail)) patch.save() logger.debug('Patch saved') # add to a series if we have found one, and we have a numbered # patch. Don't add unnumbered patches (for example diffs sent # in reply, or just messages with random refs/in-reply-tos) if series and x: series.add_patch(patch, x) return patch elif x == 0: # (potential) cover letters # if refs are empty, it's implicitly a cover letter. If not, # however, we need to see if a match already exists and, if # not, assume that it is indeed a new cover letter is_cover_letter = False if not is_comment: if not refs == []: try: CoverLetter.objects.all().get(name=name) except CoverLetter.DoesNotExist: # if no match, this is a new cover letter is_cover_letter = True except CoverLetter.MultipleObjectsReturned: # if multiple cover letters are found, just ignore pass else: is_cover_letter = True if is_cover_letter: author.save() # we don't use 'find_series' here as a cover letter will # always be the first item in a thread, thus the references # could only point to a different series or unrelated # message try: series = SeriesReference.objects.get(msgid=msgid).series except SeriesReference.DoesNotExist: series = None if not series: series = Series(date=date, submitter=author, version=version, total=n) series.save() # we don't save the in-reply-to or references fields # for a cover letter, as they can't refer to the same # series SeriesReference.objects.get_or_create(series=series, msgid=msgid) cover_letter = CoverLetter(msgid=msgid, project=project, name=name, date=date, headers=headers, submitter=author, content=message) cover_letter.save() logger.debug('Cover letter saved') series.add_cover_letter(cover_letter) return cover_letter # comments # we only save comments if we have the parent email submission = find_submission_for_comment(project, refs) if not submission: return if is_comment and diff: message += diff author.save() comment = Comment(submission=submission, msgid=msgid, date=date, headers=headers, submitter=author, content=message) comment.save() logger.debug('Comment saved') return comment