def _parse_log_entry(raw_log_entry): """ Parse a single git log entry into a LogEntry, or return None if it can't be parsed. """ # A note on the encodings. Git doesn't give us a way to get at # the encodings of the files / diffs (short of .gitattributes, # which has to be set by the original producers of the repo). We # assume that the encoding is UTF-8, and just replace everything # else with that lovely question mark thing, to do the parsing / # manipulation, and then convert that utf-8 to unicode for the # returned LogEntry. utf8_log_entry = util.utf8(raw_log_entry) # attempt to split the header from the diff. split_log_entry = _split_entry_header(utf8_log_entry) if split_log_entry is None: return None header_lines, diff_lines = split_log_entry diff = '\n'.join(diff_lines) if not diff.strip(): log.debug("Diff appeared to be empty.") return None author = _parse_header('Author: ', header_lines) if not author: log.debug("Could not parse author.") return None parsed_author = parse_name_and_email(author) if not parsed_author: log.debug("Could not parse author name / email.") return None author_name, author_email = parsed_author commit = _parse_header('commit ', header_lines) if not commit: log.debug("Could not parse commit.") return None log_msg = '\n'.join(_parse_log_msg(header_lines)) return LogEntry(author_name=util.uc(author_name), author_email=util.uc(author_email), commit=util.uc(commit), log_msg=util.uc(log_msg), diff=util.uc(diff), raw_log=raw_log_entry)
def _validate_parsed_entry(parsed_entry, commit_info): ok_(parsed_entry) ok_(util.uc(commit_info.commit_msg) in parsed_entry.log_msg) ok_(commit_info.file_body in util.utf8(parsed_entry.diff)) eq_(util.uc(commit_info.author_name), parsed_entry.author_name) eq_(util.uc(commit_info.author_email), parsed_entry.author_email)