def fetch_and_log_commits(self): success = self.repo.fetch(repo_url=self.remote_repo_url, remote_name=self.remote_branch) if not success: raise ValueError( f"Cannot fetch from remote branch: {self.remote_repo_url}/{self.remote_branch}" ) log_result = self.repo.log(FETCH_HEAD, n=self.print_n_commits) LOG.info( "Printing %d topmost commits of %s:\n %s", self.print_n_commits, FETCH_HEAD, StringUtils.list_to_multiline_string(log_result), ) base_vs_fetch_head = f"{self.base_branch}..{FETCH_HEAD}" log_result = self.repo.log(base_vs_fetch_head, oneline=True) LOG.info("\n\nPrinting diff of %s:\n %s", base_vs_fetch_head, StringUtils.list_to_multiline_string(log_result)) num_commits = len(log_result) if num_commits > self.cherry_pick_n_commits: raise ValueError( f"Number of commits between {base_vs_fetch_head} is more than {self.cherry_pick_n_commits}! Exiting..." )
def find_upstream_commits_and_save_to_file(self): # It's quite complex to grep for multiple jira IDs with gitpython, so let's rather call an external command git_log_result = self.upstream_repo.log(ORIGIN_TRUNK, oneline_with_date=True) cmd, output = CommandRunner.egrep_with_cli( git_log_result, self.intermediate_results_file, self.data.piped_jira_ids) normal_commit_lines = output.split("\n") modified_log_lines = self._find_missing_upstream_commits_by_message( git_log_result, normal_commit_lines) self.data.matched_upstream_commit_list = normal_commit_lines + modified_log_lines if not self.data.matched_upstream_commit_list: raise ValueError( f"Cannot find any commits for jira: {self.config.jira_id}") LOG.info("Number of matched commits: %s", self.data.no_of_matched_commits) LOG.debug( "Matched commits: \n%s", StringUtils.list_to_multiline_string( self.data.matched_upstream_commit_list)) # Commits in reverse order (oldest first) self.data.matched_upstream_commit_list.reverse() self.convert_to_commit_data_objects_upstream() FileUtils.save_to_file( self.commits_file, StringUtils.list_to_multiline_string( self.data.matched_upstream_commit_hashes))
def write_commit_list_to_file_or_console( self, output_type: str, commit_groups: List[Tuple[CommitData, CommitData]], add_sep_to_end=True, add_line_break_between_groups=False, ): if not add_line_break_between_groups: commits = [ CommonUtils.convert_commit_to_str(commit) for tup in commit_groups for commit in tup ] contents = StringUtils.list_to_multiline_string(commits) else: contents = "" for tup in commit_groups: commit_strs = [ CommonUtils.convert_commit_to_str(commit) for commit in tup ] contents += StringUtils.list_to_multiline_string(commit_strs) contents += "\n\n" self._write_to_file_or_console(contents, output_type, add_sep_to_end=add_sep_to_end)
def generate_summary_string(self): a_normal_table = self.get_tables(list(self.table_order)[0], table_fmt=TabulateTableFormat.GRID)[0] length_of_table_first_line = StringUtils.get_first_line_of_multiline_str(a_normal_table.table) summary_str = "\n\n" + ( StringUtils.generate_header_line("SUMMARY", char="═", length=len(length_of_table_first_line)) + "\n" ) summary_str += str(self.summary_data) + "\n\n" return summary_str
def convert_list_data(src_data, row_callback, conf: ConversionConfig): result = [] for idx, src_row in enumerate(src_data): row = row_callback(src_row) converted_row = [] if conf.add_row_numbers: converted_row.append(idx + 1) for cell in row: if conf.join_lists_by_comma and isinstance(cell, list): cell = ", ".join(cell) bcc = conf.bool_conversion_config if bcc and isinstance(cell, bool): cell = bcc.convert_true_to if cell else bcc.convert_false_to if conf.max_width and isinstance(cell, str): cell = StringUtils.convert_string_to_multiline( cell, max_line_length=conf.max_width, separator=conf.max_width_separator) converted_row.append(cell) if conf.colorize_config: ResultPrinter._colorize_row(conf.colorize_config, converted_row, row) result.append(converted_row) return ConversionResult(src_data, result)
def get_next_review_branch_name(branches, sep=REVIEW_BRANCH_SEP): # review-YARN-10277-3 # review-YARN-10277-2 # review-YARN-10277 sorted_branches = sorted(branches, reverse=True) if len(sorted_branches) == 0: raise ValueError( "Expected a list of branches with size 1 at least. List: %s", sorted_branches) latest_branch = sorted_branches[0] parts = latest_branch.split(sep) if len(parts) < 3: raise ValueError( "Expected at least 3 components (separated by '-') of branch name: {}, encountered: {}", latest_branch, len(parts), ) # No branch postfix, e.g. review-YARN-10277 if len(parts) == 3: return sep.join(parts) + sep + "2" elif len(parts) == 4: return sep.join( parts[0:3]) + sep + StringUtils.increase_numerical_str( parts[3]) else: raise ValueError( "Unexpected number of components (separated by '-') of branch name: {}, " "encountered # of components: {}", latest_branch, len(parts))
def parse_subjiras_and_jira_titles_from_umbrella_html( html_doc, to_file, filter_ids, find_all_links=True): soup = BeautifulSoup(html_doc, "html.parser") result_dict = {} links = [] if find_all_links: links = soup.find_all("a", attrs={"class": "issue-link"}) else: table = soup.find("table", id="issuetable") if table is not None: links = table.find_all("a", attrs={"class": "issue-link"}) for link in links: jira_id = link.attrs["data-issue-key"] jira_title = str(link.contents[0]) # There are 2 anchors with class: 'issue-link' per row. Only store the one with valid title. if len(jira_title.strip()) > 0: result_dict[jira_id] = jira_title if filter_ids: LOG.info("Filtering ids from result list: %s", filter_ids) result_dict = { jira_id: title for jira_id, title in result_dict.items() if jira_id not in filter_ids } FileUtils.save_to_file( to_file, StringUtils.dict_to_multiline_string(result_dict)) return result_dict
def convert_commits_to_oneline_strings(commits: List[CommitData], incl_jira_id=False): result = [] for c in commits: commit_str = CommonUtils.convert_commit_to_str(c) if incl_jira_id: commit_str = f"[{c.jira_id}]{commit_str}" result.append(commit_str) return StringUtils.list_to_multiline_string(result)
def __init__(self, header_title: str, header_list: List[str], source_data: Any, rendered_table: str, table_fmt: TabulateTableFormat, colorized: bool = False): self.header = (StringUtils.generate_header_line( header_title, char="═", length=len( StringUtils.get_first_line_of_multiline_str(rendered_table))) + "\n") self.header_list = header_list self.source_data = source_data self.table = rendered_table self.table_fmt: TabulateTableFormat = table_fmt self.colorized = colorized
def egrep_with_cli(git_log_result: List[str], file: str, grep_for: str, escape_single_quotes=True, escape_double_quotes=True, fail_on_empty_output=True, fail_on_error=True): FileUtils.save_to_file( file, StringUtils.list_to_multiline_string(git_log_result)) if escape_single_quotes or escape_double_quotes: grep_for = StringUtils.escape_str( grep_for, escape_single_quotes=escape_single_quotes, escape_double_quotes=escape_double_quotes) cli_command = f"cat {file} | egrep \"{grep_for}\"" return CommandRunner.run_cli_command( cli_command, fail_on_empty_output=fail_on_empty_output, fail_on_error=fail_on_error)
def sanity_check(self): for br_type in self.branch_data.keys(): # This will get commits_after_merge_base_filtered from BranchData num_commits_on_branch = len( GroupedCommitMatcherUtils.get_commits(self.branch_data, br_type)) # Get all number of commits from all groups sum_len_groups = self.sum_len_of_groups(br_type) # Diff all commits on branch vs. all number of commits in groups # If they are the same it means all commits are added to exactly one group if num_commits_on_branch == sum_len_groups: LOG.info("Sanity check was successful") return hashes_on_branch = GroupedCommitMatcherUtils.get_commit_hashes( self.branch_data, br_type) hashes_of_groups = self.all_commit_hashes_in_groups(br_type) message = ( f"Number of all commits on branch vs. number of all commits in all groups " f"for the branch is different!\n" f"Number of commits on branch is: {num_commits_on_branch}\n" f"Number of all items in all groups: {sum_len_groups}") LOG.error(message) if len(hashes_on_branch) < len(hashes_of_groups): # TODO think about this what could be a useful exception message here raise NotImplementedError( "len(Commits of all groups) > len(commits on branch) sanity check is not yet implemented" ) diffed_hashes = set(hashes_on_branch).difference( set(hashes_of_groups)) commits_by_hashes = self.branch_data[ br_type].get_commits_by_hashes(diffed_hashes) LOG.error( f"Commits that are not found amoung groups: {commits_by_hashes}" ) # Well, two big numbers like 414 vs. 410 commits doesn't give much of clarity, so let's print the # commit details LOG.debug( f"Querying commits on branch {br_type} against {len(diffed_hashes)} commit hashes.." ) filtered_commits: List[ CommitData] = GroupedCommitMatcherUtils.filter_commits_by_hashes( self.branch_data, br_type, diffed_hashes) commit_strs = StringUtils.list_to_multiline_string([ (c.hash, c.message) for c in filtered_commits ]) LOG.error(message) raise ValueError( message + f"\nCommits missing from groups: \n{commit_strs})")
def _convert_list_data(src_data, row_callback, max_width=None, max_width_separator=" "): dest_data = [] for idx, data_row in enumerate(src_data): tup = row_callback(data_row) converted_row = [idx + 1] for t in tup: if max_width and isinstance(t, str): t = StringUtils.convert_string_to_multiline( t, max_line_length=80, separator=max_width_separator) converted_row.append(t) dest_data.append(converted_row) return dest_data
def get_next_filename(patch_dir, list_of_prev_patches): list_of_prev_patches = sorted(list_of_prev_patches, reverse=True) LOG.info("Found patches: %s", list_of_prev_patches) if len(list_of_prev_patches) == 0: return FileUtils.join_path(patch_dir, FIRST_PATCH_NUMBER), FIRST_PATCH_NUMBER else: latest_patch = list_of_prev_patches[0] last_patch_num = PatchUtils.extract_patch_number_from_filename_as_str( latest_patch) next_patch_filename = PatchUtils.get_next_patch_filename( latest_patch) return ( FileUtils.join_path(patch_dir, next_patch_filename), StringUtils.increase_numerical_str(last_patch_num), )
def parse_subjiras_from_umbrella_html(html_doc, to_file, filter_ids): soup = BeautifulSoup(html_doc, "html.parser") issue_keys = [] for link in soup.find_all("a", attrs={"class": "issue-link"}): issue_keys.append(link.attrs["data-issue-key"]) if filter_ids: LOG.info("Filtering ids from result list: %s", filter_ids) issue_keys = [ issue for issue in issue_keys if issue not in filter_ids ] # Filter dupes issue_keys = list(set(issue_keys)) FileUtils.save_to_file( to_file, StringUtils.list_to_multiline_string(issue_keys)) return issue_keys
def save_changed_files_to_file(self): list_of_changed_files = [] for c_hash in self.data.matched_upstream_commit_hashes: changed_files = self.upstream_repo.diff_tree(c_hash, no_commit_id=True, name_only=True, recursive=True) list_of_changed_files.append(changed_files) LOG.debug("List of changed files for commit hash '%s': %s", c_hash, changed_files) # Filter dupes, flatten list of lists list_of_changed_files = [y for x in list_of_changed_files for y in x] self.data.list_of_changed_files = list(set(list_of_changed_files)) LOG.info("Got %d unique changed files", len(self.data.list_of_changed_files)) FileUtils.save_to_file( self.changed_files_file, StringUtils.list_to_multiline_string( self.data.list_of_changed_files))
def get_next_patch_filename(filename, pos=-2): # Assuming filename like: '/somedir/YARN-10277-test.0003.patch' split = filename.split(PATCH_FILE_SEPARATOR) increased_str = StringUtils.increase_numerical_str(split[pos]) split[pos] = increased_str return PATCH_FILE_SEPARATOR.join(split)
def render_summary_string(self, result_basedir, extended_backport_table=False, backport_remote_filter=ORIGIN): # Generate tables first, in order to know the length of the header rows commit_list_table = ResultPrinter.print_table( self.upstream_commit_data_list, lambda commit: (commit.jira_id, commit.message, commit.date), header=["Row", "Jira ID", "Commit message", "Commit date"], print_result=False, max_width=80, max_width_separator=" ", # tabulate_fmts=[TabulateTableFormat.GRID, TabulateTableFormat.HTML] tabulate_fmt=TabulateTableFormat.GRID, ) files = FileUtils.find_files(result_basedir, regex=".*", full_path_result=True) file_list_table = ResultPrinter.print_table( files, lambda file: (file, ), header=["Row", "File"], print_result=False, max_width=80, max_width_separator=os.sep, ) if extended_backport_table: backports_list = [] for bjira in self.backported_jiras.values(): for commit in bjira.commits: backports_list.append([ bjira.jira_id, commit.commit_obj.hash[:SHORT_SHA_LENGTH], commit.commit_obj.message, self.filter_branches(backport_remote_filter, commit.branches), commit.commit_obj.date, ]) backport_table = ResultPrinter.print_table( backports_list, lambda row: row, header=[ "Row", "Jira ID", "Hash", "Commit message", "Branches", "Date" ], print_result=False, max_width=50, max_width_separator=" ", ) else: if self.execution_mode == ExecutionMode.AUTO_BRANCH_MODE: backports_list = [] for bjira in self.backported_jiras.values(): all_branches = [] for commit in bjira.commits: if commit.commit_obj.reverted: continue branches = self.filter_branches( backport_remote_filter, commit.branches) if branches: all_branches.extend(branches) backports_list.append( [bjira.jira_id, list(set(all_branches))]) backport_table = ResultPrinter.print_table( backports_list, lambda row: row, header=["Row", "Jira ID", "Branches"], print_result=False, max_width=50, max_width_separator=" ", ) elif self.execution_mode == ExecutionMode.MANUAL_BRANCH_MODE: backports_list = [] for bjira in self.backported_jiras.values(): all_branches = set( [br for c in bjira.commits for br in c.branches]) for commit in bjira.commits: if commit.commit_obj.reverted: continue backport_present_list = [] for branch in self.downstream_branches: backport_present_list.append(branch in all_branches) curr_row = [bjira.jira_id] curr_row.extend(backport_present_list) # TODO Temporarily disabled colorize in order to send the mail effortlessly with summary body. # This method requires redesign, nevertheless. # curr_row = ResultPrinter.colorize_row(curr_row, convert_bools=True) backports_list.append(curr_row) header = ["Row", "Jira ID"] header.extend(self.downstream_branches) backport_table = ResultPrinter.print_table( backports_list, lambda row: row, header=header, print_result=False, max_width=50, max_width_separator=" ", ) # Create headers commits_header_line = (StringUtils.generate_header_line( "COMMITS", char="═", length=len( StringUtils.get_first_line_of_multiline_str( commit_list_table))) + "\n") result_files_header_line = (StringUtils.generate_header_line( "RESULT FILES", char="═", length=len( StringUtils.get_first_line_of_multiline_str(file_list_table))) + "\n") backport_header_line = (StringUtils.generate_header_line( "BACKPORTED JIRAS", char="═", length=len( StringUtils.get_first_line_of_multiline_str(backport_table))) + "\n") # Generate summary string summary_str = (StringUtils.generate_header_line( "SUMMARY", char="═", length=len( StringUtils.get_first_line_of_multiline_str( commit_list_table))) + "\n") summary_str += f"Number of jiras: {self.no_of_jiras}\n" summary_str += f"Number of commits: {self.no_of_commits}\n" summary_str += f"Number of files changed: {self.no_of_files}\n" summary_str += commits_header_line summary_str += commit_list_table summary_str += "\n\n" summary_str += result_files_header_line summary_str += file_list_table summary_str += "\n\n" summary_str += backport_header_line summary_str += backport_table return summary_str