def determine_new_patch_filename(self): patch_dir = FileUtils.join_path(self.patch_basedir, self.patch_branch) FileUtils.ensure_dir_created(patch_dir) found_patches = FileUtils.find_files(patch_dir, regex=self.patch_branch + PATCH_FILE_REGEX, single_level=True) new_patch_filename, new_patch_num = PatchUtils.get_next_filename( patch_dir, found_patches) # Double-check new filename vs. putting it altogether manually new_patch_filename_sanity = FileUtils.join_path( self.patch_basedir, self.patch_branch, f"{self.patch_branch}.{str(new_patch_num)}{PATCH_EXTENSION}") # If this is a new patch, use the appended name, # Otherwise, use the generated filename if new_patch_num == FIRST_PATCH_NUMBER: new_patch_filename = new_patch_filename_sanity if new_patch_filename != new_patch_filename_sanity: raise ValueError( "File paths do not match. " f"Calculated: {new_patch_filename}, Concatenated: {new_patch_filename_sanity}" ) self.new_patch_filename = new_patch_filename
def add_result_files_table(self): result_files_data = sorted( FileUtils.find_files(self.summary_data.output_dir, regex=".*", full_path_result=True) ) table_type = RenderedTableType.RESULT_FILES header = [HEADER_ROW, HEADER_FILE, HEADER_NO_OF_LINES] gen_tables = ResultPrinter.print_tables( result_files_data, lambda file: (file, len(FileUtils.read_file(file).splitlines())), header=header, print_result=False, max_width=200, max_width_separator=os.sep, tabulate_fmts=DEFAULT_TABLE_FORMATS, ) for table_fmt, table in gen_tables.items(): self.add_table( table_type, TableWithHeader( table_type.header, header, result_files_data, table, table_fmt=table_fmt, colorized=False, branch=None, ), )
def assert_files_not_empty(self, basedir, expected_files=None): found_files = FileUtils.find_files(basedir, regex=".*", single_level=True, full_path_result=True) for f in found_files: self.assert_file_not_empty(f) if expected_files: TESTCASE.assertEqual(expected_files, len(found_files))
def _setup_dirs(cls): found_dirs = FileUtils.find_files( cls.repo_root_dir, find_type=FindResultType.DIRS, regex=TEST_DIR_NAME, parent_dir=SOME_PARENT_DIR, single_level=False, full_path_result=True, ) if len(found_dirs) != 1: raise ValueError( f"Expected to find 1 dir with name {TEST_DIR_NAME} " f"and parent dir '{SOME_PARENT_DIR}'. " f"Actual results: {found_dirs}" ) cls.repo_root_dir = found_dirs[0] cls.some_other_dir = FileUtils.join_path(cls.repo_root_dir, "some-other-dir")
def setUpClass(cls): # Test expects that MAIL_ACC_PASSWORD is set with env var if CdswEnvVar.MAIL_ACC_PASSWORD.value not in os.environ: raise ValueError(f"Please set '{CdswEnvVar.MAIL_ACC_PASSWORD.value}' env var and re-run the test!") cls._setup_logging() cls.repo_root_dir = FileUtils.find_repo_root_dir(__file__, REPO_ROOT_DIRNAME) found_cdsw_dirs = FileUtils.find_files( cls.repo_root_dir, find_type=FindResultType.DIRS, regex=CDSW_DIRNAME, parent_dir="yarndevtools", single_level=False, full_path_result=True, ) if len(found_cdsw_dirs) != 1: raise ValueError( f"Expected to find 1 dir with name {CDSW_DIRNAME} " f"and parent dir 'yarndevtools'. " f"Actual results: {found_cdsw_dirs}" ) cls.repo_cdsw_root_dir = found_cdsw_dirs[0] cls.yarn_dev_tools_results_dir = FileUtils.join_path(cls.repo_cdsw_root_dir, "yarndevtools-results") cls.branchdiff_cdsw_runner_script = YarnCdswBranchDiffTests.find_cdsw_runner_script( os.path.join(cls.repo_cdsw_root_dir, BRANCH_DIFF_REPORTER_DIR_NAME) ) cls.docker_test_setup = DockerTestSetup( DOCKER_IMAGE, create_image=CREATE_IMAGE, dockerfile_location=cls.repo_cdsw_root_dir, logger=CMD_LOG ) exec_mode_env: str = OsUtils.get_env_value(CdswEnvVar.TEST_EXECUTION_MODE.value, TestExecMode.CLOUDERA.value) cls.exec_mode: TestExecMode = TestExecMode[exec_mode_env.upper()] # !! WARNING: User-specific settings !! if cls.exec_mode == TestExecMode.CLOUDERA: # We need both upstream / downstream repos for Cloudera-mode os.environ[CdswEnvVar.CLOUDERA_HADOOP_ROOT.value] = "/Users/snemeth/development/cloudera/hadoop/" os.environ[CdswEnvVar.HADOOP_DEV_DIR.value] = "/Users/snemeth/development/apache/hadoop" elif cls.exec_mode == TestExecMode.UPSTREAM: os.environ[CdswEnvVar.HADOOP_DEV_DIR.value] = "/Users/snemeth/development/apache/hadoop" os.environ[BranchComparatorEnvVar.REPO_TYPE.value] = RepoType.UPSTREAM.value os.environ[BranchComparatorEnvVar.FEATURE_BRANCH.value] = "origin/branch-3.3" os.environ[BranchComparatorEnvVar.MASTER_BRANCH.value] = "origin/trunk"
def test_base_and_other_refs_are_valid_more_commits(self): LOG.debug( "Found files in patches output dir: %s", FileUtils.find_files(self.patches_basedir, regex=".*", single_level=True, full_path_result=True), ) parent_level = 5 format_patch_saver = FormatPatchSaver( self.setup_args(base_ref=DEFAULT_BASE_BRANCH + "^" * parent_level, other_ref=DEFAULT_BASE_BRANCH), self.repo.working_dir, self.current_datetime, ) format_patch_saver.run() # Verify files self.utils.assert_files_not_empty(self.patches_basedir, expected_files=5)
def run(self): LOG.info( "Starting zipping latest command data... \n " f"PLEASE NOTE THAT ACTUAL OUTPUT DIR AND DESTINATION FILES CAN CHANGE, IF NOT SPECIFIED\n" f"Output dir: {self.config.output_dir}\n" f"Input files: {self.config.input_files}\n " f"Destination filename: {self.config.dest_filename}\n " f"Ignore file types: {self.config.ignore_filetypes}\n ") input_files: List[str] = self.config.input_files sum_len_all_files: int = 0 all_ignores_files: int = 0 if self.config.ignore_filetypes: input_files = [] # TODO move this whole thing to pythoncommons for input_file in self.config.input_files: if FileUtils.is_dir(input_file): all_files = FileUtils.find_files(input_file, regex=".*", full_path_result=True) sum_len_all_files += len(all_files) files_to_ignore = set() for ext in self.config.ignore_filetypes: new_files_to_ignore = FileUtils.find_files( input_file, extension=ext, full_path_result=True) all_ignores_files += len(new_files_to_ignore) LOG.debug( f"Found {len(new_files_to_ignore)} files to ignore in directory '{input_file}': " f"{StringUtils.list_to_multiline_string(files_to_ignore)}" ) files_to_ignore.update(new_files_to_ignore) files_to_keep = list( set(all_files).difference(files_to_ignore)) tmp_dir: tempfile.TemporaryDirectory = tempfile.TemporaryDirectory( ) tmp_dir_path = tmp_dir.name FileUtils.copy_files_to_dir(files_to_keep, tmp_dir_path, cut_path=input_file) input_files.append(tmp_dir_path) else: input_files.append(input_file) sum_len_all_files += 1 temp_dir_dest: bool = True if not self.config.output_dir or self.config.output_dir.startswith( "/tmp") else False if self.config.output_dir: dest_filepath = FileUtils.join_path(self.config.output_dir, self.config.dest_filename) zip_file: BufferedWriter = ZipFileUtils.create_zip_file( input_files, dest_filepath, compress=True) else: zip_file: BufferedWriter = ZipFileUtils.create_zip_as_tmp_file( input_files, self.config.dest_filename, compress=True) zip_file_name = zip_file.name no_of_files_in_zip: int = ZipFileUtils.get_number_of_files_in_zip( zip_file_name) if self.config.ignore_filetypes and ( sum_len_all_files - all_ignores_files) != no_of_files_in_zip: raise ValueError(f"Unexpected number of files in zip. " f"All files: {sum_len_all_files}, " f"all ignored files: {all_ignores_files}, " f"number of files in zip: {no_of_files_in_zip}, " f"zip file: {zip_file_name}") LOG.info( f"Finished writing command data to zip file: {zip_file_name}, " f"size: {FileUtils.get_file_size(zip_file_name)}") FileUtils.create_symlink_path_dir(LATEST_DATA_ZIP_LINK_NAME, zip_file_name, self.config.project_out_root) # Create a latest link for the command as well FileUtils.create_symlink_path_dir(self.cmd_type.command_data_zip_name, zip_file_name, self.config.project_out_root) # Save command data file per command to home dir when temp dir mode is being used if temp_dir_dest: zip_file_name_real: str = f"{self.cmd_type.command_data_name}-real.zip" target_file_path = FileUtils.join_path( self.config.project_out_root, FileUtils.basename(zip_file_name_real)) FileUtils.copy_file(zip_file_name, target_file_path)
def run(self): branch_results = {} for branch in self.branches: LOG.info("Processing branch: %s", branch) exists = self.upstream_repo.is_branch_exist(branch) commits = self.upstream_repo.log(branch, grep=self.jira_id, oneline=True) commit_hashes = GitWrapper.extract_commit_hash_from_gitlog_results( commits) branch_result = BranchResults(branch, exists, commits, commit_hashes) branch_results[branch] = branch_result # Only store diff if number of matched commits for this branch is 1 if branch_result.number_of_commits == 1: commit_hash = branch_result.single_commit_hash # TODO create diff_with_parent helper method to GitWrapper diff = self.upstream_repo.diff_between_refs( commit_hash + "^", commit_hash) branch_result.git_diff = diff PatchUtils.save_diff_to_patch_file( diff, FileUtils.join_path(self.basedir, f"{self.jira_id}-{branch}.diff")) # Validate results branch_does_not_exist = [ b_res.branch_name for br, b_res in branch_results.items() if not b_res.exists ] zero_commit = [ b_res.branch_name for br, b_res in branch_results.items() if b_res.number_of_commits == 0 ] multiple_commits = [ b_res.branch_name for br, b_res in branch_results.items() if b_res.number_of_commits > 1 ] LOG.debug("Branch result objects: %s", branch_results) if branch_does_not_exist: raise ValueError( "The following branches are not existing for Jira ID '{}': {}", branch_does_not_exist) if zero_commit: raise ValueError( "The following branches do not contain commit for Jira ID '{}': {}", self.jira_id, zero_commit) if multiple_commits: raise ValueError( "The following branches contain multiple commits for Jira ID '{}': {}", self.jira_id, multiple_commits) LOG.info("Generated diff files: ") diff_files = FileUtils.find_files(self.basedir, self.jira_id + "-.*", single_level=True, full_path_result=True) for f in diff_files: LOG.info("%s: %s", f, FileUtils.get_file_size(f))
def render_summary_string(self, result_basedir, extended_backport_table=False, backport_remote_filter=ORIGIN): # Generate tables first, in order to know the length of the header rows commit_list_table = ResultPrinter.print_table( self.upstream_commit_data_list, lambda commit: (commit.jira_id, commit.message, commit.date), header=["Row", "Jira ID", "Commit message", "Commit date"], print_result=False, max_width=80, max_width_separator=" ", # tabulate_fmts=[TabulateTableFormat.GRID, TabulateTableFormat.HTML] tabulate_fmt=TabulateTableFormat.GRID, ) files = FileUtils.find_files(result_basedir, regex=".*", full_path_result=True) file_list_table = ResultPrinter.print_table( files, lambda file: (file, ), header=["Row", "File"], print_result=False, max_width=80, max_width_separator=os.sep, ) if extended_backport_table: backports_list = [] for bjira in self.backported_jiras.values(): for commit in bjira.commits: backports_list.append([ bjira.jira_id, commit.commit_obj.hash[:SHORT_SHA_LENGTH], commit.commit_obj.message, self.filter_branches(backport_remote_filter, commit.branches), commit.commit_obj.date, ]) backport_table = ResultPrinter.print_table( backports_list, lambda row: row, header=[ "Row", "Jira ID", "Hash", "Commit message", "Branches", "Date" ], print_result=False, max_width=50, max_width_separator=" ", ) else: if self.execution_mode == ExecutionMode.AUTO_BRANCH_MODE: backports_list = [] for bjira in self.backported_jiras.values(): all_branches = [] for commit in bjira.commits: if commit.commit_obj.reverted: continue branches = self.filter_branches( backport_remote_filter, commit.branches) if branches: all_branches.extend(branches) backports_list.append( [bjira.jira_id, list(set(all_branches))]) backport_table = ResultPrinter.print_table( backports_list, lambda row: row, header=["Row", "Jira ID", "Branches"], print_result=False, max_width=50, max_width_separator=" ", ) elif self.execution_mode == ExecutionMode.MANUAL_BRANCH_MODE: backports_list = [] for bjira in self.backported_jiras.values(): all_branches = set( [br for c in bjira.commits for br in c.branches]) for commit in bjira.commits: if commit.commit_obj.reverted: continue backport_present_list = [] for branch in self.downstream_branches: backport_present_list.append(branch in all_branches) curr_row = [bjira.jira_id] curr_row.extend(backport_present_list) # TODO Temporarily disabled colorize in order to send the mail effortlessly with summary body. # This method requires redesign, nevertheless. # curr_row = ResultPrinter.colorize_row(curr_row, convert_bools=True) backports_list.append(curr_row) header = ["Row", "Jira ID"] header.extend(self.downstream_branches) backport_table = ResultPrinter.print_table( backports_list, lambda row: row, header=header, print_result=False, max_width=50, max_width_separator=" ", ) # Create headers commits_header_line = (StringUtils.generate_header_line( "COMMITS", char="═", length=len( StringUtils.get_first_line_of_multiline_str( commit_list_table))) + "\n") result_files_header_line = (StringUtils.generate_header_line( "RESULT FILES", char="═", length=len( StringUtils.get_first_line_of_multiline_str(file_list_table))) + "\n") backport_header_line = (StringUtils.generate_header_line( "BACKPORTED JIRAS", char="═", length=len( StringUtils.get_first_line_of_multiline_str(backport_table))) + "\n") # Generate summary string summary_str = (StringUtils.generate_header_line( "SUMMARY", char="═", length=len( StringUtils.get_first_line_of_multiline_str( commit_list_table))) + "\n") summary_str += f"Number of jiras: {self.no_of_jiras}\n" summary_str += f"Number of commits: {self.no_of_commits}\n" summary_str += f"Number of files changed: {self.no_of_files}\n" summary_str += commits_header_line summary_str += commit_list_table summary_str += "\n\n" summary_str += result_files_header_line summary_str += file_list_table summary_str += "\n\n" summary_str += backport_header_line summary_str += backport_table return summary_str