def _base_pre_run(self): if self.job_center.total_jobs < 1: log.info('No jobs to reproduce. Exiting.') return # Set up the required directories. os.makedirs(self.config.orig_logs_dir, exist_ok=True) os.makedirs(self.config.output_dir, exist_ok=True) self.utils.directories_setup() if os.path.isfile(self.utils.get_error_reason_file_path()): self.error_reasons = read_json( self.utils.get_error_reason_file_path()) self.error_reasons = self.manager.dict(self.error_reasons) # Check if commands to Travis work. if not Utils.is_travis_installed(): log.error( colored( 'Commands to Travis are failing unexpectedly. Try restarting your shell and ensure your ' 'environment is provisioned correctly. Also try restarting your shell.', 'red')) raise Exception( 'Unexpected state: Commands to Travis are failing unexpectedly.' ) # Read travis_images.json. try: self.travis_images = read_json(self.config.travis_images_json) except FileNotFoundError: log.error( colored( self.config.travis_images_json + ' not found. Exiting.', 'red')) raise
def docker_run(image_tag, use_sandbox, use_pipe_stdin, use_rm): assert isinstance(image_tag, str) and not image_tag.isspace() assert isinstance(use_sandbox, bool) assert isinstance(use_pipe_stdin, bool) assert isinstance(use_rm, bool) # First, try to pull the image. ok = docker_pull(image_tag) if not ok: return False # Communicate progress to the user. host_sandbox = _default_host_sandbox() container_sandbox = CONTAINER_SANDBOX_DEFAULT if use_sandbox: if not os.path.exists(host_sandbox): log.info('Creating', host_sandbox, 'as the host sandbox.') os.makedirs(host_sandbox, exist_ok=True) log.info('Binding host sandbox', host_sandbox, 'to container directory', container_sandbox) # Communicate progress to the user. if use_pipe_stdin: log.info('Entering the container and executing the contents of stdin inside the container.') else: log.info('Entering the container.') if use_rm: log.info('The container will be cleaned up after use.') image_location = _image_location(image_tag) # Prepare the arguments for the docker run command. volume_args = ['-v', '{}:{}'.format(host_sandbox, container_sandbox)] if use_sandbox else [] # The -t option must not be used in order to use a heredoc. input_args = ['-i'] if use_pipe_stdin else ['-i', '-t'] subprocess_input = sys.stdin.read() if use_pipe_stdin else None subprocess_universal_newlines = use_pipe_stdin rm_args = ['--rm'] if use_rm else [] # If we're using a shared directory, we need to modify the start script to change the permissions of the shared # directory on the container side. However, this will also change the permissions on the host side. script_args = [SCRIPT_DEFAULT] if use_sandbox: start_command = '"sudo chmod -R 777 {} && cd {} && umask 000 && cd .. && {}"'.format( container_sandbox, container_sandbox, SCRIPT_DEFAULT) # These arguments represent a command of the following form: # /bin/bash -c "sudo chmod 777 <container_sandbox> && cd <container_sandbox> && umask 000 && /bin/bash" # So bash will execute chmod and umask and then start a new bash shell. From the user's perspective, the chmod # and umask commands happen transparently. That is, the user only sees the final new bash shell. script_args = [SCRIPT_DEFAULT, '-c', start_command] # Try to run the image. # The tail arguments must be at the end of the command. tail_args = [image_location] + script_args args = ['sudo', 'docker', 'run', '--privileged'] + rm_args + volume_args + input_args + tail_args command = ' '.join(args) _, _, returncode = ShellWrapper.run_commands(command, input=subprocess_input, universal_newlines=subprocess_universal_newlines, shell=True) return returncode == 0
def query_current_metrics(repo: str) -> dict: log.info( 'Attempting to query metrics from database for {}'.format(repo)) bugswarmapi = DatabaseAPI(token=DATABASE_PIPELINE_TOKEN) results = bugswarmapi.find_mined_project(repo) if results.status_code != 200: log.info( 'Repository: {} has yet to be mined. Continuing.'.format(repo)) return { 'repo': '', 'latest_mined_version': '', 'last_build_mined': { 'build_id': 0, 'build_number': 0 }, 'progression_metrics': { 'builds': 0, 'jobs': 0, 'failed_builds': 0, 'failed_jobs': 0, 'failed_pr_builds': 0, 'failed_pr_jobs': 0, 'mined_build_pairs': 0, 'mined_job_pairs': 0, 'mined_pr_build_pairs': 0, 'mined_pr_job_pairs': 0, }, } return results.json()
def get_commit_info_for_virtual_commit(self): start_time = time.time() virtual_commits_info = {} virtual_commits_info_json_file = self.utils.get_virtual_commits_info_json_file( self.repo) has_json_file = os.path.isfile(virtual_commits_info_json_file) if has_json_file: virtual_commits_info = read_json(virtual_commits_info_json_file) for _, branch_obj in self.branches.items(): if not branch_obj.pairs: continue for pair in branch_obj.pairs: builds = [pair.failed_build, pair.passed_build] for b in builds: if has_json_file: if b.commit in virtual_commits_info: b.virtual_commit_info = virtual_commits_info[ b.commit] else: c = self.utils.github.get_commit_info( self.repo, b.commit) if c: virtual_commits_info[b.commit] = c b.virtual_commit_info = c if not has_json_file: write_json(virtual_commits_info_json_file, virtual_commits_info) log.info('Got commit info for virtual commits in', time.time() - start_time, 'seconds.')
def build_and_run(self, job): log.info('Building and running job with ID {}.'.format(job.job_id)) # Determine the image name. image_name = 'job_id:{}'.format(job.job_id) # Get paths required for building the image. abs_reproduce_tmp_dir = os.path.abspath(self.utils.get_reproduce_tmp_dir(job)) abs_dockerfile_path = os.path.abspath(self.utils.get_dockerfile_path(job)) reproduced_log_destination = self.utils.get_log_path(job) # Actually build the image now. image = self.build_image(path=abs_reproduce_tmp_dir, dockerfile=abs_dockerfile_path, full_image_name=image_name) # Spawn the container. container_name = str(job.job_id) retry_count = 0 while True: try: self.spawn_container(image, container_name, reproduced_log_destination) except requests.exceptions.ReadTimeout as e: log.error('Error while attempting to spawn a container:', e) log.info('Retrying to spawn container.') retry_count += 1 else: break
def build_and_run(self, job_id, gen_files_dir, repo_path, repo_name, base_image_name, repo): log.info('Building and running job with ID {}.'.format(job_id)) dockerfile_path = os.path.join(gen_files_dir, job_id + "-dockerfile") # Determine the image name. image_name = 'binswarm/cbuilds:{}'.format(job_id + "-" + repo_name) image_name = image_name.lower() # Actually build the image now. image = self.build_image(path=gen_files_dir, dockerfile=dockerfile_path, full_image_name=image_name) f = open("image.txt", "w") f.write(image_name) f.close() # Spawn the container. container_name = job_id retry_count = 0 while True: try: reproduced_log_destination = os.path.join( gen_files_dir, "docker-log.txt") self.spawn_container(image_name, container_name, reproduced_log_destination, repo_path, base_image_name, repo) except requests.exceptions.ReadTimeout as e: log.error('Error while attempting to spawn a container:', e) log.info('Retrying to spawn container.') retry_count += 1 else: break
def download_repo(job, utils): # Make the workspace repository directory. os.makedirs(utils.get_stored_repo_path(job), exist_ok=True) # Download the repository. if job.is_pr: # Correct job sha is necessary for correct file path generation. job.sha = job.travis_merge_sha if not os.path.exists(utils.get_project_storage_repo_zip_path(job)): src = utils.construct_github_archive_repo_sha_url(job.repo, job.sha) repo_unzip_name = job.repo.split('/')[1] + '-' + job.sha log.info('Downloading the repository from the GitHub archive at {}.'.format(src)) urllib.request.urlretrieve(src, utils.get_project_storage_repo_zip_path(job)) # Copy repository from stored project repositories to the workspace repository directory by untar-ing the storage # repository tar file into the workspace directory. repo_zip_obj = zipfile.ZipFile(utils.get_project_storage_repo_zip_path(job)) repo_zip_obj.extractall(utils.get_stored_repo_path(job)) distutils.dir_util.copy_tree(os.path.join(utils.get_stored_repo_path(job), repo_unzip_name), utils.get_reproducing_repo_dir(job)) distutils.dir_util.copy_tree(os.path.join(utils.get_repo_storage_dir(job), '.git'), os.path.join(utils.get_reproducing_repo_dir(job), '.git'))
def main(args=dict()): log.config_logging(getattr(logging, 'INFO', None)) # Log the current version of this BugSwarm component. log.info(get_current_component_version_message('Classifier')) repo_list, pipeline = _validate_input(args) filter_output_dir = os.path.join(os.path.dirname(__file__), '../pair-filter/output-json/') if pipeline and not os.path.exists(filter_output_dir): log.error( 'pipeline == true, but output_file_path ({}) does not exist. ' 'Exiting PairClassifier.'.format(filter_output_dir)) return for repo in repo_list: if pipeline: task_name = repo.replace('/', '-') json_path = os.path.join(filter_output_dir, task_name + '.json') if not os.path.exists(json_path): log.error(json_path, 'does not exist. Repo', repo, 'will be skipped.') continue # Get the input json from the file generated by pair-filter. dir_of_jsons = generate_build_pair_json(repo, json_path) else: # Get the input json from the DB. dir_of_jsons = generate_build_pair_json(repo) PairClassifier.run(repo, dir_of_jsons, args)
def process(self, repo, builds_json_file, builds_info_json_file) -> Optional[Any]: # repo = context['repo'] travis = TravisWrapper() if os.path.isfile(builds_json_file): build_list = read_json(builds_json_file) else: log.info('Getting the list of builds...') start_time = time.time() try: builds = travis.get_builds_for_repo(repo) except RequestException: error_message = 'Encountered an error while downloading builds for repository {}.'.format(repo) build_list = list(builds) write_json(builds_json_file, build_list) log.info('Got the list of builds in', time.time() - start_time, 'seconds.') if os.path.isfile(builds_info_json_file): build_list = read_json(builds_info_json_file) else: log.info('Downloading build info for', len(build_list), 'builds... This step may take several minutes for large repositories.') start_time = time.time() for idx, build in enumerate(build_list): build_id = build['id'] try: build_info = travis.get_build_info(build_id) except RequestException: error_message = 'Encountered an error while downloading build info for build {}.'.format(build_id) build['build_info'] = build_info if (idx + 1) % 500 == 0: log.info('Downloaded build info for', idx + 1, 'builds so far...') write_json(builds_info_json_file, build_list) log.info('Downloaded build info in', time.time() - start_time, 'seconds.')
def main(): if not path.exists(os.path.expanduser('~/.docker/config.json')): log.info( 'docker login file not found run `docker login` before filtering pairs' ) exit(0) generate_image_file()
def init_queues_for_threads(self, threads_num, package_mode=False): num_of_items_per_thread = int( self.get_num_remaining_items(package_mode) / threads_num) self.thread_workloads = [] q = Queue() if package_mode: for r in self.repos: for bp in self.repos[r].buildpairs: for jp in bp.jobpairs: if not jp.reproduced.value: q.put(jp) if q.qsize() >= num_of_items_per_thread: self.thread_workloads.append(q) q = Queue() else: for r in self.repos: for bp in self.repos[r].buildpairs: for jp in bp.jobpairs: for j in jp.jobs: if not j.reproduced.value and not j.skip.value: q.put(j) if q.qsize() >= num_of_items_per_thread: self.thread_workloads.append(q) q = Queue() log.info('Finished initializing queues for all threads.') for i in range(len(self.thread_workloads)): log.debug('tid =', i, ', qsize =', self.thread_workloads[i].qsize())
def load_buildpairs(dir_of_jsons: str, repo: str): """ :param dir_of_jsons: A directory containing JSON files of build pairs. :param repo: repo_slug name :raises json.decoder.JSONDecodeError: When the passed directory contains JSON files with invalid JSON. """ all_buildpairs = [] count = 0 task_name = repo.replace('/', '-') filename = task_name + '.json' try: data = read_json(os.path.join(dir_of_jsons, filename)) except json.decoder.JSONDecodeError: log.error('{} contains invalid JSON.'.format(filename)) return None except FileNotFoundError: log.error('{} is not found.'.format(filename)) return None all_buildpairs.extend(data) if not data: log.warning('{} does not contain any build pairs.'.format(filename)) count += 1 log.info('Read {} build pairs from {}.'.format(len(all_buildpairs), filename)) return all_buildpairs
def check_package_outdated(package: str): """ Checks if the installed version of a package is older than the latest non-prerelease version available on PyPI. If so, prints a message the asks the user to consider upgrading. The package must be available on PyPI and must have always used a version numbering scheme that can be parsed by distutils.version.StrictVersion. This function is meant to be used for packages in the 'bugswarm' namespace, which meet the above requirements, and therefore is not guaranteed to work for packages outside that namespace. :param package: The name of the package to check. """ if not isinstance(package, str): raise TypeError try: installed = _get_installed_version(package) latest = _get_latest_version(package) if latest > installed: # A newer, non-prerelease version is available. log.info( 'You are using {} version {}, but version {} is available.'. format(package, installed, latest)) log.info( "You should consider upgrading via the 'pip3 install --upgrade {}' command." .format(package)) except Exception as e: log.error( 'Encountered an error while checking if {} can be updated: {}'. format(package, e))
def get_pr_commits_by_parsing_html(self): start_time = time.time() html_commits_json_file = self.utils.get_html_commits_json_file( self.repo) html_commits = {} if os.path.isfile(html_commits_json_file): html_commits = read_json(html_commits_json_file) for _, branch_obj in self.branches.items(): if branch_obj.pr_num != -1: # if it's a PR branch branch_obj.html_commits = html_commits[str( branch_obj.pr_num)] else: threads = [ threading.Thread( target=self.utils.github.get_pr_commits_by_html, args=(self.repo, str(branch_obj.pr_num), branch_obj)) for _, branch_obj in self.branches.items() ] for thread in threads: thread.start() for thread in threads: thread.join() for _, branch_obj in self.branches.items(): if branch_obj.pr_num != -1: # if it's a PR branch html_commits[branch_obj.pr_num] = branch_obj.html_commits write_json(html_commits_json_file, html_commits) log.info('Got pull request commits (via HTML parsing) in', time.time() - start_time, 'seconds.')
def _visualize_match_history(self): log.info('Visualizing match history:') log.info( 'N means no reproduced log exists. (An error occured in reproducer while reproducing the job.)' ) all_jobpairs, all_task_names = self._get_all_jobpairs_and_task_names() for jp in all_jobpairs: log.info(jp.full_name) match_histories = [ (jp.match_history, 'Job pair'), (jp.failed_job_match_history, 'Failed job'), (jp.passed_job_match_history, 'Passed job'), ] for match_history, history_name in match_histories: mh = [ str(match_history.get(task_name, 'N')) for task_name in all_task_names ] if mh: full_history_name = '{} match history'.format(history_name) log.info('{:>24}:'.format(full_history_name), ' -> '.join(mh)) else: log.info( 'No match history. (This jobpair is not reproduced.)')
def _write_csv(self, data): os.makedirs(self.config.csv_dir, exist_ok=True) filename = self.task + '.csv' filepath = os.path.join(self.config.csv_dir, filename) keys = Packager._flatten_keys() with open(filepath, 'w') as f: # Write header. f.write(','.join(keys) + '\n') for d in data: line = [] for key in keys: if key.startswith('failed_job_'): k = key.split('failed_job_')[1] replaced = str(d['failed_job'][k]).replace(',', '#') replaced = replaced.replace('\n', ' ') line.append(replaced) # Replace , with # to disambiguate the delimiter. elif key.startswith('passed_job_'): k = key.split('passed_job_')[1] replaced = str(d['passed_job'][k]).replace(',', '#') replaced = replaced.replace('\n', ' ') line.append(replaced) # Replace , with # to disambiguate the delimiter. else: line.append(d[key]) f.write(','.join(map(str, line)) + '\n') log.info('Done! Wrote', len(data), 'rows into the CSV file at', filepath + '.')
def _exceeded_api_quota(self) -> Tuple[bool, Optional[int]]: """ :return: A 2-tuple. (True, number of seconds until the quota resets) if the API quota has been exceeded. (False, None) otherwise. :raises Exception: When an exception is raised by the request. """ quota_url = 'https://api.github.com/rate_limit' log.info('Checking GitHub API quota.') response = self._session.get(quota_url) try: response.raise_for_status() result = response.json() if 'resources' in result: remaining = result['resources']['core']['remaining'] if remaining <= 0: reset_at = result['resources']['core'][ 'reset'] # Time when the quota resets, in UTC epoch seconds log.warning( 'GitHub API quota exceeded and will reset at UTC {}.'. format(reset_at)) now = int(time.time()) sleep_duration = ( reset_at - now ) + 10 # Add a few seconds to be sure that we sleep long enough. return True, sleep_duration except Exception as e: log.error('Exception while checking API quota:', e) raise return False, None
def __init__(self, input_file, task_name, threads=1, keep=False, package_mode=False, dependency_solver=False, skip_check_disk=False): """ Initializes JobDispatcher with user specified input and starts work. If `threads` is specified, JobDispatcher will dispatch jobs to be reproduced in each thread. Otherwise, each job will be reproduced sequentially. """ log.info('Initializing job dispatcher.') self.input_file = input_file self.thread_num = threads self.keep = keep self.package_mode = package_mode self.dependency_solver = dependency_solver # ----- self.config = Config(task_name) self.config.skip_check_disk = skip_check_disk self.utils = Utils(self.config) self.items_processed = Value('i', 0) self.reproduce_err = Value('i', 0) self.job_time_acc = 0 self.start_time = time.time() self.docker = DockerWrapper(self.utils) self.docker_storage_path = self.docker.setup_docker_storage_path() self.terminate = Value('i', 0) self.manager = Manager() self.lock = Lock() self.workspace_locks = self.manager.dict() self.cloned_repos = self.manager.dict() self.threads = {} self.error_reasons = {} self.alive_threads = 0 self.travis_images = None self.job_center = PairCenter(self.input_file, self.utils, self.package_mode)
def docker_pull(image_tag): assert image_tag assert isinstance(image_tag, str) # Exit early if the image already exists locally. exists, image_location = _image_exists_locally(image_tag) if exists: return True, image_location image_location = _image_location(image_tag) command = 'sudo docker pull {}'.format(image_location) _, _, returncode = ShellWrapper.run_commands(command, shell=True) if returncode != 0: # Image is not cached. Attempt to pull from bugswarm/images. image_location = '{}:{}'.format(DOCKER_HUB_REPO, image_tag) command = 'sudo docker pull {}'.format(image_location) _, _, returncode = ShellWrapper.run_commands(command, shell=True) if returncode != 0: # Image is not in bugswarm/images log.error('Could not download the image', image_location) else: log.info('Downloaded the image', image_location + '.') else: log.info('Downloaded the image', image_location + '.') return returncode == 0, image_location
def get_commits_from_github_api(self): start_time = time.time() github_commits = {} get_github_commits = True github_commits_json_file = self.utils.get_github_commits_json_file( self.repo) if os.path.isfile(github_commits_json_file): github_commits = read_json(github_commits_json_file) get_github_commits = False for _, branch_obj in self.branches.items(): if branch_obj.pr_num != -1: # Whether it is a PR branch. # Get commits from the GitHub API. if get_github_commits: github_commits[str(branch_obj.pr_num )] = self.utils.github.list_pr_commits( self.repo, str(branch_obj.pr_num)) branch_obj.github_commits = github_commits[str( branch_obj.pr_num)] # for commit in github_commits[str(branch.pr_num)]: # commit['build_ids'] = self.utils.github.get_build_ids_for_commit(self.repo, commit['sha']) write_json(github_commits_json_file, github_commits) log.info('Got pull request commits (via GitHub API calls) in', time.time() - start_time, 'seconds.')
def process(self, data: Any, context: dict) -> Optional[Any]: log.info('Getting build system info.') branches = data repo = context['repo'] for _, branch_obj in branches.items(): if not branch_obj.pairs: continue for pair in branch_obj.pairs: failed_build_commit_sha = pair.failed_build.commit passed_build_commit_sha = pair.passed_build.commit failed_build_info = self.get_build_info_from_github_api( repo, failed_build_commit_sha) passed_build_info = self.get_build_info_from_github_api( repo, passed_build_commit_sha) if failed_build_info == -1 or passed_build_info == -1: continue if failed_build_info != passed_build_info: failed_build_info = 'NA' jobpairs = pair.jobpairs for jp in jobpairs: jp.build_system = failed_build_info return data
def run(self): for i in range(1, self.runs + 1): self._pre_analyze() self._analyze(i) self._post_analyze(i) self._show_reproducibility() self._write_output_json() log.info('Done!')
def _save_output(repo: str, output_pairs: list): task_name = repo.replace('/', '-') os.makedirs(os.path.dirname('output/'), exist_ok=True) output_path = 'output/{}.json'.format(task_name) log.info('Saving output to', output_path) with open(output_path, 'w+') as f: json.dump(output_pairs, f, indent=2) log.info('Done writing output file.')
def remove_all_images(): log.info( 'Removing all containers and Docker images (except Travis images).' ) command = 'docker rm $(docker ps -a -q); docker rmi -f $(docker images -a | grep -v "travis")' ShellWrapper.run_commands(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True)
def generate_build_pair_json(repo): log.info('Getting build_pair from Database') dir_of_jsons = "input/" task_name = repo.replace('/', '-') bugswarmapi = DatabaseAPI(token=DATABASE_PIPELINE_TOKEN) buildpairs = bugswarmapi.filter_mined_build_pairs_for_repo(repo) os.makedirs(os.path.dirname(dir_of_jsons), exist_ok=True) write_json('{}{}.json'.format(dir_of_jsons, task_name), buildpairs) return dir_of_jsons
def _docker_image_inspect(image_tag): image_location = _image_location(image_tag) command = 'sudo docker image inspect {}'.format(image_location) _, _, returncode = ShellWrapper.run_commands(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True) # For a non-existent image, docker image inspect has a non-zero exit status. if returncode == 0: log.info('The image', image_location, 'already exists locally and is up to date.') return returncode == 0
def main(argv): log.config_logging(getattr(logging, 'INFO', None)) in_paths, out_path = _validate_input(argv) buildpairs = [] tasks = [] for path in in_paths: with open(path) as f: # Get task names to check for previous caching output CSVs tasks.append(str(os.path.splitext(path)[0].split('/')[-1])) buildpairs += json.load(f) to_be_cached = [] for bp in buildpairs: # Only accept reproducible build pairs if 'match' not in bp or bp['match'] != 1: continue # Make sure language is Java java_jobs = [] for job in bp['failed_build']['jobs']: if job['language'] == 'java': java_jobs.append(job['job_id']) for job in bp['passed_build']['jobs']: if job['language'] == 'java': java_jobs.append(job['job_id']) # Cache all reproducible & unfiltered job pairs that use Java & Maven prefix = bp['repo'].replace('/', '-') + '-' for jp in bp['jobpairs']: should_be_cached = (not jp['is_filtered'] and jp['build_system'] == 'Maven' and jp['failed_job']['job_id'] in java_jobs and jp['passed_job']['job_id'] in java_jobs) if should_be_cached: to_be_cached.append(prefix + str(jp['failed_job']['job_id'])) try: os.mkdir('input') except FileExistsError: pass cached_image_tags = set() for task in tasks: if os.path.isfile('../cache-dependency/output/{}'.format(task)): with open('../cache-dependency/output/{}.csv'.format(task)) as f: for row in f: row_list = row.split(', ') if row_list[1] == 'succeed': cached_image_tags.add(row_list[0]) with open(out_path, 'w') as f: for image_tag in to_be_cached: if image_tag not in cached_image_tags: f.write(image_tag + '\n') log.info('Wrote file to {}/{}'.format(os.getcwd(), out_path))
def main(argv=None): argv = argv or sys.argv # Configure logging. log.config_logging(getattr(logging, 'INFO', None)) # Log the current version of this BugSwarm component. log.info(get_current_component_version_message('ReproducedResultsAnalyzer')) input_file, runs, task_name = _validate_input(argv) ReproducedResultsAnalyzer(input_file, runs, task_name).run()
def modify_deprecated_links(search_dir): file_path_result = [] for deprecated_url in _LIST_OF_DEPRECATED_URLS: grep_for_pom_command = 'grep -rl {} {}'.format(deprecated_url, search_dir) _, stdout, stderr, ok = _run_command(grep_for_pom_command) if ok: file_path_result += stdout.splitlines() for file_path in file_path_result: file_modified = False if os.path.isfile(file_path): extension_type = file_path.split('.')[-1] if extension_type == 'xml' or extension_type == 'pom': try: soup = BeautifulSoup(open(file_path), 'lxml-xml') list_of_repo_urls = soup.find_all('url') for url in list_of_repo_urls: stripped_url = url.getText().strip() if stripped_url in _LIST_OF_DEPRECATED_URLS: url.string.replace_with(_REPLACEMENT_URL) file_modified = True # Overwrite the existing POM with the updated POM. if file_modified: with open(file_path, 'w', encoding='utf-8') as f: f.write(soup.prettify()) log.info('Modified {} file.'.format(file_path)) except IOError: log.error('Error reading file: ', file_path) else: # square-retrofit-104397133 is an edge case example that contains a .js file that contains the # deprecated link and is executed at some point during the build causing the HTTPs 501 Error with fileinput.input(file_path, inplace=True) as f: for line in f: match_obj_found = False for url in _LIST_OF_DEPRECATED_URLS: match_obj = re.search(url, line) if match_obj: print( line.replace(url, _REPLACEMENT_URL).strip('\n')) file_modified = True match_obj_found = True continue if match_obj_found: continue else: print(line.strip('\n')) if file_modified: log.info('Modified {} file.'.format(file_path)) else: log.error('Error opening file: ', file_path)
def main(argv=None): argv = argv or sys.argv if len(argv) != 2: log.info('Usage: add_artifact_logs.py <task_name>') sys.exit() log.config_logging(getattr(logging, 'INFO', None)) task_name = argv[1] ArtifactLogAdder(task_name=task_name).run()