def test_relative_symlink_bad_link(self) -> None: with temporary_dir() as tmpdir_1: # link is not absolute source = os.path.join(tmpdir_1, "source") link = os.path.join("foo", "bar") with self.assertRaisesRegex(ValueError, r"Path for link.*absolute"): relative_symlink(source, link)
def test_relative_symlink_bad_link(self): with temporary_dir() as tmpdir_1: # link is not absolute source = os.path.join(tmpdir_1, 'source') link = os.path.join('foo', 'bar') with self.assertRaisesRegexp(ValueError, r'Path for link.*absolute'): relative_symlink(source, link)
def test_relative_symlink_bad_source(self) -> None: with temporary_dir() as tmpdir_1: # source is not absolute source = os.path.join('foo', 'bar') link = os.path.join(tmpdir_1, 'link') with self.assertRaisesRegex(ValueError, r'Path for source.*absolute'): relative_symlink(source, link)
def test_relative_symlink_overwrite_existing_file(self): # Succeeds, since os.unlink can be safely called on files that aren't symlinks. with temporary_dir() as tmpdir_1: # source and link in same dir source = os.path.join(tmpdir_1, 'source') link_path = os.path.join(tmpdir_1, 'link') touch(link_path) relative_symlink(source, link_path)
def __init__(self, results_dir_root, cache_key_generator, build_invalidator, invalidate_dependents, fingerprint_strategy=None, invalidation_report=None, task_name=None, task_version=None, artifact_write_callback=lambda _: None): """ :API: public """ self._cache_key_generator = cache_key_generator self._task_name = task_name or 'UNKNOWN' self._task_version = task_version or 'Unknown_0' self._invalidate_dependents = invalidate_dependents self._invalidator = build_invalidator self._fingerprint_strategy = fingerprint_strategy self._artifact_write_callback = artifact_write_callback self.invalidation_report = invalidation_report # Create the task-versioned prefix of the results dir, and a stable symlink to it # (useful when debugging). self._results_dir_prefix = os.path.join( results_dir_root, sha1(self._task_version).hexdigest()[:12]) safe_mkdir(self._results_dir_prefix) stable_prefix = os.path.join(results_dir_root, self._STABLE_DIR_NAME) safe_delete(stable_prefix) relative_symlink(self._results_dir_prefix, stable_prefix)
def execute(self): targets = self.context.targets(self._is_remote) runtime_classpath_product = self.context.products.get_data( 'runtime_classpath', init_func=ClasspathProducts.init_func(self.get_options().pants_workdir) ) with self.invalidated(targets, invalidate_dependents=True, topological_order=True) as invalidation_check: # The fetches are idempotent operations from the subsystem, invalidation only controls recreating the symlinks. for vt in invalidation_check.all_vts: remote_source = RemoteSourceFetcher(vt.target) fetched = remote_source.path safe_mkdir(fetched) # Some unfortunate rigamorole to cover for the case where different targets want the same fetched file # but extracted/not. Both cases use the same base namespacing so we rely on the target to tell us. fetch_dir = fetched if remote_source.extracted else os.path.dirname(fetched) filenames = os.listdir(fetched) if remote_source.extracted else [os.path.basename(fetched)] stable_outpath = vt.target.namespace + '-{}'.format('extracted') if vt.target.extract else '' for filename in filenames: symlink_file = os.path.join(vt.results_dir, filename) if not vt.valid or not os.path.isfile(symlink_file): safe_rmtree(symlink_file) relative_symlink(os.path.join(fetch_dir, filename), symlink_file) self.context.products.get('remote_files').add(vt.target, vt.results_dir).append(filename) # The runtime_classpath product is a constructed object that is rooted in the results_dir. runtime_classpath_product.add_for_target(vt.target, [('default', vt.results_dir)])
def test_relative_symlink_overwrite_existing_file(self) -> None: # Succeeds, since os.unlink can be safely called on files that aren't symlinks. with temporary_dir() as tmpdir_1: # source and link in same dir source = os.path.join(tmpdir_1, "source") link_path = os.path.join(tmpdir_1, "link") touch(link_path) relative_symlink(source, link_path)
def create_results_dir(self, root_dir, allow_incremental): """Ensures that a results_dir exists under the given root_dir for this versioned target. If incremental=True, attempts to clone the results_dir for the previous version of this target to the new results dir. Otherwise, simply ensures that the results dir exists. :API: public """ # Generate unique and stable directory paths for this cache key. current_dir = self._results_dir_path(root_dir, self.cache_key, stable=False) self._current_results_dir = current_dir stable_dir = self._results_dir_path(root_dir, self.cache_key, stable=True) self._results_dir = stable_dir if self.valid: # If the target is valid, both directories can be assumed to exist. return # Clone from the previous results_dir if incremental, or initialize. previous_dir = self._use_previous_dir(allow_incremental, root_dir, current_dir) if previous_dir is not None: self.is_incremental = True self._previous_results_dir = previous_dir shutil.copytree(previous_dir, current_dir) else: safe_mkdir(current_dir) # Finally, create the stable symlink. relative_symlink(current_dir, stable_dir)
def execute(self): targets = self.context.targets(self._is_remote) with self.invalidated(targets, invalidate_dependents=True, topological_order=True) as invalidation_check: # The fetches are idempotent operations from the subsystem, invalidation only controls recreating the symlinks. for vt in invalidation_check.all_vts: remote_source = RemoteSourceFetcher.Factory.scoped_instance(self).create(vt.target) fetched = remote_source.path self.context.log.info("Found fetched file at {}".format(fetched)) safe_mkdir(fetched) # Some unfortunate rigamorole to cover for the case where different targets want the same fetched file # but extracted/not. Both cases use the same base namespacing so we rely on the target to tell us. fetch_dir = fetched if remote_source.extracted else os.path.dirname(fetched) filenames = os.listdir(fetched) if remote_source.extracted else [os.path.basename(fetched)] stable_outpath = vt.target.namespace + '-{}'.format('extracted') if vt.target.extract else '' stable_target_root = self.stable_root(stable_outpath) safe_mkdir(stable_target_root) for filename in filenames: symlink_file = os.path.join(vt.results_dir, filename) # This will be false if the symlink or the downloaded blob is missing. if not vt.valid or not os.path.isfile(symlink_file): safe_rmtree(symlink_file) relative_symlink(os.path.join(fetch_dir, filename), symlink_file) stable_sym = os.path.join(stable_target_root, filename) if not os.path.exists(stable_sym): relative_symlink(symlink_file, stable_sym) self.context.products.get('remote_files').add(vt.target, stable_target_root).append(filename)
def __init__(self, results_dir_root, cache_key_generator, build_invalidator_dir, invalidate_dependents, fingerprint_strategy=None, invalidation_report=None, task_name=None, task_version=None, artifact_write_callback=lambda _: None): """ :API: public """ self._cache_key_generator = cache_key_generator self._task_name = task_name or 'UNKNOWN' self._task_version = task_version or 'Unknown_0' self._invalidate_dependents = invalidate_dependents self._invalidator = BuildInvalidator(build_invalidator_dir) self._fingerprint_strategy = fingerprint_strategy self._artifact_write_callback = artifact_write_callback self.invalidation_report = invalidation_report # Create the task-versioned prefix of the results dir, and a stable symlink to it (useful when debugging). self._results_dir_prefix = os.path.join(results_dir_root, sha1(self._task_version).hexdigest()[:12]) safe_mkdir(self._results_dir_prefix) stable_prefix = os.path.join(results_dir_root, self._STABLE_DIR_NAME) safe_delete(stable_prefix) relative_symlink(self._results_dir_prefix, stable_prefix)
def test_path_globs_symlink_dead(rule_runner: RuleRunner) -> None: setup_fs_test_tar(rule_runner) link = os.path.join(rule_runner.build_root, "subdir/dead") dest = os.path.join(rule_runner.build_root, "this_file_does_not_exist") relative_symlink(dest, link) # Because the symlink does not escape, it should be ignored, rather than cause an error. assert_path_globs(rule_runner, ["subdir/dead"], expected_files=[], expected_dirs=[])
def test_relative_symlink(self): with temporary_dir() as tmpdir_1: # source and link in same dir source = os.path.join(tmpdir_1, 'source') link = os.path.join(tmpdir_1, 'link') rel_path = os.path.relpath(source, os.path.dirname(link)) relative_symlink(source, link) self.assertTrue(os.path.islink(link)) self.assertEquals(rel_path, os.readlink(link))
def test_relative_symlink(self) -> None: with temporary_dir() as tmpdir_1: # source and link in same dir source = os.path.join(tmpdir_1, "source") link = os.path.join(tmpdir_1, "link") rel_path = os.path.relpath(source, os.path.dirname(link)) relative_symlink(source, link) self.assertTrue(os.path.islink(link)) self.assertEqual(rel_path, os.readlink(link))
def __init__(self, *args, **kwargs): super(RunTracker, self).__init__(*args, **kwargs) run_timestamp = time.time() cmd_line = ' '.join(['pants'] + sys.argv[1:]) # run_id is safe for use in paths. millis = int((run_timestamp * 1000) % 1000) run_id = 'pants_run_{}_{}_{}'.format( time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(run_timestamp)), millis, uuid.uuid4().hex) info_dir = os.path.join(self.get_options().pants_workdir, self.options_scope) self.run_info_dir = os.path.join(info_dir, run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info')) self.run_info.add_basic_info(run_id, run_timestamp) self.run_info.add_info('cmd_line', cmd_line) # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), 'latest') relative_symlink(self.run_info_dir, link_to_latest) # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings(os.path.join(self.run_info_dir, 'cumulative_timings')) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings(os.path.join(self.run_info_dir, 'self_timings')) # Hit/miss stats for the artifact cache. self.artifact_cache_stats = \ ArtifactCacheStats(os.path.join(self.run_info_dir, 'artifact_cache_stats')) # Number of threads for foreground work. self._num_foreground_workers = self.get_options().num_foreground_workers # Number of threads for background work. self._num_background_workers = self.get_options().num_background_workers # We report to this Report. self.report = None # self._threadlocal.current_workunit contains the current workunit for the calling thread. # Note that multiple threads may share a name (e.g., all the threads in a pool). self._threadlocal = threading.local() # For main thread work. Created on start(). self._main_root_workunit = None # For background work. Created lazily if needed. self._background_worker_pool = None self._background_root_workunit = None # Trigger subproc pool init while our memory image is still clean (see SubprocPool docstring). SubprocPool.set_num_processes(self._num_foreground_workers) SubprocPool.foreground() self._aborted = False
def test_relative_symlink_link_parent(self) -> None: with temporary_dir() as tmpdir_1: # link in parent dir of source child = os.path.join(tmpdir_1, "child") source = os.path.join(child, "source") link = os.path.join(tmpdir_1, "link") relative_symlink(source, link) rel_path = os.path.relpath(source, os.path.dirname(link)) self.assertTrue(os.path.islink(link)) self.assertEqual(rel_path, os.readlink(link))
def test_relative_symlink_exception_on_existing_dir(self): # This historically was an uncaught exception, the tested behavior is to begin catching the error. with temporary_dir() as tmpdir_1: source = os.path.join(tmpdir_1, 'source') link_path = os.path.join(tmpdir_1, 'link') safe_mkdir(link_path) with self.assertRaisesRegexp(ValueError, r'Path for link.*overwrite an existing directory*'): relative_symlink(source, link_path)
def test_relative_symlink_link_parent(self): with temporary_dir() as tmpdir_1: # link in parent dir of source child = os.path.join(tmpdir_1, 'child') source = os.path.join(child, 'source') link = os.path.join(tmpdir_1, 'link') relative_symlink(source, link) rel_path = os.path.relpath(source, os.path.dirname(link)) self.assertTrue(os.path.islink(link)) self.assertEquals(rel_path, os.readlink(link))
def test_path_globs_symlink_escaping_errors(rule_runner: RuleRunner) -> None: setup_fs_test_tar(rule_runner) link = os.path.join(rule_runner.build_root, "subdir/escaping") dest = os.path.join(rule_runner.build_root, "../../..") relative_symlink(dest, link) exc_reg = r".*While expanding link.*subdir/escaping.*may not traverse outside of the buildroot" with pytest.raises(Exception, match=exc_reg): assert_path_globs(rule_runner, ["subdir/escaping"], expected_files=[], expected_dirs=[])
def initial_reporting(self, run_tracker): """Sets up the initial reporting configuration. Will be changed after we parse cmd-line flags. """ link_to_latest = os.path.join(self.get_options().reports_dir, 'latest') run_id = run_tracker.run_info.get_info('id') if run_id is None: raise ReportingError('No run_id set') run_dir = os.path.join(self.get_options().reports_dir, run_id) safe_rmtree(run_dir) html_dir = os.path.join(run_dir, 'html') safe_mkdir(html_dir) relative_symlink(run_dir, link_to_latest) report = Report() # Capture initial console reporting into a buffer. We'll do something with it once # we know what the cmd-line flag settings are. outfile = StringIO() errfile = StringIO() capturing_reporter_settings = PlainTextReporter.Settings( outfile=outfile, errfile=errfile, log_level=Report.INFO, color=False, indent=True, timing=False, cache_stats=False, label_format=self.get_options().console_label_format, tool_output_format=self.get_options().console_tool_output_format) capturing_reporter = PlainTextReporter(run_tracker, capturing_reporter_settings) report.add_reporter('capturing', capturing_reporter) # Set up HTML reporting. We always want that. html_reporter_settings = HtmlReporter.Settings( log_level=Report.INFO, html_dir=html_dir, template_dir=self.get_options().template_dir) html_reporter = HtmlReporter(run_tracker, html_reporter_settings) report.add_reporter('html', html_reporter) # Add some useful RunInfo. run_tracker.run_info.add_info('default_report', html_reporter.report_path()) port = ReportingServerManager().socket if port: run_tracker.run_info.add_info( 'report_url', 'http://localhost:{}/run/{}'.format(port, run_id)) return report
def test_relative_symlink_source_parent(self) -> None: with temporary_dir() as tmpdir_1: # source in parent dir of link child = os.path.join(tmpdir_1, "child") os.mkdir(child) source = os.path.join(tmpdir_1, "source") link = os.path.join(child, "link") relative_symlink(source, link) rel_path = os.path.relpath(source, os.path.dirname(link)) assert os.path.islink(link) assert rel_path == os.readlink(link)
def create_results_dir(self): """Ensure that the empty results directory and a stable symlink exist for these versioned targets.""" self._current_results_dir = self._cache_manager.results_dir_path(self.cache_key, stable=False) self._results_dir = self._cache_manager.results_dir_path(self.cache_key, stable=True) if not self.valid: # Clean the workspace for invalid vts. safe_mkdir(self._current_results_dir, clean=True) relative_symlink(self._current_results_dir, self._results_dir) self.ensure_legal()
def test_relative_symlink_source_parent(self): with temporary_dir() as tmpdir_1: # source in parent dir of link child = os.path.join(tmpdir_1, "child") os.mkdir(child) source = os.path.join(tmpdir_1, "source") link = os.path.join(child, "link") relative_symlink(source, link) rel_path = os.path.relpath(source, os.path.dirname(link)) self.assertTrue(os.path.islink(link)) self.assertEquals(rel_path, os.readlink(link))
def test_relative_symlink_source_parent(self) -> None: with temporary_dir() as tmpdir_1: # source in parent dir of link child = os.path.join(tmpdir_1, 'child') os.mkdir(child) source = os.path.join(tmpdir_1, 'source') link = os.path.join(child, 'link') relative_symlink(source, link) rel_path = os.path.relpath(source, os.path.dirname(link)) self.assertTrue(os.path.islink(link)) self.assertEqual(rel_path, os.readlink(link))
def start(self, all_options, run_start_time=None): """Start tracking this pants run.""" if self.run_info: raise AssertionError( "RunTracker.start must not be called multiple times.") # Initialize the run. info_dir = os.path.join(self.options.pants_workdir, self.options_scope) self.run_info_dir = os.path.join(info_dir, self.run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, "info")) self.run_info.add_basic_info(self.run_id, self._run_timestamp) self.run_info.add_info("cmd_line", self._cmd_line) if self.options.parent_build_id: self.run_info.add_info("parent_build_id", self.options.parent_build_id) # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), "latest") relative_symlink(self.run_info_dir, link_to_latest) # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings( os.path.join(self.run_info_dir, "cumulative_timings")) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings( os.path.join(self.run_info_dir, "self_timings")) # Daemon stats. self.pantsd_stats = PantsDaemonStats() self._all_options = all_options self.report = Report() # Set up the JsonReporter for V2 stats. if self._stats_version == 2: json_reporter_settings = JsonReporter.Settings( log_level=Report.INFO) self.json_reporter = JsonReporter(self, json_reporter_settings) self.report.add_reporter("json", self.json_reporter) self.report.open() # And create the workunit. self._main_root_workunit = WorkUnit(run_info_dir=self.run_info_dir, parent=None, name=RunTracker.DEFAULT_ROOT_NAME, cmd=None) self.register_thread(self._main_root_workunit) # Set the true start time in the case of e.g. the daemon. self._main_root_workunit.start(run_start_time) self.report.start_workunit(self._main_root_workunit)
def test_relative_symlink_exception_on_existing_dir(self) -> None: # This historically was an uncaught exception, the tested behavior is to begin catching the error. with temporary_dir() as tmpdir_1: source = os.path.join(tmpdir_1, "source") link_path = os.path.join(tmpdir_1, "link") safe_mkdir(link_path) with pytest.raises( ValueError, match=r"Path for link.*overwrite an existing directory*" ): relative_symlink(source, link_path)
def create_link(self, relsrc, reldst): """Creates a symlink within the buildroot. :API: public relsrc: A relative path for the source of the link. reldst: A relative path for the destination of the link. """ src = os.path.join(self.build_root, relsrc) dst = os.path.join(self.build_root, reldst) relative_symlink(src, dst) self._invalidate_for(reldst)
def initialize(self, run_tracker, start_time=None): """Initialize with the given RunTracker. TODO: See `RunTracker.start`. """ run_id = run_tracker.initialize() run_dir = os.path.join(self.get_options().reports_dir, run_id) html_dir = os.path.join(run_dir, 'html') safe_mkdir(html_dir) relative_symlink( run_dir, os.path.join(self.get_options().reports_dir, 'latest')) report = Report() # Capture initial console reporting into a buffer. We'll do something with it once # we know what the cmd-line flag settings are. outfile = StringIO() errfile = StringIO() capturing_reporter_settings = PlainTextReporter.Settings( outfile=outfile, errfile=errfile, log_level=Report.INFO, color=False, indent=True, timing=False, cache_stats=False, label_format=self.get_options().console_label_format, tool_output_format=self.get_options().console_tool_output_format) capturing_reporter = PlainTextReporter(run_tracker, capturing_reporter_settings) report.add_reporter('capturing', capturing_reporter) # Set up HTML reporting. We always want that. html_reporter_settings = HtmlReporter.Settings( log_level=Report.INFO, html_dir=html_dir, template_dir=self.get_options().template_dir) html_reporter = HtmlReporter(run_tracker, html_reporter_settings) report.add_reporter('html', html_reporter) # Add some useful RunInfo. run_tracker.run_info.add_info('default_report', html_reporter.report_path()) port = ReportingServerManager().socket if port: run_tracker.run_info.add_info( 'report_url', 'http://localhost:{}/run/{}'.format(port, run_id)) # And start tracking the run. run_tracker.start(report, start_time)
def initialize(self, all_options): """Create run_info and relevant directories, and return the run id. Must be called before `start`. """ if self.run_info: raise AssertionError( 'RunTracker.initialize must not be called multiple times.') # Initialize the run. # Select a globally unique ID for the run, that sorts by time. millis = int((self._run_timestamp * 1000) % 1000) # run_uuid is used as a part of run_id and also as a trace_id for Zipkin tracing run_uuid = uuid.uuid4().hex run_id = 'pants_run_{}_{}_{}'.format( time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(self._run_timestamp)), millis, run_uuid) info_dir = os.path.join(self.get_options().pants_workdir, self.options_scope) self.run_info_dir = os.path.join(info_dir, run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info')) self.run_info.add_basic_info(run_id, self._run_timestamp) self.run_info.add_info('cmd_line', self._cmd_line) # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), 'latest') relative_symlink(self.run_info_dir, link_to_latest) # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings( os.path.join(self.run_info_dir, 'cumulative_timings')) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings( os.path.join(self.run_info_dir, 'self_timings')) # Hit/miss stats for the artifact cache. self.artifact_cache_stats = ArtifactCacheStats( os.path.join(self.run_info_dir, 'artifact_cache_stats')) # Daemon stats. self.pantsd_stats = PantsDaemonStats() self._all_options = all_options return (run_id, run_uuid)
def start(self, all_options: Options, run_start_time: float) -> None: """Start tracking this pants run.""" if self.run_info: raise AssertionError( "RunTracker.start must not be called multiple times.") # Initialize the run. info_dir = os.path.join(self.options.pants_workdir, self.options_scope) self.run_info_dir = os.path.join(info_dir, self.run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, "info")) self.run_info.add_basic_info(self.run_id, self._run_timestamp) self.run_info.add_info("cmd_line", self._cmd_line) # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), "latest") relative_symlink(self.run_info_dir, link_to_latest) # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings( os.path.join(self.run_info_dir, "cumulative_timings")) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings( os.path.join(self.run_info_dir, "self_timings")) # pantsd stats. self._pantsd_metrics: Dict[str, int] = dict() self._all_options = all_options self.report = Report() self.report.open() # And create the workunit. self._main_root_workunit = WorkUnit(run_info_dir=self.run_info_dir, parent=None, name=RunTracker.DEFAULT_ROOT_NAME, cmd=None) self.register_thread(self._main_root_workunit) # Set the true start time in the case of e.g. the daemon. self._main_root_workunit.start(run_start_time) self.report.start_workunit(self._main_root_workunit) goal_names: Tuple[str, ...] = tuple(all_options.goals) self._v2_goal_rule_names = goal_names self.run_logs_file = Path(self.run_info_dir, "logs") self.native.set_per_run_log_path(str(self.run_logs_file))
def check_archive_with_flags(archive_format, dereference): with temporary_dir() as fromdir: filename = os.path.join(fromdir, 'a') linkname = os.path.join(fromdir, 'link_to_a') touch(filename) relative_symlink(filename, linkname) with temporary_dir() as archivedir: archive = create_archiver(archive_format).create(fromdir, archivedir, 'archive', dereference=dereference) with temporary_dir() as todir: create_archiver(archive_format).extract(archive, todir) extracted_linkname = os.path.join(todir, 'link_to_a') assertion = self.assertFalse if dereference else self.assertTrue assertion(os.path.islink(extracted_linkname)) assertion(os.path.samefile(extracted_linkname, os.path.join(todir, 'a')))
def check_archive_with_flags(archive_format, dereference): with temporary_dir() as fromdir: filename = os.path.join(fromdir, 'a') linkname = os.path.join(fromdir, 'link_to_a') touch(filename) relative_symlink(filename, linkname) with temporary_dir() as archivedir: archive = archiver(archive_format).create(fromdir, archivedir, 'archive', dereference=dereference) with temporary_dir() as todir: archiver(archive_format).extract(archive, todir) extracted_linkname = os.path.join(todir, 'link_to_a') assertion = self.assertFalse if dereference else self.assertTrue assertion(os.path.islink(extracted_linkname)) assertion(os.path.samefile(extracted_linkname, os.path.join(todir, 'a')))
def initialize(self, all_options): """Create run_info and relevant directories, and return the run id. Must be called before `start`. """ if self.run_info: raise AssertionError('RunTracker.initialize must not be called multiple times.') # Initialize the run. # Select a globally unique ID for the run, that sorts by time. millis = int((self._run_timestamp * 1000) % 1000) # run_uuid is used as a part of run_id and also as a trace_id for Zipkin tracing run_uuid = uuid.uuid4().hex run_id = 'pants_run_{}_{}_{}'.format( time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(self._run_timestamp)), millis, run_uuid ) info_dir = os.path.join(self.get_options().pants_workdir, self.options_scope) self.run_info_dir = os.path.join(info_dir, run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info')) self.run_info.add_basic_info(run_id, self._run_timestamp) self.run_info.add_info('cmd_line', self._cmd_line) # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), 'latest') relative_symlink(self.run_info_dir, link_to_latest) # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings(os.path.join(self.run_info_dir, 'cumulative_timings')) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings(os.path.join(self.run_info_dir, 'self_timings')) # Hit/miss stats for the artifact cache. self.artifact_cache_stats = ArtifactCacheStats(os.path.join(self.run_info_dir, 'artifact_cache_stats')) # Daemon stats. self.pantsd_stats = PantsDaemonStats() self._all_options = all_options return (run_id, run_uuid)
def _link_source_dependencies(self, node_task, target, results_dir, node_paths, source_deps): for package_name, file_path in source_deps.items(): # Package name should always the same as the target name dep = self._get_target_from_package_name(target, package_name, file_path) # Apply node-scoping rules if applicable node_scope = dep.payload.node_scope or node_task.node_distribution.node_scope dep_package_name = self._scoped_package_name(node_task, dep.package_name, node_scope) # Symlink each target dep_path = node_paths.node_path(dep) node_module_dir = os.path.join(results_dir, 'node_modules') relative_symlink(dep_path, os.path.join(node_module_dir, dep_package_name)) # If there are any bin, we need to symlink those as well bin_dir = os.path.join(node_module_dir, '.bin') for bin_name, rel_bin_path in dep.bin_executables.items(): bin_path = os.path.join(dep_path, rel_bin_path) relative_symlink(bin_path, os.path.join(bin_dir, bin_name))
def start(self, run_start_time: float) -> None: """Start tracking this pants run.""" if self._has_started: raise AssertionError( "RunTracker.start must not be called multiple times.") self._has_started = True # Initialize the run. self._run_start_time = run_start_time self.run_info.add_basic_info(self.run_id, run_start_time) cmd_line = " ".join(["pants"] + sys.argv[1:]) self.run_info.add_info("cmd_line", cmd_line) # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), "latest") relative_symlink(self.run_info_dir, link_to_latest)
def initial_reporting(self, run_tracker): """Sets up the initial reporting configuration. Will be changed after we parse cmd-line flags. """ link_to_latest = os.path.join(self.get_options().reports_dir, 'latest') run_id = run_tracker.run_info.get_info('id') if run_id is None: raise ReportingError('No run_id set') run_dir = os.path.join(self.get_options().reports_dir, run_id) safe_rmtree(run_dir) html_dir = os.path.join(run_dir, 'html') safe_mkdir(html_dir) relative_symlink(run_dir, link_to_latest) report = Report() # Capture initial console reporting into a buffer. We'll do something with it once # we know what the cmd-line flag settings are. outfile = StringIO() capturing_reporter_settings = PlainTextReporter.Settings( outfile=outfile, log_level=Report.INFO, color=False, indent=True, timing=False, cache_stats=False, label_format=self.get_options().console_label_format, tool_output_format=self.get_options().console_tool_output_format) capturing_reporter = PlainTextReporter(run_tracker, capturing_reporter_settings) report.add_reporter('capturing', capturing_reporter) # Set up HTML reporting. We always want that. html_reporter_settings = HtmlReporter.Settings(log_level=Report.INFO, html_dir=html_dir, template_dir=self.get_options().template_dir) html_reporter = HtmlReporter(run_tracker, html_reporter_settings) report.add_reporter('html', html_reporter) # Add some useful RunInfo. run_tracker.run_info.add_info('default_report', html_reporter.report_path()) port = ReportingServerManager().socket if port: run_tracker.run_info.add_info('report_url', 'http://localhost:{}/run/{}'.format(port, run_id)) return report
def _link_source_dependencies(self, node_task, target, results_dir, node_paths, source_deps): for package_name, file_path in source_deps.items(): # Package name should always the same as the target name dep = self._get_target_from_package_name(target, package_name, file_path) # Apply node-scoping rules if applicable node_scope = dep.payload.node_scope or node_task.node_distribution.node_scope dep_package_name = self._scoped_package_name(node_task, dep.package_name, node_scope) # Symlink each target dep_path = node_paths.node_path(dep) node_module_dir = os.path.join(results_dir, "node_modules") relative_symlink(dep_path, os.path.join(node_module_dir, dep_package_name)) # If there are any bin, we need to symlink those as well bin_dir = os.path.join(node_module_dir, ".bin") for bin_name, rel_bin_path in dep.bin_executables.items(): bin_path = os.path.join(dep_path, rel_bin_path) relative_symlink(bin_path, os.path.join(bin_dir, bin_name))
def initialize(self, all_options): """Create run_info and relevant directories, and return the run id. Must be called before `start`. """ if self.run_info: raise AssertionError( 'RunTracker.initialize must not be called multiple times.') # Initialize the run. info_dir = os.path.join(self.get_options().pants_workdir, self.options_scope) self.run_info_dir = os.path.join(info_dir, self.run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info')) self.run_info.add_basic_info(self.run_id, self._run_timestamp) self.run_info.add_info('cmd_line', self._cmd_line) if self.get_options().parent_build_id: self.run_info.add_info('parent_build_id', self.get_options().parent_build_id) # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), 'latest') relative_symlink(self.run_info_dir, link_to_latest) # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings( os.path.join(self.run_info_dir, 'cumulative_timings')) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings( os.path.join(self.run_info_dir, 'self_timings')) # Hit/miss stats for the artifact cache. self.artifact_cache_stats = ArtifactCacheStats( os.path.join(self.run_info_dir, 'artifact_cache_stats')) # Daemon stats. self.pantsd_stats = PantsDaemonStats() self._all_options = all_options return (self.run_id, self.run_uuid)
def copy_previous_results(self, root_dir): """Use the latest valid results_dir as the starting contents of the current results_dir. Should be called after the cache is checked, since previous_results are not useful if there is a cached artifact. """ # TODO(mateo): An immediate followup removes the root_dir param, it is identical to the task.workdir. # TODO(mateo): This should probably be managed by the task, which manages the rest of the incremental support. if not self.previous_cache_key: return None previous_path = self._cache_manager.results_dir_path(self.previous_cache_key, stable=False) if os.path.isdir(previous_path): self.is_incremental = True safe_rmtree(self._current_results_dir) shutil.copytree(previous_path, self._current_results_dir) safe_mkdir(self._current_results_dir) relative_symlink(self._current_results_dir, self.results_dir) # Set the self._previous last, so that it is only True after the copy completed. self._previous_results_dir = previous_path
def copy_previous_results(self): """Use the latest valid results_dir as the starting contents of the current results_dir. Should be called after the cache is checked, since previous_results are not useful if there is a cached artifact. """ # TODO(mateo): This should probably be managed by the task, which manages the rest of the # incremental support. if not self.previous_cache_key: return None previous_path = self._cache_manager.results_dir_path(self.previous_cache_key, stable=False) if os.path.isdir(previous_path): self.is_incremental = True safe_rmtree(self._current_results_dir) shutil.copytree(previous_path, self._current_results_dir) safe_mkdir(self._current_results_dir) relative_symlink(self._current_results_dir, self.results_dir) # Set the self._previous last, so that it is only True after the copy completed. self._previous_results_dir = previous_path
def initialize(self, run_tracker): """Initialize with the given RunTracker. TODO: See `RunTracker.start`. """ run_id = run_tracker.initialize() run_dir = os.path.join(self.get_options().reports_dir, run_id) html_dir = os.path.join(run_dir, 'html') safe_mkdir(html_dir) relative_symlink(run_dir, os.path.join(self.get_options().reports_dir, 'latest')) report = Report() # Capture initial console reporting into a buffer. We'll do something with it once # we know what the cmd-line flag settings are. outfile = StringIO() errfile = StringIO() capturing_reporter_settings = PlainTextReporter.Settings( outfile=outfile, errfile=errfile, log_level=Report.INFO, color=False, indent=True, timing=False, cache_stats=False, label_format=self.get_options().console_label_format, tool_output_format=self.get_options().console_tool_output_format) capturing_reporter = PlainTextReporter(run_tracker, capturing_reporter_settings) report.add_reporter('capturing', capturing_reporter) # Set up HTML reporting. We always want that. html_reporter_settings = HtmlReporter.Settings(log_level=Report.INFO, html_dir=html_dir, template_dir=self.get_options().template_dir) html_reporter = HtmlReporter(run_tracker, html_reporter_settings) report.add_reporter('html', html_reporter) # Add some useful RunInfo. run_tracker.run_info.add_info('default_report', html_reporter.report_path()) port = ReportingServerManager().socket if port: run_tracker.run_info.add_info('report_url', 'http://localhost:{}/run/{}'.format(port, run_id)) # And start tracking the run. run_tracker.start(report)
def execute(self): targets = self.context.targets(self._is_remote) runtime_classpath_product = self.context.products.get_data( 'runtime_classpath', init_func=ClasspathProducts.init_func( self.get_options().pants_workdir)) with self.invalidated(targets, invalidate_dependents=True, topological_order=True) as invalidation_check: # The fetches are idempotent operations from the subsystem, invalidation only controls recreating the symlinks. for vt in invalidation_check.all_vts: remote_source = RemoteSourceFetcher.Factory.scoped_instance( self).create(vt.target) fetched = remote_source.path self.context.log.debug( "Found fetched file at {}".format(fetched)) safe_mkdir(fetched) # Some unfortunate rigamorole to cover for the case where different targets want the same fetched file # but extracted/not. Both cases use the same base namespacing so we rely on the target to tell us. fetch_dir = fetched if remote_source.extracted else os.path.dirname( fetched) filenames = os.listdir( fetched) if remote_source.extracted else [ os.path.basename(fetched) ] stable_outpath = vt.target.namespace + '-{}'.format( 'extracted') if vt.target.extract else '' for filename in filenames: symlink_file = os.path.join(vt.results_dir, filename) if not vt.valid or not os.path.isfile(symlink_file): safe_rmtree(symlink_file) relative_symlink(os.path.join(fetch_dir, filename), symlink_file) self.context.products.get('remote_files').add( vt.target, vt.results_dir).append(filename) # The runtime_classpath product is a constructed object that is rooted in the results_dir. runtime_classpath_product.add_for_target( vt.target, [('default', vt.results_dir)])
def _symlink_tools_classpath(self, tools_classpath): for src, dst in tools_classpath: relative_symlink(src, dst)
def prepare(project_tree): link_path = os.path.join(project_tree.build_root, link) dest_path = os.path.join(project_tree.build_root, dest) relative_symlink(dest_path, link_path)
def test_relative_symlink_bad_link(self): with temporary_dir() as tmpdir_1: # link is not absolute source = os.path.join(tmpdir_1, "source") link = os.path.join("foo", "bar") with self.assertRaisesRegexp(ValueError, r"Path for link.*absolute"): relative_symlink(source, link)
def _symlink_tools_jar(self, dest_jar_path): relative_symlink(self._tools_jar, dest_jar_path)
def initialize(self, run_tracker, all_options, start_time=None): """Initialize with the given RunTracker. TODO: See `RunTracker.start`. """ run_id, run_uuid = run_tracker.initialize(all_options) run_dir = os.path.join(self.get_options().reports_dir, run_id) html_dir = os.path.join(run_dir, 'html') safe_mkdir(html_dir) relative_symlink(run_dir, os.path.join(self.get_options().reports_dir, 'latest')) report = Report() # Capture initial console reporting into a buffer. We'll do something with it once # we know what the cmd-line flag settings are. outfile = BytesIO() errfile = BytesIO() capturing_reporter_settings = PlainTextReporter.Settings( outfile=outfile, errfile=errfile, log_level=Report.INFO, color=False, indent=True, timing=False, cache_stats=False, label_format=self.get_options().console_label_format, tool_output_format=self.get_options().console_tool_output_format) capturing_reporter = PlainTextReporter(run_tracker, capturing_reporter_settings) report.add_reporter('capturing', capturing_reporter) # Set up HTML reporting. We always want that. html_reporter_settings = HtmlReporter.Settings(log_level=Report.INFO, html_dir=html_dir, template_dir=self.get_options().template_dir) html_reporter = HtmlReporter(run_tracker, html_reporter_settings) report.add_reporter('html', html_reporter) # Set up Zipkin reporting. zipkin_endpoint = self.get_options().zipkin_endpoint trace_id = self.get_options().zipkin_trace_id parent_id = self.get_options().zipkin_parent_id sample_rate = self.get_options().zipkin_sample_rate if zipkin_endpoint is None and trace_id is not None and parent_id is not None: raise ValueError( "The zipkin-endpoint flag must be set if zipkin-trace-id and zipkin-parent-id flags are given." ) if (trace_id is None) != (parent_id is None): raise ValueError( "Flags zipkin-trace-id and zipkin-parent-id must both either be set or not set." ) # If trace_id isn't set by a flag, use UUID from run_id if trace_id is None: trace_id = run_uuid if trace_id and (len(trace_id) != 16 and len(trace_id) != 32 or not is_hex_string(trace_id)): raise ValueError( "Value of the flag zipkin-trace-id must be a 16-character or 32-character hex string. " + "Got {}.".format(trace_id) ) if parent_id and (len(parent_id) != 16 or not is_hex_string(parent_id)): raise ValueError( "Value of the flag zipkin-parent-id must be a 16-character hex string. " + "Got {}.".format(parent_id) ) if zipkin_endpoint is not None: zipkin_reporter_settings = ZipkinReporter.Settings(log_level=Report.INFO) zipkin_reporter = ZipkinReporter( run_tracker, zipkin_reporter_settings, zipkin_endpoint, trace_id, parent_id, sample_rate ) report.add_reporter('zipkin', zipkin_reporter) # Add some useful RunInfo. run_tracker.run_info.add_info('default_report', html_reporter.report_path()) port = ReportingServerManager().socket if port: run_tracker.run_info.add_info('report_url', 'http://localhost:{}/run/{}'.format(port, run_id)) # And start tracking the run. run_tracker.start(report, start_time)
def test_relative_symlink_same_paths(self): with temporary_dir() as tmpdir_1: # source is link source = os.path.join(tmpdir_1, 'source') with self.assertRaisesRegexp(ValueError, r'Path for link is identical to source'): relative_symlink(source, source)