def _assert_pex(self, binary, expected_output=None, expected_shebang=None): # The easiest way to create products required by the PythonBinaryCreate task is to # execute the relevant tasks. si_task_type = self.synthesize_task_subtype(SelectInterpreter, "si_scope") gs_task_type = self.synthesize_task_subtype(GatherSources, "gs_scope") task_context = self.context( for_task_types=[si_task_type, gs_task_type], target_roots=[binary]) run_info_dir = os.path.join(self.pants_workdir, self.options_scope, "test/info") task_context.run_tracker.run_info = RunInfo(run_info_dir) si_task_type(task_context, os.path.join(self.pants_workdir, "si")).execute() gs_task_type(task_context, os.path.join(self.pants_workdir, "gs")).execute() test_task = self.create_task(task_context) test_task.execute() self._check_products(task_context, binary, expected_output=expected_output, expected_shebang=expected_shebang)
def __init__(self, options: Options): """ :API: public """ self.native = Native() self._has_started: bool = False self._has_ended: bool = False # Select a globally unique ID for the run, that sorts by time. run_timestamp = time.time() run_uuid = uuid.uuid4().hex str_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime(run_timestamp)) millis = int((run_timestamp * 1000) % 1000) self.run_id = f"pants_run_{str_time}_{millis}_{run_uuid}" self._all_options = options info_dir = os.path.join( self._all_options.for_global_scope().pants_workdir, "run-tracker") self.run_info_dir = os.path.join(info_dir, self.run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, "info")) # pantsd stats. self._pantsd_metrics: Dict[str, int] = dict() self.run_logs_file = Path(self.run_info_dir, "logs") self.native.set_per_run_log_path(str(self.run_logs_file)) # Initialized in `start()`. self._run_start_time: Optional[float] = None self._run_total_duration: Optional[float] = None
def _get_run_info_dict(self, run_id): """Get the RunInfo for a run, as a dict.""" run_info_path = os.path.join(self._settings.info_dir, run_id, 'info') if os.path.exists(run_info_path): # We copy the RunInfo as a dict, so we can add stuff to it to pass to the template. return RunInfo(run_info_path).get_as_dict() else: return None
def __init__(self, *args, **kwargs): super(RunTracker, self).__init__(*args, **kwargs) run_timestamp = time.time() cmd_line = ' '.join(['pants'] + sys.argv[1:]) # run_id is safe for use in paths. millis = int((run_timestamp * 1000) % 1000) run_id = 'pants_run_{}_{}_{}'.format( time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(run_timestamp)), millis, uuid.uuid4().hex) info_dir = os.path.join(self.get_options().pants_workdir, self.options_scope) self.run_info_dir = os.path.join(info_dir, run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info')) self.run_info.add_basic_info(run_id, run_timestamp) self.run_info.add_info('cmd_line', cmd_line) # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), 'latest') relative_symlink(self.run_info_dir, link_to_latest) # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings(os.path.join(self.run_info_dir, 'cumulative_timings')) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings(os.path.join(self.run_info_dir, 'self_timings')) # Hit/miss stats for the artifact cache. self.artifact_cache_stats = \ ArtifactCacheStats(os.path.join(self.run_info_dir, 'artifact_cache_stats')) # Number of threads for foreground work. self._num_foreground_workers = self.get_options().num_foreground_workers # Number of threads for background work. self._num_background_workers = self.get_options().num_background_workers # We report to this Report. self.report = None # self._threadlocal.current_workunit contains the current workunit for the calling thread. # Note that multiple threads may share a name (e.g., all the threads in a pool). self._threadlocal = threading.local() # For main thread work. Created on start(). self._main_root_workunit = None # For background work. Created lazily if needed. self._background_worker_pool = None self._background_root_workunit = None # Trigger subproc pool init while our memory image is still clean (see SubprocPool docstring). SubprocPool.set_num_processes(self._num_foreground_workers) SubprocPool.foreground() self._aborted = False
def test_write_run_info(self): with temporary_file_path() as tmppath: ri = RunInfo(tmppath) ri.add_info('key1', 'val1') ri.add_infos(('key2', ' val2'), (' key3 ', 'val3 ')) self.assertEqual({'key1': 'val1', 'key2': 'val2', 'key3': 'val3'}, ri.get_as_dict()) with open(tmppath, 'r') as tmpfile: contents = tmpfile.read() self.assertEqual('key1: val1\nkey2: val2\nkey3: val3\n', contents)
def start(self, all_options, run_start_time=None): """Start tracking this pants run.""" if self.run_info: raise AssertionError( "RunTracker.start must not be called multiple times.") # Initialize the run. info_dir = os.path.join(self.options.pants_workdir, self.options_scope) self.run_info_dir = os.path.join(info_dir, self.run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, "info")) self.run_info.add_basic_info(self.run_id, self._run_timestamp) self.run_info.add_info("cmd_line", self._cmd_line) if self.options.parent_build_id: self.run_info.add_info("parent_build_id", self.options.parent_build_id) # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), "latest") relative_symlink(self.run_info_dir, link_to_latest) # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings( os.path.join(self.run_info_dir, "cumulative_timings")) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings( os.path.join(self.run_info_dir, "self_timings")) # Daemon stats. self.pantsd_stats = PantsDaemonStats() self._all_options = all_options self.report = Report() # Set up the JsonReporter for V2 stats. if self._stats_version == 2: json_reporter_settings = JsonReporter.Settings( log_level=Report.INFO) self.json_reporter = JsonReporter(self, json_reporter_settings) self.report.add_reporter("json", self.json_reporter) self.report.open() # And create the workunit. self._main_root_workunit = WorkUnit(run_info_dir=self.run_info_dir, parent=None, name=RunTracker.DEFAULT_ROOT_NAME, cmd=None) self.register_thread(self._main_root_workunit) # Set the true start time in the case of e.g. the daemon. self._main_root_workunit.start(run_start_time) self.report.start_workunit(self._main_root_workunit)
def _get_all_run_infos(self): """Find the RunInfos for all runs since the last clean-all.""" info_dir = self._settings.info_dir if not os.path.isdir(info_dir): return [] paths = [os.path.join(info_dir, x) for x in os.listdir(info_dir)] # We copy the RunInfo as a dict, so we can add stuff to it to pass to the template. # We filter only those that have a timestamp, to avoid a race condition with writing # that field. return filter(lambda d: 'timestamp' in d, [RunInfo(os.path.join(p, 'info')).get_as_dict() for p in paths if os.path.isdir(p) and not os.path.islink(p)])
def initialize(self, all_options): """Create run_info and relevant directories, and return the run id. Must be called before `start`. """ if self.run_info: raise AssertionError( 'RunTracker.initialize must not be called multiple times.') # Initialize the run. # Select a globally unique ID for the run, that sorts by time. millis = int((self._run_timestamp * 1000) % 1000) # run_uuid is used as a part of run_id and also as a trace_id for Zipkin tracing run_uuid = uuid.uuid4().hex run_id = 'pants_run_{}_{}_{}'.format( time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(self._run_timestamp)), millis, run_uuid) info_dir = os.path.join(self.get_options().pants_workdir, self.options_scope) self.run_info_dir = os.path.join(info_dir, run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info')) self.run_info.add_basic_info(run_id, self._run_timestamp) self.run_info.add_info('cmd_line', self._cmd_line) # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), 'latest') relative_symlink(self.run_info_dir, link_to_latest) # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings( os.path.join(self.run_info_dir, 'cumulative_timings')) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings( os.path.join(self.run_info_dir, 'self_timings')) # Hit/miss stats for the artifact cache. self.artifact_cache_stats = ArtifactCacheStats( os.path.join(self.run_info_dir, 'artifact_cache_stats')) # Daemon stats. self.pantsd_stats = PantsDaemonStats() self._all_options = all_options return (run_id, run_uuid)
def start(self, all_options: Options, run_start_time: float) -> None: """Start tracking this pants run.""" if self.run_info: raise AssertionError( "RunTracker.start must not be called multiple times.") # Initialize the run. info_dir = os.path.join(self.options.pants_workdir, self.options_scope) self.run_info_dir = os.path.join(info_dir, self.run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, "info")) self.run_info.add_basic_info(self.run_id, self._run_timestamp) self.run_info.add_info("cmd_line", self._cmd_line) # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), "latest") relative_symlink(self.run_info_dir, link_to_latest) # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings( os.path.join(self.run_info_dir, "cumulative_timings")) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings( os.path.join(self.run_info_dir, "self_timings")) # pantsd stats. self._pantsd_metrics: Dict[str, int] = dict() self._all_options = all_options self.report = Report() self.report.open() # And create the workunit. self._main_root_workunit = WorkUnit(run_info_dir=self.run_info_dir, parent=None, name=RunTracker.DEFAULT_ROOT_NAME, cmd=None) self.register_thread(self._main_root_workunit) # Set the true start time in the case of e.g. the daemon. self._main_root_workunit.start(run_start_time) self.report.start_workunit(self._main_root_workunit) goal_names: Tuple[str, ...] = tuple(all_options.goals) self._v2_goal_rule_names = goal_names self.run_logs_file = Path(self.run_info_dir, "logs") self.native.set_per_run_log_path(str(self.run_logs_file))
def test_python_docker_image(self): expected_image_name = 'test-image-%s' % uuid.uuid4() expected_image = expected_image_name + ':foo-bar' self.create_python_library( 'src/python/lib', 'lib', { 'lib.py': dedent(""" import os def main(): os.getcwd() """) }) binary = self.create_python_binary('src/python/bin', 'bin', 'lib.lib:main', dependencies=['//src/python/lib']) docker_target = self.make_target(spec='//bar:bar-image', target_type=DockerPythonTarget, image_name=expected_image_name, image_tag='foo-bar', base_image='scratch', dependencies=[binary]) binary_create_type = self.synthesize_task_subtype( PythonBinaryCreate, 'bc_scope') task_context = self.context(for_task_types=[binary_create_type], target_roots=[docker_target]) run_info_dir = os.path.join(self.pants_workdir, self.options_scope, 'test/info') task_context.run_tracker.run_info = RunInfo(run_info_dir) test_task = self.create_task(task_context) binary_create_type(task_context, os.path.join(self.pants_workdir, 'bc')).execute() try: test_task.execute() with self.parse_tar(task_context, docker_target, expected_image) as tar: self.assertIn('manifest.json', tar.getnames()) # TODO test more properties if we can assure it's hermetic somehow finally: subprocess.call(['docker', 'rmi', expected_image])
def test_run_info_read(self): with temporary_file_path() as tmppath: with open(tmppath, 'w') as tmpfile: tmpfile.write('foo:bar\n baz :qux quux') ri = RunInfo(tmppath) self.assertEqual(ri.path(), tmppath) # Test get_info access. self.assertEqual(ri.get_info('foo'), 'bar') self.assertEqual(ri.get_info('baz'), 'qux quux') self.assertIsNone(ri.get_info('nonexistent')) # Test dict-like access. self.assertEqual(ri['foo'], 'bar') self.assertEqual(ri['baz'], 'qux quux')
def initialize(self, all_options): """Create run_info and relevant directories, and return the run id. Must be called before `start`. """ if self.run_info: raise AssertionError( 'RunTracker.initialize must not be called multiple times.') # Initialize the run. info_dir = os.path.join(self.get_options().pants_workdir, self.options_scope) self.run_info_dir = os.path.join(info_dir, self.run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info')) self.run_info.add_basic_info(self.run_id, self._run_timestamp) self.run_info.add_info('cmd_line', self._cmd_line) if self.get_options().parent_build_id: self.run_info.add_info('parent_build_id', self.get_options().parent_build_id) # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), 'latest') relative_symlink(self.run_info_dir, link_to_latest) # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings( os.path.join(self.run_info_dir, 'cumulative_timings')) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings( os.path.join(self.run_info_dir, 'self_timings')) # Hit/miss stats for the artifact cache. self.artifact_cache_stats = ArtifactCacheStats( os.path.join(self.run_info_dir, 'artifact_cache_stats')) # Daemon stats. self.pantsd_stats = PantsDaemonStats() self._all_options = all_options return (self.run_id, self.run_uuid)
def setUp(self): super(PythonBinaryCreateTest, self).setUp() self.library = self.create_python_library('src/python/lib', 'lib', {'lib.py': dedent(""" import os def main(): os.getcwd() """)}) self.binary = self.create_python_binary('src/python/bin', 'bin', 'lib.lib:main', dependencies=['//src/python/lib']) self.task_context = self.context(target_roots=[self.binary]) self.run_info_dir = os.path.join(self.pants_workdir, self.options_scope, 'test/info') self.task_context.run_tracker.run_info = RunInfo(self.run_info_dir) self.test_task = self.create_task(self.task_context) self.dist_root = os.path.join(self.build_root, 'dist')
def setUp(self): super(PythonBinaryCreateTest, self).setUp() self.library = self.create_python_library( 'src/python/lib', 'lib', { 'lib.py': dedent(""" import os def main(): os.getcwd() """) }) self.binary = self.create_python_binary( 'src/python/bin', 'bin', 'lib.lib:main', dependencies=['//src/python/lib']) # The easiest way to create products required by the PythonBinaryCreate task is to # execute the relevant tasks. si_task_type = self.synthesize_task_subtype(SelectInterpreter, 'si_scope') gs_task_type = self.synthesize_task_subtype(GatherSources, 'gs_scope') self.task_context = self.context( for_task_types=[si_task_type, gs_task_type], target_roots=[self.binary]) self.run_info_dir = os.path.join(self.pants_workdir, self.options_scope, 'test/info') self.task_context.run_tracker.run_info = RunInfo(self.run_info_dir) si_task_type(self.task_context, os.path.join(self.pants_workdir, 'si')).execute() gs_task_type(self.task_context, os.path.join(self.pants_workdir, 'gs')).execute() self.test_task = self.create_task(self.task_context) self.dist_root = os.path.join(self.build_root, 'dist')
def __init__(self, info_dir, stats_upload_url=None, stats_upload_timeout=2, num_foreground_workers=8, num_background_workers=8): self.run_timestamp = time.time( ) # A double, so we get subsecond precision for ids. cmd_line = ' '.join(['./pants'] + sys.argv[1:]) # run_id is safe for use in paths. millis = (self.run_timestamp * 1000) % 1000 run_id = 'pants_run_%s_%d' % \ (time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime(self.run_timestamp)), millis) self.run_info_dir = os.path.join(info_dir, run_id) self.run_info = RunInfo(os.path.join(self.run_info_dir, 'info')) self.run_info.add_basic_info(run_id, self.run_timestamp) self.run_info.add_info('cmd_line', cmd_line) self.stats_url = stats_upload_url self.stats_timeout = stats_upload_timeout # Create a 'latest' symlink, after we add_infos, so we're guaranteed that the file exists. link_to_latest = os.path.join(os.path.dirname(self.run_info_dir), 'latest') try: if os.path.lexists(link_to_latest): os.unlink(link_to_latest) os.symlink(self.run_info_dir, link_to_latest) except OSError as e: # Another run may beat us to deletion or creation. if not (e.errno == errno.EEXIST or e.errno == errno.ENOENT): raise # Time spent in a workunit, including its children. self.cumulative_timings = AggregatedTimings( os.path.join(self.run_info_dir, 'cumulative_timings')) # Time spent in a workunit, not including its children. self.self_timings = AggregatedTimings( os.path.join(self.run_info_dir, 'self_timings')) # Hit/miss stats for the artifact cache. self.artifact_cache_stats = \ ArtifactCacheStats(os.path.join(self.run_info_dir, 'artifact_cache_stats')) # Number of threads for foreground work. self._num_foreground_workers = num_foreground_workers # Number of threads for background work. self._num_background_workers = num_background_workers # We report to this Report. self.report = None # self._threadlocal.current_workunit contains the current workunit for the calling thread. # Note that multiple threads may share a name (e.g., all the threads in a pool). self._threadlocal = threading.local() # For main thread work. Created on start(). self._main_root_workunit = None # For concurrent foreground work. Created lazily if needed. # Associated with the main thread's root workunit. self._foreground_worker_pool = None # For background work. Created lazily if needed. self._background_worker_pool = None self._background_root_workunit = None # Trigger subproc pool init while our memory image is still clean (see SubprocPool docstring) SubprocPool.foreground() self._aborted = False