def _write_to_artifact_cache(self, analysis_file, vts, sources_by_target): vt_by_target = dict([(vt.target, vt) for vt in vts.versioned_targets]) vts_targets = [t for t in vts.targets if not t.has_label('no_cache')] split_analysis_files = [ JvmCompile._analysis_for_target(self._analysis_tmpdir, t) for t in vts_targets ] portable_split_analysis_files = [ JvmCompile._portable_analysis_for_target(self._analysis_tmpdir, t) for t in vts_targets ] # Set up args for splitting the analysis into per-target files. splits = zip([sources_by_target.get(t, []) for t in vts_targets], split_analysis_files) splits_args_tuples = [(analysis_file, splits)] # Set up args for rebasing the splits. relativize_args_tuples = zip(split_analysis_files, portable_split_analysis_files) # Set up args for artifact cache updating. vts_artifactfiles_pairs = [] classes_by_source = self._compute_classes_by_source(analysis_file) resources_by_target = self.context.products.get_data( 'resources_by_target') for target, sources in sources_by_target.items(): if target.has_label('no_cache'): continue artifacts = [] if resources_by_target is not None: for _, paths in resources_by_target[target].abs_paths(): artifacts.extend(paths) for source in sources: classes = classes_by_source.get(source, []) artifacts.extend(classes) vt = vt_by_target.get(target) if vt is not None: # NOTE: analysis_file doesn't exist yet. vts_artifactfiles_pairs.append((vt, artifacts + [ JvmCompile._portable_analysis_for_target( self._analysis_tmpdir, target) ])) update_artifact_cache_work = self.get_update_artifact_cache_work( vts_artifactfiles_pairs) if update_artifact_cache_work: work_chain = [ Work(self._analysis_tools.split_to_paths, splits_args_tuples, 'split'), Work(self._analysis_tools.relativize, relativize_args_tuples, 'relativize'), update_artifact_cache_work ] self.context.submit_background_work_chain( work_chain, parent_workunit_name='cache')
def _write_to_artifact_cache(self, analysis_file, vts, get_update_artifact_cache_work): vt_by_target = dict([(vt.target, vt) for vt in vts.versioned_targets]) vts_targets = [t for t in vts.targets if not t.has_label('no_cache')] # Determine locations for analysis files that will be split in the background. split_analysis_files = [ JvmCompileStrategy._analysis_for_target(self.analysis_tmpdir, t) for t in vts_targets] portable_split_analysis_files = [ JvmCompileStrategy._portable_analysis_for_target(self.analysis_tmpdir, t) for t in vts_targets] # Set up args for splitting the analysis into per-target files. splits = zip([self._sources_for_target(t) for t in vts_targets], split_analysis_files) splits_args_tuples = [(analysis_file, splits)] # Set up args for rebasing the splits. relativize_args_tuples = zip(split_analysis_files, portable_split_analysis_files) # Compute the classes and resources for each vts. compile_contexts = [self.compile_context(t) for t in vts_targets] vts_artifactfiles_pairs = [] classes_by_source_by_context = self.compute_classes_by_source(compile_contexts) resources_by_target = self.context.products.get_data('resources_by_target') for compile_context in compile_contexts: target = compile_context.target if target.has_label('no_cache'): continue artifacts = [] if resources_by_target is not None: for _, paths in resources_by_target[target].abs_paths(): artifacts.extend(paths) classes_by_source = classes_by_source_by_context[compile_context] for source in compile_context.sources: classes = classes_by_source.get(source, []) artifacts.extend(classes) vt = vt_by_target.get(target) if vt is not None: # NOTE: analysis_file doesn't exist yet. vts_artifactfiles_pairs.append( (vt, artifacts + [JvmCompileStrategy._portable_analysis_for_target( self.analysis_tmpdir, target)])) update_artifact_cache_work = get_update_artifact_cache_work(vts_artifactfiles_pairs) if update_artifact_cache_work: work_chain = [ Work(self._analysis_tools.split_to_paths, splits_args_tuples, 'split'), Work(self._analysis_tools.relativize, relativize_args_tuples, 'relativize'), update_artifact_cache_work ] self.context.submit_background_work_chain(work_chain, parent_workunit_name='cache')
def _get_update_artifact_cache_work(self, vts_artifactfiles_pairs): """Create a Work instance to update an artifact cache, if we're configured to. vts_artifactfiles_pairs - a list of pairs (vts, artifactfiles) where - vts is single VersionedTargetSet. - artifactfiles is a list of paths to artifacts for the VersionedTargetSet. """ cache = self._cache_factory.get_write_cache() if cache: if len(vts_artifactfiles_pairs) == 0: return None # Do some reporting. targets = set() for vts, _ in vts_artifactfiles_pairs: targets.update(vts.targets) self._report_targets('Caching artifacts for ', list(targets), '.') always_overwrite = self._cache_factory.overwrite() # Cache the artifacts. args_tuples = [] for vts, artifactfiles in vts_artifactfiles_pairs: overwrite = always_overwrite or vts.cache_key in self._cache_key_errors args_tuples.append( (cache, vts.cache_key, artifactfiles, overwrite)) return Work(lambda x: self.context.subproc_map(call_insert, x), [(args_tuples, )], 'insert') else: return None
def execute(self): thrift_targets = self.get_targets(self._is_thrift) with self.invalidated(thrift_targets) as invalidation_check: if not invalidation_check.invalid_vts: return with self.context.new_workunit( 'parallel-thrift-linter') as workunit: worker_pool = WorkerPool(workunit.parent, self.context.run_tracker, self.get_options().worker_count) scrooge_linter_classpath = self.tool_classpath( 'scrooge-linter') results = [] errors = [] for vt in invalidation_check.invalid_vts: r = worker_pool.submit_async_work( Work(self._lint, [(vt.target, scrooge_linter_classpath)])) results.append((r, vt)) for r, vt in results: r.wait() # MapResult will raise _value in `get` if the run is not successful. try: r.get() except ThriftLintError as e: errors.append(str(e)) else: vt.update() if errors: raise TaskError('\n'.join(errors))
def do_check_artifact_cache(self, vts, post_process_cached_vts=None): """Checks the artifact cache for the specified list of VersionedTargetSets. Returns a pair (cached, uncached) of VersionedTargets that were satisfied/unsatisfied from the cache. """ if not vts: return [], [] cached_vts = [] uncached_vts = OrderedSet(vts) with self.context.new_workunit(name='check', labels=[WorkUnit.MULTITOOL]) as parent: res = self.context.submit_foreground_work_and_wait( Work(lambda vt: bool(self.get_artifact_cache().use_cached_files(vt.cache_key)), [(vt, ) for vt in vts], 'fetch'), workunit_parent=parent) for vt, was_in_cache in zip(vts, res): if was_in_cache: cached_vts.append(vt) uncached_vts.discard(vt) # Note that while the input vts may represent multiple targets (for tasks that overrride # check_artifact_cache_for), the ones we return must represent single targets. def flatten(vts): return list(itertools.chain.from_iterable([vt.versioned_targets for vt in vts])) all_cached_vts, all_uncached_vts = flatten(cached_vts), flatten(uncached_vts) if post_process_cached_vts: post_process_cached_vts(all_cached_vts) for vt in all_cached_vts: vt.update() return all_cached_vts, all_uncached_vts
def get_update_artifact_cache_work(self, vts_artifactfiles_pairs, cache=None): """Create a Work instance to update the artifact cache, if we're configured to. vts_artifactfiles_pairs - a list of pairs (vts, artifactfiles) where - vts is single VersionedTargetSet. - artifactfiles is a list of paths to artifacts for the VersionedTargetSet. """ cache = cache or self.get_artifact_cache() if cache: if len(vts_artifactfiles_pairs) == 0: return None # Do some reporting. targets = set() for vts, _ in vts_artifactfiles_pairs: targets.update(vts.targets) self._report_targets('Caching artifacts for ', list(targets), '.') # Cache the artifacts. args_tuples = [] for vts, artifactfiles in vts_artifactfiles_pairs: args_tuples.append((vts.cache_key, artifactfiles)) return Work(lambda *args: cache.insert(*args), args_tuples, 'insert') else: return None
def _write_to_artifact_cache(self, vts, compile_context, get_update_artifact_cache_work): assert len(vts.targets) == 1 assert vts.targets[0] == compile_context.target # Noop if the target is uncacheable. if (compile_context.target.has_label('no_cache')): return vt = vts.versioned_targets[0] # Set up args to relativize analysis in the background. portable_analysis_file = self._portable_analysis_for_target( self._analysis_dir, compile_context.target) relativize_args_tuple = (compile_context.analysis_file, portable_analysis_file) # Collect the artifacts for this target. artifacts = [] # Intransitive classpath entries. target_classpath = ClasspathUtil.classpath_entries( (compile_context.target, ), self.context.products.get_data('runtime_classpath'), ('default', ), transitive=False) for entry in target_classpath: if ClasspathUtil.is_jar(entry): artifacts.append(entry) elif ClasspathUtil.is_dir(entry): for rel_file in ClasspathUtil.classpath_entries_contents( [entry]): artifacts.append(os.path.join(entry, rel_file)) else: # non-jar and non-directory classpath entries should be ignored pass # Log file. log_file = self._capture_log_file(compile_context.target) if log_file and os.path.exists(log_file): artifacts.append(log_file) # Jar. artifacts.append(compile_context.jar_file) # Get the 'work' that will publish these artifacts to the cache. # NB: the portable analysis_file won't exist until we finish. vts_artifactfiles_pair = (vt, artifacts + [portable_analysis_file]) update_artifact_cache_work = get_update_artifact_cache_work( [vts_artifactfiles_pair]) # And execute it. if update_artifact_cache_work: work_chain = [ Work(self._analysis_tools.relativize, [relativize_args_tuple], 'relativize'), update_artifact_cache_work ] self.context.submit_background_work_chain( work_chain, parent_workunit_name='cache')
def test_keyboard_interrupts_propagated(self): condition = threading.Condition() condition.acquire() with self.assertRaises(KeyboardInterrupt): with temporary_dir() as rundir: pool = WorkerPool(WorkUnit(rundir, None, "work"), FakeRunTracker(), 1) try: pool.submit_async_work(Work(keyboard_interrupt_raiser, [()])) condition.wait(2) finally: pool.abort()
def _write_to_artifact_cache(self, vts, compile_context, get_update_artifact_cache_work): assert len(vts.targets) == 1 assert vts.targets[0] == compile_context.target # Noop if the target is uncacheable. if (compile_context.target.has_label('no_cache')): return vt = vts.versioned_targets[0] # Set up args to relativize analysis in the background. portable_analysis_file = JvmCompileStrategy._portable_analysis_for_target( self._analysis_dir, compile_context.target) relativize_args_tuple = (compile_context.analysis_file, portable_analysis_file) # Collect the artifacts for this target. artifacts = [] def add_abs_products(p): if p: for _, paths in p.abs_paths(): artifacts.extend(paths) # Resources. resources_by_target = self.context.products.get_data( 'resources_by_target') add_abs_products(resources_by_target.get(compile_context.target)) # Classes. classes_by_target = self.context.products.get_data('classes_by_target') add_abs_products(classes_by_target.get(compile_context.target)) # Log file. log_file = self._capture_log_file(compile_context.target) if log_file and os.path.exists(log_file): artifacts.append(log_file) # Jar. if self._jar: artifacts.append(compile_context.jar_file) # Get the 'work' that will publish these artifacts to the cache. # NB: the portable analysis_file won't exist until we finish. vts_artifactfiles_pair = (vt, artifacts + [portable_analysis_file]) update_artifact_cache_work = get_update_artifact_cache_work( [vts_artifactfiles_pair]) # And execute it. if update_artifact_cache_work: work_chain = [ Work(self._analysis_tools.relativize, [relativize_args_tuple], 'relativize'), update_artifact_cache_work ] self.context.submit_background_work_chain( work_chain, parent_workunit_name='cache')
def submit_jobs(job_keys): def worker(worker_key, work): try: work() result = (worker_key, SUCCESSFUL, None) except Exception as e: result = (worker_key, FAILED, e) finished_queue.put(result) for job_key in job_keys: status_table.mark_as(QUEUED, job_key) pool.submit_async_work( Work(worker, [(job_key, (self._jobs[job_key]))]))
def try_to_submit_jobs_from_heap(): def worker(worker_key, work): try: work() result = (worker_key, SUCCESSFUL, None) except Exception as e: result = (worker_key, FAILED, e) finished_queue.put(result) jobs_in_flight.decrement() while len(heap) > 0 and jobs_in_flight.get() < pool.num_workers: priority, job_key = heappop(heap) jobs_in_flight.increment() status_table.mark_queued(job_key) pool.submit_async_work( Work(worker, [(job_key, (self._jobs[job_key]))]))
def try_to_submit_jobs_from_heap(): def worker(worker_key, work): status_table.mark_as(RUNNING, worker_key) try: with Timer() as timer: work() result = (worker_key, SUCCESSFUL, None, timer.elapsed) except BaseException: _, exc_value, exc_traceback = sys.exc_info() result = (worker_key, FAILED, (exc_value, traceback.format_tb(exc_traceback)), timer.elapsed) finished_queue.put(result) jobs_in_flight.decrement() while len(heap) > 0 and jobs_in_flight.get() < pool.num_workers: priority, job_key = heappop(heap) jobs_in_flight.increment() pool.submit_async_work(Work(worker, [(job_key, (self._jobs[job_key]))]))
def execute(self): thrift_targets = self.get_targets(self._is_thrift) task_worker_count_configured = not self.get_options().is_default( "worker_count") subsystem_worker_count_configured = not ScroogeLinter.global_instance( ).options.is_default("worker_count") if task_worker_count_configured and subsystem_worker_count_configured: self.raise_conflicting_option("worker_count") worker_count = (self.get_options().worker_count if task_worker_count_configured else ScroogeLinter.global_instance().options.worker_count) with self.invalidated(thrift_targets) as invalidation_check: if not invalidation_check.invalid_vts: return with self.context.new_workunit( 'parallel-thrift-linter') as workunit: worker_pool = WorkerPool(workunit.parent, self.context.run_tracker, worker_count, workunit.name) scrooge_linter_classpath = self.tool_classpath( 'scrooge-linter') results = [] errors = [] for vt in invalidation_check.invalid_vts: r = worker_pool.submit_async_work( Work(self._lint, [(vt.target, scrooge_linter_classpath)])) results.append((r, vt)) for r, vt in results: r.wait() # MapResult will raise _value in `get` if the run is not successful. try: r.get() except ThriftLintError as e: errors.append(str(e)) else: vt.update() if errors: raise TaskError('\n'.join(errors))
def _launch_background_workdir_cleanup(self, vts): workdir_build_cleanup_job = Work(self._cleanup_workdir_stale_builds, [(vts,)], 'workdir_build_cleanup') self.context.submit_background_work_chain([workdir_build_cleanup_job])