from WebMirror.OutputFilters.rss.FeedDataParser import getCreateRssSource def hours(num): return 60 * 60 * num def getHash(fCont): m = hashlib.md5() m.update(fCont) return m.hexdigest() NETLOC_BADWORDS_LOOKUP_CACHE = cachetools.LRUCache(maxsize=1000) def saveCoverFile(filecont, fHash, filename): # use the first 3 chars of the hash for the folder name. # Since it's hex-encoded, that gives us a max of 2^12 bits of # directories, or 4096 dirs. fHash = fHash.upper() dirName = fHash[:3] dirPath = os.path.join(C_RESOURCE_DIR, dirName) if not os.path.exists(dirPath): os.makedirs(dirPath) ext = os.path.splitext(filename)[-1] ext = ext.lower()
from cudf.core.udf.typing import MaskedType from cudf.utils import cudautils from cudf.utils.dtypes import ( BOOL_TYPES, DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES, ) JIT_SUPPORTED_TYPES = (NUMERIC_TYPES | BOOL_TYPES | DATETIME_TYPES | TIMEDELTA_TYPES) libcudf_bitmask_type = numpy_support.from_dtype(np.dtype("int32")) MASK_BITSIZE = np.dtype("int32").itemsize * 8 precompiled: cachetools.LRUCache = cachetools.LRUCache(maxsize=32) @annotate("NUMBA JIT", color="green", domain="cudf_python") def _get_udf_return_type(argty, func: Callable, args=()): """ Get the return type of a masked UDF for a given set of argument dtypes. It is assumed that the function consumes a dictionary whose keys are strings and whose values are of MaskedType. Initially assume that the UDF may be written to utilize any field in the row - including those containing an unsupported dtype. If an unsupported dtype is actually used in the function the compilation should fail at `compile_udf`. If compilation succeeds, one can infer that the function does not use any of the columns of unsupported dtype - meaning we can drop them going forward and the UDF will still end up getting fed rows containing all the fields it actually needs to use to compute the answer for that row.
class CxSASTSecurityWarnings(CxSASTBase): """Collector class to measure the number of security warnings in a Checkmarx CxSAST scan.""" CXSAST_SCAN_REPORTS = cachetools.LRUCache( 256) # Mapping of scan ids to scan report ids STATS_RESPONSE, XML_REPORT_RESPONSE = range(3, 5) def __init__(self, source) -> None: super().__init__(source) self.report_status = "In Process" def get_source_responses(self, api_url: URL) -> List[requests.Response]: responses = super().get_source_responses(api_url) token = responses[self.TOKEN_RESPONSE].json()["access_token"] scan_id = responses[self.SCAN_RESPONSE].json()[0]["id"] # Get the statistics of the last scan; this is a single API call: responses.append( self.api_get(f"sast/scans/{scan_id}/resultsStatistics", token)) # We want to get the security warning details. For that, we need to have Checkmarx create an XML report. # First, check if we've requested a report in a previous run. If so, we have a report id. If not, request it. report_id = self.CXSAST_SCAN_REPORTS.get(scan_id) if not report_id: response = self.api_post("reports/sastScan", dict(reportType="XML", scanId=scan_id), token) report_id = self.CXSAST_SCAN_REPORTS[scan_id] = response.json( )["reportId"] # Next, get the report status response = self.api_get(f"reports/sastScan/{report_id}/status", token) self.report_status = response.json()["status"]["value"] # Finally, if the report is ready, get it. if self.report_status == "Created": responses.append( self.api_get(f"reports/sastScan/{report_id}", token)) return responses def parse_source_responses_value( self, responses: List[requests.Response]) -> Value: stats = responses[self.STATS_RESPONSE].json() severities = self.parameters.get("severities") or [ "info", "low", "medium", "high" ] return str( sum([ stats.get(f"{severity.lower()}Severity", 0) for severity in severities ])) def parse_source_responses_entities( self, responses: List[requests.Response]) -> Entities: return self.parse_xml_report(responses[self.XML_REPORT_RESPONSE].text) \ if len(responses) > self.XML_REPORT_RESPONSE else [] def next_collection(self) -> datetime: """If the CxSAST report is in process, try again as soon as possible, otherwise return the regular next collection datetime.""" return datetime.min if self.report_status == "In Process" else super( ).next_collection() def parse_xml_report(self, xml_string: str) -> Entities: """Get the entities from the CxSAST XML report.""" root = xml.etree.cElementTree.fromstring(xml_string) severities = self.parameters.get("severities") or [ "info", "low", "medium", 'high' ] entities: Entities = [] for query in root.findall(".//Query"): for result in query.findall("Result"): severity = result.attrib["Severity"] if result.attrib["FalsePositive"] == 'False' and severity.lower( ) in severities: location = f"{result.attrib['FileName']}:{result.attrib['Line']}:{result.attrib['Column']}" entities.append( dict(key=result.attrib["NodeId"], name=query.attrib["name"], location=location, severity=severity, url=result.attrib["DeepLink"])) return entities
def __init__(self): self.cache = cachetools.LRUCache(maxsize=50)
def __init__(self, maxsize): self.lock = threading.Lock() self.data = cachetools.LRUCache(maxsize)
import asyncio import os import sys import traceback import aiohttp import cachetools from aiohttp import web from gidgethub import aiohttp as gh_aiohttp from gidgethub import routing, sansio from . import check_runs, installations router = routing.Router(installations.router, check_runs.router) cache = cachetools.LRUCache(maxsize=500) # type: cachetools.LRUCache async def main(request: web.Request) -> web.Response: try: body = await request.read() secret = os.environ.get("GITHUB_SECRET") event = sansio.Event.from_http(request.headers, body, secret=secret) if event.event == "ping": return web.Response(status=200) async with aiohttp.ClientSession() as session: gh = gh_aiohttp.GitHubAPI(session, "algorithms-bot", cache=cache) # Give GitHub some time to reach internal consistency. await asyncio.sleep(1) await router.dispatch(event, gh) try:
def __init__(self, catalog): self.catalog = catalog self.__cache = cachetools.LRUCache(1024)
training_data = learning_data.LearningData() training_data.from_file(args.training) identifier = lib.get_identifier(training_data.name, args.experiment) pset = experiment.get_pset(training_data.num_variables, training_data.variable_type_indices, training_data.variable_names, training_data.variable_dict) transformed_predictors, transformed_response, predictor_transformer, response_transformer = \ experiment.transform_features(training_data.predictors, training_data.response) creator.create("ErrorSizeComplexity", base.Fitness, weights=(-1.0, -1.0, -1.0)) validate_toolbox = experiment.get_validation_toolbox( transformed_predictors, transformed_response, pset, size_measure=afpo.evaluate_fitness_size_complexity, expression_dict=cachetools.LRUCache(maxsize=100), fitness_class=creator.ErrorSizeComplexity) logging.info("Validating models on: " + args.training) fronts = get_fronts(args.results, training_data.name, validate_toolbox, pset) # Test testing_data = learning_data.LearningData() testing_data.from_file(args.testing) transformed_testing_predictors, transformed_testing_response = \ experiment.transform_features(testing_data.predictors, testing_data.response, predictor_transformer, response_transformer)[0:2] testing_toolbox = experiment.get_validation_toolbox( transformed_testing_predictors, transformed_testing_response, pset,
from enum import auto, Enum from functools import wraps from typing import Iterable, List, Tuple, Optional, Union from gs_quant.target.assets import Asset as __Asset, AssetClass, AssetType, AssetToInstrumentResponse, TemporalXRef,\ Position, EntityQuery, PositionSet from gs_quant.target.common import FieldFilterMap from gs_quant.errors import MqValueError from gs_quant.instrument import Instrument, Security from gs_quant.session import GsSession from gs_quant.common import PositionType _logger = logging.getLogger(__name__) IdList = Union[Tuple[str, ...], List] metalock = threading.Lock() invocation_locks = cachetools.LRUCache(1024) # prevent collection from growing without bound def _cached(fn): _fn_cache_lock = threading.Lock() # short-term cache to avoid retrieving the same data several times in succession cache = cachetools.TTLCache(1024, 30) if os.environ.get('GSQ_SEC_MASTER_CACHE') else None @wraps(fn) def wrapper(*args, **kwargs): if cache is not None: args = [tuple(x) if isinstance(x, list) else x for x in args] # tuples are hashable k = cachetools.keys.hashkey(GsSession.current, *args, **kwargs) with metalock: invocation_lock = invocation_locks.setdefault(f'{fn.__name__}:{k}', threading.Lock()) with invocation_lock:
def get_toolbox(predictors, response): creator.create("ErrorAgeSize", base.Fitness, weights=(-1.0, -1.0, -1.0)) creator.create("Individual", gp.PrimitiveTree, fitness=creator.ErrorAgeSize) toolbox = base.Toolbox() pset = symbreg.get_numpy_polynomial_explog_trig_pset(len(predictors[0])) pset.addEphemeralConstant("gaussian", lambda: random.gauss(0.0, 1.0)) toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=6) toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr) toolbox.register("population", tools.initRepeat, list, toolbox.individual) toolbox.register("select", tools.selRandom) # Crossover toolbox.register("mate", gp.cxOnePoint) toolbox.decorate( "mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17)) toolbox.decorate("mate", gp.staticLimit(key=len, max_value=300)) # Mutation toolbox.register("expr_mutation", gp.genFull, min_=0, max_=2) toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mutation, pset=pset) toolbox.decorate( "mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17)) toolbox.decorate("mutate", gp.staticLimit(key=len, max_value=300)) # Fast evaluation configuration numpy_response = numpy.array(response) numpy_predictors = numpy.array(predictors) expression_dict = cachetools.LRUCache(maxsize=2000) # toolbox.register("error_func", fast_evaluate.mean_absolute_percentage_error, response=numpy_response) toolbox.register("error_func", fast_evaluate.anti_correlation, response=numpy_response) toolbox.register("evaluate_error", fast_evaluate.fast_numpy_evaluate, context=pset.context, predictors=numpy_predictors, error_function=toolbox.error_func, expression_dict=expression_dict) toolbox.register("evaluate", afpo.evaluate_age_fitness_size, error_func=toolbox.evaluate_error) random_data_points = numpy.random.choice(len(predictors), 1000, replace=False) subset_predictors = numpy_predictors[random_data_points, :] toolbox.register("calc_semantics", semantics.calculate_semantics, context=pset.context, predictors=subset_predictors) toolbox.register("simplify_front", simplify.simplify_all, toolbox=toolbox, size_threshold=0, semantics_threshold=10e-5, precompute_semantics=True) pop = toolbox.population(n=1000) mstats = reports.configure_inf_protected_stats() pareto_archive = archive.ParetoFrontSavingArchive( frequency=1, criteria_chooser=archive.pick_fitness_size_from_fitness_age_size, simplifier=toolbox.simplify_front) toolbox.register("run", afpo.afpo, population=pop, toolbox=toolbox, xover_prob=0.75, mut_prob=0.01, ngen=1000, tournament_size=2, num_randoms=1, stats=mstats, hall_of_fame=pareto_archive) toolbox.register("save", reports.save_log_to_csv) toolbox.decorate("save", reports.save_archive(pareto_archive)) return toolbox
import module import logging import cachetools import psutil class PropWrapper(object): def __init__(self, interface, props): self.props = props self.interface = interface def __getattr__(self, name): return self.props.Get(self.interface, name) @cachetools.cached(cache=cachetools.LRUCache(5000)) def _get_process(pid): return psutil.Process(pid=pid) class CGroupWrapper(object): """ * {{...cpu_time_pct}} - percent share of CPU core time by the given cgroup * {{...rss}} - total RSS memory in bytes by the given cgroup """ def __init__(self, cgname): self.cgname = cgname def _get_procs(self): result = [] with open("/sys/fs/cgroup/systemd%s/cgroup.procs" % self.cgname) as fp:
class Context(object): client: http.Client pull: dict subscription: dict sources: List = dataclasses.field(default_factory=list) _write_permission_cache: cachetools.LRUCache = dataclasses.field( default_factory=lambda: cachetools.LRUCache(4096)) @property def pull_request(self): return PullRequest(self) @property def log(self): return utils.get_pull_logger(self.pull) def __post_init__(self): self._ensure_complete() @cachetools.cachedmethod( cache=operator.attrgetter("_write_permission_cache"), key=functools.partial(cachetools.keys.hashkey, "has_write_permissions"), ) def has_write_permissions(self, login): return self.client.item( f"collaborators/{login}/permission")["permission"] in [ "admin", "write", ] def _get_valid_users(self): bots = list( set([ r["user"]["login"] for r in self.reviews if r["user"] and r["user"]["type"] == "Bot" ])) collabs = set([ r["user"]["login"] for r in self.reviews if r["user"] and r["user"]["type"] != "Bot" ]) valid_collabs = [ login for login in collabs if self.has_write_permissions(login) ] return bots + valid_collabs @functools.cached_property def consolidated_reviews(self): # Ignore reviews that are not from someone with admin/write permissions # And only keep the last review for each user. comments = dict() approvals = dict() valid_users = self._get_valid_users() for review in self.reviews: if not review["user"] or review["user"]["login"] not in valid_users: continue # Only keep latest review of an user if review["state"] == "COMMENTED": comments[review["user"]["login"]] = review else: approvals[review["user"]["login"]] = review return list(comments.values()), list(approvals.values()) def _get_consolidated_data(self, name): if name == "assignee": return [a["login"] for a in self.pull["assignees"]] elif name == "label": return [label["name"] for label in self.pull["labels"]] elif name == "review-requested": return [u["login"] for u in self.pull["requested_reviewers"]] + [ "@" + t["slug"] for t in self.pull["requested_teams"] ] elif name == "draft": return self.pull["draft"] elif name == "author": return self.pull["user"]["login"] elif name == "merged-by": return self.pull["merged_by"]["login"] if self.pull[ "merged_by"] else "" elif name == "merged": return self.pull["merged"] elif name == "closed": return self.pull["state"] == "closed" elif name == "milestone": return self.pull["milestone"]["title"] if self.pull[ "milestone"] else "" elif name == "number": return self.pull["number"] elif name == "conflict": return self.pull["mergeable_state"] == "dirty" elif name == "base": return self.pull["base"]["ref"] elif name == "head": return self.pull["head"]["ref"] elif name == "locked": return self.pull["locked"] elif name == "title": return self.pull["title"] elif name == "body": return self.pull["body"] elif name == "files": return [f["filename"] for f in self.files] elif name == "approved-reviews-by": _, approvals = self.consolidated_reviews return [ r["user"]["login"] for r in approvals if r["state"] == "APPROVED" ] elif name == "dismissed-reviews-by": _, approvals = self.consolidated_reviews return [ r["user"]["login"] for r in approvals if r["state"] == "DISMISSED" ] elif name == "changes-requested-reviews-by": _, approvals = self.consolidated_reviews return [ r["user"]["login"] for r in approvals if r["state"] == "CHANGES_REQUESTED" ] elif name == "commented-reviews-by": comments, _ = self.consolidated_reviews return [ r["user"]["login"] for r in comments if r["state"] == "COMMENTED" ] # NOTE(jd) The Check API set conclusion to None for pending. # NOTE(sileht): "pending" statuses are not really trackable, we # voluntary drop this event because CIs just sent they status every # minutes until the CI pass (at least Travis and Circle CI does # that). This was causing a big load on Mergify for nothing useful # tracked, and on big projects it can reach the rate limit very # quickly. # NOTE(sileht): Not handled for now: cancelled, timed_out, or action_required elif name == "status-success": return [ ctxt for ctxt, state in self.checks.items() if state == "success" ] elif name == "status-failure": return [ ctxt for ctxt, state in self.checks.items() if state == "failure" ] elif name == "status-neutral": return [ ctxt for ctxt, state in self.checks.items() if state == "neutral" ] else: raise PullRequestAttributeError(name) def update_pull_check_runs(self, check): self.pull_check_runs = [ c for c in self.pull_check_runs if c["name"] != check["name"] ] self.pull_check_runs.append(check) @functools.cached_property def pull_check_runs(self): return check_api.get_checks_for_ref(self, self.pull["head"]["sha"]) @property def pull_engine_check_runs(self): return [ c for c in self.pull_check_runs if c["app"]["id"] == config.INTEGRATION_ID ] @functools.cached_property def checks(self): # NOTE(sileht): conclusion can be one of success, failure, neutral, # cancelled, timed_out, or action_required, and None for "pending" checks = dict( (c["name"], c["conclusion"]) for c in self.pull_check_runs) # NOTE(sileht): state can be one of error, failure, pending, # or success. checks.update((s["context"], s["state"]) for s in self.client.items( f"commits/{self.pull['head']['sha']}/status", list_items="statuses")) return checks def _resolve_login(self, name): if not name: return [] elif not isinstance(name, str): return [name] elif name[0] != "@": return [name] if "/" in name: organization, _, team_slug = name.partition("/") if not team_slug or "/" in team_slug: # Not a team slug return [name] organization = organization[1:] else: organization = self.pull["base"]["repo"]["owner"]["login"] team_slug = name[1:] try: return [ member["login"] for member in self.client.items( f"/orgs/{organization}/teams/{team_slug}/members") ] except http.HTTPClientSideError as e: self.log.warning( "fail to get the organization, team or members", team=name, status=e.status_code, detail=e.message, ) return [name] def resolve_teams(self, values): if not values: return [] if not isinstance(values, (list, tuple)): values = [values] values = list( itertools.chain.from_iterable((map(self._resolve_login, values)))) return values UNUSABLE_STATES = ["unknown", None] # NOTE(sileht): quickly retry, if we don't get the status on time # the exception is recatch in worker.py, so worker will retry it later @tenacity.retry( wait=tenacity.wait_exponential(multiplier=0.2), stop=tenacity.stop_after_attempt(5), retry=tenacity.retry_if_exception_type( exceptions.MergeableStateUnknown), reraise=True, ) def _ensure_complete(self): if not (self._is_data_complete() and self._is_background_github_processing_completed()): self.pull = self.client.item(f"pulls/{self.pull['number']}") if not self._is_data_complete(): self.log.error( "/pulls/%s has returned an incomplete payload...", self.pull["number"], data=self.pull, ) if self._is_background_github_processing_completed(): return raise exceptions.MergeableStateUnknown(self) def _is_data_complete(self): # NOTE(sileht): If pull request come from /pulls listing or check-runs sometimes, # they are incomplete, This ensure we have the complete view fields_to_control = ( "state", "mergeable_state", "merged_by", "merged", "merged_at", ) for field in fields_to_control: if field not in self.pull: return False return True def _is_background_github_processing_completed(self): return (self.pull["state"] == "closed" or self.pull["mergeable_state"] not in self.UNUSABLE_STATES) def update(self): # TODO(sileht): Remove me, # Don't use it, because consolidated data are not updated after that. # Only used by merge action for posting an update report after rebase. self.pull = self.client.item(f"pulls/{self.pull['number']}") try: del self.__dict__["pull_check_runs"] except KeyError: pass @functools.cached_property def is_behind(self): branch_name_escaped = parse.quote(self.pull["base"]["ref"], safe="") branch = self.client.item(f"branches/{branch_name_escaped}") for commit in self.commits: for parent in commit["parents"]: if parent["sha"] == branch["commit"]["sha"]: return False return True def __str__(self): return "%(login)s/%(repo)s/pull/%(number)d@%(branch)s " "s:%(pr_state)s" % { "login": self.pull["base"]["user"]["login"], "repo": self.pull["base"]["repo"]["name"], "number": self.pull["number"], "branch": self.pull["base"]["ref"], "pr_state": ("merged" if self.pull["merged"] else (self.pull["mergeable_state"] or "none")), } @functools.cached_property def reviews(self): return list(self.client.items(f"pulls/{self.pull['number']}/reviews")) @functools.cached_property def commits(self): return list(self.client.items(f"pulls/{self.pull['number']}/commits")) @functools.cached_property def files(self): return list(self.client.items(f"pulls/{self.pull['number']}/files")) @property def pull_from_fork(self): return self.pull["head"]["repo"]["id"] != self.pull["base"]["repo"][ "id"]
import cachetools from defaultlist import defaultlist data = None data_t = None labels = None outdir = None pairwise_distances = None ordered_neighbours = None neighbours = None all_orderings = None identity_ordering = None nobj = 2 fitnessCache = defaultlist(lambda: cachetools.LRUCache(maxsize=1e6)) accesses = 0 stores = 0 max_depth = 8#7#12#8 max_height = 14#10#17#14 pop_size = 100#1024#100 cxpb = 0.7 mutpb = 0.15 mutarpb = 0.15 num_trees = 34 gens = 1000 num_instances = 0 num_features = 0
class LockedCache: def __init__(self, cache: cachetools.Cache): self.lock = threading.Lock() self.cache = cache def __call__(self, **kwargs): return cachetools.cached(cache=self.cache, lock=self.lock, **kwargs) # global cache instances _repo = LockedCache(cachetools.TTLCache(maxsize=1, ttl=CACHE_TTL)) _release = LockedCache(cachetools.TTLCache(maxsize=CACHE_SIZE, ttl=CACHE_TTL)) _asset = LockedCache(cachetools.LRUCache(maxsize=CACHE_SIZE)) _metadata = LockedCache(cachetools.TTLCache(maxsize=CACHE_SIZE, ttl=METADATA_TTL)) _ticket = LockedCache(cachetools.TTLCache(maxsize=1, ttl=TICKETS_TTL)) _log = logging.getLogger(__name__) def _cache_key(organization, *args, **kwargs): """Skip the first argument.""" return cachetools.keys.hashkey(*args, **kwargs) @_repo(key=_cache_key) def _repository_list(org: Organization): return sorted([repo.name for repo in org().get_repos() if inventory.is_wrap_project_name(repo.name)])
def __init__(self, *args, remove_domains=None, **kwargs): super().__init__(*args, **kwargs) self.remove_domains = remove_domains or [] self.interfaces_cache = cachetools.LRUCache(128) self._lock = threading.Lock()
def grouped_window_sizes_from_offset(arr, group_starts, offset): window_sizes = cuda.device_array(shape=(arr.shape), dtype="int32") if arr.size > 0: gpu_grouped_window_sizes_from_offset.forall(arr.size)(arr, window_sizes, group_starts, offset) return window_sizes # This cache is keyed on the (signature, code, closure variables) of UDFs, so # it can hit for distinct functions that are similar. The lru_cache wrapping # compile_udf misses for these similar functions, but doesn't need to serialize # closure variables to check for a hit. _udf_code_cache = cachetools.LRUCache(maxsize=32) def compile_udf(udf, type_signature): """Compile ``udf`` with `numba` Compile a python callable function ``udf`` with `numba.cuda.compile_ptx_for_current_device(device=True)` using ``type_signature`` into CUDA PTX together with the generated output type. The output is expected to be passed to the PTX parser in `libcudf` to generate a CUDA device function to be inlined into CUDA kernels, compiled at runtime and launched. Parameters --------
class Context(object): client: github.GithubInstallationClient pull: dict subscription: subscription.Subscription sources: typing.List = dataclasses.field(default_factory=list) _write_permission_cache: cachetools.LRUCache = dataclasses.field( default_factory=lambda: cachetools.LRUCache(4096)) log: logging.LoggerAdapter = dataclasses.field(init=False) SUMMARY_NAME = "Summary" def __post_init__(self): self._ensure_complete() self.log = daiquiri.getLogger( self.__class__.__qualname__, gh_owner=self.pull["base"]["user"]["login"] if "base" in self.pull else "<unknown-yet>", gh_repo=(self.pull["base"]["repo"]["name"] if "base" in self.pull else "<unknown-yet>"), gh_private=(self.pull["base"]["repo"]["private"] if "base" in self.pull else "<unknown-yet>"), gh_branch=self.pull["base"]["ref"] if "base" in self.pull else "<unknown-yet>", gh_pull=self.pull["number"], gh_pull_base_sha=self.pull["base"]["sha"] if "base" in self.pull else "<unknown-yet>", gh_pull_head_sha=self.pull["head"]["sha"] if "head" in self.pull else "<unknown-yet>", gh_pull_url=self.pull.get("html_url", "<unknown-yet>"), gh_pull_state=("merged" if self.pull.get("merged") else (self.pull.get("mergeable_state", "unknown") or "none")), ) @property def base_url(self): """The URL prefix to make GitHub request.""" return f"/repos/{self.pull['base']['user']['login']}/{self.pull['base']['repo']['name']}" @property def pull_request(self): return PullRequest(self) @cachetools.cachedmethod( # Ignore type until https://github.com/python/typeshed/issues/4652 is fixed cache=operator.attrgetter("_write_permission_cache"), # type: ignore key=functools.partial(cachetools.keys.hashkey, "has_write_permissions"), ) def has_write_permissions(self, login): return self.client.item( f"{self.base_url}/collaborators/{login}/permission" )["permission"] in [ "admin", "write", ] def set_summary_check(self, result): """Set the Mergify Summary check result.""" previous_sha = self.get_cached_last_summary_head_sha() # NOTE(sileht): we first commit in redis the future sha, # so engine.create_initial_summary() cannot creates a second SUMMARY self._save_cached_last_summary_head_sha(self.pull["head"]["sha"]) try: ret = check_api.set_check_run(self, self.SUMMARY_NAME, result) except Exception: if previous_sha: # Restore previous sha in redis self._save_cached_last_summary_head_sha(previous_sha) raise return ret @staticmethod def redis_last_summary_head_sha_key(pull: dict) -> str: owner = pull["base"]["repo"]["owner"]["id"] repo = pull["base"]["repo"]["id"] pull_number = pull["number"] return f"summary-sha~{owner}~{repo}~{pull_number}" @classmethod def get_cached_last_summary_head_sha_from_pull( cls, pull: dict, ) -> str: with utils.get_redis_for_cache() as redis: # type: ignore # FIXME(jd): remove in January 2021 # Look for old format ################ owner = pull["base"]["repo"]["owner"]["id"] repo = pull["base"]["repo"]["id"] pull_number = pull["number"] for k in redis.keys(f"summary-sha~*~{owner}~{repo}~{pull_number}"): return redis.get(k) # ENDOF FIXME(jd) return redis.get(cls.redis_last_summary_head_sha_key(pull)) def get_cached_last_summary_head_sha(self) -> str: return self.get_cached_last_summary_head_sha_from_pull(self.pull, ) def clear_cached_last_summary_head_sha(self): with utils.get_redis_for_cache() as redis: redis.delete(self.redis_last_summary_head_sha_key(self.pull)) def _save_cached_last_summary_head_sha(self, sha): # NOTE(sileht): We store it only for 1 month, if we lose it it's not a big deal, as it's just # to avoid race conditions when too many synchronize events occur in a short period of time with utils.get_redis_for_cache() as redis: redis.set( self.redis_last_summary_head_sha_key(self.pull), sha, ex=SUMMARY_SHA_EXPIRATION, ) def _get_valid_users(self): bots = list( set([ r["user"]["login"] for r in self.reviews if r["user"] and r["user"]["type"] == "Bot" ])) collabs = set([ r["user"]["login"] for r in self.reviews if r["user"] and r["user"]["type"] != "Bot" ]) valid_collabs = [ login for login in collabs if self.has_write_permissions(login) ] return bots + valid_collabs @functools.cached_property def consolidated_reviews(self): # Ignore reviews that are not from someone with admin/write permissions # And only keep the last review for each user. comments = dict() approvals = dict() valid_users = self._get_valid_users() for review in self.reviews: if not review["user"] or review["user"]["login"] not in valid_users: continue # Only keep latest review of an user if review["state"] == "COMMENTED": comments[review["user"]["login"]] = review else: approvals[review["user"]["login"]] = review return list(comments.values()), list(approvals.values()) def _get_consolidated_data(self, name): if name == "assignee": return [a["login"] for a in self.pull["assignees"]] elif name == "label": return [label["name"] for label in self.pull["labels"]] elif name == "review-requested": return [u["login"] for u in self.pull["requested_reviewers"]] + [ "@" + t["slug"] for t in self.pull["requested_teams"] ] elif name == "draft": return self.pull["draft"] elif name == "author": return self.pull["user"]["login"] elif name == "merged-by": return self.pull["merged_by"]["login"] if self.pull[ "merged_by"] else "" elif name == "merged": return self.pull["merged"] elif name == "closed": return self.pull["state"] == "closed" elif name == "milestone": return self.pull["milestone"]["title"] if self.pull[ "milestone"] else "" elif name == "number": return self.pull["number"] elif name == "conflict": return self.pull["mergeable_state"] == "dirty" elif name == "base": return self.pull["base"]["ref"] elif name == "head": return self.pull["head"]["ref"] elif name == "locked": return self.pull["locked"] elif name == "title": return self.pull["title"] elif name == "body": return self.pull["body"] elif name == "files": return [f["filename"] for f in self.files] elif name == "approved-reviews-by": _, approvals = self.consolidated_reviews return [ r["user"]["login"] for r in approvals if r["state"] == "APPROVED" ] elif name == "dismissed-reviews-by": _, approvals = self.consolidated_reviews return [ r["user"]["login"] for r in approvals if r["state"] == "DISMISSED" ] elif name == "changes-requested-reviews-by": _, approvals = self.consolidated_reviews return [ r["user"]["login"] for r in approvals if r["state"] == "CHANGES_REQUESTED" ] elif name == "commented-reviews-by": comments, _ = self.consolidated_reviews return [ r["user"]["login"] for r in comments if r["state"] == "COMMENTED" ] # NOTE(jd) The Check API set conclusion to None for pending. # NOTE(sileht): "pending" statuses are not really trackable, we # voluntary drop this event because CIs just sent they status every # minutes until the CI pass (at least Travis and Circle CI does # that). This was causing a big load on Mergify for nothing useful # tracked, and on big projects it can reach the rate limit very # quickly. # NOTE(sileht): Not handled for now: cancelled, timed_out, or action_required elif name in ("status-success", "check-success"): return [ ctxt for ctxt, state in self.checks.items() if state == "success" ] elif name in ("status-failure", "check-failure"): return [ ctxt for ctxt, state in self.checks.items() if state == "failure" ] elif name in ("status-neutral", "check-neutral"): return [ ctxt for ctxt, state in self.checks.items() if state == "neutral" ] else: raise PullRequestAttributeError(name) def update_pull_check_runs(self, check): self.pull_check_runs = [ c for c in self.pull_check_runs if c["name"] != check["name"] ] self.pull_check_runs.append(check) @functools.cached_property def pull_check_runs(self): return check_api.get_checks_for_ref(self, self.pull["head"]["sha"]) @property def pull_engine_check_runs(self): return [ c for c in self.pull_check_runs if c["app"]["id"] == config.INTEGRATION_ID ] @functools.cached_property def checks(self): # NOTE(sileht): conclusion can be one of success, failure, neutral, # cancelled, timed_out, or action_required, and None for "pending" checks = dict( (c["name"], c["conclusion"]) for c in self.pull_check_runs) # NOTE(sileht): state can be one of error, failure, pending, # or success. checks.update((s["context"], s["state"]) for s in self.client.items( f"{self.base_url}/commits/{self.pull['head']['sha']}/status", list_items="statuses", )) return checks def _resolve_login(self, name): if not name: return [] elif not isinstance(name, str): return [name] elif name[0] != "@": return [name] if "/" in name: organization, _, team_slug = name.partition("/") if not team_slug or "/" in team_slug: # Not a team slug return [name] organization = organization[1:] else: organization = self.pull["base"]["repo"]["owner"]["login"] team_slug = name[1:] try: return [ member["login"] for member in self.client.items( f"/orgs/{organization}/teams/{team_slug}/members") ] except http.HTTPClientSideError as e: self.log.warning( "fail to get the organization, team or members", team=name, status_code=e.status_code, detail=e.message, ) return [name] def resolve_teams(self, values): if not values: return [] if not isinstance(values, (list, tuple)): values = [values] values = list( itertools.chain.from_iterable((map(self._resolve_login, values)))) return values UNUSABLE_STATES = ["unknown", None] # NOTE(sileht): quickly retry, if we don't get the status on time # the exception is recatch in worker.py, so worker will retry it later @tenacity.retry( wait=tenacity.wait_exponential(multiplier=0.2), stop=tenacity.stop_after_attempt(5), retry=tenacity.retry_if_exception_type( exceptions.MergeableStateUnknown), reraise=True, ) def _ensure_complete(self): if not (self._is_data_complete() and self._is_background_github_processing_completed()): self.pull = self.client.item( f"{self.base_url}/pulls/{self.pull['number']}") if not self._is_data_complete(): self.log.error( "/pulls/%s has returned an incomplete payload...", self.pull["number"], data=self.pull, ) if self._is_background_github_processing_completed(): return raise exceptions.MergeableStateUnknown(self) def _is_data_complete(self): # NOTE(sileht): If pull request come from /pulls listing or check-runs sometimes, # they are incomplete, This ensure we have the complete view fields_to_control = ( "state", "mergeable_state", "merged_by", "merged", "merged_at", ) for field in fields_to_control: if field not in self.pull: return False return True def _is_background_github_processing_completed(self): return (self.pull["state"] == "closed" or self.pull["mergeable_state"] not in self.UNUSABLE_STATES) def update(self): # TODO(sileht): Remove me, # Don't use it, because consolidated data are not updated after that. # Only used by merge action for posting an update report after rebase. self.pull = self.client.item( f"{self.base_url}/pulls/{self.pull['number']}") try: del self.__dict__["pull_check_runs"] except KeyError: pass @functools.cached_property def is_behind(self): branch_name_escaped = parse.quote(self.pull["base"]["ref"], safe="") branch = self.client.item( f"{self.base_url}/branches/{branch_name_escaped}") for commit in self.commits: for parent in commit["parents"]: if parent["sha"] == branch["commit"]["sha"]: return False return True def have_been_synchronized(self): for source in self.sources: if (source["event_type"] == "pull_request" and source["data"]["action"] == "synchronize" and source["data"]["sender"]["id"] != config.BOT_USER_ID): return True return False def __str__(self): return "%(login)s/%(repo)s/pull/%(number)d@%(branch)s " "s:%(pr_state)s" % { "login": self.pull["base"]["user"]["login"], "repo": self.pull["base"]["repo"]["name"], "number": self.pull["number"], "branch": self.pull["base"]["ref"], "pr_state": ("merged" if self.pull["merged"] else (self.pull["mergeable_state"] or "none")), } @functools.cached_property def reviews(self): return list( self.client.items( f"{self.base_url}/pulls/{self.pull['number']}/reviews")) @functools.cached_property def commits(self): return list( self.client.items( f"{self.base_url}/pulls/{self.pull['number']}/commits")) @functools.cached_property def files(self): return list( self.client.items( f"{self.base_url}/pulls/{self.pull['number']}/files")) @property def pull_from_fork(self): return self.pull["head"]["repo"]["id"] != self.pull["base"]["repo"][ "id"] def github_workflow_changed(self): for f in self.files: if f["filename"].startswith(".github/workflows"): return True return False
from mistral.workflow import states CONF = cfg.CONF def _create_lru_cache_for_workflow_execution(wf_ex_id): return cachetools.LRUCache(maxsize=500) # This is a two-level caching structure. # First level: [<workflow execution id> -> <task execution cache>] # Second level (task execution cache): [<task_name> -> <task executions>] # The first level (by workflow execution id) allows to invalidate # needed cache entry when the workflow gets completed. _TASK_EX_CACHE = cachetools.LRUCache( maxsize=100, missing=_create_lru_cache_for_workflow_execution ) _ACTION_DEF_CACHE = cachetools.TTLCache( maxsize=1000, ttl=CONF.engine.action_definition_cache_time # 60 seconds by default ) _TASK_EX_CACHE_LOCK = threading.RLock() _ACTION_DEF_CACHE_LOCK = threading.RLock() def find_action_definition_by_name(action_name): """Find action definition name. :param action_name: Action name.
def get_data(self): def get_segment_path(segment): if segment.parent: return segment_path[segment.parent] + [segment] else: return [segment] def update_summary(segment): """ Calculate summary for segment and all nested segments """ services = {} subscribers = {} # Calculate direct segments' coverage for o in segment["objects"].values(): update_dict(services, o["services"]) update_dict(subscribers, o["subscribers"]) # Flatten objects segment["objects"] = sorted(segment["objects"].values(), key=lambda x: -x["weight"]) # Calculate children's coverage for s in segment["segments"].values(): update_summary(s) update_dict(services, s["services"]) update_dict(subscribers, s["subscribers"]) segment["segments"] = sorted(segment["segments"].values(), key=lambda x: -x["weight"]) segment["services"] = services segment["subscribers"] = subscribers segment["summary"] = { "service": services, "subscriber": subscribers } segment["weight"] = ServiceSummary.get_weight(segment["summary"]) def update_dict(d1, d2): for k in d2: if k in d1: d1[k] += d2[k] else: d1[k] = d2[k] segment_path = cachetools.LRUCache(maxsize=10000, missing=get_segment_path) # Build tree tree = { "segment": None, "segments": {}, "objects": {}, "subscribers": {}, "services": {} } if self.current_user.is_superuser: qs = ActiveAlarm.objects.filter(root__exists=False) else: qs = ActiveAlarm.objects.filter( adm_path__in=self.get_user_domains(), root__exists=False) now = datetime.datetime.now() for alarm in qs: if not alarm.total_services and not alarm.total_subscribers: continue ct = tree for sp in segment_path[alarm.managed_object.segment]: if sp.id not in ct["segments"]: ct["segments"][sp.id] = { "segment": sp, "segments": {}, "objects": {}, "subscribers": {}, "services": {} } ct = ct["segments"][sp.id] subscribers = SummaryItem.items_to_dict(alarm.total_subscribers) services = SummaryItem.items_to_dict(alarm.total_services) ct["objects"][alarm.id] = { "object": alarm.managed_object, "alarm": alarm, "severity": alarm.severity, "timestamp": alarm.timestamp, "duration": now - alarm.timestamp, "escalation_tt": alarm.escalation_tt, "subscribers": subscribers, "services": services, "summary": { "subscriber": subscribers, "service": services } } ct["objects"][alarm.id]["weight"] = ServiceSummary.get_weight( ct["objects"][alarm.id]["summary"]) # Calculate segment summaries update_summary(tree) # Calculate total summaries services = {} subscribers = {} for s in tree["segments"]: update_dict(services, s["summary"]["service"]) update_dict(subscribers, s["summary"]["subscriber"]) for o in tree["objects"]: update_dict(services, o["summary"]["service"]) update_dict(subscribers, o["summary"]["subscriber"]) tree["summary"] = {"subscriber": subscribers, "service": services} return tree
def _create_lru_cache_for_workflow_execution(wf_ex_id): return cachetools.LRUCache(maxsize=500)
class Combat: message_cache = cachetools.LRUCache(100) def __init__(self, channelId, summaryMsgId, dmId, options, ctx, combatants=None, roundNum=0, turnNum=0, currentIndex=None): if combatants is None: combatants = [] self._channel = channelId # readonly self._summary = summaryMsgId # readonly self._dm = dmId self._options = options # readonly (?) self._combatants = combatants self._round = roundNum self._turn = turnNum self._current_index = currentIndex self.ctx = ctx @classmethod def new(cls, channelId, summaryMsgId, dmId, options, ctx): return cls(channelId, summaryMsgId, dmId, options, ctx) @classmethod async def from_ctx(cls, ctx): raw = await ctx.bot.mdb.combats.find_one( {"channel": str(ctx.channel.id)}) if raw is None: raise CombatNotFound return await cls.from_dict(raw, ctx) @classmethod async def from_dict(cls, raw, ctx): inst = cls(raw['channel'], raw['summary'], raw['dm'], raw['options'], ctx, [], raw['round'], raw['turn'], raw['current']) for c in raw['combatants']: if c['type'] == 'common': inst._combatants.append(Combatant.from_dict(c, ctx, inst)) elif c['type'] == 'monster': inst._combatants.append( MonsterCombatant.from_dict(c, ctx, inst)) elif c['type'] == 'player': inst._combatants.append(await PlayerCombatant.from_dict( c, ctx, inst)) elif c['type'] == 'group': inst._combatants.append(await CombatantGroup.from_dict(c, ctx, inst)) else: raise CombatException("Unknown combatant type") return inst @classmethod async def from_id(cls, _id, ctx): raw = await ctx.bot.mdb.combats.find_one({"channel": _id}) if raw is None: raise CombatNotFound return await cls.from_dict(raw, ctx) def to_dict(self): return { 'channel': self.channel, 'summary': self.summary, 'dm': self.dm, 'options': self.options, 'combatants': [c.to_dict() for c in self._combatants], 'turn': self.turn_num, 'round': self.round_num, 'current': self._current_index } @property def channel(self): return self._channel @property def summary(self): return self._summary @property def dm(self): return self._dm @property def options(self): return self._options @options.setter def options(self, value): self._options = value @property # private write def round_num(self): return self._round @property # private write def turn_num(self): return self._turn @property # private write def index(self): return self._current_index @property def current_combatant(self): """The combatant whose turn it currently is.""" return next((c for c in self._combatants if c.index == self.index), None) if self.index is not None else None @property def next_combatant(self): """The combatant whose turn it will be when advance_turn() is called.""" if len(self._combatants) == 0: return None if self.index is None: index = 0 elif self.index + 1 >= len(self._combatants): index = 0 else: index = self.index + 1 return next(c for c in self._combatants if c.index == index) if index is not None else None def get_combatants(self, groups=False): """ Returns a list of all Combatants in a combat. :param groups: Whether to return CombatantGroup objects in the list. :return: A list of all combatants (and optionally groups). """ combatants = [] for c in self._combatants: if isinstance(c, Combatant): combatants.append(c) else: combatants.extend(c.get_combatants()) if groups: combatants.append(c) return combatants def add_combatant(self, combatant): self._combatants.append(combatant) self.sort_combatants() def remove_combatant(self, combatant, ignore_remove_hook=False): if not ignore_remove_hook: combatant.on_remove() if not combatant.group: self._combatants.remove(combatant) self.sort_combatants() else: self.get_group(combatant.group).remove_combatant(combatant) self.check_empty_groups() return self def sort_combatants(self): current = self.current_combatant self._combatants = sorted(self._combatants, key=lambda k: (k.init, k.initMod), reverse=True) for n, c in enumerate(self._combatants): c.index = n self._current_index = current.index if current is not None else None def get_combatant(self, name, strict=True): if strict: return next((c for c in self.get_combatants() if c.name.lower() == name.lower()), None) else: return next((c for c in self.get_combatants() if name.lower() in c.name.lower()), None) def get_group(self, name, create=None, strict=True): """ Gets a combatant group. :rtype: CombatantGroup :param name: The name of the combatant group. :param create: The initiative to create a group at if a group is not found. :param strict: Whether group name must be a full case insensitive match. :return: The combatant group. """ if strict: grp = next( (g for g in self.get_groups() if g.name.lower() == name.lower()), None) else: grp = next( (g for g in self.get_groups() if name.lower() in g.name.lower()), None) if grp is None and create is not None: grp = CombatantGroup.new(name, create, self.ctx) self.add_combatant(grp) return grp def get_groups(self): return [c for c in self._combatants if isinstance(c, CombatantGroup)] def check_empty_groups(self): removed = False for c in self._combatants: if isinstance(c, CombatantGroup) and len(c.get_combatants()) == 0: self.remove_combatant(c) removed = True if removed: self.sort_combatants() def reroll_dynamic(self): """ Rerolls all combatant initiatives. """ for c in self._combatants: c.init = roll(f"1d20+{c.initMod}").total self.sort_combatants() async def select_combatant(self, name, choice_message=None, select_group=False): """ Opens a prompt for a user to select the combatant they were searching for. :param choice_message: The message to pass to the selector. :param select_group: Whether to allow groups to be selected. :rtype: Combatant :param name: The name of the combatant to search for. :return: The selected Combatant, or None if the search failed. """ matching = [(c.name, c) for c in self.get_combatants(select_group) if name.lower() == c.name.lower()] if not matching: matching = [(c.name, c) for c in self.get_combatants(select_group) if name.lower() in c.name.lower()] return await get_selection(self.ctx, matching, message=choice_message) def advance_turn(self): if len(self._combatants) == 0: raise NoCombatants if self.current_combatant: self.current_combatant.on_turn_end() changed_round = False if self.index is None: # new round, no dynamic reroll self._current_index = 0 self._round += 1 elif self.index + 1 >= len(self._combatants): # new round if self.options.get('dynamic'): self.reroll_dynamic() self._current_index = 0 self._round += 1 changed_round = True else: self._current_index += 1 self._turn = self.current_combatant.init self.current_combatant.on_turn() return changed_round def rewind_turn(self): if len(self._combatants) == 0: raise NoCombatants if self.current_combatant: self.current_combatant.on_turn_end() if self.index is None: # start of combat self._current_index = len(self._combatants) - 1 elif self.index == 0: # new round self._current_index = len(self._combatants) - 1 self._round -= 1 else: self._current_index -= 1 self._turn = self.current_combatant.init def goto_turn(self, init_num, is_combatant=False): if len(self._combatants) == 0: raise NoCombatants if self.current_combatant: self.current_combatant.on_turn_end() if is_combatant: if init_num.group: init_num = self.get_group(init_num.group) self._current_index = init_num.index else: target = next((c for c in self._combatants if c.init <= init_num), None) if target: self._current_index = target.index else: self._current_index = 0 self._turn = self.current_combatant.init def skip_rounds(self, num_rounds): self._round += num_rounds for com in self.get_combatants(): com.on_turn(num_rounds) com.on_turn_end(num_rounds) if self.options.get('dynamic'): self.reroll_dynamic() def get_turn_str(self): nextCombatant = self.current_combatant if isinstance(nextCombatant, CombatantGroup): thisTurn = nextCombatant.get_combatants() outStr = "**Initiative {} (round {})**: {} ({})\n{}" outStr = outStr.format( self.turn_num, self.round_num, nextCombatant.name, ", ".join({co.controller_mention() for co in thisTurn}), '```markdown\n' + "\n".join([co.get_status() for co in thisTurn]) + '```') else: outStr = "**Initiative {} (round {})**: {}\n{}" outStr = outStr.format( self.turn_num, self.round_num, "{} ({})".format(nextCombatant.name, nextCombatant.controller_mention()), '```markdown\n' + nextCombatant.get_status() + '```') if self.options.get('turnnotif'): nextTurn = self.next_combatant outStr += f"**Next up**: {nextTurn.name} ({nextTurn.controller_mention()})\n" return outStr @staticmethod async def ensure_unique_chan(ctx): if await ctx.bot.mdb.combats.find_one({"channel": str(ctx.channel.id)}): raise ChannelInCombat async def commit(self): """Commits the combat to db.""" if not self.ctx: raise RequiresContext for pc in self.get_combatants(): if isinstance(pc, PlayerCombatant): await pc.character.manual_commit(self.ctx.bot, pc.character_owner) await self.ctx.bot.mdb.combats.update_one({"channel": self.channel}, { "$set": self.to_dict(), "$currentDate": { "lastchanged": True } }, upsert=True) def get_summary(self, private=False): """Returns the generated summary message content.""" combatants = sorted(self._combatants, key=lambda k: (k.init, k.initMod), reverse=True) outStr = "```markdown\n{}: {} (round {})\n".format( self.options.get('name') if self.options.get('name') else "Current initiative", self.turn_num, self.round_num) outStr += f"{'=' * (len(outStr) - 13)}\n" combatantStr = "" for c in combatants: combatantStr += ("# " if self.index == c.index else " ") + c.get_summary(private) + "\n" outStr += "{}```" # format place for combatatstr if len(outStr.format(combatantStr)) > 2000: combatantStr = "" for c in combatants: combatantStr += ("# " if self.index == c.index else " ") + c.get_summary(private, no_notes=True) + "\n" return outStr.format(combatantStr) async def update_summary(self): """Edits the summary message with the latest summary.""" await (await self.get_summary_msg()).edit(content=self.get_summary()) def get_channel(self): """Gets the Channel object of the combat.""" if self.ctx: return self.ctx.message.channel else: chan = self.ctx.bot.get_channel(int(self.channel)) if chan: return chan else: raise CombatChannelNotFound async def get_summary_msg(self): """Gets the Message object of the combat summary.""" if self.summary in Combat.message_cache: return Combat.message_cache[self.summary] else: msg = await self.get_channel().get_message(self.summary) Combat.message_cache[msg.id] = msg return msg async def final(self): """Final commit/update.""" await self.commit() await self.update_summary() async def end(self): """Ends combat in a channel.""" for c in self._combatants: c.on_remove() await self.ctx.bot.mdb.combats.delete_one({"channel": self.channel}) def __str__(self): return f"Initiative in <#{self.channel}>"
from mistral.db.v2 import api as db_api from mistral import exceptions as exc from mistral.lang import base from mistral.lang.v2 import actions as actions_v2 from mistral.lang.v2 import tasks as tasks_v2 from mistral.lang.v2 import workbook as wb_v2 from mistral.lang.v2 import workflows as wf_v2 V2_0 = '2.0' ALL_VERSIONS = [V2_0] # {workflow execution id => workflow specification}. _WF_EX_CACHE = cachetools.LRUCache(maxsize=100) _WF_EX_CACHE_LOCK = threading.RLock() # {(workflow def id, workflow def updated at) => workflow specification}. _WF_DEF_CACHE = cachetools.LRUCache(maxsize=100) _WF_DEF_CACHE_LOCK = threading.RLock() def parse_yaml(text): """Loads a text in YAML format as dictionary object. :param text: YAML text. :return: Parsed YAML document as dictionary. """ try:
:param config: bigquery.LoadJobConfig :return: bigquery.TableReference """ print(f"Retrieving table for LoadJobConfig: {config.to_api_repr()}") if config.to_api_repr().get('load'): config = config.to_api_repr().get('load') if config.get('destinationTable'): project_id = config.get('destinationTable').get('projectId') dataset_id = config.get('destinationTable').get('datasetId') table_id = config.get('destinationTable').get('tableId') return bigquery.TableReference.from_string( f"{project_id}.{dataset_id}.{table_id}") return None @cachetools.cached(cachetools.LRUCache(maxsize=1024)) def get_table_prefix(gcs_client: storage.Client, blob: storage.Blob) -> str: """Find the table prefix for a object_id based on the destination regex. Args: gcs_client: storage.Client blob: storage.Blob to parse Returns: str: table prefix """ basename = os.path.basename(blob.name) if basename in { constants.BACKFILL_FILENAME, constants.START_BACKFILL_FILENAME, "_bqlock", }: # These files will not match the regex and always should appear at the
import cachetools import gramex import markdown import yaml md = markdown.Markdown(extensions=[ 'markdown.extensions.extra', 'markdown.extensions.meta', 'markdown.extensions.codehilite', 'markdown.extensions.smarty', 'markdown.extensions.sane_lists', 'markdown.extensions.fenced_code', 'markdown.extensions.toc', ], output_format='html5') # Create a cache for guide markdown content md_cache = cachetools.LRUCache(maxsize=5000000, getsizeof=len) def markdown_template(content, handler): if content not in md_cache: md_cache[content] = md.convert(content) kwargs = { 'classes': '', # GUIDE_ROOT has the absolute URL of the Gramex guide 'GUIDE_ROOT': gramex.config.variables.GUIDE_ROOT, 'body': md_cache[content], 'title': '' } for key, val in md.Meta.items(): kwargs[key] = val[0] if 'xsrf' in content:
from tfx.orchestration.experimental.core import task_gen from tfx.orchestration.experimental.core import task_gen_utils from tfx.orchestration.portable import cache_utils from tfx.orchestration.portable import execution_publish_utils from tfx.orchestration.portable import outputs_utils from tfx.orchestration.portable.mlmd import execution_lib from tfx.proto.orchestration import pipeline_pb2 from tfx.utils import status as status_lib from tfx.utils import topsort from google.protobuf import any_pb2 from ml_metadata.proto import metadata_store_pb2 # Caches nodes that completed successfully so that we don't have to query MLMD # for the status of those nodes repeatedly. _successful_nodes_cache = cachetools.LRUCache(maxsize=1024) class SyncPipelineTaskGenerator(task_gen.TaskGenerator): """Task generator for executing a sync pipeline. Calling `generate` is not thread-safe. Concurrent calls to `generate` should be explicitly serialized. Since MLMD may be updated upon call to `generate`, it's also not safe to call `generate` on different instances of this class where the instances refer to the same MLMD db and the same pipeline IR. """ def __init__(self, mlmd_handle: metadata.Metadata, pipeline_state: pstate.PipelineState, is_task_id_tracked_fn: Callable[[task_lib.TaskId], bool], service_job_manager: service_jobs.ServiceJobManager):
def get_toolbox_base(predictors, response, toolbox, param_mut_prob): metadata_dict = dict() latitude_longitude = np.load('../data/SweData/metadata/latlon.npy') elevation = np.load('../data/SweData/metadata/elevation.npy') aspect = np.load('../data/SweData/metadata/aspect.npy') metadata_dict["LatLon"] = latitude_longitude metadata_dict["Elevation"] = np.repeat(elevation, 3) metadata_dict["Aspect"] = np.repeat(aspect, 3) metadata_dict["Response"] = response predictors_dict = [None, None, None] predictors_indices = np.arange(predictors.shape[1]) predictors_dict[0] = predictors[:, predictors_indices % 3 == 0] predictors_dict[1] = predictors[:, predictors_indices % 3 == 1] predictors_dict[2] = predictors[:, predictors_indices % 3 == 2] metadata_dict["Predictors"] = predictors_dict toolbox.register("population", tools.initRepeat, list, toolbox.individual) toolbox.register("select", tools.selRandom) # Crossover toolbox.register("mate", gp.cxOnePoint) toolbox.decorate( "mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17)) toolbox.decorate("mate", gp.staticLimit(key=len, max_value=300)) # Mutation toolbox.register("expr_mutation", gp.genFull, min_=0, max_=2) toolbox.register("subtree_mutate", gp.mutUniform, expr=toolbox.expr_mutation, pset=toolbox.pset) toolbox.decorate( "subtree_mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17)) toolbox.decorate("subtree_mutate", gp.staticLimit(key=len, max_value=300)) toolbox.register("parameter_mutation", mutation.one_point_parameter_mutation, toolbox=toolbox, metadata=metadata_dict, two_point_scale=0.005, radius_scale=0.25, iterations=20) toolbox.register( "mutate", mutation.multi_mutation, mutations=[toolbox.subtree_mutate, toolbox.parameter_mutation], probs=[0.05, param_mut_prob]) # Fast evaluation configuration numpy_response = np.array(response) numpy_predictors = np.array(predictors) expression_dict = cachetools.LRUCache(maxsize=2000) toolbox.register("error_func", fast_evaluate.anti_correlation, response=numpy_response) toolbox.register("evaluate_error", fast_numpy_evaluate_metadata, context=toolbox.pset.context, predictors=numpy_predictors, metadata=metadata_dict, error_function=toolbox.error_func, expression_dict=expression_dict, arg_prefix="ARG") toolbox.register("evaluate", afpo.evaluate_age_fitness_size, error_func=toolbox.evaluate_error) random_data_points = np.random.choice(len(predictors), 1000, replace=False) subset_predictors = numpy_predictors[random_data_points, :] toolbox.register("calc_semantics", calculate_semantics, context=toolbox.pset.context, predictors=subset_predictors, metadata=metadata_dict) toolbox.register("simplify_front", simplify.simplify_all, toolbox=toolbox, size_threshold=0, semantics_threshold=10e-5, precompute_semantics=True) pop = toolbox.population(n=1000) mstats = reports.configure_inf_protected_stats() pareto_archive = archive.ParetoFrontSavingArchive( frequency=1, criteria_chooser=archive.pick_fitness_size_from_fitness_age_size, simplifier=toolbox.simplify_front) toolbox.register("run", afpo.afpo, population=pop, toolbox=toolbox, xover_prob=0.75, mut_prob=0.20, ngen=1000, tournament_size=2, num_randoms=1, stats=mstats, mut_archive=None, hall_of_fame=pareto_archive) toolbox.register("save", reports.save_log_to_csv) toolbox.decorate("save", reports.save_archive(pareto_archive)) return toolbox
def __init__(self, system): self.system = system self.loaders = {} # name -> loader self.lseq = [] self.cache = cachetools.LRUCache(maxsize=1000) self.cache.__missing__ = self.get_cached
def __init__(self, path, **kwargs): """ Initialize the tile class. See the base class for other available parameters. :param path: a filesystem path for the tile source. """ super(ND2FileTileSource, self).__init__(path, **kwargs) self._largeImagePath = self._getLargeImagePath() self._pixelInfo = {} try: self._nd2 = nd2reader.ND2Reader(self._largeImagePath) except (UnicodeDecodeError, nd2reader.exceptions.InvalidVersionError, nd2reader.exceptions.EmptyFileError): raise TileSourceException('File cannot be opened via nd2reader.') self._logger = config.getConfig('logger') self._tileLock = threading.RLock() self._recentFrames = cachetools.LRUCache(maxsize=6) self.sizeX = self._nd2.metadata['width'] self.sizeY = self._nd2.metadata['height'] self.tileWidth = self.tileHeight = self._tileSize if self.sizeX <= self._singleTileThreshold and self.sizeY <= self._singleTileThreshold: self.tileWidth = self.sizeX self.tileHeight = self.sizeY self.levels = int( max( 1, math.ceil( math.log( float(max(self.sizeX, self.sizeY)) / self.tileWidth) / math.log(2)) + 1)) # There is one file that throws a warning 'Z-levels details missing in # metadata'. In this instance, there should be no z-levels. try: if (self._nd2.sizes.get('z') and self._nd2.sizes.get('z') == self._nd2.sizes.get('v') and not len( self._nd2._parser._raw_metadata._parse_dimension( r""".*?Z\((\d+)\).*?""")) and self._nd2.sizes['v'] * self._nd2.sizes.get('t', 1) == self._nd2.metadata.get('total_images_per_channel')): self._nd2._parser._raw_metadata._metadata_parsed[ 'z_levels'] = [] self._nd2.sizes['z'] = 1 except Exception: pass frames = self._nd2.sizes.get('c', 1) * self._nd2.metadata.get( 'total_images_per_channel', 0) self._framecount = frames if frames else None self._nd2.iter_axes = sorted( [a for a in self._nd2.axes if a not in {'x', 'y', 'v'}], reverse=True) if frames and len(self._nd2) != frames and 'v' in self._nd2.axes: self._nd2.iter_axes = ['v'] + self._nd2.iter_axes if 'c' in self._nd2.iter_axes and len( self._nd2.metadata.get('channels', [])): self._bandnames = { name.lower(): idx for idx, name in enumerate(self._nd2.metadata['channels']) } # self._nd2.metadata # {'channels': ['CY3', 'A594', 'CY5', 'DAPI'], # 'date': datetime.datetime(2019, 7, 21, 15, 13, 45), # 'events': [], # 'experiment': {'description': '', # 'loops': [{'duration': 0, # 'sampling_interval': 0.0, # 'start': 0, # 'stimulation': False}]}, # 'fields_of_view': range(0, 2500), # v # 'frames': [0], # 'height': 1022, # 'num_frames': 1, # 'pixel_microns': 0.219080212825376, # 'total_images_per_channel': 2500, # 'width': 1024, # 'z_coordinates': [1890.8000000000002, # 1891.025, # 1891.1750000000002, # ... # 1905.2250000000001, # 1905.125, # 1905.1000000000001], # 'z_levels': range(0, 2500)} # self._nd2.axes ['x', 'y', 'c', 't', 'z', 'v'] # self._nd2.ndim 6 # self._nd2.pixel_type numpy.float64 # self._nd2.sizes {'x': 1024, 'y': 1022, 'c': 4, 't': 1, 'z': 2500, 'v': 2500} self._getND2Metadata()
__tablename__ = 'rss_parser_feed_name_lut' name = 'rss_parser_feed_name_lut' id = Column(BigInteger, primary_key = True, index = True) feed_netloc = Column(Text, nullable = False, index = True) # Most feeds are defined by netloc, so we have to allow that, at least untill the first feed scrape. feed_url = Column(Text, nullable = True, index = True) feed_id = Column(BigInteger, ForeignKey('rss_parser_funcs.id'), nullable = False, index = True) __table_args__ = ( UniqueConstraint('feed_netloc', 'feed_id'), ) # LRU Cache of function text -> function objects. PARSED_FUNCTION_CACHE = cachetools.LRUCache(maxsize=5000) def str_to_ast(instr, name): print("Compiling function from DB (str_to_ast) for '%s'" % name) # So compile needs a trailing newline to properly terminate (or something?) # anyways, stick some extra on to be safe. func_str = instr+"\n\n" func_container = ast.parse(func_str, "<db_for_<{}>>".format(name), "exec") return func_container def str_to_function(instr, name): instr = instr.strip() # Use the loaded function when possible.
def from_directory( cls, directory, *, ignore_re_dirs=None, ignore_re_files=None, readers_by_mimetype=None, mimetypes_by_file_ext=None, subdirectory_handler=None, key_from_filename=strip_suffixes, metadata=None, sorting=None, access_policy=None, principal=None, error_if_missing=True, greedy=False, poll_interval=DEFAULT_POLL_INTERVAL, entries_stale_after=None, metadata_stale_after=None, **kwargs, ): """ Construct a Adapter from a directory of files. Parameters ---------- ignore_re_dirs : str, optional Regular expression. Matched directories will be ignored. ignore_re_files : str, optional Regular expression. Matched files will be ignored. readers_by_mimetype : dict, optional Map a mimetype to a Reader suitable for that mimetype mimetypes_by_file_ext : dict, optional Map a file extension (e.g. '.tif') to a mimetype (e.g. 'image/tiff') subdirectory_handler : callable, optional Given a (relative) filepath to a direj key_from_filename : callable[str] -> str, Given a filename, return the key for the item that will represent it. By default, this strips off the suffixes, so "a.tif" -> "a". metadata : dict, optional, Metadata for the top-level node of this tree. access_policy : AccessPolicy, optional principal : str, optional error_if_missing : boolean, optional If True (default) raise an error if the directory does not exist. If False, wait and poll for the directory to be created later. greedy : boolean, optional If False (default) instantiate nodes in the tree lazily, when first accessed. If True, instantiate them greedily when the underlying files are first found. poll_interval : float or False, optional Time in seconds between scans of the directory for removed or changed files. If False or 0, do not poll for changes. Default value is 0.2 seconds, subject to change without notice. entries_stale_after: timedelta This server uses this to communite to the client how long it should rely on a local cache before checking back for changes. metadata_stale_after: timedelta This server uses this to communite to the client how long it should rely on a local cache before checking back for changes. """ if error_if_missing: if not os.path.isdir(directory): raise ValueError( f"{directory} is not a directory. " "To run anyway, in anticipation of the directory " "appearing later, use error_if_missing=False.") readers_by_mimetype = readers_by_mimetype or {} # If readers_by_mimetype comes from a configuration file, # objects are given as importable strings, like "package.module:Reader". for key, value in list(readers_by_mimetype.items()): if isinstance(value, str): readers_by_mimetype[key] = import_object(value) if isinstance(key_from_filename, str): key_from_filename = import_object(key_from_filename) if isinstance(subdirectory_handler, str): subdirectory_handler = import_object(subdirectory_handler) # User-provided readers take precedence over defaults. merged_readers_by_mimetype = collections.ChainMap( readers_by_mimetype, cls.DEFAULT_READERS_BY_MIMETYPE) mimetypes_by_file_ext = mimetypes_by_file_ext or {} merged_mimetypes_by_file_ext = collections.ChainMap( mimetypes_by_file_ext, cls.DEFAULT_MIMETYPES_BY_FILE_EXT) # Map subdirectory path parts, as in ('a', 'b', 'c'), to mapping of partials. # This single index represents the entire nested directory structure. (We # could have done this recursively, with each sub-Adapter watching its own # subdirectory, but there are efficiencies to be gained by doing a single # walk of the nested directory structure and having a single thread watching # for changes within that structure.) mapping = CachingMap( {}, cache=cachetools.LRUCache(maxsize=MAX_ADAPTER_CACHE_SIZE)) index = {(): mapping} # Map key to set of filepaths that map to that key. collision_tracker = collections.defaultdict(set) # This is a trie for efficiently checking of a given subdirectory is # claimed by a subdirectory_handler. subdirectory_trie = {} # 1. Start watching directory for changes and accumulating a queue of them. # 2. Do an initial scan of the files in the directory. # 3. When the initial scan completes, start processing changes. This # will cover changes that occurred during or after the initial scan and # avoid a possibile a race condition. initial_scan_complete = [] watcher_thread_kill_switch = [] manual_trigger = queue.Queue() watcher_thread = threading.Thread( target=_watch, args=( directory, ignore_re_files, ignore_re_dirs, index, subdirectory_trie, subdirectory_handler, merged_readers_by_mimetype, merged_mimetypes_by_file_ext, key_from_filename, initial_scan_complete, watcher_thread_kill_switch, manual_trigger, greedy, collision_tracker, poll_interval, ), daemon=True, name="tiled-watch-filesystem-changes", ) if poll_interval: watcher_thread.start() compiled_ignore_re_dirs = (re.compile(ignore_re_dirs) if ignore_re_dirs is not None else ignore_re_dirs) compiled_ignore_re_files = (re.compile(ignore_re_files) if ignore_re_files is not None else ignore_re_files) for root, subdirectories, files in os.walk(directory, topdown=True): parts = Path(root).relative_to(directory).parts # Skip this root if it corresponds to a directory managed by a handler. # TODO Let the top-level directory be managed by a handler? if parts: d = subdirectory_trie for part in parts: if part not in d: this_root_is_separately_managed = False break if not isinstance(d[part], dict): this_root_is_separately_managed = True break d = d[part] else: this_root_is_separately_managed = True if this_root_is_separately_managed: continue # Account for ignore_re_dirs and update which subdirectories we will traverse. valid_subdirectories = [] for d in subdirectories: if (ignore_re_dirs is None) or compiled_ignore_re_dirs.match( str(Path(*(parts + (d, ))))): valid_subdirectories.append(d) subdirectories[:] = valid_subdirectories for subdirectory in subdirectories: _new_subdir( index, subdirectory_trie, subdirectory_handler, root, parts, subdirectory, greedy, ) # Account for ignore_re_files and update which files we will traverse. valid_files = [] for f in files: if (ignore_re_files is None) or compiled_ignore_re_files.match( str(Path(*(parts + (f, ))))): valid_files.append(f) files[:] = valid_files for filename in files: if (ignore_re_files is not None) and compiled_ignore_re_files.match( str(Path(*parts))): continue # Add items to the mapping for this root directory. key = key_from_filename(filename) filepath = Path(*parts, filename) if (*parts, key) in collision_tracker: # There is already a filepath that maps to this key! warnings.warn( COLLISION_WARNING.format( filepath=filepath, existing=[ str(p) for p in collision_tracker[(*parts, key)] ], key=key, )) index[parts].remove(key) else: try: reader_factory = _reader_factory_for_file( merged_readers_by_mimetype, merged_mimetypes_by_file_ext, Path(root, filename), ) except NoReaderAvailable: pass else: index[parts].set(key, reader_factory) if greedy: index[parts][key] collision_tracker[(*parts, key)].add(filepath) # Appending any object will cause bool(initial_scan_complete) to # evaluate to True. initial_scan_complete.append(object()) return cls( index[()], directory=directory, index=index, subdirectory_trie=subdirectory_trie, subdirectory_handler=subdirectory_handler, watcher_thread_kill_switch=watcher_thread_kill_switch, manual_trigger=manual_trigger, metadata=metadata, sorting=sorting, principal=principal, access_policy=access_policy, entries_stale_after=entries_stale_after, metadata_stale_after=metadata_stale_after, # The __init__ of this class does not accept any other # kwargs, but subclasses can use this to set up additional # instance state. **kwargs, )