def test_executing_container(): out_dir = tempfile.mkdtemp() cnt = exe.create_container( IMAGE, biobox_cfg(hlp.short_read_fastq()), {"output" : out_dir}, "default", {"detach" : False}) id_ = cnt['Id'] util.client().start(id_) util.client().wait(id_) assert funcy.get_in(util.client().inspect_container(id_), ['State', 'ExitCode']) == 0 assert os.path.isfile(os.path.join(out_dir, 'contigs.fa')) hlp.clean_up_container(id_)
def test_show_running_executor(tmp_dir, scm, dvc, exp_stage): baseline_rev = scm.get_rev() dvc.experiments.run(exp_stage.addressing, params=["foo=2"], queue=True) exp_rev = dvc.experiments.scm.resolve_rev(f"{EXPS_STASH}@{{0}}") pid_dir = os.path.join(dvc.tmp_dir, dvc.experiments.EXEC_PID_DIR) makedirs(pid_dir, True) info = ExecutorInfo(None, None, None, BaseExecutor.DEFAULT_LOCATION) pidfile = os.path.join(pid_dir, f"{exp_rev}{BaseExecutor.PIDFILE_EXT}") (tmp_dir / pidfile).dump(info.to_dict()) results = dvc.experiments.show() exp_data = get_in(results, [baseline_rev, exp_rev, "data"]) assert not exp_data["queued"] assert exp_data["running"] assert exp_data["executor"] == info.location assert not results["workspace"]["baseline"]["data"]["running"]
def test_api_missing_local_cache_exists_on_remote( tmp_dir, scm, dvc, as_external, remote, files, to_read, ): tmp_dir.dvc_gen(files, commit="DVC track files") dvc.push() # Remove cache to make foo missing remove(dvc.cache.local.cache_dir) remove(first(files)) repo_url = f"file://{tmp_dir}" if as_external else None file_content = get_in(files, to_read.split(os.sep)) assert api.read(to_read, repo=repo_url) == file_content
def test_show_running_checkpoint(tmp_dir, scm, dvc, checkpoint_stage, workspace, mocker): from dvc.repo.experiments.base import EXEC_BASELINE, EXEC_BRANCH from dvc.repo.experiments.executor.local import TempDirExecutor baseline_rev = scm.get_rev() dvc.experiments.run(checkpoint_stage.addressing, params=["foo=2"], queue=True) stash_rev = dvc.experiments.scm.resolve_rev(f"{EXPS_STASH}@{{0}}") run_results = dvc.experiments.run(run_all=True) checkpoint_rev = first(run_results) exp_ref = first(exp_refs_by_rev(scm, checkpoint_rev)) pid_dir = os.path.join(dvc.tmp_dir, EXEC_TMP_DIR, EXEC_PID_DIR) executor = (BaseExecutor.DEFAULT_LOCATION if workspace else TempDirExecutor.DEFAULT_LOCATION) info = make_executor_info( git_url="foo.git", baseline_rev=baseline_rev, location=executor, ) rev = "workspace" if workspace else stash_rev pidfile = os.path.join(pid_dir, f"{rev}{BaseExecutor.INFOFILE_EXT}") makedirs(os.path.dirname(pidfile), True) (tmp_dir / pidfile).dump_json(info.asdict()) mocker.patch.object(BaseExecutor, "fetch_exps", return_value=[str(exp_ref)]) if workspace: scm.set_ref(EXEC_BRANCH, str(exp_ref), symbolic=True) scm.set_ref(EXEC_BASELINE, str(baseline_rev)) scm.checkout(str(exp_ref)) results = dvc.experiments.show() checkpoint_res = get_in(results, [baseline_rev, checkpoint_rev, "data"]) assert checkpoint_res["running"] assert checkpoint_res["executor"] == info.location assert not results["workspace"]["baseline"]["data"]["running"]
def test_show_running_checkpoint(tmp_dir, scm, dvc, checkpoint_stage, workspace, mocker): from dvc.repo.experiments.base import EXEC_BRANCH from dvc.repo.experiments.executor.local import TempDirExecutor from dvc.repo.experiments.utils import exp_refs_by_rev baseline_rev = scm.get_rev() dvc.experiments.run(checkpoint_stage.addressing, params=["foo=2"], queue=True) stash_rev = dvc.experiments.scm.resolve_rev(f"{EXPS_STASH}@{{0}}") run_results = dvc.experiments.run(run_all=True) checkpoint_rev = first(run_results) exp_ref = first(exp_refs_by_rev(scm, checkpoint_rev)) pid_dir = os.path.join(dvc.tmp_dir, dvc.experiments.EXEC_PID_DIR) makedirs(pid_dir, True) executor = (BaseExecutor.DEFAULT_LOCATION if workspace else TempDirExecutor.DEFAULT_LOCATION) info = ExecutorInfo(123, "foo.git", baseline_rev, executor) rev = "workspace" if workspace else stash_rev pidfile = os.path.join(pid_dir, f"{rev}{BaseExecutor.PIDFILE_EXT}") dump_yaml(pidfile, info.to_dict()) mocker.patch.object(BaseExecutor, "fetch_exps", return_value=[str(exp_ref)]) if workspace: scm.set_ref(EXEC_BRANCH, str(exp_ref), symbolic=True) results = dvc.experiments.show() checkpoint_res = get_in(results, [baseline_rev, checkpoint_rev, "data"]) assert checkpoint_res["running"] assert checkpoint_res["executor"] == info.location assert not results["workspace"]["baseline"]["data"]["running"]
def test_log_errors(tmp_dir, scm, dvc, run_copy_metrics, file, error_path, capsys): metric = [{"val": 2}, {"val": 3}] (tmp_dir / "metric_t.yaml").dump(metric) run_copy_metrics( "metric_t.yaml", "plot.yaml", plots=["plot.yaml"], single_stage=False, name="train", ) scm.tag("v1") with open(file, "a", encoding="utf-8") as fd: fd.write("\nMALFORMED!") result = dvc.plots.show(onerror=onerror_collect) _, error = capsys.readouterr() assert isinstance(get_in(result, error_path), YAMLFileCorruptedError) assert ( "DVC failed to load some plots for following revisions: 'workspace'." in error)
def test_log_errors(tmp_dir, scm, dvc, capsys, run_copy_metrics, file, error_path): tmp_dir.gen("metrics_t.yaml", "m: 1.1") run_copy_metrics( "metrics_t.yaml", "metrics.yaml", metrics=["metrics.yaml"], single_stage=False, name="train", ) scm.tag("v1") with open(file, "a") as fd: fd.write("\nMALFORMED!") result = dvc.metrics.show(revs=["v1"]) _, error = capsys.readouterr() assert isinstance(get_in(result, error_path), YAMLFileCorruptedError) assert ( "DVC failed to load some metrics for following revisions: 'workspace'." in error)
def parse_event_description(event: dict) -> Tuple[str, str]: """ Parses the Gcal event's description for the `airtable_record_id` and `source` Examples: >>> event['description'] = "38xfjrf30jxojr33pd201jf s3" >>> parse_event_description(event) ("38xfjrf30jxojr33pd201jf", "s3") >>> event['description'] = "38xfjrf30jxojr33pd201jf" >>> parse_event_description(event) ("38xfjrf30jxojr33pd201jf", None) Args: event: Dictionary that stores the event's information Returns: Tuple of `airtable_record_id` and `source` """ string_to_parse = get_in(event, ['description'], "").split(" ") airtable_record_id, source = string_to_parse[ 0], string_to_parse[1:2] or None if source: source = source[0] return airtable_record_id, source
def process_deadline_change(update_fields: dict, record: dict, calendar: Calendar) -> Dict: """ Detects records where the `Deadline` changed and updates the update_field payload accordingly A deadline change is detected when the `deadline` does not equal the `lastDeadline` field. Actions: 1. Updates the Gcal event, if the detected deadline change did not originate from the webhook 2. Update the `Deadline Group`, `Day`, and `lastDeadline` fields in Airtable Args: update_fields: The payload dictionary that will be sent in a patch/post request to the Airtable API record: The individual record being processed calendar: The :obj:`calendar_request.Calendar` instance corresponding to the calendar out of which we're working Returns: An updated-version of `update_fields` to be sent to airtable in a patch/post request """ deadline = get_in(record, ["fields", "Deadline"]) last_deadline = get_in(record, ["fields", "lastDeadline"], "") if deadline != last_deadline: calendar_event_id = get_in(record, ["fields", "calendarEventId"]) duration = get_in(record, ["fields", "duration"]) lastCalendarDeadline = get_in(record, ["fields", "lastCalendarDeadline"], "")[0:10] airtable_record_id = get_in(record, ["id"]) deadline_date = datetime.strptime(deadline, "%Y-%m-%d") + timedelta(hours=16) days_to_sunday = 6 - deadline_date.weekday() next_sunday = (deadline_date + timedelta(days=days_to_sunday)).strftime("%m/%d") if not duration: duration = 1 # valid calendar_event_id; and wasn't recently updated by gcal webhook if calendar_event_id and lastCalendarDeadline != deadline: calendar.patch_event(calendar_event_id, airtable_record_id, start=deadline_date, duration=duration) update_fields.update({ "Deadline Group": next_sunday, "Day": DAY_OF_WEEK[deadline_date.weekday()], "lastDeadline": deadline }) return update_fields
def metadata_lookup(path, container_id): """ Look up metadata about the docker container for the given path """ return funcy.get_in(util.client().inspect_container(container_id), path)
def profile(self): with suppress(TypeError): return get_in(self, ["json_metadata", "profile"], default={})
def image_name(app): return funcy.get_in(app, ["task", "image", "name"])
def fail_code_error_message(node): msg = "The QC entry '{}' is missing a failure code" return msg.format(funcy.get_in(node, [0, 'name']))
def flatten_dict(source_dict, name_delimiter='_', inner_name=False): """ flatten nest dict Parameters ---------- source_dict : nest dict name_delimiter : flatten name delimiter(non-use when inner_name is True) inner_name : False, use innermost name as retrun dict key or not Returns ------- flatten dict Examples -------- >>> from tidyframe import flatten_dict >>> nest_dict = { ... 'a': 1, ... 'b': [1, 2], ... 'c': { ... 'cc1': 3, ... 'cc2': 4 ... }, ... 'd': { ... 'd1': 5, ... 'd2': { ... 'dd1': 6, ... 'dd2': 7 ... } ... } ... } >>> flatten_dict(nest_dict) {'a': 1, 'b': [1, 2], 'c_cc1': 3, 'c_cc2': 4, 'd_d1': 5, 'd_d2_dd1': 6, 'd_d2_dd2': 7} >>> flatten_dict(nest_dict, inner_name=True) {'a': 1, 'b': [1, 2], 'cc1': 3, 'cc2': 4, 'd1': 5, 'dd1': 6, 'dd2': 7} """ assert isinstance(source_dict, dict), "import source_dict is not dict" json_name = {} for key in source_dict.keys(): if isinstance(get_in(source_dict, [key]), dict): val = [True, [key]] json_name.update({key: val}) else: val = [False, [key]] json_name.update({key: val}) while True: key_inner = list(filter(lambda x: json_name.get(x)[0], json_name)) if key_inner: for x in key_inner: dict_to_update_json_name = {} val = json_name.get(x)[1] for key in get_in(source_dict, val).keys(): val_in = copy(val) val_in.append(key) if isinstance(get_in(source_dict, val_in), dict): dict_to_update = { reduce(lambda x, y: x + name_delimiter + y, val_in): [True, val_in] } else: dict_to_update = { reduce(lambda x, y: x + name_delimiter + y, val_in): [False, val_in] } dict_to_update_json_name.update(dict_to_update) json_name.update(dict_to_update_json_name) json_name.pop(x) else: break if inner_name: return { json_name.get(x)[1][-1]: get_in(source_dict, json_name.get(x)[1]) for x in json_name.keys() } else: return { x: get_in(source_dict, json_name.get(x)[1]) for x in json_name.keys() }
def hackuity_search_vulnerabilities_command( client: Client, args: Dict[str, Any], hy_global_only: bool) -> CommandResults: asset_name = args.get("asset_name") asset_type = args.get("asset_type") attribute = args.get("attribute") limit = int(args["limit"]) cvss_min = none_or_apply(args.get("cvss_min"), float) cvss_max = none_or_apply(args.get("cvss_max"), float) vuln_type = args.get("vuln_type") trs_min = none_or_apply(args.get("trs_min"), int) trs_max = none_or_apply(args.get("trs_max"), int) raw_response = client.fetch_aggfindings( asset_name=asset_name, asset_type=asset_type, attribute=attribute, cvss_min=cvss_min, cvss_max=cvss_max, hy_global_only=hy_global_only, limit=limit, trs_min=trs_min, trs_max=trs_max, vuln_type=vuln_type, ) outputs = [] for aggfinding in raw_response: output = remap_item( aggfinding, [ ( [ "aggExtAttribute", "sharedDetailsLocal", "aggFindingAttributes", ], ["Attributes"], [], ), ( [ "nodeStatus", "unIgnoredOpen", "total", "environmental", "max", ], ["Score", "CVSS"], 0.0, ), ( ["aggExtAttribute", "sharedDetailsLocal", "title"], ["Description"], ), ( ["id"], ["ID"], ), ( ["aggExtAttribute", "firstDeliveredAt"], ["Seen", "First"], None, format_date, ), ( ["nodeStatus", "unIgnoredOpen", "total", "hyScore", "max"], ["Score", "TRS"], 0, ), ( ["nodeStatus", "total", "nb"], ["Findings", "Total"], 0, ), ( ["nodeStatus", "unIgnoredOpen", "total", "nb"], ["Findings", "Open"], 0, ), ( ["nodeStatus", "unIgnoredClosed", "total", "nb"], ["Findings", "Closed"], 0, ), ( ["nodeStatus", "ignored", "nb"], ["Findings", "Ignored"], 0, ), ], ) output["VulnTypes"] = [{ "ID": vuln_type, "Name": client.get_vulnerability_name(vuln_type), } for vuln_type in get_in(aggfinding, ["aggExtAttribute", "vulnTypeIds"], [])] outputs.append(output) return CommandResults( outputs_prefix="Hackuity.Vulnerabilities", outputs=outputs, readable_output=tableToMarkdown( f"{'VulnDB' if hy_global_only else 'Provider'} vulnerabilities", outputs, headers=[ "VulnTypes", "Description", *(["Attributes"] if hy_global_only else []), "Score", "Findings", "Seen", ], ), )
def get(str): path = str.split('.') return lambda x: get_in(x, path)
def create_qc_message(is_pass, input_node, variables): x = 'pass_msg' if is_pass else 'fail_msg' return funcy.get_in(input_node, [0, x]).format(**variables)
def get_biobox_yaml_value(app, yaml_path): """ Given an xpath-type look up, returns the value in the biobox.yaml file """ biobox_file = get_output_biobox_file_contents(app) return funcy.get_in(biobox_file, yaml_path + ['value'])
def image_task(app): return funcy.get_in(app, ["task", "image", "task"])
def image_version(app): return funcy.get_in(app, ["task", "image", "name"]) + \ "@sha256:" + \ funcy.get_in(app, ["task", "image", "sha256"])
def multi_char_from(_perm_index, _index_in_perm) -> Optional[MultiChar]: return get_in(multi_char_indices, [_perm_index, _index_in_perm])
def profile(self): with suppress(TypeError): return get_in(self, ['json_metadata', 'profile'], default={})
def get_collection(db, collection): return get_in(db, [settings.MONGO_DB_NAME, collection])
def getmaster(rs_uuid): return get_in( cfg, ["sharding", rs_uuid, "replicas", uuid(replica_name), "master"], )
def refresh(self): post_author, post_permlink = resolve_identifier(self.identifier) post = self.steemd.get_content(post_author, post_permlink, vote_limit=self.vote_limit) if not post["permlink"]: raise PostDoesNotExist("Post does not exist: %s" % self.identifier) # If this 'post' comes from an operation, it might carry a patch if "body" in post and re.match("^@@", post["body"]): self.patched = True # TODO: Check # This field is returned from blockchain, but it's empty. Fill it try: reblogged_by = [ i for i in self.steemd.get_reblogged_by( post_author, post_permlink) if i != post_author ] except RPCError: reblogged_by = [] post['reblogged_by'] = reblogged_by # Parse Times parse_times = [ "active", "cashout_time", "created", "last_payout", "last_update", "max_cashout_time" ] for p in parse_times: post[p] = parse_time(post.get(p, "1970-01-01T00:00:00")) # Parse Amounts sbd_amounts = [ 'total_payout_value', 'max_accepted_payout', 'pending_payout_value', 'curator_payout_value', 'total_pending_payout_value', 'promoted', ] for p in sbd_amounts: post[p] = Amount(post.get(p, "0.000 GBG")) # calculate trending and hot scores for sorting post['score_trending'] = calculate_trending(post.get('net_rshares', 0), post['created']) post['score_hot'] = calculate_hot(post.get('net_rshares', 0), post['created']) # turn json_metadata into python dict meta_str = post.get("json_metadata", "{}") post['json_metadata'] = silent(json.loads)(meta_str) or {} post["tags"] = [] post['community'] = '' if isinstance(post['json_metadata'], dict): if post["depth"] == 0: tags = [post["parent_permlink"]] tags += get_in(post, ['json_metadata', 'tags'], default=[]) tags_added = set() post['tags'] = [ tag for tag in tags if not (tag in tags_added or tags_added.add(tag)) ] post['community'] = get_in(post, ['json_metadata', 'community'], default='') # If this post is a comment, retrieve the root comment self.root_identifier, self.category = self._get_root_identifier(post) self._store_post(post)
def scan_etl_events(etl_id, selected_columns: Optional[Iterable[str]] = None) -> None: """ Scan for all events belonging to a specific ETL. If a list of columns is provided, then the output is limited to those columns. But note that the target (schema.table) and the event are always present. """ ddb = DynamoDBStorage.factory() table = ddb.get_table(create_if_not_exists=False) available_columns = [ "target", "step", "event", "timestamp", "elapsed", "rowcount" ] if selected_columns is None: selected_columns = available_columns # We will always select "target" and "event" to have a meaningful output. columns = list( fy.filter( frozenset(selected_columns).union(["target", "event"]), available_columns)) keys = [ "extra.rowcount" if column == "rowcount" else column for column in columns ] # We need to scan here since the events are stored by "target" and not by "etl_id". # TODO Try to find all the "known" relations and query on them with a filter on the etl_id. client = boto3.client("dynamodb") paginator = client.get_paginator("scan") response_iterator = paginator.paginate( TableName=table.name, ConsistentRead=False, ExpressionAttributeNames={"#timestamp": "timestamp"}, ExpressionAttributeValues={ ":etl_id": { "S": etl_id }, ":marker": { "S": _DUMMY_TARGET }, ":start_event": { "S": STEP_START }, }, FilterExpression= "etl_id = :etl_id and target <> :marker and event <> :start_event", ProjectionExpression= "target, step, event, #timestamp, elapsed, extra.rowcount", ReturnConsumedCapacity="TOTAL", # PaginationConfig={ # "PageSize": 100 # } ) logger.info("Scanning events table '%s' for elapsed times", table.name) consumed_capacity = 0.0 scanned_count = 0 rows: List[List[str]] = [] deserialize = TypeDeserializer().deserialize for response in response_iterator: consumed_capacity += response["ConsumedCapacity"]["CapacityUnits"] scanned_count += response["ScannedCount"] # We need to turn something like "'event': {'S': 'finish'}" into "'event': 'finish'". deserialized = [{ key: deserialize(value) for key, value in item.items() } for item in response["Items"]] # Lookup "elapsed" or "extra.rowcount" (the latter as ["extra", "rowcount"]). items = [{key: fy.get_in(item, key.split(".")) for key in keys} for item in deserialized] # Scope down to selected keys and format the columns. rows.extend([_format_output_column(key, item[key]) for key in keys] for item in items) logger.info("Scan result: scanned count = %d, consumed capacity = %f", scanned_count, consumed_capacity) if "timestamp" in keys: rows.sort(key=itemgetter(keys.index("timestamp"))) else: rows.sort(key=itemgetter(keys.index("target"))) print(etl.text.format_lines(rows, header_row=columns))
def get_picture_url(fcb_user): if get_in(fcb_user, ["picture", "data", "is_silhouette"]): return "" return get_in(fcb_user, ["picture", "data", "url"], "")
def image_type(app): return interface.select_task(funcy.get_in(app, ["task", "image", "type"]))()