def test_executing_container():
    out_dir = tempfile.mkdtemp()
    cnt = exe.create_container(
            IMAGE,
            biobox_cfg(hlp.short_read_fastq()),
            {"output" : out_dir},
            "default",
            {"detach" : False})
    id_ = cnt['Id']
    util.client().start(id_)
    util.client().wait(id_)
    assert funcy.get_in(util.client().inspect_container(id_), ['State', 'ExitCode']) == 0
    assert os.path.isfile(os.path.join(out_dir, 'contigs.fa'))
    hlp.clean_up_container(id_)
Example #2
0
def test_show_running_executor(tmp_dir, scm, dvc, exp_stage):
    baseline_rev = scm.get_rev()
    dvc.experiments.run(exp_stage.addressing, params=["foo=2"], queue=True)
    exp_rev = dvc.experiments.scm.resolve_rev(f"{EXPS_STASH}@{{0}}")

    pid_dir = os.path.join(dvc.tmp_dir, dvc.experiments.EXEC_PID_DIR)
    makedirs(pid_dir, True)
    info = ExecutorInfo(None, None, None, BaseExecutor.DEFAULT_LOCATION)
    pidfile = os.path.join(pid_dir, f"{exp_rev}{BaseExecutor.PIDFILE_EXT}")
    (tmp_dir / pidfile).dump(info.to_dict())

    results = dvc.experiments.show()
    exp_data = get_in(results, [baseline_rev, exp_rev, "data"])
    assert not exp_data["queued"]
    assert exp_data["running"]
    assert exp_data["executor"] == info.location

    assert not results["workspace"]["baseline"]["data"]["running"]
Example #3
0
def test_api_missing_local_cache_exists_on_remote(
    tmp_dir,
    scm,
    dvc,
    as_external,
    remote,
    files,
    to_read,
):
    tmp_dir.dvc_gen(files, commit="DVC track files")
    dvc.push()

    # Remove cache to make foo missing
    remove(dvc.cache.local.cache_dir)
    remove(first(files))

    repo_url = f"file://{tmp_dir}" if as_external else None
    file_content = get_in(files, to_read.split(os.sep))
    assert api.read(to_read, repo=repo_url) == file_content
Example #4
0
def test_show_running_checkpoint(tmp_dir, scm, dvc, checkpoint_stage,
                                 workspace, mocker):
    from dvc.repo.experiments.base import EXEC_BASELINE, EXEC_BRANCH
    from dvc.repo.experiments.executor.local import TempDirExecutor

    baseline_rev = scm.get_rev()
    dvc.experiments.run(checkpoint_stage.addressing,
                        params=["foo=2"],
                        queue=True)
    stash_rev = dvc.experiments.scm.resolve_rev(f"{EXPS_STASH}@{{0}}")

    run_results = dvc.experiments.run(run_all=True)
    checkpoint_rev = first(run_results)
    exp_ref = first(exp_refs_by_rev(scm, checkpoint_rev))

    pid_dir = os.path.join(dvc.tmp_dir, EXEC_TMP_DIR, EXEC_PID_DIR)
    executor = (BaseExecutor.DEFAULT_LOCATION
                if workspace else TempDirExecutor.DEFAULT_LOCATION)
    info = make_executor_info(
        git_url="foo.git",
        baseline_rev=baseline_rev,
        location=executor,
    )
    rev = "workspace" if workspace else stash_rev
    pidfile = os.path.join(pid_dir, f"{rev}{BaseExecutor.INFOFILE_EXT}")
    makedirs(os.path.dirname(pidfile), True)
    (tmp_dir / pidfile).dump_json(info.asdict())

    mocker.patch.object(BaseExecutor,
                        "fetch_exps",
                        return_value=[str(exp_ref)])
    if workspace:
        scm.set_ref(EXEC_BRANCH, str(exp_ref), symbolic=True)
        scm.set_ref(EXEC_BASELINE, str(baseline_rev))
        scm.checkout(str(exp_ref))

    results = dvc.experiments.show()

    checkpoint_res = get_in(results, [baseline_rev, checkpoint_rev, "data"])
    assert checkpoint_res["running"]
    assert checkpoint_res["executor"] == info.location

    assert not results["workspace"]["baseline"]["data"]["running"]
Example #5
0
def test_show_running_checkpoint(tmp_dir, scm, dvc, checkpoint_stage,
                                 workspace, mocker):
    from dvc.repo.experiments.base import EXEC_BRANCH
    from dvc.repo.experiments.executor.local import TempDirExecutor
    from dvc.repo.experiments.utils import exp_refs_by_rev

    baseline_rev = scm.get_rev()
    dvc.experiments.run(checkpoint_stage.addressing,
                        params=["foo=2"],
                        queue=True)
    stash_rev = dvc.experiments.scm.resolve_rev(f"{EXPS_STASH}@{{0}}")

    run_results = dvc.experiments.run(run_all=True)
    checkpoint_rev = first(run_results)
    exp_ref = first(exp_refs_by_rev(scm, checkpoint_rev))

    pid_dir = os.path.join(dvc.tmp_dir, dvc.experiments.EXEC_PID_DIR)
    makedirs(pid_dir, True)
    executor = (BaseExecutor.DEFAULT_LOCATION
                if workspace else TempDirExecutor.DEFAULT_LOCATION)
    info = ExecutorInfo(123, "foo.git", baseline_rev, executor)
    rev = "workspace" if workspace else stash_rev
    pidfile = os.path.join(pid_dir, f"{rev}{BaseExecutor.PIDFILE_EXT}")
    dump_yaml(pidfile, info.to_dict())

    mocker.patch.object(BaseExecutor,
                        "fetch_exps",
                        return_value=[str(exp_ref)])
    if workspace:
        scm.set_ref(EXEC_BRANCH, str(exp_ref), symbolic=True)

    results = dvc.experiments.show()

    checkpoint_res = get_in(results, [baseline_rev, checkpoint_rev, "data"])
    assert checkpoint_res["running"]
    assert checkpoint_res["executor"] == info.location

    assert not results["workspace"]["baseline"]["data"]["running"]
Example #6
0
def test_log_errors(tmp_dir, scm, dvc, run_copy_metrics, file, error_path,
                    capsys):
    metric = [{"val": 2}, {"val": 3}]
    (tmp_dir / "metric_t.yaml").dump(metric)
    run_copy_metrics(
        "metric_t.yaml",
        "plot.yaml",
        plots=["plot.yaml"],
        single_stage=False,
        name="train",
    )
    scm.tag("v1")

    with open(file, "a", encoding="utf-8") as fd:
        fd.write("\nMALFORMED!")

    result = dvc.plots.show(onerror=onerror_collect)
    _, error = capsys.readouterr()

    assert isinstance(get_in(result, error_path), YAMLFileCorruptedError)
    assert (
        "DVC failed to load some plots for following revisions: 'workspace'."
        in error)
Example #7
0
def test_log_errors(tmp_dir, scm, dvc, capsys, run_copy_metrics, file,
                    error_path):
    tmp_dir.gen("metrics_t.yaml", "m: 1.1")
    run_copy_metrics(
        "metrics_t.yaml",
        "metrics.yaml",
        metrics=["metrics.yaml"],
        single_stage=False,
        name="train",
    )
    scm.tag("v1")

    with open(file, "a") as fd:
        fd.write("\nMALFORMED!")

    result = dvc.metrics.show(revs=["v1"])

    _, error = capsys.readouterr()

    assert isinstance(get_in(result, error_path), YAMLFileCorruptedError)
    assert (
        "DVC failed to load some metrics for following revisions: 'workspace'."
        in error)
Example #8
0
def parse_event_description(event: dict) -> Tuple[str, str]:
    """ Parses the Gcal event's description for the `airtable_record_id` and `source`

    Examples:
        >>> event['description'] = "38xfjrf30jxojr33pd201jf s3"
        >>> parse_event_description(event)
        ("38xfjrf30jxojr33pd201jf", "s3")

        >>> event['description'] = "38xfjrf30jxojr33pd201jf"
        >>> parse_event_description(event)
        ("38xfjrf30jxojr33pd201jf", None)

    Args:
        event: Dictionary that stores the event's information
    
    Returns:
        Tuple of `airtable_record_id` and `source`
    """
    string_to_parse = get_in(event, ['description'], "").split(" ")
    airtable_record_id, source = string_to_parse[
        0], string_to_parse[1:2] or None
    if source:
        source = source[0]
    return airtable_record_id, source
def process_deadline_change(update_fields: dict, record: dict, calendar: Calendar) -> Dict:
    """ Detects records where the `Deadline` changed and updates the update_field payload accordingly

    A deadline change is detected when the `deadline` does not equal the `lastDeadline` field.

    Actions:
        1. Updates the Gcal event, if the detected deadline change did not originate from the webhook
        2. Update the `Deadline Group`, `Day`, and `lastDeadline` fields in Airtable

    Args:
        update_fields: The payload dictionary that will be sent in a patch/post request to the Airtable API
        record: The individual record being processed
        calendar: The :obj:`calendar_request.Calendar` instance corresponding to the calendar out of which we're working

    Returns:
        An updated-version of `update_fields` to be sent to airtable in a patch/post request
    """
    deadline = get_in(record, ["fields", "Deadline"])
    last_deadline = get_in(record, ["fields", "lastDeadline"], "")

    if deadline != last_deadline:
        calendar_event_id = get_in(record, ["fields", "calendarEventId"])
        duration = get_in(record, ["fields", "duration"])
        lastCalendarDeadline = get_in(record, ["fields", "lastCalendarDeadline"], "")[0:10]
        airtable_record_id = get_in(record, ["id"])
        deadline_date = datetime.strptime(deadline, "%Y-%m-%d") + timedelta(hours=16)
        days_to_sunday = 6 - deadline_date.weekday()
        next_sunday = (deadline_date + timedelta(days=days_to_sunday)).strftime("%m/%d")

        if not duration:
            duration = 1

        # valid calendar_event_id; and wasn't recently updated by gcal webhook
        if calendar_event_id and lastCalendarDeadline != deadline:
            calendar.patch_event(calendar_event_id, airtable_record_id, start=deadline_date, duration=duration)
        
        update_fields.update({
            "Deadline Group": next_sunday, 
            "Day": DAY_OF_WEEK[deadline_date.weekday()],
            "lastDeadline": deadline
        })
    return update_fields
Example #10
0
def metadata_lookup(path, container_id):
    """
    Look up metadata about the docker container for the given path
    """
    return funcy.get_in(util.client().inspect_container(container_id), path)
Example #11
0
 def profile(self):
     with suppress(TypeError):
         return get_in(self, ["json_metadata", "profile"], default={})
Example #12
0
def image_name(app):
    return funcy.get_in(app, ["task", "image", "name"])
Example #13
0
def fail_code_error_message(node):
    msg = "The QC entry '{}' is missing a failure code"
    return msg.format(funcy.get_in(node, [0, 'name']))
Example #14
0
def flatten_dict(source_dict, name_delimiter='_', inner_name=False):
    """
    flatten nest dict

    Parameters
    ----------
    source_dict : nest dict
    name_delimiter : flatten name delimiter(non-use when inner_name is True)
    inner_name : False, use innermost name as retrun dict key or not 

    Returns
    -------
    flatten dict

    Examples
    --------
    >>> from tidyframe import flatten_dict
    >>> nest_dict = {
    ...     'a': 1,
    ...     'b': [1, 2],
    ...     'c': {
    ...         'cc1': 3,
    ...         'cc2': 4
    ...     },
    ...     'd': {
    ...         'd1': 5,
    ...         'd2': {
    ...             'dd1': 6,
    ...             'dd2': 7
    ...         }
    ...     }
    ... }
    >>> flatten_dict(nest_dict)
    {'a': 1, 'b': [1, 2], 'c_cc1': 3, 'c_cc2': 4, 'd_d1': 5, 'd_d2_dd1': 6, 'd_d2_dd2': 7}
    >>> flatten_dict(nest_dict, inner_name=True)
    {'a': 1, 'b': [1, 2], 'cc1': 3, 'cc2': 4, 'd1': 5, 'dd1': 6, 'dd2': 7}
    """
    assert isinstance(source_dict, dict), "import source_dict is not dict"
    json_name = {}
    for key in source_dict.keys():
        if isinstance(get_in(source_dict, [key]), dict):
            val = [True, [key]]
            json_name.update({key: val})
        else:
            val = [False, [key]]
            json_name.update({key: val})
    while True:
        key_inner = list(filter(lambda x: json_name.get(x)[0], json_name))
        if key_inner:
            for x in key_inner:
                dict_to_update_json_name = {}
                val = json_name.get(x)[1]
                for key in get_in(source_dict, val).keys():
                    val_in = copy(val)
                    val_in.append(key)
                    if isinstance(get_in(source_dict, val_in), dict):
                        dict_to_update = {
                            reduce(lambda x, y: x + name_delimiter + y, val_in):
                            [True, val_in]
                        }
                    else:
                        dict_to_update = {
                            reduce(lambda x, y: x + name_delimiter + y, val_in):
                            [False, val_in]
                        }
                    dict_to_update_json_name.update(dict_to_update)
                json_name.update(dict_to_update_json_name)
                json_name.pop(x)
        else:
            break
    if inner_name:
        return {
            json_name.get(x)[1][-1]: get_in(source_dict,
                                            json_name.get(x)[1])
            for x in json_name.keys()
        }
    else:
        return {
            x: get_in(source_dict,
                      json_name.get(x)[1])
            for x in json_name.keys()
        }
Example #15
0
def hackuity_search_vulnerabilities_command(
        client: Client, args: Dict[str, Any],
        hy_global_only: bool) -> CommandResults:
    asset_name = args.get("asset_name")
    asset_type = args.get("asset_type")
    attribute = args.get("attribute")
    limit = int(args["limit"])
    cvss_min = none_or_apply(args.get("cvss_min"), float)
    cvss_max = none_or_apply(args.get("cvss_max"), float)
    vuln_type = args.get("vuln_type")
    trs_min = none_or_apply(args.get("trs_min"), int)
    trs_max = none_or_apply(args.get("trs_max"), int)
    raw_response = client.fetch_aggfindings(
        asset_name=asset_name,
        asset_type=asset_type,
        attribute=attribute,
        cvss_min=cvss_min,
        cvss_max=cvss_max,
        hy_global_only=hy_global_only,
        limit=limit,
        trs_min=trs_min,
        trs_max=trs_max,
        vuln_type=vuln_type,
    )
    outputs = []
    for aggfinding in raw_response:
        output = remap_item(
            aggfinding,
            [
                (
                    [
                        "aggExtAttribute",
                        "sharedDetailsLocal",
                        "aggFindingAttributes",
                    ],
                    ["Attributes"],
                    [],
                ),
                (
                    [
                        "nodeStatus",
                        "unIgnoredOpen",
                        "total",
                        "environmental",
                        "max",
                    ],
                    ["Score", "CVSS"],
                    0.0,
                ),
                (
                    ["aggExtAttribute", "sharedDetailsLocal", "title"],
                    ["Description"],
                ),
                (
                    ["id"],
                    ["ID"],
                ),
                (
                    ["aggExtAttribute", "firstDeliveredAt"],
                    ["Seen", "First"],
                    None,
                    format_date,
                ),
                (
                    ["nodeStatus", "unIgnoredOpen", "total", "hyScore", "max"],
                    ["Score", "TRS"],
                    0,
                ),
                (
                    ["nodeStatus", "total", "nb"],
                    ["Findings", "Total"],
                    0,
                ),
                (
                    ["nodeStatus", "unIgnoredOpen", "total", "nb"],
                    ["Findings", "Open"],
                    0,
                ),
                (
                    ["nodeStatus", "unIgnoredClosed", "total", "nb"],
                    ["Findings", "Closed"],
                    0,
                ),
                (
                    ["nodeStatus", "ignored", "nb"],
                    ["Findings", "Ignored"],
                    0,
                ),
            ],
        )
        output["VulnTypes"] = [{
            "ID":
            vuln_type,
            "Name":
            client.get_vulnerability_name(vuln_type),
        } for vuln_type in get_in(aggfinding,
                                  ["aggExtAttribute", "vulnTypeIds"], [])]
        outputs.append(output)
    return CommandResults(
        outputs_prefix="Hackuity.Vulnerabilities",
        outputs=outputs,
        readable_output=tableToMarkdown(
            f"{'VulnDB' if hy_global_only else 'Provider'} vulnerabilities",
            outputs,
            headers=[
                "VulnTypes",
                "Description",
                *(["Attributes"] if hy_global_only else []),
                "Score",
                "Findings",
                "Seen",
            ],
        ),
    )
Example #16
0
def get(str):
    path = str.split('.')
    return lambda x: get_in(x, path)
Example #17
0
def create_qc_message(is_pass, input_node, variables):
    x = 'pass_msg' if is_pass else 'fail_msg'
    return funcy.get_in(input_node, [0, x]).format(**variables)
def get_biobox_yaml_value(app, yaml_path):
    """
    Given an xpath-type look up, returns the value in the biobox.yaml file
    """
    biobox_file = get_output_biobox_file_contents(app)
    return funcy.get_in(biobox_file, yaml_path + ['value'])
Example #19
0
def image_task(app):
    return funcy.get_in(app, ["task", "image", "task"])
Example #20
0
def image_version(app):
    return funcy.get_in(app, ["task", "image", "name"]) + \
           "@sha256:" + \
           funcy.get_in(app, ["task", "image", "sha256"])
Example #21
0
 def multi_char_from(_perm_index, _index_in_perm) -> Optional[MultiChar]:
     return get_in(multi_char_indices, [_perm_index, _index_in_perm])
Example #22
0
 def profile(self):
     with suppress(TypeError):
         return get_in(self, ['json_metadata', 'profile'], default={})
Example #23
0
def get_collection(db, collection):
    return get_in(db, [settings.MONGO_DB_NAME, collection])
Example #24
0
 def getmaster(rs_uuid):
     return get_in(
         cfg,
         ["sharding", rs_uuid, "replicas",
          uuid(replica_name), "master"],
     )
Example #25
0
    def refresh(self):
        post_author, post_permlink = resolve_identifier(self.identifier)
        post = self.steemd.get_content(post_author,
                                       post_permlink,
                                       vote_limit=self.vote_limit)
        if not post["permlink"]:
            raise PostDoesNotExist("Post does not exist: %s" % self.identifier)

        # If this 'post' comes from an operation, it might carry a patch
        if "body" in post and re.match("^@@", post["body"]):
            self.patched = True

        # TODO: Check
        # This field is returned from blockchain, but it's empty. Fill it
        try:
            reblogged_by = [
                i for i in self.steemd.get_reblogged_by(
                    post_author, post_permlink) if i != post_author
            ]
        except RPCError:
            reblogged_by = []

        post['reblogged_by'] = reblogged_by

        # Parse Times
        parse_times = [
            "active", "cashout_time", "created", "last_payout", "last_update",
            "max_cashout_time"
        ]
        for p in parse_times:
            post[p] = parse_time(post.get(p, "1970-01-01T00:00:00"))

        # Parse Amounts
        sbd_amounts = [
            'total_payout_value',
            'max_accepted_payout',
            'pending_payout_value',
            'curator_payout_value',
            'total_pending_payout_value',
            'promoted',
        ]
        for p in sbd_amounts:
            post[p] = Amount(post.get(p, "0.000 GBG"))

        # calculate trending and hot scores for sorting
        post['score_trending'] = calculate_trending(post.get('net_rshares', 0),
                                                    post['created'])
        post['score_hot'] = calculate_hot(post.get('net_rshares', 0),
                                          post['created'])

        # turn json_metadata into python dict
        meta_str = post.get("json_metadata", "{}")
        post['json_metadata'] = silent(json.loads)(meta_str) or {}

        post["tags"] = []
        post['community'] = ''
        if isinstance(post['json_metadata'], dict):
            if post["depth"] == 0:
                tags = [post["parent_permlink"]]
                tags += get_in(post, ['json_metadata', 'tags'], default=[])
                tags_added = set()
                post['tags'] = [
                    tag for tag in tags
                    if not (tag in tags_added or tags_added.add(tag))
                ]

            post['community'] = get_in(post, ['json_metadata', 'community'],
                                       default='')

        # If this post is a comment, retrieve the root comment
        self.root_identifier, self.category = self._get_root_identifier(post)

        self._store_post(post)
Example #26
0
def scan_etl_events(etl_id,
                    selected_columns: Optional[Iterable[str]] = None) -> None:
    """
    Scan for all events belonging to a specific ETL.

    If a list of columns is provided, then the output is limited to those columns.
    But note that the target (schema.table) and the event are always present.
    """
    ddb = DynamoDBStorage.factory()
    table = ddb.get_table(create_if_not_exists=False)
    available_columns = [
        "target", "step", "event", "timestamp", "elapsed", "rowcount"
    ]
    if selected_columns is None:
        selected_columns = available_columns
    # We will always select "target" and "event" to have a meaningful output.
    columns = list(
        fy.filter(
            frozenset(selected_columns).union(["target", "event"]),
            available_columns))
    keys = [
        "extra.rowcount" if column == "rowcount" else column
        for column in columns
    ]

    # We need to scan here since the events are stored by "target" and not by "etl_id".
    # TODO Try to find all the "known" relations and query on them with a filter on the etl_id.
    client = boto3.client("dynamodb")
    paginator = client.get_paginator("scan")
    response_iterator = paginator.paginate(
        TableName=table.name,
        ConsistentRead=False,
        ExpressionAttributeNames={"#timestamp": "timestamp"},
        ExpressionAttributeValues={
            ":etl_id": {
                "S": etl_id
            },
            ":marker": {
                "S": _DUMMY_TARGET
            },
            ":start_event": {
                "S": STEP_START
            },
        },
        FilterExpression=
        "etl_id = :etl_id and target <> :marker and event <> :start_event",
        ProjectionExpression=
        "target, step, event, #timestamp, elapsed, extra.rowcount",
        ReturnConsumedCapacity="TOTAL",
        # PaginationConfig={
        #     "PageSize": 100
        # }
    )
    logger.info("Scanning events table '%s' for elapsed times", table.name)
    consumed_capacity = 0.0
    scanned_count = 0
    rows: List[List[str]] = []
    deserialize = TypeDeserializer().deserialize

    for response in response_iterator:
        consumed_capacity += response["ConsumedCapacity"]["CapacityUnits"]
        scanned_count += response["ScannedCount"]
        # We need to turn something like "'event': {'S': 'finish'}" into "'event': 'finish'".
        deserialized = [{
            key: deserialize(value)
            for key, value in item.items()
        } for item in response["Items"]]
        # Lookup "elapsed" or "extra.rowcount" (the latter as ["extra", "rowcount"]).
        items = [{key: fy.get_in(item, key.split("."))
                  for key in keys} for item in deserialized]
        # Scope down to selected keys and format the columns.
        rows.extend([_format_output_column(key, item[key]) for key in keys]
                    for item in items)

    logger.info("Scan result: scanned count = %d, consumed capacity = %f",
                scanned_count, consumed_capacity)
    if "timestamp" in keys:
        rows.sort(key=itemgetter(keys.index("timestamp")))
    else:
        rows.sort(key=itemgetter(keys.index("target")))
    print(etl.text.format_lines(rows, header_row=columns))
Example #27
0
def get_picture_url(fcb_user):
    if get_in(fcb_user, ["picture", "data", "is_silhouette"]):
        return ""

    return get_in(fcb_user, ["picture", "data", "url"], "")
Example #28
0
def image_type(app):
    return interface.select_task(funcy.get_in(app,
                                              ["task", "image", "type"]))()