Esempio n. 1
0
def accumulate_pgt_partition_drop_data(drop: dict):
    """
    Is as combination of unroll drop data
    :param drop:
    :return:
    """
    if drop.get("reprodata") is None:
        drop["reprodata"] = {
            "rmode": str(REPRO_DEFAULT.value),
            "lg_blockhash": None
        }
    if drop["reprodata"].get("rmode") is None:
        level = REPRO_DEFAULT
        drop["reprodata"]["rmode"] = str(level.value)
    else:
        level = rflag_caster(drop["reprodata"]["rmode"])
    if not rmode_supported(level):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(level))
        level = REPRO_DEFAULT
        drop["reprodata"]["rmode"] = str(level.value)
    if level == ReproducibilityFlags.ALL:
        data = {}
        unroll_data = accumulate_pgt_unroll_drop_data(drop)
        for rmode in ALL_RMODES:
            pgt_fields = pgt_partition_block_fields(rmode)
            data[rmode.name] = extract_fields(drop, pgt_fields)
            unroll_data[rmode.name].update(data[rmode.name])
        return unroll_data
    else:
        pgt_fields = pgt_partition_block_fields(level)
        data = extract_fields(drop, pgt_fields)
        return_data = accumulate_pgt_unroll_drop_data(drop)
        return_data.update(data)
        return return_data
Esempio n. 2
0
def init_runtime_repro_data(runtime_graph: dict, reprodata: dict):
    """
    Adds reproducibility data at the runtime level to graph-wide values.
    :param runtime_graph:
    :param reprodata:
    :return:
    """
    if reprodata is None:
        return runtime_graph
    level = rflag_caster(reprodata["rmode"])
    if not rmode_supported(level):
        # TODO: Logging needs sessionID at this stage
        # logger.warning("Requested reproducibility mode %s not yet implemented", str(rmode))
        level = REPRO_DEFAULT
        reprodata["rmode"] = str(level.value)
    for drop in runtime_graph.values():
        init_rg_repro_drop_data(drop)
    if level == ReproducibilityFlags.ALL:
        for rmode in ALL_RMODES:
            leaves, _ = build_blockdag(list(runtime_graph.values()), "rg",
                                       rmode)
            reprodata[rmode.name]["signature"] = agglomerate_leaves(leaves)
    else:
        leaves, _ = build_blockdag(list(runtime_graph.values()), "rg")
        reprodata["signature"] = agglomerate_leaves(leaves)
    runtime_graph["reprodata"] = reprodata
    # logger.info("Reproducibility data finished at runtime level")
    return runtime_graph
Esempio n. 3
0
def accumulate_pgt_unroll_drop_data(drop: dict):
    """
    Accumulates relevant reproducibility fields for a single drop at the physical template level.
    :param drop:
    :return: A dictionary containing accumulated reproducibility data for a given drop.
    """
    data = {}
    rmode = rflag_caster(drop['reprodata']['rmode'])
    if not rmode_supported(rmode):
        logger.warning('Requested reproducibility mode %s not yet implemented',
                       str(rmode))
        rmode = REPRO_DEFAULT
        drop['reprodata']['rmode'] = str(rmode.value)
    if rmode == ReproducibilityFlags.NOTHING:
        return data
    if rmode == ReproducibilityFlags.REPRODUCE:
        data['type'] = drop['type']
        if drop['type'] == 'plain':
            data['storage'] = drop['storage']
        return data
    if rmode.value >= ReproducibilityFlags.RERUN.value:
        data['type'] = drop['type']
        if data['type'] == 'plain':
            data['storage'] = drop['storage']
        else:
            data['dt'] = drop[
                'dt']  # WARNING: Added to differentiate between subtle component differences.
    if rmode == ReproducibilityFlags.RECOMPUTE or rmode == ReproducibilityFlags.REPLICATE_COMP:
        data['rank'] = drop['rank']

    return data
Esempio n. 4
0
def accumulate_lg_drop_data(drop: dict, level: ReproducibilityFlags):
    """
    Accumulates relevant reproducibility fields for a single drop.
    :param drop:
    :param level:
    :return: A dictionary containing accumulated reproducibility data for a given drop.
    """
    if not rmode_supported(level):
        raise NotImplementedError(
            f"Reproducibility level {level.name} not yet supported")
    category = drop.get("category", "")

    # Cheeky way to get field list into dicts. map(dict, drop...) makes a copy
    fields = {
        e.pop("name"): e["value"]
        for e in map(dict, drop.get("fields", {}))
    }
    app_fields = {
        e.pop("name"): e["value"]
        for e in map(dict, drop.get("applicationArgs", {}))
    }
    fields.update(app_fields)
    lg_fields = lg_block_fields(category, level, app_fields.keys())
    data = extract_fields(fields, lg_fields)
    return data
Esempio n. 5
0
def init_pg_repro_data(physical_graph: list):
    """
    Handles adding reproducibility data at the physical graph template level.
    :param physical_graph: The logical graph data structure (a list of drops + reprodata dictionary)
    :return: The same pg object with new information appended
    """
    reprodata = physical_graph.pop()
    if "rmode" not in reprodata:
        physical_graph.append(reprodata)
        return physical_graph
    level = rflag_caster(reprodata["rmode"])
    if not rmode_supported(level):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(level))
        level = REPRO_DEFAULT
    if level == ReproducibilityFlags.NOTHING:
        physical_graph.append(reprodata)
        return physical_graph
    for drop in physical_graph:
        init_pg_repro_drop_data(drop)
    if level == ReproducibilityFlags.ALL:
        for rmode in ALL_RMODES:
            leaves, _ = build_blockdag(physical_graph, "pg", rmode)
            reprodata[rmode.name]["signature"] = agglomerate_leaves(leaves)
    else:
        leaves, _ = build_blockdag(physical_graph, "pg")
        reprodata["signature"] = agglomerate_leaves(leaves)
    physical_graph.append(reprodata)
    logger.info("Reproducibility data finished at PG level")
    return physical_graph
Esempio n. 6
0
def init_lgt_repro_data(logical_graph_template: dict, rmode: str):
    """
    Creates and appends graph-wide reproducibility data at the logical template stage.
    Currently, this is basically a stub that adds the requested flag to the graph.
    Later, this will contain significantly more information.
    :param logical_graph_template: The logical graph data structure (a JSON object (a dict))
    :param rmode: One several values 0-5 defined in constants.py
    :return: The same lgt object with new information appended
    """
    rmode = rflag_caster(rmode)
    if not rmode_supported(rmode):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(rmode))
        rmode = REPRO_DEFAULT
    if rmode == ReproducibilityFlags.NOTHING:
        return logical_graph_template
    reprodata = {
        "rmode": str(rmode.value),
        "meta_data": accumulate_meta_data()
    }
    meta_tree = MerkleTree(reprodata.items(), common_hash)
    reprodata["merkleroot"] = meta_tree.merkle_root
    for drop in logical_graph_template.get("nodeDataArray", []):
        init_lgt_repro_drop_data(drop, rmode)
    logical_graph_template["reprodata"] = reprodata
    logger.info("Reproducibility data finished at LGT level")
    return logical_graph_template
Esempio n. 7
0
def init_lg_repro_data(logical_graph: dict):
    """
    Handles adding reproducibility data at the logical graph level.
    Also builds the logical data blockdag over the entire structure.
    :param logical_graph: The logical graph data structure (a JSON object (a dict))
    :return: The same lgt object with new information appended
    """
    if "reprodata" not in logical_graph:
        return logical_graph
    level = rflag_caster(logical_graph["reprodata"]["rmode"])
    if not rmode_supported(level):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(level))
        level = REPRO_DEFAULT
    if level == ReproducibilityFlags.NOTHING:
        return logical_graph
    for drop in logical_graph.get("nodeDataArray", []):
        init_lg_repro_drop_data(drop)
    if level == ReproducibilityFlags.ALL:
        for rmode in ALL_RMODES:
            if rmode.name not in logical_graph["reprodata"]:
                logical_graph["reprodata"][rmode.name] = {}
            leaves, _ = lg_build_blockdag(logical_graph, rmode)
            logical_graph["reprodata"][
                rmode.name]["signature"] = agglomerate_leaves(leaves)
    else:
        leaves, _ = lg_build_blockdag(logical_graph)
        logical_graph["reprodata"]["signature"] = agglomerate_leaves(leaves)
    logger.info("Reproducibility data finished at LG level")
    return logical_graph
Esempio n. 8
0
def init_lg_repro_drop_data(drop: dict):
    """
    Creates and appends per-drop reproducibility information at the logical graph stage.
    :param drop:
    :return: The same drop with appended reproducibility information
    """
    level = rflag_caster(drop["reprodata"]["rmode"])
    if not rmode_supported(level):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(level))
        level = REPRO_DEFAULT
        drop["reprodata"]["rmode"] = str(level.value)
    if level == ReproducibilityFlags.ALL:
        for rmode in ALL_RMODES:
            data = accumulate_lg_drop_data(drop, rmode)
            merkletree = MerkleTree(data.items(), common_hash)
            data["merkleroot"] = merkletree.merkle_root
            drop["reprodata"][rmode.name]["lg_data"] = data
            drop["reprodata"][rmode.name]["lg_parenthashes"] = {}
    else:
        data = accumulate_lg_drop_data(drop, level)
        merkletree = MerkleTree(data.items(), common_hash)
        data["merkleroot"] = merkletree.merkle_root
        drop["reprodata"]["lg_data"] = data
        drop["reprodata"]["lg_parenthashes"] = {}
    return drop
Esempio n. 9
0
def accumulate_lgt_drop_data(drop: dict, level: ReproducibilityFlags):
    """
    Accumulates relevant reproducibility fields for a single drop.
    :param drop:
    :param level:
    :return: A dictionary containing accumulated reproducibility data for a given drop.
    """
    data = {}
    if level == ReproducibilityFlags.NOTHING:
        return data

    category_type = drop['categoryType']
    category = drop['category']

    if not rmode_supported(level):
        raise NotImplementedError(
            "Reproducibility level %s not yet supported" % level.name)

    if level == ReproducibilityFlags.REPRODUCE:
        data['category_type'] = category_type
        data['category'] = category
        return data  # Early return to avoid next conditional

    if level.value >= ReproducibilityFlags.RERUN.value:
        data['category_type'] = category_type
        data['category'] = category
        data['numInputPorts'] = len(drop['inputPorts'])
        data['numOutputPorts'] = len(drop['outputPorts'])
        data['streaming'] = drop['streaming']
    return data
Esempio n. 10
0
def accumulate_pgt_unroll_drop_data(drop: dict):
    """
    Accumulates relevant reproducibility fields for a single drop at the physical template level.
    :param drop:
    :return: A dictionary containing accumulated reproducibility data for a given drop.
    """
    if drop.get("reprodata") is None:
        drop["reprodata"] = {
            "rmode": str(REPRO_DEFAULT.value),
            "lg_blockhash": None
        }
    if drop["reprodata"].get("rmode") is None:
        level = REPRO_DEFAULT
        drop["reprodata"]["rmode"] = str(level.value)
    else:
        level = rflag_caster(drop["reprodata"]["rmode"])
    if not rmode_supported(level):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(level))
        level = REPRO_DEFAULT
        drop["reprodata"]["rmode"] = str(level.value)
    if drop.get("type") is None:
        return {}
    drop_type = drop["type"]
    if level == ReproducibilityFlags.ALL:
        data = {}
        for rmode in ALL_RMODES:
            pgt_fields = pgt_unroll_block_fields(drop_type, rmode)
            data[rmode.name] = extract_fields(drop, pgt_fields)
    else:
        pgt_fields = pgt_unroll_block_fields(drop_type, level)
        data = extract_fields(drop, pgt_fields)
    return data
Esempio n. 11
0
def accumulate_lgt_drop_data(drop: dict, level: ReproducibilityFlags):
    """
    Accumulates relevant reproducibility fields for a single drop.
    :param drop:
    :param level:
    :return: A dictionary containing accumulated reproducibility data for a given drop.
    """
    if not rmode_supported(level):
        raise NotImplementedError(
            f"Reproducibility level {level.name} not yet supported")
    relevant_fields = lgt_block_fields(level)
    data = extract_fields(drop, relevant_fields)
    return data
Esempio n. 12
0
def accumulate_pg_drop_data(drop: dict):
    """
    Accumulate relevant reproducibility fields for a single drop at the physical graph level.
    :param drop:
    :return: A dictionary containing accumulated reproducibility data for a given drop.
    """
    rmode = rflag_caster(drop['reprodata']['rmode'])
    if not rmode_supported(rmode):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(rmode))
        rmode = REPRO_DEFAULT
        drop['reprodata']['rmode'] = str(rmode.value)
    data = {}
    if rmode == ReproducibilityFlags.REPLICATE_COMP or rmode == ReproducibilityFlags.RECOMPUTE:
        data['node'] = drop['node']
        data['island'] = drop['island']
    return data
Esempio n. 13
0
def init_lg_repro_drop_data(drop: dict):
    """
    Creates and appends per-drop reproducibility information at the logical graph stage.
    :param drop:
    :return: The same drop with appended reproducibility information
    """
    rmode = rflag_caster(drop['reprodata']['rmode'])
    if not rmode_supported(rmode):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(rmode))
        rmode = REPRO_DEFAULT
        drop['reprodata']['rmode'] = str(rmode.value)
    data = accumulate_lg_drop_data(drop, rmode)
    merkletree = MerkleTree(data.items(), common_hash)
    data['merkleroot'] = merkletree.merkle_root
    drop['reprodata']['lg_data'] = data
    drop['reprodata']['lg_parenthashes'] = {}
    return drop
Esempio n. 14
0
def accumulate_pgt_partition_drop_data(drop: dict):
    """
    Is as combination of unroll drop data
    :param drop:
    :return:
    """
    rmode = rflag_caster(drop['reprodata']['rmode'])
    if not rmode_supported(rmode):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(rmode))
        rmode = REPRO_DEFAULT
        drop['reprodata']['rmode'] = str(rmode.value)
    data = accumulate_pgt_unroll_drop_data(drop)
    # This is the only piece of new information added at the partition level
    # It is only pertinent to Repetition and Computational replication
    if rmode == ReproducibilityFlags.REPLICATE_COMP or rmode == ReproducibilityFlags.RECOMPUTE:
        data['node'] = drop['node'][1:]
        data['island'] = drop['island'][1:]
    return data
Esempio n. 15
0
def init_runtime_repro_data(rg: dict, reprodata: dict):
    """
    Adds reproducibility data at the runtime level to graph-wide values.
    :param rg:
    :param reprodata:
    :return:
    """
    rmode = rflag_caster(reprodata['rmode'])
    if not rmode_supported(rmode):
        # TODO: Logging needs sessionID at this stage
        # logger.warning("Requested reproducibility mode %s not yet implemented", str(rmode))
        rmode = REPRO_DEFAULT
        reprodata['rmode'] = str(rmode.value)
    for drop_id, drop in rg.items():
        init_rg_repro_drop_data(drop)
    leaves, visited = build_blockdag(list(rg.values()), 'rg')
    reprodata['signature'] = agglomerate_leaves(leaves)
    rg['reprodata'] = reprodata
    # logger.info("Reproducibility data finished at runtime level")
    return rg
Esempio n. 16
0
def init_pg_repro_data(pg: list):
    """
    Handles adding reproducibility data at the physical graph template level.
    :param pg: The logical graph data structure (a list of drops + reprodata dictionary)
    :return: The same pg object with new information appended
    """
    reprodata = pg.pop()
    rmode = rflag_caster(reprodata['rmode'])
    if not rmode_supported(rmode):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(rmode))
        rmode = REPRO_DEFAULT
        reprodata['rmode'] = str(rmode.value)
    for drop in pg:
        init_pg_repro_drop_data(drop)
    leaves, visited = build_blockdag(pg, 'pg')
    reprodata['signature'] = agglomerate_leaves(leaves)
    pg.append(reprodata)
    logger.info("Reproducibility data finished at PG level")
    return pg
Esempio n. 17
0
def accumulate_pg_drop_data(drop: dict):
    """
    Accumulate relevant reproducibility fields for a single drop at the physical graph level.
    :param drop:
    :return: A dictionary containing accumulated reproducibility data for a given drop.
    """
    level = rflag_caster(drop["reprodata"]["rmode"])
    if not rmode_supported(level):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(level))
        level = REPRO_DEFAULT
        drop["reprodata"]["rmode"] = str(level.value)
    if level == ReproducibilityFlags.ALL:
        data = {}
        for rmode in ALL_RMODES:
            pg_fields = pg_block_fields(rmode)
            data[rmode.name] = extract_fields(drop, pg_fields)
    else:
        pg_fields = pg_block_fields(level)
        data = extract_fields(drop, pg_fields)
    return data
Esempio n. 18
0
def accumulate_lg_drop_data(drop: dict, level: ReproducibilityFlags):
    """
    Accumulates relevant reproducibility fields for a single drop.
    :param drop:
    :param level:
    :return: A dictionary containing accumulated reproducibility data for a given drop.
    """
    data = {}
    if level == ReproducibilityFlags.NOTHING:
        return data

    category_type = drop['categoryType']
    category = drop['category']

    # Cheeky way to get field list into dicts. map(dict, drop...) makes a copy
    fields = {e.pop('name'): e['value'] for e in map(dict, drop['fields'])}

    if not rmode_supported(level):
        raise NotImplementedError(
            "Reproducibility level %s not yet supported" % level.name)
    if level == ReproducibilityFlags.RERUN:
        pass
    elif level == ReproducibilityFlags.REPEAT or level == ReproducibilityFlags.REPLICATE_COMP \
            or level == ReproducibilityFlags.RECOMPUTE \
            or level == ReproducibilityFlags.REPLICATE_TOTAL:
        if category_type == 'Application':
            data['execution_time'] = fields['execution_time']
            data['num_cpus'] = fields['num_cpus']
            if category == Categories.BASH_SHELL_APP:
                data['command'] = fields['Arg01']
            elif category == Categories.DYNLIB_APP:  # TODO: Deal with DYNLIB_PROC
                data['libpath'] = fields['libpath']
            elif category == Categories.MPI:
                data['num_of_procs'] = fields['num_of_procs']
            elif category == Categories.DOCKER:
                data['image'] = fields['image']
                data['command'] = fields['command']
                data['user'] = fields['user']
                data['ensureUserAndSwitch'] = fields['ensureUserAndSwitch']
                data['removeContainer'] = fields['removeContainer']
                data['additionalBindings'] = fields['additionalBindings']
            elif category == Categories.COMPONENT:
                data['appclass'] = fields['appclass']
        elif category_type == Categories.DATA:
            data['data_volume'] = fields['data_volume']
            if category == Categories.MEMORY:
                pass
            elif category == Categories.FILE:
                data['check_filepath_exists'] = fields['check_filepath_exists']
            elif category == Categories.S3:
                pass
            elif category == Categories.NGAS:
                pass
            elif category == Categories.JSON:
                pass
            elif category == Categories.NULL:
                pass
        elif category_type == 'Group':
            data['exitAppName'] = drop['exitAppName']
            if category == Categories.GROUP_BY:
                data['group_key'] = fields['group_key']
                data['group_axis'] = fields['group_axis']
            elif category == Categories.GATHER:
                data['num_of_inputs'] = fields['num_of_inputs']
                data['gather_axis'] = fields['gather_axis']
            elif category == Categories.SCATTER:
                data['num_of_copies'] = fields['num_of_copies']
                data['scatter_axis'] = fields['scatter_axis']
            elif category == Categories.LOOP:
                data['num_of_iter'] = fields['num_of_iter']
        elif category_type == 'Control':
            pass
        elif category_type == 'Other':
            pass
    elif level == ReproducibilityFlags.REPRODUCE:
        pass
    if level == ReproducibilityFlags.RECOMPUTE or level == ReproducibilityFlags.REPLICATE_COMP:
        if category_type == Categories.DATA:
            if category == Categories.FILE:
                data['filepath'] = fields['filepath']
                data['dirname'] = fields['dirname']

    return data