Beispiel #1
0
def format_entry(e: any, no_quote_fields: list,
                 double_quote_fields: list) -> any:
    # max 2 layers
    if isinstance(e, str):
        return sq(e)
    res = ordereddict()
    for k1, v1 in e.items():
        if isinstance(v1, ordereddict):
            res[k1] = v1
        elif isinstance(v1, dict):
            # break up v1
            temp1 = ordereddict()
            for k2, v2 in v1.items():
                if k2 in no_quote_fields:
                    temp1[sq(k2)] = v2
                elif k2 in double_quote_fields:
                    temp1[sq(k2)] = dq(v2)
                else:
                    temp1[sq(k2)] = sq(v2)
            temp1.fa.set_flow_style()  # one line
            if isinstance(v1, ordereddict):
                temp1.ca.comment = v1.ca.comment
            res[k1] = temp1
        elif v1:
            if k1 in no_quote_fields:
                res[k1] = v1
            elif k1 in double_quote_fields:
                res[k1] = dq(v1)
            else:
                res[k1] = sq(v1)
    # print(e)
    # res.ca.comment = e.ca.comment  # preserve comments
    return res
Beispiel #2
0
    def to_yaml(cls, dumper, self):
        #logging.debug(cls.yaml_flow_style)
        new_data = common.remove_empty(
            ordereddict({
                'name': self.name,
                'description': self.description,
            }))
        for key in ['init', 'actions', 'end']:
            if len(self.__dict__[key].steps) > 0:
                new_data[key] = self.__dict__[key]

        return dumper.represent_data(new_data)
Beispiel #3
0
 def provide_inner_data(self):
     ret = {
         'rampup_load':
         common.remove_empty(
             ordereddict({
                 'min_users': 1,
                 'max_users': self.to,
                 'increment_users': self.by,
                 'increment_every': self.per,
                 'duration': self.duration
             }))
     }
     return ret
Beispiel #4
0
def create_feed_attr_data_object_attrs(data):
    res = []
    for row in data["attribute_mappings"]:
        if (not row.get("FEED_ATTRIBUTE_ID")
                or not row.get("DATA_OBJECT_ATTRIBUTE_ID")):
            continue

        new_attr = ordereddict()
        new_attr["FEED_ATTRIBUTE_ID"] = row["FEED_ATTRIBUTE_ID"]
        new_attr["DATA_OBJECT_ATTRIBUTE_ID"] = row["DATA_OBJECT_ATTRIBUTE_ID"]
        new_attr["TRANSFORM_FN"] = row.get("TRANSFORM_FN")
        res.append(new_attr)
    return res
Beispiel #5
0
def get_options(args):
    """実験の設定を取得する

    Arguments:
        args {Object} -- コマンドライン引数

    Raises:
        TypeError: args.sourceが不正(weka, uci以外)の場合

    Returns:
        orderddict -- 実験の設定
    """

    config = get_configs(args, CONFIG_DIR_UCI)

    options = ordereddict()
    options['datasets'] = {}

    if args.source == 'weka':
        yaml = YAML()
        with open(CONFIG_DIR_WEKA, encoding='utf-8') as f:
            config_weka = yaml.load(f)
        # sourceの情報も追加
        config_weka['datasets'][args.dataset]['source'] = 'weka'
        options['datasets'] = config_weka['datasets'][args.dataset]
    elif args.source == 'uci':
        # sourceの情報も追加
        config['datasets'][args.dataset]['source'] = 'uci'
        options['datasets'] = config['datasets'][args.dataset]
    else:
        raise TypeError('args.source: "{}" is invalid.'.format(args.source))
    options['datasets'].move_to_end('source', False)
    options['experiments'] = config['experiments']
    options['model_params'] = config['model_params']
    options["experiments"]["metrics"] = config["datasets"][
        args.dataset]["metric"]
    # # metrics情報の追加
    # metrics = args.metrics
    # if metrics is None:
    #     task = options["datasets"]["task"]
    #     if task=="regression":
    #         metrics = "RMSE"
    #     elif task=="classification":
    #         metrics = "error rate"
    # options["experiments"]["metrics"] = metrics
    options['experiments'].move_to_end('methods', True)

    return options
Beispiel #6
0
def create_feed_data_objects(feed_id, data_object_id, data):
    field_map = {
        "src_filter_sql": ["SRC_FILTER_SQL", str],
        "transform_sql_query": ["TRANSFORM_SQL_QUERY", str],
    }

    feed_data_obj = ordereddict()
    feed_data_obj["FEED_ID"] = util.build_dict_value_from_keys(
        feed_id, ["FEED_NAME", "DB_NAME"])
    feed_data_obj["DATA_OBJECT_ID"] = util.build_dict_value_from_keys(
        data_object_id, ["DATA_OBJECT_NAME", "TGT_DB_NAME"])
    for incoming, existing in field_map.items():
        if data.get(incoming):
            feed_data_obj[existing[0]] = existing[1](data[incoming])
    print(f"created feed data obj: {feed_data_obj}")
    return [feed_data_obj]
Beispiel #7
0
    def to_yaml(cls, dumper, self):
        #logging.debug(cls.yaml_flow_style)
        scn_data = common.remove_empty(
            ordereddict({
                'name': self.name,
                'description': self.description,
                'populations': []
            }))
        for pop in self.populations:
            var_pol = pop['variation_policy']
            pop_data = {
                'name': pop['population'].name,
            }
            pop_data.update(var_pol.provide_inner_data())
            scn_data['populations'].append(pop_data)

        return dumper.represent_data(scn_data)
Beispiel #8
0
def read_one_dag(rd, dag_id):
    # read loads of a dag - load.yaml
    # read data objects for all loads - data_object_data_object.yaml
    loads = rd.loads.filter_entries("DAG_ID", dag_id)

    res = []
    for load in loads:
        temp = ordereddict()
        temp.update(load)
        temp["LOAD_WAREHOUSE_CONFIG_NAME"] = \
            load["LOAD_WAREHOUSE_CONFIG_ID"]["LOAD_WAREHOUSE_CONFIG_NAME"]
        dodo = rd.data_object_data_objects.filter_entries(
            "LOAD_ID", load["ZZ_LOAD_ID"])
        if dodo:
            temp.update(dodo[0])
            temp.update(dodo[0]["LOAD_SOURCE_DATA_OBJECT_ID"])
        # TODO validate schema
        res.append(temp)
    return res
Beispiel #9
0
def create_data_object_data_objects(data):
    dodos = []
    for row in data["dag_details"]:
        new_dodo = ordereddict()
        new_dodo["LOAD_ID"] = util.build_dict_value_from_keys(
            row, ["LOAD_NAME"])
        src_data_object_name = row["DATA_OBJECT_NAME"]
        src_data_object_tgt_db_name = row["TGT_DB_NAME"]

        new_dodo["LOAD_TARGET_DATA_OBJECT_ID"] = {
            "DATA_OBJECT_NAME": src_data_object_name,
            "TGT_DB_NAME": src_data_object_tgt_db_name.replace("cds", "fds")
        }
        new_dodo["LOAD_SOURCE_DATA_OBJECT_ID"] = {
            "DATA_OBJECT_NAME": src_data_object_name,
            "TGT_DB_NAME": src_data_object_tgt_db_name,
        }
        new_dodo["LOAD_DEPENDENCY_TYPE"] = "Hard"
        dodos.append(new_dodo)
    return dodos
Beispiel #10
0
def create_data_object_attributes(values: dict, data):
    field_map = {
        "ATTRIBUTE_NAME": ["ATTRIBUTE_NAME", str],
        "ATTRIBUTE_NO": ["ATTRIBUTE_NO", int],
        "ATTRIBUTE_TYPE": ["ATTRIBUTE_TYPE", str],
        "PRIMARY_KEY_IND": ["PRIMARY_KEY_IND", str],
    }
    data_object_attrs = []

    for row in data["data_object_attributes"]:
        new_data_object_attr = ordereddict()
        new_data_object_attr["DATA_OBJECT_ID"] = \
            util.build_dict_value_from_keys(
                values, ["DATA_OBJECT_NAME", "TGT_DB_NAME"]
            )
        for incoming, existing in field_map.items():
            if row.get(incoming):
                new_data_object_attr[existing[0]] = existing[1](row[incoming])

        data_object_attrs.append(new_data_object_attr)
    return data_object_attrs
Beispiel #11
0
def create_loads(values, data):
    field_map = {
        "LOAD_NAME": ["LOAD_NAME", str],
        "LOAD_DESC": ["LOAD_DESC", str],
        "LOAD_EXECUTE_TYPE": ["LOAD_EXECUTE_TYPE", str],
        "LOAD_EXECUTE_LOGIC_NAME": ["LOAD_EXECUTE_LOGIC_NAME", str],
    }
    loads = []

    for row in data["dag_details"]:
        new_load = ordereddict()
        new_load["DAG_ID"] = util.build_dict_value_from_keys(
            values, ["DAG_NAME"])
        for incoming, existing in field_map.items():
            if row.get(incoming):
                new_load[existing[0]] = existing[1](row[incoming])
        new_load["LOAD_WAREHOUSE_CONFIG_ID"] = {
            "LOAD_WAREHOUSE_CONFIG_NAME": row["LOAD_WAREHOUSE_CONFIG_NAME"]
        }
        loads.append(new_load)

    return loads
Beispiel #12
0
def create_feed_attributes(values: dict, data):
    field_map = {
        "ATTRIBUTE_NAME": ["ATTRIBUTE_NAME", str],
        "ATTRIBUTE_NO": ["ATTRIBUTE_NO", int],
        "ATTRIBUTE_TYPE": ["ATTRIBUTE_TYPE", str],
        "PRIMARY_KEY_IND": ["PRIMARY_KEY_IND", str],
        "NULLABLE_IND": ["NULLABLE_IND", str],
        "ATTRIBUTE_LENGTH": ["ATTRIBUTE_LENGTH", int],
        "NESTED_ATTRIBUTE_TYPE": ["NESTED_ATTRIBUTE_TYPE", str],
        "NESTED_ATTRIBUTE_PATH": ["NESTED_ATTRIBUTE_PATH", str],
    }
    feed_attrs = []

    for row in data["feed_attributes"]:
        new_feed_attribute = ordereddict()
        new_feed_attribute["FEED_ID"] = util.build_dict_value_from_keys(
            values, ["SOURCE_SYSTEM", "FEED_NAME"])
        for incoming, existing in field_map.items():
            if row.get(incoming):
                new_feed_attribute[existing[0]] = existing[1](row[incoming])
        feed_attrs.append(new_feed_attribute)
    return feed_attrs
Beispiel #13
0
def save_dag(rd, dag_id, data):
    dodos = create_data_object_data_objects(data)

    if dag_id == "NEW_DAG":
        dag = ordereddict({
            "DAG_NAME": data["new_dag_name"],
            "DAG_DESCRIPTION": data["new_dag_description"],
        })
        rd.dags.add_entry(dag)

        loads = create_loads(dag, data)
        # these load names should not already exist
        existing_load_names, _ = rd.loads.filter_entries_any(
            "LOAD_NAME", [e["LOAD_NAME"] for e in loads])
        if existing_load_names:
            raise ValueError(
                f"Cannot create loads - these exists: {existing_load_names}")
        rd.loads.add_entries(loads)

        rd.data_object_data_objects.add_entries(dodos)
    else:
        dag_id = eval(dag_id)
        incoming_loads = create_loads(dag_id, data)
        deleted_loads = rd.loads.delete_entries([["DAG_ID", dag_id]])
        rd.loads.add_entries(incoming_loads)

        # delete relevant data_object_data_objects
        rd.data_object_data_objects.delete_entries_any(
            "LOAD_ID", [x["ZZ_LOAD_ID"] for x in deleted_loads])

        for to_add in dodos:
            rd.data_object_data_objects.delete_entries(
                [["LOAD_ID", to_add["LOAD_ID"]]])
        rd.data_object_data_objects.add_entries(dodos)

    rd.dags.dump()
    rd.loads.dump()
    rd.data_object_data_objects.dump()
Beispiel #14
0
    def to_yaml(cls,dumper,self):
        #logging.debug(cls.yaml_flow_style)
        data = common.remove_empty(ordereddict(self.__dict__))

        return dumper.represent_data(data)
Beispiel #15
0
    def edit_config(
        self,
        name,
        step, 
        function,
        **kwargs
    ):
        """
        [new/experimental] Add or edit functions in all configuration files of a project.

        Parameters
        ----------

        name: str
            name of config-file. this gets appended to all files and serves as and
            identifier of a specific analysis pipeline
        step: str
            name of the step the function is in 
        function: str
            name of the function
        """

        ## kwargs
        flag_checked = False
        
        ## go through project directories
        for directory in self.dirpaths:
            dirname = os.path.basename(directory)

            ## save config
            preset_path = os.path.join(
                self.root_dir, directory, "pype_config_" + name + ".yaml"
            )
            
            if os.path.isfile(preset_path):
                config = _load_yaml(preset_path)
                
            ordered_steps = ["preprocessing",
                          "segmentation",
                          "measurement",
                          "visualization",
                          "export"
                          ]

            if not step in config.keys():
                new_config = ordereddict([("image", ordereddict(config["image"]))])
                new_config.update(ordereddict([("pype", ordereddict(config["pype"]))]))
                for ordered_step in ordered_steps:
                    if ordered_step in config:
                        new_config.update(ordereddict([(ordered_step, config[ordered_step])]))
                    elif not ordered_step in config and ordered_step == step:
                        new_config.update(ordereddict([(ordered_step, [function] )]))
            else:
                new_config = copy.deepcopy(config)
                if not function in new_config[step]:
                    new_config[step].append(function)
                    
            
            if flag_checked == False:
                _show_yaml(new_config)
                check = input("This is what the new config may look like (can differ beteeen files) - proceed?")
            
            if check in ["True", "true", "y", "yes"]:
                flag_checked = True
                _save_yaml(new_config, preset_path)
                print("New config saved for " + dirname)
            else:
                print("User check failed - aborting.")
                return