Ejemplo n.º 1
0
    def modify(self, mods, inputs=None, outputs=None):
        """
        Allows to make modifications to the model. It will create a modified configuration file, build the corresponding model and
        set the weights of the current model when possible.

        mods:    a list of dictionaries. Each dictionary can have as key: delete (in this case the value, which is a path, 
                 is deleted from config), or a config path (in this case, the path value is replaced by the value of the dictionary).
        inputs:  a list with the names of the inputs
        outputs: a list with the names of the outputs
        """
        model_weights = self.get_weights()
        yaml_loader = YAML()
        m_conf = Config(self.core_model.processed_config)
        original_keys = list(m_conf.keys())
        deep_conf = Config(shallow_to_deep(m_conf))
        for mod in mods:
            mod_key = list(mod.keys())[0]
            mod_value = mod[mod_key]
            if mod_key == 'delete':
                deep_conf.pop(mod_value)
                if mod_value in original_keys:
                    original_keys.remove(mod_value)
            elif '*' in mod_key:
                mod_key = mod_key.lstrip('/')
                found_paths = [
                    k for k in deep_conf.to_shallow().keys()
                    if fnmatch.fnmatch(k, mod_key)
                ]
                for k in found_paths:
                    k = k.replace('.', '/')
                    if isinstance(mod_value, str):
                        deep_conf[k] = yaml_loader.load(mod_value)
                    else:
                        deep_conf[k] = mod_value
            else:
                mod_key = mod_key.replace('.', '/')
                if mod_key.split('/')[0] not in deep_conf.keys(
                ):  #This means we are adding a new layer
                    layer_name = mod_key.split('/')[0]
                    original_keys.append(layer_name)
                    deep_conf['{}/name'.format(layer_name)] = layer_name
                if isinstance(mod_value, str):
                    deep_conf[mod_key] = yaml_loader.load(mod_value)
                else:
                    deep_conf[mod_key] = mod_value
        new_model_architecture = shallow_to_original_keys(
            deep_conf.to_shallow(), original_keys)
        model = self.build(processed_config=new_model_architecture,
                           input_names=inputs,
                           output_names=outputs)
        layer_names = [l.name for l in model.layers]
        for k, v in model_weights.items():
            if k in layer_names:
                layer = model.get_layer(k)
                layer.set_weights(v)
        self.core_model.model = model
Ejemplo n.º 2
0
def include_config(config, special_tags, global_config, default_config,
                   missing_paths):
    found_paths = config.find_keys('include')

    for p in found_paths:
        includes = config[p]
        switch = None
        if isinstance(includes, dict):
            switch = includes.get('switch', None)
        if switch is not None:
            if not isinstance(switch, list):
                switch = [switch]
            filtered_includes = []
            for include_config in includes['configs']:
                if include_config.get('name', None) in switch:
                    filtered_includes.append(include_config)
            includes = filtered_includes

        for include_config in includes:
            if include_config.get('enable', True) and include_config.get(
                    'config', None):
                path_yaml_to_include = Path(config.yaml_path.parent,
                                            include_config.pop('config'))

                imported_config = Config(path_yaml_to_include,
                                         yaml_tags=special_tags)
                if 'defaults' in imported_config:
                    default_config.update(imported_config.pop('defaults'))
                mods = include_config.get('mods', None)
                for r, v in include_config.items():
                    r = '({})'.format(r)
                    imported_config = replace_in_config(imported_config, r, v)
                if '/' in p:
                    p_parent = '/'.join(p.split('/')[:-1])
                else:
                    p_parent = None
                imported_config = process_config(imported_config, special_tags,
                                                 global_config, default_config,
                                                 missing_paths)
                if mods:
                    apply_mods(mods, imported_config)
                if p_parent:
                    p_config = Config(config[p_parent])
                    p_config.yaml_path = config.yaml_path
                    new_config = merge_configs([p_config, imported_config])
                    config[p_parent] = new_config
                else:
                    original_yaml_path = config.yaml_path
                    config = merge_configs([Config(config), imported_config])
                    config.yaml_path = original_yaml_path
        config.pop(p)
    return config
Ejemplo n.º 3
0
def replace_yamls(main_config, special_tags):
    main_config.find_path(
        symbols['insert_config'],
        mode='startswith',
        action=lambda x: Config(x.split(symbols['insert_config'])[-1],
                                special_tags=special_tags))
    return main_config
Ejemplo n.º 4
0
Archivo: core.py Proyecto: mrpep/paips
    def __make_hash_dict(self):
        """
        Creates a dictionary to hash the task. Parameters that are TaskIOs get replaced by their hash
        """
        self._hash_dict = copy.deepcopy(self.parameters)
        #Remove not cacheable parameters
        if not isinstance(self._hash_dict, Config):
            self._hash_dict = Config(self._hash_dict)
        if not isinstance(self.parameters, Config):
            self.parameters = Config(self.parameters)

        _ = self._hash_dict.find_path(symbols['nocache'],
                                      mode='startswith',
                                      action='remove_value')
        _ = self.parameters.find_path(symbols['nocache'],
                                      mode='startswith',
                                      action='remove_substring')

        for k, v in self._hash_dict.to_shallow().items():
            if isinstance(v, TaskIO):
                self._hash_dict[k] = self._hash_dict[k].get_hash()
Ejemplo n.º 5
0
def insert_yaml_value(config, special_tags, global_config, default_config,
                      missing_paths):
    found_paths = config.find_path(symbols['insert_config'], mode='startswith')
    #,action=lambda x: process_config(Config(x.split(symbols['insert_config'])[-1],special_tags=special_tags),special_tags=special_tags,global_config=global_config)
    for path in found_paths:
        tag_data = config[path]
        insert_yaml_path = tag_data.split(symbols['insert_config'])[-1]
        insert_config = Config(insert_yaml_path, yaml_tags=special_tags)
        global_config.update(insert_config.get('global', {}))
        if 'defaults' in insert_config:
            default_config.update(insert_config.pop('defaults'))
        insert_config = process_config(insert_config, special_tags,
                                       global_config, default_config,
                                       missing_paths)
        config[path] = insert_config
Ejemplo n.º 6
0
    def _serialize_model(self, save_optimizer=False, extra_data=None):
        model_output = {}
        model_output['weights'] = self.get_weights()
        original_config = Config(self.original_config)
        for p in original_config.all_paths():
            if type(original_config[p]).__name__ == 'BatchGenerator':
                original_config[p] = original_config[p].data_processor_config
        model_output['original_config'] = original_config
        if self.architecture_config:
            model_output['hierarchy'] = self.architecture_config.hierarchy
        if self.core_model:
            model_output['unfolded_config'] = self.core_model.processed_config
            if save_optimizer:
                model_output['optimizer_state'] = self.get_optimizer()
            if extra_data:
                model_output.update(extra_data)
        model_output['input_shapes'] = self.input_shapes
        model_output['output_shapes'] = self.output_shapes

        return model_output
Ejemplo n.º 7
0
def apply_mods(modstr, config):
    yaml = YAML()
    if modstr is not None:
        if isinstance(modstr, str):
            mods = modstr.split('&')
            for mod in mods:
                if '=' in mod:
                    mod_parts = mod.split('=')
                    mod_k = '='.join(mod_parts[:-1])
                    mod_v = mod_parts[-1]
                    #if mod_parts[1].startswith('['):
                    if '!' in mod_v:
                        config[mod_k] = mod_v
                    #elif mod_parts[1].lower() == 'null':
                    #    config[mod_parts[0]] = None
                    else:
                        config[mod_k] = yaml.load(mod_v)

        elif isinstance(modstr, list):
            for mod in modstr:
                config.update(Config(mod).to_shallow())
Ejemplo n.º 8
0
def get_config(filename):
    config = Config(filename, safe=False)
    return config
Ejemplo n.º 9
0
def external_unfold(name, config, metadata=None, logger=None):
    external_models = metadata['externals']['Models']
    external_model_name = config['model']
    external_layer_name = config.get('layer', None)
    external_last_layer = config.get('up_to', None)
    external_first_layer = config.get('from', None)
    external_exclude_inputs = config.get('exclude_input', True)
    external_reset_weights = config.get('reset_weights', False)
    external_mods = config.get('mods', None)
    external_time_distributed = config.get('time_distributed', False)

    trainable_from = config.get('trainable_from', None)
    trainable_layers = config.get('trainable_layers', None)
    trainable = config.get('trainable', True)
    training_flag = config.get('training', False)

    import dienen

    if isinstance(external_models[external_model_name], str):
        external_model = joblib.load(external_models[external_model_name])
        if isinstance(external_model, dict):
            external_model_architecture = external_model['unfolded_config']
            external_hierarchy = external_model['hierarchy']
        elif isinstance(external_model, dienen.core.model.Model):
            external_model_architecture = external_model.core_model.processed_config
            external_hierarchy = external_model.architecture_config.hierarchy
    elif isinstance(external_models[external_model_name],
                    dienen.core.model.Model):
        external_model = external_models[external_model_name]
        external_model_architecture = external_model.core_model.processed_config
        external_hierarchy = external_model.architecture_config.hierarchy

    if external_mods:
        import fnmatch
        yaml_loader = YAML()
        m_conf = Config(external_model_architecture)
        original_keys = list(m_conf.keys())
        deep_conf = Config(shallow_to_deep(m_conf))
        for mod in external_mods:
            mod_key = list(mod.keys())[0]
            mod_value = mod[mod_key]
            if mod_key == 'delete':
                deep_conf.pop(mod_value)
                if mod_value in original_keys:
                    original_keys.remove(mod_value)
            elif '*' in mod_key:
                mod_key = mod_key.lstrip('/')
                found_paths = [
                    k for k in deep_conf.to_shallow().keys()
                    if fnmatch.fnmatch(k, mod_key)
                ]
                for k in found_paths:
                    k = k.replace('.', '/')
                    if isinstance(mod_value, str):
                        deep_conf[k] = yaml_loader.load(mod_value)
                    else:
                        deep_conf[k] = mod_value
            else:
                mod_key = mod_key.replace('.', '/')
                if mod_key.split('/')[0] not in deep_conf.keys(
                ):  #This means we are adding a new layer
                    layer_name = mod_key.split('/')[0]
                    original_keys.append(layer_name)
                    deep_conf['{}/name'.format(layer_name)] = layer_name
                if isinstance(mod_value, str):
                    deep_conf[mod_key] = yaml_loader.load(mod_value)
                else:
                    deep_conf[mod_key] = mod_value

        external_model_architecture = shallow_to_original_keys(
            deep_conf.to_shallow(), original_keys)
    unfolded_layers = []

    g = nx.DiGraph()
    for layer_name, layer_config in external_model_architecture.items():
        if layer_config['class'] != 'Input':
            if isinstance(layer_config['input'], list):
                for k in layer_config['input']:
                    g.add_edge(k, layer_name)
            else:
                g.add_edge(layer_config['input'], layer_name)

    if external_layer_name and external_layer_name not in g.nodes(
    ) and external_layer_name in external_hierarchy:
        external_layer_name = external_hierarchy[external_layer_name][
            'output'][0]
    if external_last_layer and external_last_layer not in g.nodes(
    ) and external_last_layer in external_hierarchy:
        external_last_layer = external_hierarchy[external_last_layer][
            'output'][0]
    if external_first_layer and external_first_layer not in g.nodes(
    ) and external_first_layer in external_hierarchy:
        external_first_layer = external_hierarchy[external_first_layer][
            'input'][0]

    if external_last_layer and not external_first_layer and not external_layer_name:
        layers_subset = list(nx.ancestors(
            g, external_last_layer)) + [external_last_layer]
    elif external_first_layer and not external_last_layer and not external_layer_name:
        layers_subset = nx.dfs_successors(
            g, external_first_layer)[external_first_layer] + [
                external_first_layer
            ]
    elif external_first_layer and external_last_layer and not external_layer_name:
        after_from = set(nx.dfs_successors(g, external_first_layer).keys())
        before_to = set(nx.ancestors(g, external_last_layer))
        layers_subset = list(
            after_from.intersection(before_to)) + [external_last_layer]
    elif external_layer_name:
        layers_subset = [external_layer_name]
    else:
        layers_subset = list(external_model_architecture.keys())
    if external_exclude_inputs:
        layers_subset = [
            layer for layer in layers_subset
            if external_model_architecture[layer]['class'] != 'Input'
        ]

    unfolded_layers = [external_model_architecture[l] for l in layers_subset]

    #if len(unfolded_layers) == 1:
    #    in_layers = [name]
    #else:
    in_layers = []
    for l in unfolded_layers:
        ins = l['input']
        if not isinstance(ins, list):
            ins = [ins]
        for x in ins:
            if x not in layers_subset:
                in_layers.append(l['name'])

    #in_layers = [l['name'] for l in unfolded_layers if l['input'] not in layers_subset]
    new_nodes = [{
        layer['name']: pop_dictreturn(layer, 'name')
    } for layer in unfolded_layers]
    new_config, hierarchy = new_nodes_to_config(new_nodes, name)

    if 'input' in config:
        for layer in in_layers:
            new_config[layer]['input'] = config['input']

    if trainable and not trainable_from and not trainable_layers:
        trainable_layers = [
            layer_name for layer_name, layer in new_config.items()
            if layer['class'] != 'Input'
        ]
    elif trainable_from:
        if trainable_from not in external_model_architecture and trainable_from in external_hierarchy:
            trainable_from = external_hierarchy[trainable_from]['inputs'][0]
        trainable_layers = nx.dfs_successors(g, trainable_from)
    elif not trainable:
        trainable_layers = []

    #Set trainable false in non-trainable layers
    for layer_name, layer_config in new_config.items():
        if layer_name not in trainable_layers:
            layer_config['trainable'] = False
            layer_config[
                'training'] = training_flag  #This is to avoid problems with BN accumulated statistics
        else:
            layer_config['trainable'] = True

    if external_time_distributed:
        for layer_name, layer_config in new_config.items():
            layer_config['time_distributed'] = True

    #Make each layer search for the weights from external model
    if isinstance(external_reset_weights, bool) and not external_reset_weights:
        external_weight_layers = [
            layer_name for layer_name, layer in new_config.items()
            if layer['class'] != 'Input'
        ]
    elif isinstance(external_reset_weights, list):
        external_weight_layers = external_reset_weights

    if not config.get('reset_weights', False):
        for layer in external_weight_layers:
            new_config[layer]['from_model'] = external_model_name
            new_config[layer]['from_layer'] = layer

    return new_config, hierarchy
Ejemplo n.º 10
0
def load_experiment(configs, mods=None, global_config=None, logger=None):
    #Get main config
    #By default, yaml uses custom tags marked as !, however, we want to use it in a more general way even in dictionaries.
    #To avoid raising exceptions, an ignorable tag is created which will return the string unchanged for later processing

    ignorable_tags = [
        v.strip() for k, v in symbols.items() if v.startswith('!')
    ]
    special_tags = [IgnorableTag(tag) for tag in ignorable_tags]

    configs = [Config(path_i, yaml_tags=special_tags) for path_i in configs]
    #main_config = merge_configs(configs)
    main_config = configs[0]
    apply_mods(mods, main_config)

    if global_config is None:
        global_config = {}
    global_config.update(main_config.get('global', {}))
    default_config = main_config.get('defaults', {})

    if 'global' in main_config:
        main_config['global'].update(global_config)
    else:
        main_config['global'] = global_config

    #Config processing/merging/expanding
    missing_paths = []
    main_config = process_config(main_config, special_tags, global_config,
                                 default_config, missing_paths)
    n_tries = 20

    while n_tries > 0 and len(missing_paths) > 0:
        n_tries -= 1
        global_config.update(main_config['global'])
        default_config.update(main_config.get('default', {}))
        missing_paths = []
        main_config = process_config(main_config, special_tags, global_config,
                                     default_config, missing_paths)

    if len(missing_paths) > 0:
        print('Warning: Cannot resolve tags {}'.format(missing_paths))
        for k in missing_paths:
            global_config[k] = None
        missing_paths = []
        main_config = process_config(main_config, special_tags, global_config,
                                     default_config, missing_paths)

    default_cluster_config = {'manager': None, 'n_cores': 1, 'niceness': 20}

    cluster_config = main_config.get('cluster_config', default_cluster_config)
    main_config['cluster_config'] = cluster_config
    main_config['global_config'] = global_config

    #For every task with a variable that we want to loop,
    #we find the tag and create a parameter 'parallel' which holds the names
    #of the loopable params, and adds a '!nocache' so that it is not cached

    parallel_paths = main_config.find_path(symbols['distributed-pool'],
                                           mode='startswith',
                                           action='remove_substring')
    parallel_paths = [p for p in parallel_paths if not p.startswith('global')]
    parallel_paths = [(task_parameters_level_from_path(p),
                       p.split(task_parameters_level_from_path(p) + '/')[-1])
                      for p in parallel_paths]

    parallel_paths_async = [
        k for k in list(main_config.all_paths())
        if k.endswith('async') and main_config[k] == True
    ]
    parallel_paths_async = [
        p for p in parallel_paths_async if not p.startswith('global')
    ]
    parallel_paths_async = [
        (task_parameters_level_from_path(p),
         p.split(task_parameters_level_from_path(p) + '/')[-1])
        for p in parallel_paths_async
    ]

    parallel_paths_ = {}

    for p in parallel_paths_async:
        main_config[p[0] + '/niceness'] = cluster_config.get('niceness', 20)
    for p in parallel_paths:
        path = p[0] + '/parallel'
        if path not in parallel_paths_:
            parallel_paths_[path] = [p[1]]
        else:
            parallel_paths_[path].append(p[1])
        if 'n_cores' not in main_config[p[0]]:
            main_config[p[0] + '/n_cores'] = cluster_config['n_cores']
        if 'niceness' not in main_config[p[0]]:
            main_config[p[0] + '/niceness'] = cluster_config.get(
                'niceness', 20)

    map_paths = main_config.find_path(symbols['serial-map'],
                                      mode='startswith',
                                      action='remove_substring')
    map_paths = [p for p in map_paths if not p.startswith('global')]
    map_paths = [(task_parameters_level_from_path(p),
                  p.split(task_parameters_level_from_path(p) + '/')[-1])
                 for p in map_paths]
    map_paths_ = {}

    for p in map_paths:
        path = p[0] + '/map_vars'
        if path not in map_paths_:
            map_paths_[path] = [p[1]]
        else:
            map_paths_[path].append(p[1])

    yaml = YAML()
    for k, v in parallel_paths_.items():
        v_yaml_stream = StringIO()
        yaml.dump(v, v_yaml_stream)
        parallel_paths_[k] = symbols['nocache'] + ' ' + str(v)
        v_yaml_stream.close()

    for k, v in map_paths_.items():
        v_yaml_stream = StringIO()
        yaml.dump(v, v_yaml_stream)
        map_paths_[k] = symbols['nocache'] + ' ' + str(v)
        v_yaml_stream.close()

    main_config.update(parallel_paths_)
    main_config.update(map_paths_)

    #main_task = TaskGraph(main_config,global_config,name='MainTask',logger=paips_logger)

    return main_config
Ejemplo n.º 11
0
Archivo: core.py Proyecto: mrpep/paips
    def __init__(self,
                 parameters,
                 global_parameters=None,
                 name=None,
                 logger=None,
                 simulate=False):
        """
        parameters: dictionary with all parameters given to a task
        global_parameters: dictionary with parameters common to all tasks
        name: task name
        logger: task logger
        simulate: if True, the task won't get executed
        """
        self.global_parameters = {
            'cache': True,
            'cache_path': 'cache',
            'cache_compression': 0,
            'output_path': 'experiments',
            'overwrite_export': True
        }

        if global_parameters:
            self.global_parameters.update(global_parameters)

        if not GenericFile(self.global_parameters['output_path']).exists():
            GenericFile(
                self.global_parameters['output_path']).mkdir(parents=True)

        self.name = name
        self.valid_args = []
        self.default_no_cache: []

        self.parameters = parameters

        self.simulate = simulate

        self.output_names = self.parameters.pop('output_names', ['out'])
        self.cache = get_delete_param(self.parameters, 'cache',
                                      self.global_parameters['cache'])
        self.in_memory = get_delete_param(self.parameters, 'in_memory',
                                          self.global_parameters['in_memory'])

        self.dependencies = []
        self.logger = logger

        if 'mods' in self.parameters:
            apply_mods(self.parameters['mods'], Config(self.parameters))
            self.parameters.pop('mods')

        self.__make_hash_dict()
        self.initial_parameters = copy.deepcopy(self.parameters)

        self.export_path = Path(self.global_parameters.get('output_path'),
                                self.name)
        self.export = self.parameters.get('export', False)
        self.symlinkdb_path = Path(self.global_parameters.get('output_path'),
                                   'links.txt')

        fname = GenericFile(self.global_parameters['output_path'], 'configs',
                            '{}.yaml'.format(self.name))
        self.parameters.save(Path(fname.local_filename), mode='unsafe')
        if fname.filesystem == 's3':
            fname.upload_from(fname.local_filename)
Ejemplo n.º 12
0
    def __init__(self, config, logger=None):
        """
        Main class of the dienen library. It represents the model, which is built from a configuration file.
        config: can be a string or pathlib.Path pointing to a .yaml file, a dictionary or a kahnfigh Config.
        logger: optionally, a logger can be supplied to log all information related to dienen model.
        """

        config = Config(config, safe=False)

        self.original_config = config
        self.config = copy.deepcopy(config)
        self.core_model = None
        self.architecture_config = None
        self.model_path = None
        self.name = self.config['Model'].get(
            'name',
            datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
        if not self.model_path:
            self.model_path = self.config['Model'].get('path',
                                                       '{}'.format(self.name))
        self.weights = None
        self.optimizer_weights = None
        self.extra_data = None
        self.modules = self.config.get('Module', [])
        self.gpu_config = self.config.get('gpu_config', {
            'device': 'auto',
            'allow_growth': True
        })
        self.cache = True
        self.logger = logger
        self.input_shapes = None
        self.output_shapes = None

        training_strategy = self.config['Model'].get('DistributedStrategy',
                                                     None)
        if training_strategy is None:
            self.training_strategy = tf.distribute.get_strategy()
        elif training_strategy == 'Mirrored':
            self.training_strategy = tf.distribute.MirroredStrategy()

        if training_strategy is None:
            self.gpu_device = self.gpu_config.get('device', 'auto')
            if self.gpu_device == 'auto':
                gpu, mem = get_available_gpus()
                self.gpu_device = int(gpu)
                if self.logger:
                    self.logger.info(
                        "Automatically selected device {} with {} available memory"
                        .format(self.gpu_device, mem))
            gpu_growth = self.gpu_config.get('allow_growth', True)
            gpus = tf.config.experimental.list_physical_devices('GPU')
            if gpus:
                try:
                    if len(gpus) < self.gpu_device:
                        raise Exception(
                            'There are only {} available GPUs and the {} was requested'
                            .format(len(gpus), self.gpu_device))
                    tf.config.experimental.set_visible_devices(
                        gpus[self.gpu_device], 'GPU')
                except RuntimeError as e:
                    warnings.warn('Failed setting GPUs. {}'.format(e))
            if gpu_growth:
                for gpu in gpus:
                    try:
                        tf.config.experimental.set_memory_growth(
                            gpu, gpu_growth)
                    except RuntimeError as e:
                        warnings.warn(
                            'Failed setting GPU dynamic memory allocation. {}'.
                            format(e))

            logical_gpus = tf.config.experimental.list_logical_devices('GPU')

        if self.logger:
            self.logger.debug("Physical GPUs: {}. Logical GPUs: {}".format(
                len(gpus), len(logical_gpus)))

        self.externals = self.config['Model'].get('External', None)
        self.validation_data = None