Exemple #1
0
def download_config(conf_file):
    src_file = src_dir / conf_file
    if not src_file.is_file():
        src_file = test_src_dir / conf_file

    if not src_file.is_file():
        raise RuntimeError('Unexisting config file {}'.format(conf_file))

    with src_file.open() as fin:
        config = json.load(fin)

    if config.get("train"):
        config["train"]["epochs"] = 1
        for pytest_key in [
                k for k in config["train"] if k.startswith('pytest_')
        ]:
            config["train"][pytest_key[len('pytest_'):]] = config["train"].pop(
                pytest_key)

    config["deeppavlov_root"] = str(download_path)

    with (test_configs_path / conf_file).open("w") as fout:
        json.dump(config, fout)

    # Download referenced config files
    config_references = get_all_elems_from_json(config, 'config_path')
    for config_ref in config_references:
        m_name = config_ref.split('/')[-2]
        conf_file = '/'.join(config_ref.split('/')[-2:])

        test_configs_path.joinpath(m_name).mkdir(exist_ok=True)
        if not test_configs_path.joinpath(conf_file).exists():
            download_config(conf_file)
def download_config(config_path):
    src_file = src_dir / config_path
    if not src_file.is_file():
        src_file = test_src_dir / config_path

    if not src_file.is_file():
        raise RuntimeError('No config file {}'.format(config_path))

    with src_file.open(encoding='utf8') as fin:
        config: dict = json.load(fin)

    # Download referenced config files
    config_references = get_all_elems_from_json(parse_config(config),
                                                'config_path')
    for config_ref in config_references:
        m_name = config_ref.split('/')[-2]
        config_ref = '/'.join(config_ref.split('/')[-2:])

        test_configs_path.joinpath(m_name).mkdir(exist_ok=True)
        if not test_configs_path.joinpath(config_ref).exists():
            download_config(config_ref)

    # Update config for testing
    config.setdefault('train', {}).setdefault('pytest_epochs', 1)
    config['train'].setdefault('pytest_max_batches', 2)
    config['train'].setdefault('pytest_max_test_batches', 2)
    _override_with_test_values(config)

    config_path = test_configs_path / config_path
    config_path.parent.mkdir(exist_ok=True, parents=True)
    with config_path.open("w", encoding='utf8') as fout:
        json.dump(config, fout)
def get_config_downloads(
        config: Union[str, Path, dict]) -> Set[Tuple[str, Path]]:
    config = parse_config(config)

    downloads = set()
    if 'metadata' in config and 'download' in config['metadata']:
        for resource in config['metadata']['download']:
            if isinstance(resource, str):
                resource = {'url': resource}

            url = resource['url']
            dest = expand_path(resource.get('subdir', ''))

            downloads.add((url, dest))

    config_references = [
        expand_path(config_ref)
        for config_ref in get_all_elems_from_json(config, 'config_path')
    ]

    downloads |= {(url, dest)
                  for config in config_references
                  for url, dest in get_config_downloads(config)}

    return downloads
Exemple #4
0
def get_config_downloads(config_path, config_downloads=None):
    config = read_json(config_path)

    if config_downloads is None:
        config_downloads = {}

    if 'metadata' in config and 'download' in config['metadata']:
        for resource in config['metadata']['download']:
            if isinstance(resource, str):
                url = resource
                sub_dir = ''
            elif isinstance(resource, dict):
                url = resource['url']
                sub_dir = resource['subdir'] if 'subdir' in resource else ''

            if url in config_downloads:
                config_downloads[url]['subdir'] = list(
                    set(config_downloads[url]['subdir'] + [sub_dir]))
            else:
                config_downloads[url] = {'url': url, 'subdir': [sub_dir]}

    config_references = get_all_elems_from_json(config, 'config_path')
    config_references = [
        root_path.joinpath(config_ref.split('../', 1)[1])
        for config_ref in config_references
    ]

    for config_ref in config_references:
        config_downloads = get_config_downloads(config_ref, config_downloads)

    return config_downloads
def download_config(config_path):
    src_file = src_dir / config_path
    if not src_file.is_file():
        src_file = test_src_dir / config_path

    if not src_file.is_file():
        raise RuntimeError('No config file {}'.format(config_path))

    with src_file.open(encoding='utf8') as fin:
        config: dict = json.load(fin)

    # Download referenced config files
    config_references = get_all_elems_from_json(parse_config(config), 'config_path')
    for config_ref in config_references:
        m_name = config_ref.split('/')[-2]
        config_ref = '/'.join(config_ref.split('/')[-2:])

        test_configs_path.joinpath(m_name).mkdir(exist_ok=True)
        if not test_configs_path.joinpath(config_ref).exists():
            download_config(config_ref)

    # Update config for testing
    config.setdefault('train', {}).setdefault('pytest_epochs', 1)
    _override_with_test_values(config)

    config_path = test_configs_path / config_path
    config_path.parent.mkdir(exist_ok=True, parents=True)
    with config_path.open("w", encoding='utf8') as fout:
        json.dump(config, fout)
Exemple #6
0
def get_config_downloads(config_path):
    dp_root_back = get_deeppavlov_root()
    config = read_json(config_path)
    set_deeppavlov_root(config)

    downloads = set()
    if 'metadata' in config and 'download' in config['metadata']:
        for resource in config['metadata']['download']:
            if isinstance(resource, str):
                resource = {
                    'url': resource
                }

            url = resource['url']
            dest = expand_path(resource.get('subdir', ''))

            downloads.add((url, dest))

    config_references = [expand_path(config_ref) for config_ref in get_all_elems_from_json(config, 'config_path')]

    downloads |= {(url, dest) for config in config_references for url, dest in get_config_downloads(config)}

    set_deeppavlov_root({'deeppavlov_root': dp_root_back})

    return downloads
Exemple #7
0
def _update_requirements(config: dict) -> dict:
    """
    Generates requirements for DeepPavlov model and adds them as ``metadata.requirements`` field to the returned dict.

    Searches for the ``class_name`` keys in the passed config at all nesting levels. For each found component,
    function looks for dependencies in the requirements registry. Found dependencies are added to the returned copy of
    the config as ``metadata.requirements``. If the config already has ``metadata.requirements``, the existing one
    is complemented by the found requirements.

    Args:
        config: DeepPavlov model config
    Returns:
        config copy with updated ``metadata.requirements`` field according to the config components.
    """
    components = get_all_elems_from_json(config, 'class_name')
    components = {
        inverted_registry.get(component, component)
        for component in components
    }
    requirements_registry_path = Path(
        __file__).parents[1] / 'common' / 'requirements_registry.json'
    requirements_registry = read_json(requirements_registry_path)
    requirements = []
    for component in components:
        requirements.extend(requirements_registry.get(component, []))
    requirements.extend(config.get('metadata', {}).get('requirements', []))
    response = deepcopy(config)
    response['metadata'] = response.get('metadata', {})
    response['metadata']['requirements'] = list(set(requirements))
    return response
def download_config(conf_file):
    src_file = src_dir / conf_file
    if not src_file.is_file():
        src_file = test_src_dir / conf_file

    if not src_file.is_file():
        raise RuntimeError('No config file {}'.format(conf_file))

    with src_file.open(encoding='utf8') as fin:
        config = json.load(fin)

    if config.get("train"):
        config["train"]["epochs"] = 1
        for pytest_key in [k for k in config["train"] if k.startswith('pytest_')]:
            config["train"][pytest_key[len('pytest_'):]] = config["train"].pop(pytest_key)

    config["deeppavlov_root"] = str(download_path)

    conf_file = test_configs_path / conf_file
    conf_file.parent.mkdir(exist_ok=True, parents=True)
    with conf_file.open("w", encoding='utf8') as fout:
        json.dump(config, fout)

    # Download referenced config files
    config_references = get_all_elems_from_json(config, 'config_path')
    for config_ref in config_references:
        m_name = config_ref.split('/')[-2]
        conf_file = '/'.join(config_ref.split('/')[-2:])

        test_configs_path.joinpath(m_name).mkdir(exist_ok=True)
        if not test_configs_path.joinpath(conf_file).exists():
            download_config(conf_file)
Exemple #9
0
def get_config_requirements(config: [str, Path, dict]):
    config = parse_config(config)

    requirements = set()
    for req in config.get('metadata', {}).get('requirements', []):
        requirements.add(req)

    config_references = [expand_path(config_ref) for config_ref in get_all_elems_from_json(config, 'config_path')]
    requirements |= {req for config in config_references for req in get_config_requirements(config)}

    return requirements
Exemple #10
0
def get_config_downloads(config: Union[str, Path, dict]) -> Set[Tuple[str, Path]]:
    config = parse_config(config)

    downloads = set()
    if 'metadata' in config and 'download' in config['metadata']:
        for resource in config['metadata']['download']:
            if isinstance(resource, str):
                resource = {
                    'url': resource
                }

            url = resource['url']
            dest = expand_path(resource.get('subdir', ''))

            downloads.add((url, dest))

    config_references = [expand_path(config_ref) for config_ref in get_all_elems_from_json(config, 'config_path')]

    downloads |= {(url, dest) for config in config_references for url, dest in get_config_downloads(config)}

    return downloads
def download_config(conf_file):
    src_file = src_dir / conf_file
    if not src_file.is_file():
        src_file = test_src_dir / conf_file

    if not src_file.is_file():
        raise RuntimeError('No config file {}'.format(conf_file))

    with src_file.open(encoding='utf8') as fin:
        config: dict = json.load(fin)

    # Download referenced config files
    config_references = get_all_elems_from_json(parse_config(config),
                                                'config_path')
    for config_ref in config_references:
        m_name = config_ref.split('/')[-2]
        config_ref = '/'.join(config_ref.split('/')[-2:])

        test_configs_path.joinpath(m_name).mkdir(exist_ok=True)
        if not test_configs_path.joinpath(config_ref).exists():
            download_config(config_ref)

    # Update config for testing
    if config.get("train"):
        config["train"]["epochs"] = 1
        for pytest_key in [
                k for k in config["train"] if k.startswith('pytest_')
        ]:
            config["train"][pytest_key[len('pytest_'):]] = config["train"].pop(
                pytest_key)

    config_vars = config.setdefault('metadata', {}).setdefault('variables', {})
    config_vars['ROOT_PATH'] = str(download_path)
    config_vars['CONFIGS_PATH'] = str(test_configs_path)

    conf_file = test_configs_path / conf_file
    conf_file.parent.mkdir(exist_ok=True, parents=True)
    with conf_file.open("w", encoding='utf8') as fout:
        json.dump(config, fout)
Exemple #12
0
def train_evaluate_model_from_config(config: [str, Path, dict], iterator=None, *,
                                     to_train=True, to_validate=True, download=False,
                                     start_epoch_num=0, recursive=False) -> Dict[str, Dict[str, float]]:
    """Make training and evaluation of the model described in corresponding configuration file."""
    config = parse_config(config)

    if download:
        deep_download(config)

    if to_train and recursive:
        for subconfig in get_all_elems_from_json(config['chainer'], 'config_path'):
            log.info(f'Training "{subconfig}"')
            train_evaluate_model_from_config(subconfig, download=False, recursive=True)

    import_packages(config.get('metadata', {}).get('imports', []))

    if iterator is None:
        try:
            data = read_data_by_config(config)
        except ConfigError as e:
            to_train = False
            log.warning(f'Skipping training. {e.message}')
        else:
            iterator = get_iterator_from_config(config, data)

    train_config = {
        'metrics': ['accuracy'],
        'validate_best': to_validate,
        'test_best': True,
        'show_examples': False
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')

    in_y = config['chainer'].get('in_y', ['y'])
    if isinstance(in_y, str):
        in_y = [in_y]
    if isinstance(config['chainer']['out'], str):
        config['chainer']['out'] = [config['chainer']['out']]
    metrics_functions = _parse_metrics(train_config['metrics'], in_y, config['chainer']['out'])

    if to_train:
        model = fit_chainer(config, iterator)

        if callable(getattr(model, 'train_on_batch', None)):
            _train_batches(model, iterator, train_config, metrics_functions, start_epoch_num=start_epoch_num)

        model.destroy()

    res = {}

    if iterator is not None and (train_config['validate_best'] or train_config['test_best']):
        model = build_model(config, load_trained=to_train)
        log.info('Testing the best saved model')

        if train_config['validate_best']:
            report = {
                'valid': _test_model(model, metrics_functions, iterator,
                                     train_config.get('batch_size', -1), 'valid',
                                     show_examples=train_config['show_examples'])
            }

            res['valid'] = report['valid']['metrics']

            print(json.dumps(report, ensure_ascii=False))

        if train_config['test_best']:
            report = {
                'test': _test_model(model, metrics_functions, iterator,
                                    train_config.get('batch_size', -1), 'test',
                                    show_examples=train_config['show_examples'])
            }

            res['test'] = report['test']['metrics']

            print(json.dumps(report, ensure_ascii=False))

        model.destroy()

    return res
Exemple #13
0
def train_evaluate_model_from_config(
        config: Union[str, Path, dict],
        iterator: Union[DataLearningIterator, DataFittingIterator] = None,
        *,
        to_train: bool = True,
        evaluation_targets: Optional[Iterable[str]] = None,
        to_validate: Optional[bool] = None,
        download: bool = False,
        start_epoch_num: Optional[int] = None,
        recursive: bool = False) -> Dict[str, Dict[str, float]]:
    """Make training and evaluation of the model described in corresponding configuration file."""
    config = parse_config(config)

    if download:
        deep_download(config)

    if to_train and recursive:
        for subconfig in get_all_elems_from_json(config['chainer'],
                                                 'config_path'):
            log.info(f'Training "{subconfig}"')
            train_evaluate_model_from_config(subconfig,
                                             download=False,
                                             recursive=True)

    import_packages(config.get('metadata', {}).get('imports', []))

    if iterator is None:
        try:
            data = read_data_by_config(config)
        except ConfigError as e:
            to_train = False
            log.warning(f'Skipping training. {e.message}')
        else:
            iterator = get_iterator_from_config(config, data)

    if 'train' not in config:
        log.warning('Train config is missing. Populating with default values')
    train_config = config.get('train')

    if start_epoch_num is not None:
        train_config['start_epoch_num'] = start_epoch_num

    if 'evaluation_targets' not in train_config and (
            'validate_best' in train_config or 'test_best' in train_config):
        log.warning(
            '"validate_best" and "test_best" parameters are deprecated.'
            ' Please, use "evaluation_targets" list instead')

        train_config['evaluation_targets'] = []
        if train_config.pop('validate_best', True):
            train_config['evaluation_targets'].append('valid')
        if train_config.pop('test_best', True):
            train_config['evaluation_targets'].append('test')

    trainer_class = get_model(train_config.pop('class_name', 'nn_trainer'))
    trainer = trainer_class(config['chainer'], **train_config)

    if to_train:
        trainer.train(iterator)

    res = {}

    if iterator is not None:
        if to_validate is not None:
            if evaluation_targets is None:
                log.warning(
                    '"to_validate" parameter is deprecated and will be removed in future versions.'
                    ' Please, use "evaluation_targets" list instead')
                evaluation_targets = ['test']
                if to_validate:
                    evaluation_targets.append('valid')
            else:
                log.warn(
                    'Both "evaluation_targets" and "to_validate" parameters are specified.'
                    ' "to_validate" is deprecated and will be ignored')

        res = trainer.evaluate(iterator,
                               evaluation_targets,
                               print_reports=True)
        trainer.get_chainer().destroy()

    res = {k: v['metrics'] for k, v in res.items()}

    return res