Python parse_config Examples, deeppavlov.core.commands.utils.parse_config Python Examples

Example #1

0

Show file

def run_population(population, evolution, gpus):
    """
    Change save and load paths for obtained population, save config.json with model config,
    run population via current python executor (with which evolve.py already run)
    and on given devices (-1 means CPU, other integeres - visible for evolve.py GPUs)
    Args:
        population: list of dictionaries - configs of current population
        evolution: ParamsEvolution
        gpus: list of given devices (list of integers)

    Returns:
        None
    """
    population_size = len(population)
    for k in range(population_size // len(gpus) + 1):
        procs = []
        for j in range(len(gpus)):
            i = k * len(gpus) + j
            if i < population_size:
                save_path = expand_path(
                    evolution.get_value_from_config(
                        parse_config(population[i]),
                        evolution.path_to_models_save_path))

                save_path.mkdir(parents=True, exist_ok=True)
                f_name = save_path / "config.json"
                save_json(population[i], f_name)

                with save_path.joinpath('out.txt').open('w', encoding='utf8') as outlog,\
                        save_path.joinpath('err.txt').open('w', encoding='utf8') as errlog:
                    env = dict(os.environ)
                    if len(gpus) > 1 or gpus[0] != -1:
                        env['CUDA_VISIBLE_DEVICES'] = str(gpus[j])

                    procs.append(
                        Popen("{} -m deeppavlov train {}".format(
                            sys.executable, str(f_name)),
                              shell=True,
                              stdout=outlog,
                              stderr=errlog,
                              env=env))
        for j, proc in enumerate(procs):
            i = k * len(gpus) + j
            log.info(f'Waiting on {i}th proc')
            if proc.wait() != 0:
                save_path = expand_path(
                    evolution.get_value_from_config(
                        parse_config(population[i]),
                        evolution.path_to_models_save_path))
                with save_path.joinpath('err.txt').open(
                        encoding='utf8') as errlog:
                    log.warning(
                        f'Population {i} returned an error code {proc.returncode} and an error log:\n'
                        + errlog.read())
    return None

Example #2

0

Show file

File: server.py Project: netsafe/DeepVesnin

def get_server_params(model_config: Union[str, Path]) -> Dict:
    server_config = read_json(SERVER_CONFIG_PATH)
    model_config = parse_config(model_config)

    server_params = server_config['common_defaults']

    if check_nested_dict_keys(model_config, ['metadata', 'server_utils']):
        model_tag = model_config['metadata']['server_utils']
        if check_nested_dict_keys(server_config,
                                  ['model_defaults', model_tag]):
            model_defaults = server_config['model_defaults'][model_tag]
            for param_name in model_defaults.keys():
                if model_defaults[param_name]:
                    server_params[param_name] = model_defaults[param_name]

    server_params['model_endpoint'] = server_params.get(
        'model_endpoint', '/model')

    arg_names = server_params['model_args_names'] or model_config['chainer'][
        'in']
    if isinstance(arg_names, str):
        arg_names = [arg_names]
    server_params['model_args_names'] = arg_names

    return server_params

Example #3

0

Show file

File: search_engine.py Project: basalovyurij/nlp-text-search

    def save(self, path, copy_model=False):
        if not os.path.isdir(path):
            os.makedirs(path)

        settings = {
            'self_class': type(self).__name__,
            'dist_class': type(self.dist).__name__,
            'linearization_settings': self.dist.get_linearization_settings()
        }

        with open(os.path.join(path, 'settings.json'), 'w') as f:
            json.dump(settings, f, indent=True)

        model_settings = self.model_settings.copy()
        if copy_model:
            model_settings = parse_config(model_settings)
            model_settings['metadata']['download'] = []

        with open(os.path.join(path, 'model_settings.json'), 'w') as f:
            json.dump(model_settings, f, indent=True)

        Doc2Vec.save(self.doc2vec, os.path.join(path, 'doc2vec.model'))

        with open(os.path.join(path, 'tree.model'), 'wb') as f:
            pickle.dump(self.tree, f)

Example #4

0

Show file

File: download.py Project: diamondrajan1996/chatbot_deep

def get_config_downloads(
        config: Union[str, Path, dict]) -> Set[Tuple[str, Path]]:
    config = parse_config(config)

    downloads = set()
    if 'metadata' in config and 'download' in config['metadata']:
        for resource in config['metadata']['download']:
            if isinstance(resource, str):
                resource = {'url': resource}

            url = resource['url']
            dest = expand_path(resource.get('subdir', ''))

            downloads.add((url, dest))

    config_references = [
        expand_path(config_ref)
        for config_ref in get_all_elems_from_json(config, 'config_path')
    ]

    downloads |= {(url, dest)
                  for config in config_references
                  for url, dest in get_config_downloads(config)}

    return downloads

Example #5

0

Show file

File: test_quick_start.py Project: RileyShe/DeepPavlov

def download_config(config_path):
    src_file = src_dir / config_path
    if not src_file.is_file():
        src_file = test_src_dir / config_path

    if not src_file.is_file():
        raise RuntimeError('No config file {}'.format(config_path))

    with src_file.open(encoding='utf8') as fin:
        config: dict = json.load(fin)

    # Download referenced config files
    config_references = get_all_elems_from_json(parse_config(config), 'config_path')
    for config_ref in config_references:
        m_name = config_ref.split('/')[-2]
        config_ref = '/'.join(config_ref.split('/')[-2:])

        test_configs_path.joinpath(m_name).mkdir(exist_ok=True)
        if not test_configs_path.joinpath(config_ref).exists():
            download_config(config_ref)

    # Update config for testing
    config.setdefault('train', {}).setdefault('pytest_epochs', 1)
    _override_with_test_values(config)

    config_path = test_configs_path / config_path
    config_path.parent.mkdir(exist_ok=True, parents=True)
    with config_path.open("w", encoding='utf8') as fout:
        json.dump(config, fout)

Example #6

0

Show file

File: test_quick_start.py Project: zelladoor/DeepPavlov

def download_config(config_path):
    src_file = src_dir / config_path
    if not src_file.is_file():
        src_file = test_src_dir / config_path

    if not src_file.is_file():
        raise RuntimeError('No config file {}'.format(config_path))

    with src_file.open(encoding='utf8') as fin:
        config: dict = json.load(fin)

    # Download referenced config files
    config_references = get_all_elems_from_json(parse_config(config),
                                                'config_path')
    for config_ref in config_references:
        m_name = config_ref.split('/')[-2]
        config_ref = '/'.join(config_ref.split('/')[-2:])

        test_configs_path.joinpath(m_name).mkdir(exist_ok=True)
        if not test_configs_path.joinpath(config_ref).exists():
            download_config(config_ref)

    # Update config for testing
    config.setdefault('train', {}).setdefault('pytest_epochs', 1)
    config['train'].setdefault('pytest_max_batches', 2)
    config['train'].setdefault('pytest_max_test_batches', 2)
    _override_with_test_values(config)

    config_path = test_configs_path / config_path
    config_path.parent.mkdir(exist_ok=True, parents=True)
    with config_path.open("w", encoding='utf8') as fout:
        json.dump(config, fout)

Example #7

0

Show file

File: evolve.py Project: RileyShe/DeepPavlov

def run_population(population, evolution, gpus):
    """
    Change save and load paths for obtained population, save config.json with model config,
    run population via current python executor (with which evolve.py already run)
    and on given devices (-1 means CPU, other integeres - visible for evolve.py GPUs)
    Args:
        population: list of dictionaries - configs of current population
        evolution: ParamsEvolution
        gpus: list of given devices (list of integers)

    Returns:
        None
    """
    population_size = len(population)
    for k in range(population_size // len(gpus) + 1):
        procs = []
        for j in range(len(gpus)):
            i = k * len(gpus) + j
            if i < population_size:
                save_path = expand_path(
                    evolution.get_value_from_config(parse_config(population[i]),
                                                    evolution.path_to_models_save_path))

                save_path.mkdir(parents=True, exist_ok=True)
                f_name = save_path / "config.json"
                save_json(population[i], f_name)

                with save_path.joinpath('out.txt').open('w', encoding='utf8') as outlog,\
                        save_path.joinpath('err.txt').open('w', encoding='utf8') as errlog:
                    env = dict(os.environ)
                    if len(gpus) > 1 or gpus[0] != -1:
                        env['CUDA_VISIBLE_DEVICES'] = str(gpus[j])

                    procs.append(Popen("{} -m deeppavlov train {}".format(sys.executable, str(f_name)),
                                       shell=True, stdout=outlog, stderr=errlog, env=env))
        for j, proc in enumerate(procs):
            i = k * len(gpus) + j
            log.info(f'Waiting on {i}th proc')
            if proc.wait() != 0:
                save_path = expand_path(
                    evolution.get_value_from_config(parse_config(population[i]),
                                                    evolution.path_to_models_save_path))
                with save_path.joinpath('err.txt').open(encoding='utf8') as errlog:
                    log.warning(f'Population {i} returned an error code {proc.returncode} and an error log:\n' +
                                errlog.read())
    return None

Example #8

0

Show file

File: params.py Project: RileyShe/DeepPavlov

def from_params(params: Dict, mode: str = 'infer', serialized: Any = None, **kwargs) -> Component:
    """Builds and returns the Component from corresponding dictionary of parameters."""
    # what is passed in json:
    config_params = {k: _resolve(v) for k, v in params.items()}

    # get component by reference (if any)
    if 'ref' in config_params:
        try:
            component = _refs[config_params['ref']]
            if serialized is not None:
                component.deserialize(serialized)
            return component
        except KeyError:
            e = ConfigError('Component with id "{id}" was referenced but not initialized'
                            .format(id=config_params['ref']))
            log.exception(e)
            raise e

    elif 'config_path' in config_params:
        from deeppavlov.core.commands.infer import build_model
        refs = _refs.copy()
        _refs.clear()
        config = parse_config(expand_path(config_params['config_path']))
        model = build_model(config, serialized=serialized)
        _refs.clear()
        _refs.update(refs)
        return model

    cls_name = config_params.pop('class_name', None)
    if not cls_name:
        e = ConfigError('Component config has no `class_name` nor `ref` fields')
        log.exception(e)
        raise e
    cls = get_model(cls_name)

    # find the submodels params recursively
    config_params = {k: _init_param(v, mode) for k, v in config_params.items()}

    try:
        spec = inspect.getfullargspec(cls)
        if 'mode' in spec.args+spec.kwonlyargs or spec.varkw is not None:
            kwargs['mode'] = mode

        component = cls(**dict(config_params, **kwargs))
        try:
            _refs[config_params['id']] = component
        except KeyError:
            pass
    except Exception:
        log.exception("Exception in {}".format(cls))
        raise

    if serialized is not None:
        component.deserialize(serialized)
    return component

Example #9

0

Show file

def build_model(config: Union[str, Path, dict],
                mode: str = 'infer',
                load_trained: bool = False,
                download: bool = False,
                serialized: Optional[bytes] = None) -> Chainer:
    """Build and return the model described in corresponding configuration file."""
    config = parse_config(config)

    if serialized:
        serialized: list = pickle.loads(serialized)

    if download:
        deep_download(config)

    import_packages(config.get('metadata', {}).get('imports', []))

    model_config = config['chainer']

    model = Chainer(model_config['in'], model_config['out'],
                    model_config.get('in_y'))

    for component_config in model_config['pipe']:
        if load_trained and ('fit_on' in component_config
                             or 'in_y' in component_config):
            try:
                component_config['load_path'] = component_config['save_path']
            except KeyError:
                log.warning(
                    'No "save_path" parameter for the {} component, so "load_path" will not be renewed'
                    .format(
                        component_config.get(
                            'class_name',
                            component_config.get('ref', 'UNKNOWN'))))

        if serialized and 'in' in component_config:
            component_serialized = serialized.pop(0)
        else:
            component_serialized = None

        component = from_params(component_config,
                                mode=mode,
                                serialized=component_serialized)

        if 'id' in component_config:
            model._components_dict[component_config['id']] = component

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            model.append(component, c_in, c_out, in_y, main)

    return model

Example #10

0

Show file

File: pip_wrapper.py Project: netsafe/DeepVesnin

def get_config_requirements(config: [str, Path, dict]):
    config = parse_config(config)

    requirements = set()
    for req in config.get('metadata', {}).get('requirements', []):
        requirements.add(req)

    config_references = [expand_path(config_ref) for config_ref in get_all_elems_from_json(config, 'config_path')]
    requirements |= {req for config in config_references for req in get_config_requirements(config)}

    return requirements

Example #11

0

Show file

File: params.py Project: zr940326/DeepPavlov

def from_params(params: Dict, mode: str = 'infer', **kwargs) -> Component:
    """Builds and returns the Component from corresponding dictionary of parameters."""
    # what is passed in json:
    config_params = {k: _resolve(v) for k, v in params.items()}

    # get component by reference (if any)
    if 'ref' in config_params:
        try:
            return _refs[config_params['ref']]
        except KeyError:
            e = ConfigError(
                'Component with id "{id}" was referenced but not initialized'.
                format(id=config_params['ref']))
            log.exception(e)
            raise e

    elif 'config_path' in config_params:
        from deeppavlov.core.commands.infer import build_model
        refs = _refs.copy()
        _refs.clear()
        config = parse_config(expand_path(config_params['config_path']))
        model = build_model(config)
        _refs.clear()
        _refs.update(refs)
        return model

    cls_name = config_params.pop('class_name', None)
    if not cls_name:
        e = ConfigError(
            'Component config has no `class_name` nor `ref` fields')
        log.exception(e)
        raise e
    cls = get_model(cls_name)

    # find the submodels params recursively
    config_params = {k: _init_param(v, mode) for k, v in config_params.items()}

    try:
        spec = inspect.getfullargspec(cls)
        if 'mode' in spec.args + spec.kwonlyargs or spec.varkw is not None:
            kwargs['mode'] = mode

        component = cls(**dict(config_params, **kwargs))
        try:
            _refs[config_params['id']] = component
        except KeyError:
            pass
    except Exception:
        log.exception("Exception in {}".format(cls))
        raise

    return component

Example #12

0

Show file

File: search_engine.py Project: basalovyurij/nlp-text-search

    def save(self, path, copy_model: bool = False):
        BaseSearchEngine.save(self, path)

        model_settings = self.model_settings.copy()
        if copy_model:
            model_settings = parse_config(model_settings)
            self._copy_model_files(model_settings, path)
            model_settings['metadata']['download'] = []

        with open(os.path.join(path, 'model_settings.json'), 'w') as f:
            json.dump(model_settings, f, indent=True)

        Doc2Vec.save(self.doc2vec, os.path.join(path, 'doc2vec.model'))

Example #13

0

Show file

File: server.py Project: RileyShe/DeepPavlov

def get_server_params(server_config_path, model_config):
    server_config = read_json(server_config_path)
    model_config = parse_config(model_config)

    server_params = server_config['common_defaults']

    if check_nested_dict_keys(model_config, ['metadata', 'labels', 'server_utils']):
        model_tag = model_config['metadata']['labels']['server_utils']
        if model_tag in server_config['model_defaults']:
            model_defaults = server_config['model_defaults'][model_tag]
            for param_name in model_defaults.keys():
                if model_defaults[param_name]:
                    server_params[param_name] = model_defaults[param_name]

    return server_params

Example #14

0

Show file

File: server.py Project: abhishek2024/Chatbot-Deep-Learning

def get_server_params(server_config_path, model_config):
    server_config = read_json(server_config_path)
    model_config = parse_config(model_config)

    server_params = server_config['common_defaults']

    if check_nested_dict_keys(model_config,
                              ['metadata', 'labels', 'server_utils']):
        model_tag = model_config['metadata']['labels']['server_utils']
        if model_tag in server_config['model_defaults']:
            model_defaults = server_config['model_defaults'][model_tag]
            for param_name in model_defaults.keys():
                if model_defaults[param_name]:
                    server_params[param_name] = model_defaults[param_name]

    return server_params

Example #15

0

Show file

def predict_with_model(config_path: [Path, str]) -> List[Optional[List[str]]]:
    """Returns predictions of morphotagging model given in config :config_path:.

    Args:
        config_path: a path to config

    Returns:
        a list of morphological analyses for each sentence. Each analysis is either a list of tags
        or a list of full CONLL-U descriptions.

    """
    config = parse_config(config_path)

    reader_config = config['dataset_reader']
    reader = get_model(reader_config['class_name'])()
    data_path = expand_path(reader_config.get('data_path', ''))
    read_params = {
        k: v
        for k, v in reader_config.items()
        if k not in ['class_name', 'data_path']
    }
    data: Dict = reader.read(data_path, **read_params)

    iterator_config = config['dataset_iterator']
    iterator: MorphoTaggerDatasetIterator = from_params(iterator_config,
                                                        data=data)

    model = build_model(config, load_trained=True)
    answers = [None] * len(iterator.test)
    batch_size = config['predict'].get("batch_size", -1)
    for indexes, (x, _) in iterator.gen_batches(batch_size=batch_size,
                                                data_type="test",
                                                shuffle=False,
                                                return_indexes=True):
        y = model(x)
        for i, elem in zip(indexes, y):
            answers[i] = elem
    outfile = config['predict'].get("outfile")
    if outfile is not None:
        outfile = Path(outfile)
        if not outfile.exists():
            outfile.parent.mkdir(parents=True, exist_ok=True)
        with open(outfile, "w", encoding="utf8") as fout:
            for elem in answers:
                fout.write(elem + "\n")
    return answers

Example #16

0

Show file

def get_server_params(server_config_path, model_config):
    server_config = read_json(server_config_path)
    model_config = parse_config(model_config)

    server_params = server_config['common_defaults']

    if check_nested_dict_keys(model_config, ['metadata', 'labels', 'server_utils']):
        model_tag = model_config['metadata']['labels']['server_utils']
        if model_tag in server_config['model_defaults']:
            model_defaults = server_config['model_defaults'][model_tag]
            for param_name in model_defaults.keys():
                if model_defaults[param_name]:
                    server_params[param_name] = model_defaults[param_name]

    server_params['model_endpoint'] = server_params.get('model_endpoint', '/model')
    server_params['model_args_names'] = server_params['model_args_names'] or model_config['chainer']['in']

    return server_params

Example #17

0

Show file

File: pip_wrapper.py Project: JiKook31/thesis

def install_from_config(config: [str, Path, dict]):
    config = parse_config(config)
    requirements_files = config.get('metadata', {}).get('requirements', [])

    if not requirements_files:
        log.warn('No requirements found in config')
        return

    requirements = []
    for rf in requirements_files:
        with expand_path(rf).open(encoding='utf8') as f:
            for line in f:
                line = re.sub(r'\s', '', line.strip())
                if line and not line.startswith(
                        '#') and line not in requirements:
                    requirements.append(line)

    for r in requirements:
        install(r)

Example #18

0

Show file

File: infer.py Project: RileyShe/DeepPavlov

def build_model(config: Union[str, Path, dict], mode: str = 'infer',
                load_trained: bool = False, download: bool = False,
                serialized: Optional[bytes] = None) -> Chainer:
    """Build and return the model described in corresponding configuration file."""
    config = parse_config(config)

    if serialized:
        serialized: list = pickle.loads(serialized)

    if download:
        deep_download(config)

    import_packages(config.get('metadata', {}).get('imports', []))

    model_config = config['chainer']

    model = Chainer(model_config['in'], model_config['out'], model_config.get('in_y'))

    for component_config in model_config['pipe']:
        if load_trained and ('fit_on' in component_config or 'in_y' in component_config):
            try:
                component_config['load_path'] = component_config['save_path']
            except KeyError:
                log.warning('No "save_path" parameter for the {} component, so "load_path" will not be renewed'
                            .format(component_config.get('class_name', component_config.get('ref', 'UNKNOWN'))))

        if serialized and 'in' in component_config:
            component_serialized = serialized.pop(0)
        else:
            component_serialized = None

        component = from_params(component_config, mode=mode, serialized=component_serialized)

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            model.append(component, c_in, c_out, in_y, main)

    return model

Example #19

0

Show file

File: common.py Project: JiKook31/thesis

def predict_with_model(
        config_path: [Path, str],
        infile: Optional[Union[Path, str]] = None,
        input_format: str = "ud",
        batch_size: [int] = 16,
        output_format: str = "basic") -> List[Optional[List[str]]]:
    """Returns predictions of morphotagging model given in config :config_path:.

    Args:
        config_path: a path to config

    Returns:
        a list of morphological analyses for each sentence. Each analysis is either a list of tags
        or a list of full CONLL-U descriptions.

    """
    config = parse_config(config_path)
    if infile is None:
        if sys.stdin.isatty():
            raise RuntimeError(
                'To process data from terminal please use interact mode')
        infile = sys.stdin
    else:
        infile = expand_path(infile)
    if input_format in ["ud", "conllu", "vertical"]:
        from_words = (input_format == "vertical")
        data: List[tuple] = read_infile(infile, from_words=from_words)
        # keeping only sentences
        data = [elem[0] for elem in data]
    else:
        if infile is not sys.stdin:
            with open(infile, "r", encoding="utf8") as fin:
                data = fin.readlines()
        else:
            data = sys.stdin.readlines()
    model = build_model(config, load_trained=True)
    model.pipe[-1][-1].set_format_mode(output_format)
    answers = model.batched_call(data, batch_size=batch_size)
    for elem in answers:
        print(elem)
    return answers

Example #20

0

Show file

File: download.py Project: RileyShe/DeepPavlov

def get_config_downloads(config: Union[str, Path, dict]) -> Set[Tuple[str, Path]]:
    config = parse_config(config)

    downloads = set()
    if 'metadata' in config and 'download' in config['metadata']:
        for resource in config['metadata']['download']:
            if isinstance(resource, str):
                resource = {
                    'url': resource
                }

            url = resource['url']
            dest = expand_path(resource.get('subdir', ''))

            downloads.add((url, dest))

    config_references = [expand_path(config_ref) for config_ref in get_all_elems_from_json(config, 'config_path')]

    downloads |= {(url, dest) for config in config_references for url, dest in get_config_downloads(config)}

    return downloads

Example #21

0

Show file

File: common.py Project: RileyShe/DeepPavlov

def predict_with_model(config_path: [Path, str]) -> List[Optional[List[str]]]:
    """Returns predictions of morphotagging model given in config :config_path:.

    Args:
        config_path: a path to config

    Returns:
        a list of morphological analyses for each sentence. Each analysis is either a list of tags
        or a list of full CONLL-U descriptions.

    """
    config = parse_config(config_path)

    reader_config = config['dataset_reader']
    reader = get_model(reader_config['class_name'])()
    data_path = expand_path(reader_config.get('data_path', ''))
    read_params = {k: v for k, v in reader_config.items() if k not in ['class_name', 'data_path']}
    data: Dict = reader.read(data_path, **read_params)

    iterator_config = config['dataset_iterator']
    iterator: MorphoTaggerDatasetIterator = from_params(iterator_config, data=data)

    model = build_model(config, load_trained=True)
    answers = [None] * len(iterator.test)
    batch_size = config['predict'].get("batch_size", -1)
    for indexes, (x, _) in iterator.gen_batches(
            batch_size=batch_size, data_type="test", shuffle=False, return_indexes=True):
        y = model(x)
        for i, elem in zip(indexes, y):
            answers[i] = elem
    outfile = config['predict'].get("outfile")
    if outfile is not None:
        outfile = Path(outfile)
        if not outfile.exists():
            outfile.parent.mkdir(parents=True, exist_ok=True)
        with open(outfile, "w", encoding="utf8") as fout:
            for elem in answers:
                fout.write(elem + "\n")
    return answers

Example #22

0

Show file

def upload(config_in_file):
    config_in = parse_config(config_in_file)
    config_in_file = find_config(config_in_file)

    model_path = Path(
        config_in['metadata']['variables']['MODEL_PATH']).expanduser()

    model_name, class_name = config_in_file.stem, config_in_file.parent.name

    tmp_dir = f'/tmp/{class_name}'
    tmp_tar = f'/tmp/{class_name}/{model_name}.tar.gz'
    shutil.rmtree(tmp_dir, ignore_errors=True)
    os.mkdir(tmp_dir)

    with tarfile.open(tmp_tar, "w:gz") as tar:
        tar.add(model_path, arcname=model_name)

    main(tmp_tar)

    command = f'scp -r {tmp_dir} share.ipavlov.mipt.ru:/home/export/v1/'
    donwload_url = f'http://files.deeppavlov.ai/v1/{class_name}/{model_name}.tar.gz'
    print(command, donwload_url, sep='\n')

Example #23

0

Show file

File: test_quick_start.py Project: zr940326/DeepPavlov

def download_config(conf_file):
    src_file = src_dir / conf_file
    if not src_file.is_file():
        src_file = test_src_dir / conf_file

    if not src_file.is_file():
        raise RuntimeError('No config file {}'.format(conf_file))

    with src_file.open(encoding='utf8') as fin:
        config: dict = json.load(fin)

    # Download referenced config files
    config_references = get_all_elems_from_json(parse_config(config),
                                                'config_path')
    for config_ref in config_references:
        m_name = config_ref.split('/')[-2]
        config_ref = '/'.join(config_ref.split('/')[-2:])

        test_configs_path.joinpath(m_name).mkdir(exist_ok=True)
        if not test_configs_path.joinpath(config_ref).exists():
            download_config(config_ref)

    # Update config for testing
    if config.get("train"):
        config["train"]["epochs"] = 1
        for pytest_key in [
                k for k in config["train"] if k.startswith('pytest_')
        ]:
            config["train"][pytest_key[len('pytest_'):]] = config["train"].pop(
                pytest_key)

    config_vars = config.setdefault('metadata', {}).setdefault('variables', {})
    config_vars['ROOT_PATH'] = str(download_path)
    config_vars['CONFIGS_PATH'] = str(test_configs_path)

    conf_file = test_configs_path / conf_file
    conf_file.parent.mkdir(exist_ok=True, parents=True)
    with conf_file.open("w", encoding='utf8') as fout:
        json.dump(config, fout)

Example #24

0

Show file

def upload(config_in_file):

    print(config_in_file)
    config_in = parse_config(config_in_file)
    config_in_file = find_config(config_in_file)

    model_path = Path(
        config_in['metadata']['variables']['MODEL_PATH']).expanduser()
    models_path = Path(
        config_in['metadata']['variables']['MODELS_PATH']).expanduser()
    model_name, class_name = config_in_file.stem, config_in_file.parent.name

    if str(model_name) not in str(model_path):
        raise (f'{model_name} is not the path of the {model_path}')

    arcname = str(model_path).split("models/")[1]
    tar_path = models_path / model_name
    tmp_folder = f'/tmp/'
    tmp_tar = tmp_folder + f'{model_name}.tar.gz'

    print("model_path", model_path)
    print("class_name", class_name)
    print("model_name", model_name)

    print("Start tarring")
    archive = tarfile.open(tmp_tar, "w|gz")
    archive.add(model_path, arcname=arcname)
    archive.close()
    print("Stop tarring")

    print("Calculating hash")
    main(tmp_tar)

    print("tmp_tar", tmp_tar)
    command = f'scp -r {tmp_folder}{model_name}* share.ipavlov.mipt.ru:/home/export/v1/{class_name}'
    donwload_url = f'http://files.deeppavlov.ai/v1/{class_name}/{model_name}.tar.gz'
    print(command, donwload_url, sep='\n')

Example #25

0

Show file

File: cross_validation.py Project: JiKook31/thesis

def calc_cv_score(config, data=None, n_folds=5, is_loo=False):
    config = parse_config(config)

    if data is None:
        data = read_data_by_config(config)

    config, dirs_for_saved_models = change_savepath_for_model(config)

    cv_score = OrderedDict()
    for data_i in generate_train_valid(data, n_folds=n_folds, is_loo=is_loo):
        iterator = get_iterator_from_config(config, data_i)
        create_dirs_to_save_models(dirs_for_saved_models)
        score = train_evaluate_model_from_config(config, iterator=iterator)
        delete_dir_for_saved_models(dirs_for_saved_models)
        for key, value in score['valid'].items():
            if key not in cv_score:
                cv_score[key] = []
            cv_score[key].append(value)

    for key, value in cv_score.items():
        cv_score[key] = np.mean(value)
        log.info('Cross-Validation \"{}\" is: {}'.format(key, cv_score[key]))

    return cv_score

Example #26

0

Show file

File: cross_validation.py Project: RileyShe/DeepPavlov

def calc_cv_score(config, data=None, n_folds=5, is_loo=False):
    config = parse_config(config)

    if data is None:
        data = read_data_by_config(config)

    config, dirs_for_saved_models = change_savepath_for_model(config)

    cv_score = OrderedDict()
    for data_i in generate_train_valid(data, n_folds=n_folds, is_loo=is_loo):
        iterator = get_iterator_from_config(config, data_i)
        create_dirs_to_save_models(dirs_for_saved_models)
        score = train_evaluate_model_from_config(config, iterator=iterator)
        delete_dir_for_saved_models(dirs_for_saved_models)
        for key, value in score['valid'].items():
            if key not in cv_score:
                cv_score[key] = []
            cv_score[key].append(value)

    for key, value in cv_score.items():
        cv_score[key] = np.mean(value)
        log.info('Cross-Validation \"{}\" is: {}'.format(key, cv_score[key]))

    return cv_score

Example #27

0

Show file

File: common_neural.py Project: AlexeySorokin/Gapping

def load_elmo(elmo_output_names=("word_emb",)):
    config = parse_config(getattr(configs.elmo_embedder, "elmo_ru-news"))
    elmo_config = config["chainer"]["pipe"][-1]
    elmo_config['elmo_output_names'] = elmo_output_names
    embedder = from_params(elmo_config)
    return embedder

Example #28

0

Show file

File: train.py Project: caojiaju-2017/BYSAPrj

def train_evaluate_model_from_config(config: [str, Path, dict], iterator=None, *,
                                     to_train=True, to_validate=True, download=False,
                                     start_epoch_num=0, recursive=False) -> Dict[str, Dict[str, float]]:
    """Make training and evaluation of the model described in corresponding configuration file."""
    config = parse_config(config)

    if download:
        deep_download(config)

    if to_train and recursive:
        for subconfig in get_all_elems_from_json(config['chainer'], 'config_path'):
            log.info(f'Training "{subconfig}"')
            train_evaluate_model_from_config(subconfig, download=False, recursive=True)

    import_packages(config.get('metadata', {}).get('imports', []))

    if iterator is None:
        try:
            data = read_data_by_config(config)
        except ConfigError as e:
            to_train = False
            log.warning(f'Skipping training. {e.message}')
        else:
            iterator = get_iterator_from_config(config, data)

    train_config = {
        'metrics': ['accuracy'],
        'validate_best': to_validate,
        'test_best': True,
        'show_examples': False
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')

    in_y = config['chainer'].get('in_y', ['y'])
    if isinstance(in_y, str):
        in_y = [in_y]
    if isinstance(config['chainer']['out'], str):
        config['chainer']['out'] = [config['chainer']['out']]
    metrics_functions = _parse_metrics(train_config['metrics'], in_y, config['chainer']['out'])

    if to_train:
        model = fit_chainer(config, iterator)

        if callable(getattr(model, 'train_on_batch', None)):
            _train_batches(model, iterator, train_config, metrics_functions, start_epoch_num=start_epoch_num)

        model.destroy()

    res = {}

    if iterator is not None and (train_config['validate_best'] or train_config['test_best']):
        model = build_model(config, load_trained=to_train)
        log.info('Testing the best saved model')

        if train_config['validate_best']:
            report = {
                'valid': _test_model(model, metrics_functions, iterator,
                                     train_config.get('batch_size', -1), 'valid',
                                     show_examples=train_config['show_examples'])
            }

            res['valid'] = report['valid']['metrics']

            print(json.dumps(report, ensure_ascii=False))

        if train_config['test_best']:
            report = {
                'test': _test_model(model, metrics_functions, iterator,
                                    train_config.get('batch_size', -1), 'test',
                                    show_examples=train_config['show_examples'])
            }

            res['test'] = report['test']['metrics']

            print(json.dumps(report, ensure_ascii=False))

        model.destroy()

    return res

Example #29

0

Show file

File: pavlov_train.py Project: JureBevc/NLP

from deeppavlov import configs, build_model, train_model
from deeppavlov.core.commands.utils import parse_config

config_dict = parse_config(configs.ner.ner_ontonotes_bert_mult)
reader = config_dict['dataset_reader']
print(config_dict['dataset_reader']['data_path'])
ner_model = train_model(configs.ner.ner_ontonotes_bert_mult, download=False)

Example #30

0

Show file

File: settings.py Project: basalovyurij/nlp-text-search

def create_settings(paraphrases: Optional[List[Tuple[Tuple[str, str], int]]],
                    name: str,
                    train_size=0.8,
                    fasttext_embed_path: str = None,
                    root_path: str = '~/.deeppavlov',
                    model_settings: Optional[Dict] = None,
                    max_sequence_length=30,
                    nn_class_name='mpm_nn',
                    hidden_dim=200,
                    aggregation_dim=200):

    if model_settings is None:
        model_settings = {
            'max_sequence_length': max_sequence_length,
            'class_name': 'mpm_nn',
            'hidden_dim': 200,
            'aggregation_dim': 200
        }

    if nn_class_name != 'mpm_nn':
        warnings.warn(
            'Parameter nn_class_name deprecated: in 0.6.12 will be removed in 1.0'
        )
        model_settings['class_name'] = nn_class_name

    if hidden_dim != 200:
        warnings.warn(
            'Parameter hidden_dim deprecated: in 0.6.12 will be removed in 1.0'
        )
        model_settings['hidden_dim'] = hidden_dim

    if aggregation_dim != 200:
        warnings.warn(
            'Parameter aggregation_dim deprecated: in 0.6.12 will be removed in 1.0'
        )
        model_settings['aggregation_dim'] = aggregation_dim

    if max_sequence_length != 30:
        warnings.warn(
            'Parameter max_sequence_length deprecated: in 0.6.12 will be removed in 1.0'
        )
        model_settings['max_sequence_length'] = max_sequence_length

    downloads = []
    if fasttext_embed_path is None:
        fasttext_embed_path = '{DOWNLOADS_PATH}/embeddings/lenta_lower_100.bin'
        downloads.append({
            'url': 'http://files.deeppavlov.ai/embeddings/lenta_lower_100.bin',
            'subdir': '{DOWNLOADS_PATH}/embeddings'
        })

    preproc = {
        'id': 'preproc',
        'class_name': 'siamese_preprocessor',
        'use_matrix': False,
        'max_sequence_length': max_sequence_length,
        'fit_on': ['x'],
        'in': ['x'],
        'out': ['x_proc'],
        'sent_vocab': {
            'id': 'siam_sent_vocab',
            'class_name': 'simple_vocab',
            'save_path': '{MODELS_PATH}/%s/sent.dict' % name,
            'load_path': '{MODELS_PATH}/%s/sent.dict' % name
        },
        'tokenizer': {
            'class_name': 'nltk_tokenizer'
        },
        'vocab': {
            'id': 'siam_vocab',
            'class_name': 'simple_vocab',
            'save_path': '{MODELS_PATH}/%s/tok.dict' % name,
            'load_path': '{MODELS_PATH}/%s/tok.dict' % name
        }
    }

    embedding = {
        'id': 'embeddings',
        'class_name': 'emb_mat_assembler',
        'embedder': '#siam_embedder',
        'vocab': '#siam_vocab'
    }

    nn = {
        'id': 'model',
        'in': ['x_proc'],
        'in_y': ['y'],
        'out': ['y_predicted'],
        'len_vocab': '#siam_vocab.len',
        'use_matrix': False,
        'attention': True,
        'emb_matrix': '#embeddings.emb_mat',
        'embedding_dim': '#siam_embedder.dim',
        'max_sequence_length': '#preproc.max_sequence_length',
        'seed': 243,
        'learning_rate': 1e-3,
        'triplet_loss': False,
        'batch_size': 256,
        'save_path': '{MODELS_PATH}/%s/model_weights.h5' % name,
        'load_path': '{MODELS_PATH}/%s/model_weights.h5' % name,
        'preprocess': '#preproc.__call__'
    }

    nn.update(model_settings)

    preproc['embedder'] = {
        'id': 'siam_embedder',
        'class_name': 'fasttext',
        'load_path': fasttext_embed_path
    }

    pipe = [preproc, embedding, nn]

    res = {
        'dataset_reader': {
            'class_name': 'simple_reader',
            'data_path': '{MODELS_PATH}/%s/dataset.json' % name
        },
        'dataset_iterator': {
            'class_name': 'siamese_iterator',
            'seed': 243
        },
        'chainer': {
            'in': ['x'],
            'in_y': ['y'],
            'pipe': pipe,
            'out': ['y_predicted']
        },
        'train': {
            'epochs': 10,
            'batch_size': 256,
            'pytest_max_batches': 2,
            'train_metrics': ['f1', 'acc', 'log_loss'],
            'metrics': ['f1', 'acc', 'log_loss'],
            'validation_patience': 10,
            'val_every_n_epochs': 1,
            'log_every_n_batches': 1,
            'class_name': 'nn_trainer',
            'evaluation_targets': ['test']
        },
        'metadata': {
            'variables': {
                'ROOT_PATH': root_path,
                'DOWNLOADS_PATH': '{ROOT_PATH}/downloads',
                'MODELS_PATH': '{ROOT_PATH}/models'
            },
            'requirements': [],
            'download': downloads
        }
    }

    if paraphrases:
        SimpleDataReader().save(
            paraphrases,
            parse_config(res)['dataset_reader']['data_path'], train_size)

    return res

Example #31

0

Show file

def results_to_table(population, evolution, considered_metrics, result_file,
                     result_table_columns):
    population_size = len(population)
    train_config = evolution.basic_config.get('train', {})

    if 'evaluation_targets' in train_config:
        evaluation_targets = train_config['evaluation_targets']
    else:
        evaluation_targets = []
        if train_config.get('validate_best', True):
            evaluation_targets.append('valid')
        elif train_config.get('test_best', True):
            evaluation_targets.append('test')

    if 'valid' in evaluation_targets:
        target = 'valid'
    elif 'test' in evaluation_targets:
        target = 'test'
    elif 'train' in evaluation_targets:
        target = 'train'
    else:
        raise ConfigError('evaluation_targets are empty. Can not evolve')

    if target != 'valid':
        log.info(f"Tuning parameters on {target}")

    population_metrics = {}
    for m in considered_metrics:
        population_metrics[m] = []
    for i in range(population_size):
        log_path = expand_path(
            evolution.get_value_from_config(
                parse_config(population[i]),
                evolution.path_to_models_save_path)) / "out.txt"

        report = {}
        with log_path.open(encoding='utf8') as f:
            for line in f:
                try:
                    report.update(json.loads(line))
                except:
                    pass

        result_table_dict = defaultdict(list)

        for m in considered_metrics:
            for data_type in evaluation_targets:
                result_table_dict[f'{m}_{data_type}'].append(
                    report[data_type]['metrics'][m])
                if data_type == target:
                    population_metrics[m].append(
                        report[data_type]['metrics'][m])

        result_table_dict[result_table_columns[-1]] = [
            json.dumps(population[i])
        ]
        result_table = pd.DataFrame(result_table_dict)
        result_table.loc[:, result_table_columns].to_csv(result_file,
                                                         index=False,
                                                         sep='\t',
                                                         mode='a',
                                                         header=None)

    return population_metrics

Example #32

0

Show file

def results_to_table(population, evolution, considered_metrics, result_file,
                     result_table_columns):
    population_size = len(population)
    validate_best = evolution.get_value_from_config(
        evolution.basic_config,
        list(evolution.find_model_path(
            evolution.basic_config, "validate_best"))[0] + ["validate_best"])
    test_best = evolution.get_value_from_config(
        evolution.basic_config,
        list(evolution.find_model_path(evolution.basic_config, "test_best"))[0]
        + ["test_best"])
    if (not validate_best) and test_best:
        log.info("Validate_best is set to False. Tuning parameters on test")
    elif (not validate_best) and (not test_best):
        raise ConfigError(
            "Validate_best and test_best are set to False. Can not evolve.")

    population_metrics = {}
    for m in considered_metrics:
        population_metrics[m] = []
    for i in range(population_size):
        logpath = expand_path(
            evolution.get_value_from_config(
                parse_config(population[i]),
                evolution.path_to_models_save_path)) / "out.txt"
        reports_data = logpath.read_text(encoding='utf8').splitlines()[-2:]
        reports = []
        for j in range(2):
            try:
                reports.append(json.loads(reports_data[j]))
            except:
                pass

        val_results = {}
        test_results = {}
        for m in considered_metrics:
            val_results[m] = None
            test_results[m] = None
        if len(reports) == 2 and "valid" in reports[0].keys(
        ) and "test" in reports[1].keys():
            val_results = reports[0]["valid"]["metrics"]
            test_results = reports[1]["test"]["metrics"]
        elif len(reports) == 2 and "valid" in reports[0].keys(
        ) and "valid" in reports[1].keys():
            val_results = reports[1]["valid"]["metrics"]
        elif len(reports) == 2 and "test" in reports[0].keys(
        ) and "test" in reports[1].keys():
            val_results = reports[1]["test"]["metrics"]
        elif len(reports) == 2 and "train" in reports[0].keys(
        ) and "valid" in reports[1].keys():
            val_results = reports[1]["valid"]["metrics"]
        elif len(reports) == 2 and "train" in reports[0].keys(
        ) and "test" in reports[1].keys():
            val_results = reports[1]["test"]["metrics"]
        elif len(reports) == 2 and "train" in reports[0].keys(
        ) and "train" in reports[1].keys():
            val_results = reports[1]["train"]["metrics"]
        elif len(reports) == 1 and "valid" in reports[0].keys():
            val_results = reports[0]["valid"]["metrics"]
        elif len(reports) == 1 and "test" in reports[0].keys():
            test_results = reports[0]["test"]["metrics"]
        else:
            raise ConfigError(
                "Can not proceed output files: didn't find valid and/or test results"
            )

        result_table_dict = {}
        for el in result_table_columns:
            result_table_dict[el] = []

        for m in considered_metrics:
            result_table_dict[m + "_valid"].append(val_results[m])
            result_table_dict[m + "_test"].append(test_results[m])
            if validate_best:
                population_metrics[m].append(val_results[m])
            elif test_best:
                population_metrics[m].append(test_results[m])

        result_table_dict[result_table_columns[-1]] = [
            json.dumps(population[i])
        ]
        result_table = pd.DataFrame(result_table_dict)
        result_table.loc[:, result_table_columns].to_csv(result_file,
                                                         index=False,
                                                         sep='\t',
                                                         mode='a',
                                                         header=None)

    return population_metrics

Example #33

0

Show file

File: evolve.py Project: RileyShe/DeepPavlov

def results_to_table(population, evolution, considered_metrics, result_file, result_table_columns):
    population_size = len(population)
    validate_best = evolution.get_value_from_config(evolution.basic_config,
                                                    list(evolution.find_model_path(
                                                        evolution.basic_config, "validate_best"))[0]
                                                    + ["validate_best"])
    test_best = evolution.get_value_from_config(evolution.basic_config,
                                                list(evolution.find_model_path(
                                                    evolution.basic_config, "test_best"))[0]
                                                + ["test_best"])
    if (not validate_best) and test_best:
        log.info("Validate_best is set to False. Tuning parameters on test")
    elif (not validate_best) and (not test_best):
        raise ConfigError("Validate_best and test_best are set to False. Can not evolve.")

    population_metrics = {}
    for m in considered_metrics:
        population_metrics[m] = []
    for i in range(population_size):
        logpath = expand_path(evolution.get_value_from_config(parse_config(population[i]),
                                                              evolution.path_to_models_save_path)
                              ) / "out.txt"
        reports_data = logpath.read_text(encoding='utf8').splitlines()[-2:]
        reports = []
        for j in range(2):
            try:
                reports.append(json.loads(reports_data[j]))
            except:
                pass

        val_results = {}
        test_results = {}
        for m in considered_metrics:
            val_results[m] = None
            test_results[m] = None
        if len(reports) == 2 and "valid" in reports[0].keys() and "test" in reports[1].keys():
            val_results = reports[0]["valid"]["metrics"]
            test_results = reports[1]["test"]["metrics"]
        elif len(reports) == 2 and "valid" in reports[0].keys() and "valid" in reports[1].keys():
            val_results = reports[1]["valid"]["metrics"]
        elif len(reports) == 2 and "test" in reports[0].keys() and "test" in reports[1].keys():
            val_results = reports[1]["test"]["metrics"]
        elif len(reports) == 2 and "train" in reports[0].keys() and "valid" in reports[1].keys():
            val_results = reports[1]["valid"]["metrics"]
        elif len(reports) == 2 and "train" in reports[0].keys() and "test" in reports[1].keys():
            val_results = reports[1]["test"]["metrics"]
        elif len(reports) == 2 and "train" in reports[0].keys() and "train" in reports[1].keys():
            val_results = reports[1]["train"]["metrics"]
        elif len(reports) == 1 and "valid" in reports[0].keys():
            val_results = reports[0]["valid"]["metrics"]
        elif len(reports) == 1 and "test" in reports[0].keys():
            test_results = reports[0]["test"]["metrics"]
        else:
            raise ConfigError("Can not proceed output files: didn't find valid and/or test results")

        result_table_dict = {}
        for el in result_table_columns:
            result_table_dict[el] = []

        for m in considered_metrics:
            result_table_dict[m + "_valid"].append(val_results[m])
            result_table_dict[m + "_test"].append(test_results[m])
            if validate_best:
                population_metrics[m].append(val_results[m])
            elif test_best:
                population_metrics[m].append(test_results[m])

        result_table_dict[result_table_columns[-1]] = [json.dumps(population[i])]
        result_table = pd.DataFrame(result_table_dict)
        result_table.loc[:, result_table_columns].to_csv(result_file, index=False, sep='\t', mode='a', header=None)

    return population_metrics

Example #34

0

Show file

def train_evaluate_model_from_config(
        config: Union[str, Path, dict],
        iterator: Union[DataLearningIterator, DataFittingIterator] = None,
        *,
        to_train: bool = True,
        evaluation_targets: Optional[Iterable[str]] = None,
        to_validate: Optional[bool] = None,
        download: bool = False,
        start_epoch_num: Optional[int] = None,
        recursive: bool = False) -> Dict[str, Dict[str, float]]:
    """Make training and evaluation of the model described in corresponding configuration file."""
    config = parse_config(config)

    if download:
        deep_download(config)

    if to_train and recursive:
        for subconfig in get_all_elems_from_json(config['chainer'],
                                                 'config_path'):
            log.info(f'Training "{subconfig}"')
            train_evaluate_model_from_config(subconfig,
                                             download=False,
                                             recursive=True)

    import_packages(config.get('metadata', {}).get('imports', []))

    if iterator is None:
        try:
            data = read_data_by_config(config)
        except ConfigError as e:
            to_train = False
            log.warning(f'Skipping training. {e.message}')
        else:
            iterator = get_iterator_from_config(config, data)

    if 'train' not in config:
        log.warning('Train config is missing. Populating with default values')
    train_config = config.get('train')

    if start_epoch_num is not None:
        train_config['start_epoch_num'] = start_epoch_num

    if 'evaluation_targets' not in train_config and (
            'validate_best' in train_config or 'test_best' in train_config):
        log.warning(
            '"validate_best" and "test_best" parameters are deprecated.'
            ' Please, use "evaluation_targets" list instead')

        train_config['evaluation_targets'] = []
        if train_config.pop('validate_best', True):
            train_config['evaluation_targets'].append('valid')
        if train_config.pop('test_best', True):
            train_config['evaluation_targets'].append('test')

    trainer_class = get_model(train_config.pop('class_name', 'nn_trainer'))
    trainer = trainer_class(config['chainer'], **train_config)

    if to_train:
        trainer.train(iterator)

    res = {}

    if iterator is not None:
        if to_validate is not None:
            if evaluation_targets is None:
                log.warning(
                    '"to_validate" parameter is deprecated and will be removed in future versions.'
                    ' Please, use "evaluation_targets" list instead')
                evaluation_targets = ['test']
                if to_validate:
                    evaluation_targets.append('valid')
            else:
                log.warn(
                    'Both "evaluation_targets" and "to_validate" parameters are specified.'
                    ' "to_validate" is deprecated and will be ignored')

        res = trainer.evaluate(iterator,
                               evaluation_targets,
                               print_reports=True)
        trainer.get_chainer().destroy()

    res = {k: v['metrics'] for k, v in res.items()}

    return res

Example #35

0

Show file

File: server.py Project: deepmipt/deepy

INTENT_PHRASES_PATH = os.environ.get("INTENT_PHRASES_PATH", "intent_phrases.json")
CONFIG_NAME = os.environ.get("CONFIG_NAME", None)
if CONFIG_NAME is None:
    raise NotImplementedError("No config file name is given.")

try:
    intents_model = build_model(CONFIG_NAME, download=True)
    logger.info("Model loaded")
    regexp = get_regexp(INTENT_PHRASES_PATH)
    logger.info("Regexp model loaded")
except Exception as e:
    sentry_sdk.capture_exception(e)
    logger.exception(e)
    raise e

parsed = parse_config(CONFIG_NAME)
with open(expand_path(parsed["metadata"]["variables"]["MODEL_PATH"]).joinpath("classes.dict"), "r") as f:
    intents = f.read().strip().splitlines()
intents = [el.strip().split("\t")[0] for el in intents]
logger.info(f"Considered intents: {intents}")


def get_classifier_predictions(batch_texts: List[List[str]], intents, intents_model, thresholds):
    if thresholds is None:
        # if we do not given thresholds, use 0.5 as default
        thresholds = [0.5] * len(intents)
    thresholds = np.array(thresholds)
    # make a 1d-list of texts for classifier
    sentences = list(chain.from_iterable(batch_texts))
    sentences_text_ids = []
    for text_id, text in enumerate(batch_texts):

Example #36

0

Show file

def main():
    params_helper = ParamsSearch()

    args = parser.parse_args()
    is_loo = False
    n_folds = None
    if args.folds == 'loo':
        is_loo = True
    elif args.folds is None:
        n_folds = None
    elif args.folds.isdigit():
        n_folds = int(args.folds)
    else:
        raise NotImplementedError('Not implemented this type of CV')

    # read config
    pipeline_config_path = find_config(args.config_path)
    config_init = read_json(pipeline_config_path)
    config = parse_config(config_init)
    data = read_data_by_config(config)
    target_metric = parse_config(config_init)['train']['metrics'][0]
    if isinstance(target_metric, dict):
        target_metric = target_metric['name']

    # get all params for search
    param_paths = list(params_helper.find_model_path(config, 'search_choice'))
    param_values = []
    param_names = []
    for path in param_paths:
        value = params_helper.get_value_from_config(config, path)
        param_name = path[-1]
        param_value_search = value['search_choice']
        param_names.append(param_name)
        param_values.append(param_value_search)

    # find optimal params
    if args.search_type == 'grid':
        # generate params combnations for grid search
        combinations = list(product(*param_values))

        # calculate cv scores
        scores = []
        for comb in combinations:
            config = deepcopy(config_init)
            for param_path, param_value in zip(param_paths, comb):
                params_helper.insert_value_or_dict_into_config(
                    config, param_path, param_value)
            config = parse_config(config)

            if (n_folds is not None) | is_loo:
                # CV for model evaluation
                score_dict = calc_cv_score(config,
                                           data=data,
                                           n_folds=n_folds,
                                           is_loo=is_loo)
                score = score_dict[next(iter(score_dict))]
            else:
                # train/valid for model evaluation
                data_to_evaluate = data.copy()
                if len(data_to_evaluate['valid']) == 0:
                    data_to_evaluate['train'], data_to_evaluate[
                        'valid'] = train_test_split(data_to_evaluate['train'],
                                                    test_size=0.2)
                iterator = get_iterator_from_config(config, data_to_evaluate)
                score = train_evaluate_model_from_config(
                    config, iterator=iterator)['valid'][target_metric]

            scores.append(score)

        # get model with best score
        best_params_dict = get_best_params(combinations, scores, param_names,
                                           target_metric)
        log.info('Best model params: {}'.format(best_params_dict))
    else:
        raise NotImplementedError('Not implemented this type of search')

    # save config
    best_config = config_init
    for i, param_name in enumerate(best_params_dict.keys()):
        if param_name != target_metric:
            params_helper.insert_value_or_dict_into_config(
                best_config, param_paths[i], best_params_dict[param_name])

    best_model_filename = pipeline_config_path.with_suffix('.cvbest.json')
    save_json(best_config, best_model_filename)
    log.info('Best model saved in json-file: {}'.format(best_model_filename))