Esempio n. 1
0
def train_agent_models(config_path: str):
    usr_dir = paths.USR_PATH
    a = build_agent_from_config(config_path)

    for skill_config in a.skill_configs:
        model_config = skill_config['model']
        model_name = model_config['name']

        if issubclass(REGISTRY[model_name], Trainable):
            reader_config = skill_config['dataset_reader']
            reader = from_params(REGISTRY[reader_config['name']], {})
            data = reader.read(reader_config.get('data_path', usr_dir))

            dataset_config = skill_config['dataset']
            dataset_name = dataset_config['name']
            dataset = from_params(REGISTRY[dataset_name],
                                  dataset_config,
                                  data=data)

            model = from_params(REGISTRY[model_name], model_config)
            model.train(dataset)
        else:
            print('Model {} is not an instance of Trainable, skip training.'.
                  format(model_name),
                  file=sys.stderr)
Esempio n. 2
0
def train_model_from_config(config_path: str, mode='train'):
    usr_dir = paths.USR_PATH
    config = read_json(config_path)

    reader_config = config['dataset_reader']
    # NOTE: Why there are no params for dataset reader? Because doesn't have __init__()
    reader = from_params(REGISTRY[reader_config['name']], {})
    data = reader.read(reader_config.get('data_path', usr_dir))

    dataset_config = config['dataset']
    dataset_name = dataset_config['name']
    dataset = from_params(REGISTRY[dataset_name], dataset_config, data=data)

    vocabs = {}
    if 'vocabs' in config:
        for vocab_param_name, vocab_config in config['vocabs'].items():
            vocab_name = vocab_config['name']
            v = from_params(REGISTRY[vocab_name], vocab_config, mode=mode)
            v.train(dataset.iter_all('train'))
            vocabs[vocab_param_name] = v

    model_config = config['model']
    model_name = model_config['name']
    model = from_params(REGISTRY[model_name],
                        model_config,
                        vocabs=vocabs,
                        mode=mode)

    model.train(dataset)
Esempio n. 3
0
def train_model_from_config(config_path: str, mode='train'):
    usr_dir = paths.USR_PATH
    config = read_json(config_path)

    reader_config = config['dataset_reader']
    # NOTE: Why there are no params for dataset reader? Because doesn't have __init__()
    reader = from_params(REGISTRY[reader_config['name']], {})
    data = reader.read(reader_config.get('data_path', usr_dir))

    dataset_config = config['dataset']
    dataset_name = dataset_config['name']
    dataset = from_params(REGISTRY[dataset_name], dataset_config, data=data)

    vocabs = {}
    if 'vocabs' in config:
        for vocab_param_name, vocab_config in config['vocabs'].items():
            vocab_name = vocab_config['name']
            v = from_params(REGISTRY[vocab_name], vocab_config, mode=mode)
            v.train(dataset.iter_all('train'))
            vocabs[vocab_param_name] = v

    model_config = config['model']
    model_name = model_config['name']
    model = from_params(REGISTRY[model_name], model_config, vocabs=vocabs, mode=mode)

    model.train(dataset)
Esempio n. 4
0
def build_model_from_config(config,
                            mode='infer',
                            load_trained=False,
                            as_component=False):
    set_deeppavlov_root(config)
    if 'chainer' in config:
        model_config = config['chainer']

        model = Chainer(model_config['in'],
                        model_config['out'],
                        model_config.get('in_y'),
                        as_component=as_component)

        for component_config in model_config['pipe']:
            if load_trained and ('fit_on' in component_config
                                 or 'in_y' in component_config):
                try:
                    component_config['load_path'] = component_config[
                        'save_path']
                except KeyError:
                    log.warning(
                        'No "save_path" parameter for the {} component, so "load_path" will not be renewed'
                        .format(
                            component_config.get(
                                'name', component_config.get('ref',
                                                             'UNKNOWN'))))
            component = from_params(component_config, vocabs=[], mode=mode)

            if 'in' in component_config:
                c_in = component_config['in']
                c_out = component_config['out']
                in_y = component_config.get('in_y', None)
                main = component_config.get('main', False)
                model.append(component, c_in, c_out, in_y, main)

        return model

    model_config = config['model']
    if load_trained:
        try:
            model_config['load_path'] = model_config['save_path']
        except KeyError:
            log.warning(
                'No "save_path" parameter for the model, so "load_path" will not be renewed'
            )

    vocabs = {}
    if 'vocabs' in config:
        for vocab_param_name, vocab_config in config['vocabs'].items():
            v = from_params(vocab_config, mode=mode)
            vocabs[vocab_param_name] = v
    model = from_params(model_config, vocabs=vocabs, mode=mode)
    model.reset()
    return model
Esempio n. 5
0
def build_model_from_config(config, mode='infer'):
    model_config = config['model']
    model_name = model_config['name']

    vocabs = {}
    if 'vocabs' in config:
        for vocab_param_name, vocab_config in config['vocabs'].items():
            vocab_name = vocab_config['name']
            v = from_params(REGISTRY[vocab_name], vocab_config, mode=mode)
            vocabs[vocab_param_name] = v
    model = from_params(REGISTRY[model_name], model_config, vocabs=vocabs, mode=mode)
    model.reset()
    return model
Esempio n. 6
0
def build_model_from_config(config: [str, Path, dict], mode: str = 'infer', load_trained: bool = False,
                            as_component: bool = False) -> Chainer:
    """Build and return the model described in corresponding configuration file."""
    if isinstance(config, (str, Path)):
        config = read_json(config)
    set_deeppavlov_root(config)

    import_packages(config.get('metadata', {}).get('imports', []))

    model_config = config['chainer']

    model = Chainer(model_config['in'], model_config['out'], model_config.get('in_y'), as_component=as_component)

    for component_config in model_config['pipe']:
        if load_trained and ('fit_on' in component_config or 'in_y' in component_config):
            try:
                component_config['load_path'] = component_config['save_path']
            except KeyError:
                log.warning('No "save_path" parameter for the {} component, so "load_path" will not be renewed'
                            .format(component_config.get('name', component_config.get('ref', 'UNKNOWN'))))
        component = from_params(component_config, mode=mode)

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            model.append(component, c_in, c_out, in_y, main)

    return model
Esempio n. 7
0
def fit_chainer(config: dict, iterator: BasicDatasetIterator) -> Chainer:

    chainer_config: dict = config['chainer']
    chainer = Chainer(chainer_config['in'], chainer_config['out'],
                      chainer_config.get('in_y'))
    for component_config in chainer_config['pipe']:
        component = from_params(component_config, vocabs=[], mode='train')
        if 'fit_on' in component_config:
            component: Estimator

            preprocessed = chainer(*iterator.iter_all('train'),
                                   to_return=component_config['fit_on'])
            if len(component_config['fit_on']) == 1:
                preprocessed = [preprocessed]
            else:
                preprocessed = zip(*preprocessed)
            component.fit(*preprocessed)
            component.save()

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            chainer.append(c_in, c_out, component, in_y, main)
    return chainer
Esempio n. 8
0
def fit_chainer(config: dict, iterator: Union[DataLearningIterator, DataFittingIterator]) -> Chainer:
    """Fit and return the chainer described in corresponding configuration dictionary."""
    chainer_config: dict = config['chainer']
    chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y'))
    for component_config in chainer_config['pipe']:
        component = from_params(component_config, mode='train')
        if 'fit_on' in component_config:
            component: Estimator

            targets = component_config['fit_on']
            if isinstance(targets, str):
                targets = [targets]

            preprocessed = chainer.compute(*iterator.get_instances('train'), targets=targets)
            if len(component_config['fit_on']) == 1:
                preprocessed = [preprocessed]

            component.fit(*preprocessed)
            component.save()

        if 'fit_on_batch' in component_config:
            component: Estimator
            component.fit_batches(iterator, config['train']['batch_size'])
            component.save()

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            chainer.append(component, c_in, c_out, in_y, main)
    return chainer
Esempio n. 9
0
def fit_chainer(config: dict, iterator: Union[DataLearningIterator,
                                              DataFittingIterator]):

    chainer_config: dict = config['chainer']
    chainer = Chainer(chainer_config['in'], chainer_config['out'],
                      chainer_config.get('in_y'))
    for component_config in chainer_config['pipe']:
        component = from_params(component_config, mode='train')
        if 'fit_on' in component_config:
            component: Estimator

            preprocessed = chainer(*iterator.get_instances('train'),
                                   to_return=component_config['fit_on'])
            if len(component_config['fit_on']) == 1:
                preprocessed = [preprocessed]
            else:
                preprocessed = zip(*preprocessed)
            component.fit(*preprocessed)
            component.save()

        if 'fit_on_batch' in component_config:
            component: Estimator
            component.fit_batches(iterator, config['train']['batch_size'])
            component.save()

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            chainer.append(component, c_in, c_out, in_y, main)
    return chainer
Esempio n. 10
0
def interact_agent(config_path: str) -> None:
    """Start interaction with the agent described in corresponding configuration file."""
    a = build_agent_from_config(config_path)
    commutator = from_params(a.commutator_config)

    models = [build_model_from_config(sk) for sk in a.skill_configs]
    while True:
        # get input from user
        context = input(':: ')

        # check for exit command
        if context == 'exit' or context == 'stop' or context == 'quit' or context == 'q':
            return

        predictions = []
        for model in models:
            predictions.append(
                {model.__class__.__name__: model.infer(context, )})
        idx, name, pred = commutator.infer(predictions, )
        print('>>', pred)

        a.history.append({
            'context': context,
            "predictions": predictions,
            "winner": {
                "idx": idx,
                "model": name,
                "prediction": pred
            }
        })
        log.debug("Current history: {}".format(a.history))
Esempio n. 11
0
def fit_chainer(config: dict, iterator: Union[DataLearningIterator, DataFittingIterator]) -> Chainer:
    """Fit and return the chainer described in corresponding configuration dictionary."""
    chainer_config: dict = config['chainer']
    chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y'))
    for component_config in chainer_config['pipe']:
        component = from_params(component_config, mode='train')
        if 'fit_on' in component_config:
            component: Estimator

            preprocessed = chainer(*iterator.get_instances('train'), to_return=component_config['fit_on'])
            if len(component_config['fit_on']) == 1:
                preprocessed = [preprocessed]
            else:
                preprocessed = zip(*preprocessed)
            component.fit(*preprocessed)
            component.save()

        if 'fit_on_batch' in component_config:
            component: Estimator
            component.fit_batches(iterator, config['train']['batch_size'])
            component.save()

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            chainer.append(component, c_in, c_out, in_y, main)
    return chainer
Esempio n. 12
0
def get_iterator_from_config(config: dict, data: dict):
    """Create iterator (from config) for specified data."""
    iterator_config = config['dataset_iterator']
    iterator: Union[DataLearningIterator,
                    DataFittingIterator] = from_params(iterator_config,
                                                       data=data)
    return iterator
Esempio n. 13
0
    def read(self, data_path, tasks: Dict[str, Dict[str, str]]):
        """Creates dataset readers for tasks and returns what task dataset readers `read()` methods return.

        Args:
            data_path: can be anything since it is not used. `data_path` is present because it is
                required in train.py script.
            tasks: dictionary which keys are task names and values are dictionaries with `DatasetReader`
                subclasses specs. `DatasetReader` specs are provided in the same format as "dataset_reader"
                in the model config except for "class_name" field which has to be named "reader_class_name".
                ```json
                "tasks": {
                  "query_prediction": {
                    "reader_class_name": "basic_classification_reader",
                    "x": "Question",
                    "y": "Class",
                    "data_path": "{DOWNLOADS_PATH}/query_prediction"
                  }
                }
                ```

        Returns:
            dictionary which keys are task names and values are what task readers `read()` methods returned.
        """
        data = {}
        for task_name, reader_params in tasks.items():
            reader_params = copy.deepcopy(reader_params)
            tasks[task_name] = from_params(
                {"class_name": reader_params['reader_class_name']})
            del reader_params['reader_class_name']
            reader_params['data_path'] = Path(
                reader_params['data_path']).expanduser()
            data[task_name] = tasks[task_name].read(**reader_params)
        return data
Esempio n. 14
0
def build_model_from_config(config: [str, Path, dict], mode: str = 'infer', load_trained: bool = False) -> Chainer:
    """Build and return the model described in corresponding configuration file."""
    if isinstance(config, (str, Path)):
        config = read_json(config)
    set_deeppavlov_root(config)

    import_packages(config.get('metadata', {}).get('imports', []))

    model_config = config['chainer']

    model = Chainer(model_config['in'], model_config['out'], model_config.get('in_y'))

    for component_config in model_config['pipe']:
        if load_trained and ('fit_on' in component_config or 'in_y' in component_config):
            try:
                component_config['load_path'] = component_config['save_path']
            except KeyError:
                log.warning('No "save_path" parameter for the {} component, so "load_path" will not be renewed'
                            .format(component_config.get('name', component_config.get('ref', 'UNKNOWN'))))
        component = from_params(component_config, mode=mode)

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            model.append(component, c_in, c_out, in_y, main)

    return model
Esempio n. 15
0
def predict_with_model(config_path):
    config = read_json(config_path)
    set_deeppavlov_root(config)

    reader_config = config['dataset_reader']
    reader = get_model(reader_config['name'])()
    data_path = expand_path(reader_config.get('data_path', ''))
    read_params = {k: v for k, v in reader_config.items() if k not in ['name', 'data_path']}
    data = reader.read(data_path, **read_params)

    iterator_config = config['dataset_iterator']
    iterator: MorphoTaggerDatasetIterator =\
        from_params(iterator_config, data=data)

    model = build_model_from_config(config, load_trained=True)
    answers = [None] * len(iterator.test)
    batch_size = config['predict'].get("batch_size", -1)
    for indexes, (x, _) in iterator.gen_batches(
            batch_size=batch_size, data_type="test", shuffle=False, return_indexes=True):
        y = model(x)
        for i, elem in zip(indexes, y):
            answers[i] = elem
    outfile = config['predict'].get("outfile")
    if outfile is not None:
        outfile = Path(outfile)
        if not outfile.exists():
            outfile.parent.mkdir(parents=True, exist_ok=True)
        with open(outfile, "w", encoding="utf8") as fout:
            for elem in answers:
                fout.write(elem + "\n")
    return answers
Esempio n. 16
0
    def fit_chainer(self, iterator: Union[DataFittingIterator, DataLearningIterator]) -> None:
        """
        Build the pipeline :class:`~deeppavlov.core.common.chainer.Chainer` and successively fit
        :class:`Estimator <deeppavlov.core.models.estimator.Estimator>` components using a provided data iterator
        """
        if self._built:
            raise RuntimeError('Cannot fit already built chainer')
        for component_index, component_config in enumerate(self.chainer_config['pipe'], 1):
            component = from_params(component_config, mode='train')
            if 'fit_on' in component_config:
                component: Estimator

                targets = component_config['fit_on']
                if isinstance(targets, str):
                    targets = [targets]

                if self.batch_size > 0 and callable(getattr(component, 'partial_fit', None)):
                    writer = None

                    for i, (x, y) in enumerate(iterator.gen_batches(self.batch_size, shuffle=False)):
                        preprocessed = self._chainer.compute(x, y, targets=targets)
                        # noinspection PyUnresolvedReferences
                        result = component.partial_fit(*preprocessed)

                        if result is not None and self.tensorboard_log_dir is not None:
                            if writer is None:
                                writer = self._tf.summary.FileWriter(str(self.tensorboard_log_dir /
                                                                         f'partial_fit_{component_index}_log'))
                            for name, score in result.items():
                                summary = self._tf.Summary()
                                summary.value.add(tag='partial_fit/' + name, simple_value=score)
                                writer.add_summary(summary, i)
                            writer.flush()
                else:
                    preprocessed = self._chainer.compute(*iterator.get_instances(), targets=targets)
                    if len(targets) == 1:
                        preprocessed = [preprocessed]
                    result: Optional[Dict[str, Iterable[float]]] = component.fit(*preprocessed)

                    if result is not None and self.tensorboard_log_dir is not None:
                        writer = self._tf.summary.FileWriter(str(self.tensorboard_log_dir /
                                                                 f'fit_log_{component_index}'))
                        for name, scores in result.items():
                            for i, score in enumerate(scores):
                                summary = self._tf.Summary()
                                summary.value.add(tag='fit/' + name, simple_value=score)
                                writer.add_summary(summary, i)
                        writer.flush()

                component.save()

            if 'in' in component_config:
                c_in = component_config['in']
                c_out = component_config['out']
                in_y = component_config.get('in_y', None)
                main = component_config.get('main', False)
                self._chainer.append(component, c_in, c_out, in_y, main)
        self._built = True
Esempio n. 17
0
def build_model(config: Union[str, Path, dict],
                mode: str = 'infer',
                load_trained: bool = False,
                download: bool = False,
                serialized: Optional[bytes] = None) -> Chainer:
    """Build and return the model described in corresponding configuration file."""
    config = parse_config(config)

    if serialized:
        serialized: list = pickle.loads(serialized)

    if download:
        deep_download(config)

    import_packages(config.get('metadata', {}).get('imports', []))

    model_config = config['chainer']

    model = Chainer(model_config['in'], model_config['out'],
                    model_config.get('in_y'))

    for component_config in model_config['pipe']:
        if load_trained and ('fit_on' in component_config
                             or 'in_y' in component_config):
            try:
                component_config['load_path'] = component_config['save_path']
            except KeyError:
                log.warning(
                    'No "save_path" parameter for the {} component, so "load_path" will not be renewed'
                    .format(
                        component_config.get(
                            'class_name',
                            component_config.get('ref', 'UNKNOWN'))))

        if serialized and 'in' in component_config:
            component_serialized = serialized.pop(0)
        else:
            component_serialized = None

        component = from_params(component_config,
                                mode=mode,
                                serialized=component_serialized)

        if 'id' in component_config:
            model._components_dict[component_config['id']] = component

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            model.append(component, c_in, c_out, in_y, main)

    return model
Esempio n. 18
0
def train_agent_models(config_path: str):
    usr_dir = paths.USR_PATH
    a = build_agent_from_config(config_path)

    for skill_config in a.skill_configs:
        model_config = skill_config['model']
        model_name = model_config['name']

        if issubclass(REGISTRY[model_name], Trainable):
            reader_config = skill_config['dataset_reader']
            reader = from_params(REGISTRY[reader_config['name']], {})
            data = reader.read(reader_config.get('data_path', usr_dir))

            dataset_config = skill_config['dataset']
            dataset_name = dataset_config['name']
            dataset = from_params(REGISTRY[dataset_name], dataset_config, data=data)

            model = from_params(REGISTRY[model_name], model_config)
            model.train(dataset)
        else:
            print('Model {} is not an instance of Trainable, skip training.'.format(model_name),
                  file=sys.stderr)
Esempio n. 19
0
def predict_with_model(config_path: [Path, str]) -> List[Optional[List[str]]]:
    """Returns predictions of morphotagging model given in config :config_path:.

    Args:
        config_path: a path to config

    Returns:
        a list of morphological analyses for each sentence. Each analysis is either a list of tags
        or a list of full CONLL-U descriptions.

    """
    config = parse_config(config_path)

    reader_config = config['dataset_reader']
    reader = get_model(reader_config['class_name'])()
    data_path = expand_path(reader_config.get('data_path', ''))
    read_params = {
        k: v
        for k, v in reader_config.items()
        if k not in ['class_name', 'data_path']
    }
    data: Dict = reader.read(data_path, **read_params)

    iterator_config = config['dataset_iterator']
    iterator: MorphoTaggerDatasetIterator = from_params(iterator_config,
                                                        data=data)

    model = build_model(config, load_trained=True)
    answers = [None] * len(iterator.test)
    batch_size = config['predict'].get("batch_size", -1)
    for indexes, (x, _) in iterator.gen_batches(batch_size=batch_size,
                                                data_type="test",
                                                shuffle=False,
                                                return_indexes=True):
        y = model(x)
        for i, elem in zip(indexes, y):
            answers[i] = elem
    outfile = config['predict'].get("outfile")
    if outfile is not None:
        outfile = Path(outfile)
        if not outfile.exists():
            outfile.parent.mkdir(parents=True, exist_ok=True)
        with open(outfile, "w", encoding="utf8") as fout:
            for elem in answers:
                fout.write(elem + "\n")
    return answers
Esempio n. 20
0
    def __init__(self, data: dict, tasks: dict):
        self.task_iterators = {}
        for task_name, task_iterator_params in tasks.items():
            task_iterator_params = copy.deepcopy(task_iterator_params)
            task_iterator_params['class_name'] = task_iterator_params['iterator_class_name']
            del task_iterator_params['iterator_class_name']
            self.task_iterators[task_name] = from_params(task_iterator_params, data=data[task_name])

        self.train = self._extract_data_type('train')
        self.valid = self._extract_data_type('valid')
        self.test = self._extract_data_type('test')
        self.data = {
            'train': self.train,
            'valid': self.valid,
            'test': self.test,
            'all': self._unite_dataset_parts(self.train, self.valid, self.test)
        }
Esempio n. 21
0
def build_model(config: Union[str, Path, dict], mode: str = 'infer',
                load_trained: bool = False, download: bool = False,
                serialized: Optional[bytes] = None) -> Chainer:
    """Build and return the model described in corresponding configuration file."""
    config = parse_config(config)

    if serialized:
        serialized: list = pickle.loads(serialized)

    if download:
        deep_download(config)

    import_packages(config.get('metadata', {}).get('imports', []))

    model_config = config['chainer']

    model = Chainer(model_config['in'], model_config['out'], model_config.get('in_y'))

    for component_config in model_config['pipe']:
        if load_trained and ('fit_on' in component_config or 'in_y' in component_config):
            try:
                component_config['load_path'] = component_config['save_path']
            except KeyError:
                log.warning('No "save_path" parameter for the {} component, so "load_path" will not be renewed'
                            .format(component_config.get('class_name', component_config.get('ref', 'UNKNOWN'))))

        if serialized and 'in' in component_config:
            component_serialized = serialized.pop(0)
        else:
            component_serialized = None

        component = from_params(component_config, mode=mode, serialized=component_serialized)

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            model.append(component, c_in, c_out, in_y, main)

    return model
Esempio n. 22
0
def predict_with_model(config_path: [Path, str]) -> List[Optional[List[str]]]:
    """Returns predictions of morphotagging model given in config :config_path:.

    Args:
        config_path: a path to config

    Returns:
        a list of morphological analyses for each sentence. Each analysis is either a list of tags
        or a list of full CONLL-U descriptions.

    """
    config = parse_config(config_path)

    reader_config = config['dataset_reader']
    reader = get_model(reader_config['class_name'])()
    data_path = expand_path(reader_config.get('data_path', ''))
    read_params = {k: v for k, v in reader_config.items() if k not in ['class_name', 'data_path']}
    data: Dict = reader.read(data_path, **read_params)

    iterator_config = config['dataset_iterator']
    iterator: MorphoTaggerDatasetIterator = from_params(iterator_config, data=data)

    model = build_model(config, load_trained=True)
    answers = [None] * len(iterator.test)
    batch_size = config['predict'].get("batch_size", -1)
    for indexes, (x, _) in iterator.gen_batches(
            batch_size=batch_size, data_type="test", shuffle=False, return_indexes=True):
        y = model(x)
        for i, elem in zip(indexes, y):
            answers[i] = elem
    outfile = config['predict'].get("outfile")
    if outfile is not None:
        outfile = Path(outfile)
        if not outfile.exists():
            outfile.parent.mkdir(parents=True, exist_ok=True)
        with open(outfile, "w", encoding="utf8") as fout:
            for elem in answers:
                fout.write(elem + "\n")
    return answers
Esempio n. 23
0
def interact_agent(config_path: str) -> None:
    """Start interaction with the agent described in corresponding configuration file."""
    a = build_agent_from_config(config_path)
    commutator = from_params(a.commutator_config)

    models = [build_model_from_config(sk) for sk in a.skill_configs]
    while True:
        # get input from user
        context = input(':: ')

        # check for exit command
        if context == 'exit' or context == 'stop' or context == 'quit' or context == 'q':
            return

        predictions = []
        for model in models:
            predictions.append({model.__class__.__name__: model.infer(context, )})
        idx, name, pred = commutator.infer(predictions, )
        print('>>', pred)

        a.history.append({'context': context, "predictions": predictions,
                          "winner": {"idx": idx, "model": name, "prediction": pred}})
        log.debug("Current history: {}".format(a.history))
Esempio n. 24
0
def interact_agent(config_path):
    a = build_agent_from_config(config_path)
    commutator_name = a.commutator_config['name']
    commutator = from_params(REGISTRY[commutator_name], a.commutator_config)

    models = [build_model_from_config(sk) for sk in a.skill_configs]
    while True:
        # get input from user
        context = input(':: ')

        # check for exit command
        if context == 'exit' or context == 'stop' or context == 'quit' or context == 'q':
            return

        predictions = []
        for model in models:
            predictions.append({model.__class__.__name__: model.infer(context)})
        idx, name, pred = commutator.infer(predictions, a.history)
        print('>>', pred)

        a.history.append({'context': context, "predictions": predictions,
                          "winner": {"idx": idx, "model": name, "prediction": pred}})
        print("Current history: {}".format(a.history))
Esempio n. 25
0
def build_model_from_config(config,
                            mode='infer',
                            load_trained=False,
                            as_component=False):
    set_deeppavlov_root(config)

    import_packages(config.get('metadata', {}).get('imports', []))

    model_config = config['chainer']

    model = Chainer(model_config['in'],
                    model_config['out'],
                    model_config.get('in_y'),
                    as_component=as_component)

    for component_config in model_config['pipe']:
        if load_trained and ('fit_on' in component_config
                             or 'in_y' in component_config):
            try:
                component_config['load_path'] = component_config['save_path']
            except KeyError:
                log.warning(
                    'No "save_path" parameter for the {} component, so "load_path" will not be renewed'
                    .format(
                        component_config.get(
                            'name', component_config.get('ref', 'UNKNOWN'))))
        component = from_params(component_config, mode=mode)

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            model.append(component, c_in, c_out, in_y, main)

    return model
Esempio n. 26
0
def train_model_from_config(config_path: str) -> None:
    config = read_json(config_path)
    set_deeppavlov_root(config)

    dataset_config = config.get('dataset', None)

    if dataset_config:
        config.pop('dataset')
        ds_type = dataset_config['type']
        if ds_type == 'classification':
            reader = {'name': 'basic_classification_reader'}
            iterator = {'name': 'basic_classification_iterator'}
            config['dataset_reader'] = {**dataset_config, **reader}
            config['dataset_iterator'] = {**dataset_config, **iterator}
        else:
            raise Exception("Unsupported dataset type: {}".format(ds_type))

    reader_config = config['dataset_reader']
    reader = get_model(reader_config['name'])()
    data_path = expand_path(reader_config.get('data_path', ''))
    kwargs = {
        k: v
        for k, v in reader_config.items() if k not in ['name', 'data_path']
    }
    data = reader.read(data_path, **kwargs)

    iterator_config = config['dataset_iterator']
    iterator: BasicDatasetIterator = from_params(iterator_config, data=data)

    if 'chainer' in config:
        model = fit_chainer(config, iterator)
    else:
        vocabs = config.get('vocabs', {})
        for vocab_param_name, vocab_config in vocabs.items():
            v: Estimator = from_params(vocab_config, mode='train')
            vocabs[vocab_param_name] = _fit(v, iterator)

        model_config = config['model']
        model = from_params(model_config, vocabs=vocabs, mode='train')

    train_config = {
        'metrics': ['accuracy'],
        'validate_best': True,
        'test_best': True
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')

    metrics_functions = list(
        zip(train_config['metrics'],
            get_metrics_by_names(train_config['metrics'])))

    if callable(getattr(model, 'train_on_batch', None)):
        _train_batches(model, iterator, train_config, metrics_functions)
    elif callable(getattr(model, 'fit', None)):
        _fit(model, iterator, train_config)
    elif not isinstance(model, Chainer):
        log.warning('Nothing to train')

    if train_config['validate_best'] or train_config['test_best']:
        # try:
        #     model_config['load_path'] = model_config['save_path']
        # except KeyError:
        #     log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed')
        model = build_model_from_config(config, load_trained=True)
        log.info('Testing the best saved model')

        if train_config['validate_best']:
            report = {
                'valid':
                _test_model(model, metrics_functions, iterator,
                            train_config.get('batch_size', -1), 'valid')
            }

            print(json.dumps(report, ensure_ascii=False))

        if train_config['test_best']:
            report = {
                'test':
                _test_model(model, metrics_functions, iterator,
                            train_config.get('batch_size', -1), 'test')
            }

            print(json.dumps(report, ensure_ascii=False))
    },
    "tokenizer": {
        "name": "stream_spacy_tokenizer",
        "lowercase": False
    },
    "tracker": {
        "name": "featurized_tracker",
        "slot_names": ["pricerange", "this", "area", "food", "name"]
    },
    "main": True,
    "debug": False
}

mode_train = {"mode": "train"}

tracker = from_params(bot_dict["tracker"], "train")

tokenizer = from_params(bot_dict["tokenizer"], "train")

network_parameters = bot_dict["network_parameters"]

template_path = bot_dict["template_path"]
save_path = bot_dict["save_path"]
load_path = bot_dict["load_path"]
template_type = bot_dict["template_type"]

# 'str' object has no attribute 'items'
word_vocab = from_params(bot_dict["word_vocab"], "train")

bow_embedder = from_params(bot_dict["bow_embedder"], "train")
Esempio n. 28
0
def fit_chainer(config: dict, iterator: Union[DataLearningIterator, DataFittingIterator]) -> Chainer:
    """Fit and return the chainer described in corresponding configuration dictionary."""
    chainer_config: dict = config['chainer']
    chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y'))
    for component_config in chainer_config['pipe']:
        component = from_params(component_config, mode='train')
        if ('fit_on' in component_config) and \
                (not callable(getattr(component, 'partial_fit', None))):
            component: Estimator

            targets = component_config['fit_on']
            if isinstance(targets, str):
                targets = [targets]

            preprocessed = chainer.compute(*iterator.get_instances('train'), targets=targets)
            if len(component_config['fit_on']) == 1:
                preprocessed = [preprocessed]

            result = component.fit(*preprocessed)
            if result is not None and config['train'].get('tensorboard_log_dir') is not None:
                import tensorflow as tf
                tb_log_dir = expand_path(config['train']['tensorboard_log_dir'])
                writer = tf.summary.FileWriter(str(tb_log_dir / 'fit_log'))

                for name, scores in result.items():
                    for i, score in enumerate(scores):
                        summ = tf.Summary()
                        summ.value.add(tag='fit/' + name, simple_value=score)
                        writer.add_summary(summ, i)
                writer.flush()

            component.save()

        if 'fit_on_batch' in component_config:
            log.warning('`fit_on_batch` is deprecated and will be removed in future versions.'
                        ' Please use `fit_on` instead.')
        if ('fit_on_batch' in component_config) or \
                (('fit_on' in component_config) and
                 callable(getattr(component, 'partial_fit', None))):
            component: Estimator
            targets = component_config.get('fit_on', component_config['fit_on_batch'])
            if isinstance(targets, str):
                targets = [targets]

            for i, data in enumerate(iterator.gen_batches(config['train']['batch_size'], shuffle=False)):
                preprocessed = chainer.compute(*data, targets=targets)
                if len(targets) == 1:
                    preprocessed = [preprocessed]
                result = component.partial_fit(*preprocessed)

                if result is not None and config['train'].get('tensorboard_log_dir') is not None:
                    if i == 0:
                        import tensorflow as tf
                        tb_log_dir = expand_path(config['train']['tensorboard_log_dir'])
                        writer = tf.summary.FileWriter(str(tb_log_dir / 'fit_batches_log'))

                    for name, score in result.items():
                        summ = tf.Summary()
                        summ.value.add(tag='fit_batches/' + name, simple_value=score)
                        writer.add_summary(summ, i)
                    writer.flush()

            component.save()

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            chainer.append(component, c_in, c_out, in_y, main)
    return chainer
Esempio n. 29
0
def train_evaluate_model_from_config(config: [str, Path, dict], to_train: bool = True, to_validate: bool = True) -> None:
    """Make training and evaluation of the model described in corresponding configuration file."""
    if isinstance(config, (str, Path)):
        config = read_json(config)
    set_deeppavlov_root(config)

    import_packages(config.get('metadata', {}).get('imports', []))

    dataset_config = config.get('dataset', None)

    if dataset_config:
        config.pop('dataset')
        ds_type = dataset_config['type']
        if ds_type == 'classification':
            reader = {'name': 'basic_classification_reader'}
            iterator = {'name': 'basic_classification_iterator'}
            config['dataset_reader'] = {**dataset_config, **reader}
            config['dataset_iterator'] = {**dataset_config, **iterator}
        else:
            raise Exception("Unsupported dataset type: {}".format(ds_type))

    data = []
    reader_config = config.get('dataset_reader', None)

    if reader_config:
        reader_config = config['dataset_reader']
        if 'class' in reader_config:
            c = reader_config.pop('class')
            try:
                module_name, cls_name = c.split(':')
                reader = getattr(importlib.import_module(module_name), cls_name)()
            except ValueError:
                e = ConfigError('Expected class description in a `module.submodules:ClassName` form, but got `{}`'
                                .format(c))
                log.exception(e)
                raise e
        else:
            reader = get_model(reader_config.pop('name'))()
        data_path = reader_config.pop('data_path', '')
        if isinstance(data_path, list):
            data_path = [expand_path(x) for x in data_path]
        else:
            data_path = expand_path(data_path)
        data = reader.read(data_path, **reader_config)
    else:
        log.warning("No dataset reader is provided in the JSON config.")

    iterator_config = config['dataset_iterator']
    iterator: Union[DataLearningIterator, DataFittingIterator] = from_params(iterator_config,
                                                                             data=data)

    train_config = {
        'metrics': ['accuracy'],
        'validate_best': to_validate,
        'test_best': True,
        'show_examples': False
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')

    metrics_functions = list(zip(train_config['metrics'], get_metrics_by_names(train_config['metrics'])))

    if to_train:
        model = fit_chainer(config, iterator)

        if callable(getattr(model, 'train_on_batch', None)):
            _train_batches(model, iterator, train_config, metrics_functions)
        elif callable(getattr(model, 'fit_batches', None)):
            _fit_batches(model, iterator, train_config)
        elif callable(getattr(model, 'fit', None)):
            _fit(model, iterator, train_config)
        elif not isinstance(model, Chainer):
            log.warning('Nothing to train')

    if train_config['validate_best'] or train_config['test_best']:
        # try:
        #     model_config['load_path'] = model_config['save_path']
        # except KeyError:
        #     log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed')
        model = build_model_from_config(config, load_trained=True)
        log.info('Testing the best saved model')

        if train_config['validate_best']:
            report = {
                'valid': _test_model(model, metrics_functions, iterator,
                                     train_config.get('batch_size', -1), 'valid',
                                     show_examples=train_config['show_examples'])
            }

            print(json.dumps(report, ensure_ascii=False))

        if train_config['test_best']:
            report = {
                'test': _test_model(model, metrics_functions, iterator,
                                    train_config.get('batch_size', -1), 'test',
                                    show_examples=train_config['show_examples'])
            }

            print(json.dumps(report, ensure_ascii=False))
Esempio n. 30
0
def train_evaluate_model_from_config(config: [str, Path, dict],
                                     to_train=True,
                                     to_validate=True) -> None:
    if isinstance(config, (str, Path)):
        config = read_json(config)
    set_deeppavlov_root(config)

    import_packages(config.get('metadata', {}).get('imports', []))

    dataset_config = config.get('dataset', None)

    if dataset_config:
        config.pop('dataset')
        ds_type = dataset_config['type']
        if ds_type == 'classification':
            reader = {'name': 'basic_classification_reader'}
            iterator = {'name': 'basic_classification_iterator'}
            config['dataset_reader'] = {**dataset_config, **reader}
            config['dataset_iterator'] = {**dataset_config, **iterator}
        else:
            raise Exception("Unsupported dataset type: {}".format(ds_type))

    data = []
    reader_config = config.get('dataset_reader', None)

    if reader_config:
        reader_config = config['dataset_reader']
        if 'class' in reader_config:
            c = reader_config.pop('class')
            try:
                module_name, cls_name = c.split(':')
                reader = getattr(importlib.import_module(module_name),
                                 cls_name)()
            except ValueError:
                e = ConfigError(
                    'Expected class description in a `module.submodules:ClassName` form, but got `{}`'
                    .format(c))
                log.exception(e)
                raise e
        else:
            reader = get_model(reader_config.pop('name'))()
        data_path = expand_path(reader_config.pop('data_path', ''))
        data = reader.read(data_path, **reader_config)
    else:
        log.warning("No dataset reader is provided in the JSON config.")

    iterator_config = config['dataset_iterator']
    iterator: Union[DataLearningIterator,
                    DataFittingIterator] = from_params(iterator_config,
                                                       data=data)

    train_config = {
        'metrics': ['accuracy'],
        'validate_best': to_validate,
        'test_best': True
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')

    metrics_functions = list(
        zip(train_config['metrics'],
            get_metrics_by_names(train_config['metrics'])))

    if to_train:
        model = fit_chainer(config, iterator)

        if callable(getattr(model, 'train_on_batch', None)):
            _train_batches(model, iterator, train_config, metrics_functions)
        elif callable(getattr(model, 'fit_batches', None)):
            _fit_batches(model, iterator, train_config)
        elif callable(getattr(model, 'fit', None)):
            _fit(model, iterator, train_config)
        elif not isinstance(model, Chainer):
            log.warning('Nothing to train')

    if train_config['validate_best'] or train_config['test_best']:
        # try:
        #     model_config['load_path'] = model_config['save_path']
        # except KeyError:
        #     log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed')
        model = build_model_from_config(config, load_trained=True)
        log.info('Testing the best saved model')

        if train_config['validate_best']:
            report = {
                'valid':
                _test_model(model, metrics_functions, iterator,
                            train_config.get('batch_size', -1), 'valid')
            }

            print(json.dumps(report, ensure_ascii=False))

        if train_config['test_best']:
            report = {
                'test':
                _test_model(model, metrics_functions, iterator,
                            train_config.get('batch_size', -1), 'test')
            }

            print(json.dumps(report, ensure_ascii=False))
Esempio n. 31
0
def load_elmo(elmo_output_names=("word_emb",)):
    config = parse_config(getattr(configs.elmo_embedder, "elmo_ru-news"))
    elmo_config = config["chainer"]["pipe"][-1]
    elmo_config['elmo_output_names'] = elmo_output_names
    embedder = from_params(elmo_config)
    return embedder
Esempio n. 32
0
def train_model_from_config(config_path: str):
    config = read_json(config_path)
    set_deeppavlov_root(config)

    reader_config = config['dataset_reader']
    reader = get_model(reader_config['name'])()
    data_path = expand_path(reader_config.get('data_path', ''))
    data = reader.read(data_path)

    dataset_config = config['dataset']
    dataset: Dataset = from_params(dataset_config, data=data)

    if 'chainer' in config:
        model = fit_chainer(config, dataset)
    else:
        vocabs = {}
        for vocab_param_name, vocab_config in config.get('vocabs', {}).items():
            v: Estimator = from_params(vocab_config, mode='train')
            vocabs[vocab_param_name] = _fit(v, dataset)

        model_config = config['model']
        model = from_params(model_config, vocabs=vocabs, mode='train')

    train_config = {
        'metrics': ['accuracy'],
        'validate_best': True,
        'test_best': True
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')

    metrics_functions = list(
        zip(train_config['metrics'],
            get_metrics_by_names(train_config['metrics'])))

    if callable(getattr(model, 'train_on_batch', None)):
        _train_batches(model, dataset, train_config, metrics_functions)
    elif callable(getattr(model, 'fit', None)):
        _fit(model, dataset, train_config)
    elif not isinstance(model, Chainer):
        log.warning('Nothing to train')

    if train_config['validate_best'] or train_config['test_best']:
        # try:
        #     model_config['load_path'] = model_config['save_path']
        # except KeyError:
        #     log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed')
        model = build_model_from_config(config, load_trained=True)
        log.info('Testing the best saved model')

        if train_config['validate_best']:
            report = {
                'valid':
                _test_model(model, metrics_functions, dataset,
                            train_config.get('batch_size', -1), 'valid')
            }

            print(json.dumps(report, ensure_ascii=False))

        if train_config['test_best']:
            report = {
                'test':
                _test_model(model, metrics_functions, dataset,
                            train_config.get('batch_size', -1), 'test')
            }

            print(json.dumps(report, ensure_ascii=False))