Exemple #1
0
    def append(self,
               component: Union[Component, FunctionType],
               in_x: [str, list, dict] = None,
               out_params: [str, list] = None,
               in_y: [str, list, dict] = None,
               main: bool = False):
        if isinstance(in_x, str):
            in_x = [in_x]
        if isinstance(in_y, str):
            in_y = [in_y]
        if isinstance(out_params, str):
            out_params = [out_params]
        in_x = in_x or self.in_x

        if isinstance(in_x, dict):
            x_keys, in_x = zip(*in_x.items())
        else:
            x_keys = []
        out_params = out_params or in_x
        if in_y is not None:
            if isinstance(in_y, dict):
                y_keys, in_y = zip(*in_y.items())
            else:
                y_keys = []
            keys = x_keys + y_keys

            if bool(x_keys) != bool(y_keys):
                raise ConfigError(
                    '`in` and `in_y` for a component have to both be lists or dicts'
                )

            component: NNModel
            main = True
            assert self.train_map.issuperset(in_x + in_y), (
                'Arguments {} are expected but only {} are set'.format(
                    in_x + in_y, self.train_map))
            preprocessor = Chainer(self.in_x, in_x + in_y, self.in_y)
            for (t_in_x_keys, t_in_x), t_out, t_component in self.train_pipe:
                if t_in_x_keys:
                    t_in_x = dict(zip(t_in_x_keys, t_in_x))
                preprocessor.append(t_component, t_in_x, t_out)

            def train_on_batch(*args, **kwargs):
                preprocessed = preprocessor.compute(*args, **kwargs)
                if len(in_x + in_y) == 1:
                    preprocessed = [preprocessed]
                if keys:
                    return component.train_on_batch(
                        **dict(zip(keys, preprocessed)))
                else:
                    return component.train_on_batch(*preprocessed)

            self.train_on_batch = train_on_batch
            self.process_event = component.process_event
        if main:
            self.main = component
        if self.forward_map.issuperset(in_x):
            self.pipe.append(((x_keys, in_x), out_params, component))
            self.forward_map = self.forward_map.union(out_params)

        if self.train_map.issuperset(in_x):
            self.train_pipe.append(((x_keys, in_x), out_params, component))
            self.train_map = self.train_map.union(out_params)
        else:
            raise ConfigError(
                'Arguments {} are expected but only {} are set'.format(
                    in_x, self.train_map))
Exemple #2
0
    def load(self, fname=None):
        if fname is not None:
            self.load_path = fname

        if self.pretrained_bert:
            log.info(f"From pretrained {self.pretrained_bert}.")
            config = AutoConfig.from_pretrained(
                self.pretrained_bert,
                # num_labels=self.n_classes,
                output_attentions=False,
                output_hidden_states=False)

            if self.is_binary:
                config.add_pooling_layer = False
                self.model = AutoModelForBinaryClassification(
                    self.pretrained_bert, config)
            else:
                self.model = AutoModelForSequenceClassification.from_pretrained(
                    self.pretrained_bert, config=config)

                # TODO need a better solution here
                try:
                    hidden_size = self.model.classifier.out_proj.in_features

                    if self.n_classes != self.model.num_labels:
                        self.model.classifier.out_proj.weight = torch.nn.Parameter(
                            torch.randn(self.n_classes, hidden_size))
                        self.model.classifier.out_proj.bias = torch.nn.Parameter(
                            torch.randn(self.n_classes))
                        self.model.classifier.out_proj.out_features = self.n_classes
                        self.model.num_labels = self.n_classes

                except torch.nn.modules.module.ModuleAttributeError:
                    hidden_size = self.model.classifier.in_features

                    if self.n_classes != self.model.num_labels:
                        self.model.classifier.weight = torch.nn.Parameter(
                            torch.randn(self.n_classes, hidden_size))
                        self.model.classifier.bias = torch.nn.Parameter(
                            torch.randn(self.n_classes))
                        self.model.classifier.out_features = self.n_classes
                        self.model.num_labels = self.n_classes

        elif self.bert_config_file and Path(self.bert_config_file).is_file():
            self.bert_config = AutoConfig.from_pretrained(
                str(expand_path(self.bert_config_file)))
            if self.attention_probs_keep_prob is not None:
                self.bert_config.attention_probs_dropout_prob = 1.0 - self.attention_probs_keep_prob
            if self.hidden_keep_prob is not None:
                self.bert_config.hidden_dropout_prob = 1.0 - self.hidden_keep_prob
            self.model = AutoModelForSequenceClassification.from_config(
                config=self.bert_config)
        else:
            raise ConfigError("No pre-trained BERT model is given.")

        # TODO that should probably be parametrized in config
        if self.device.type == "cuda" and torch.cuda.device_count() > 1:
            self.model = torch.nn.DataParallel(self.model)

        self.model.to(self.device)

        self.optimizer = getattr(torch.optim, self.optimizer_name)(
            self.model.parameters(), **self.optimizer_parameters)
        if self.lr_scheduler_name is not None:
            self.lr_scheduler = getattr(torch.optim.lr_scheduler,
                                        self.lr_scheduler_name)(
                                            self.optimizer,
                                            **self.lr_scheduler_parameters)

        if self.load_path:
            log.info(f"Load path {self.load_path} is given.")
            if isinstance(self.load_path,
                          Path) and not self.load_path.parent.is_dir():
                raise ConfigError("Provided load path is incorrect!")

            weights_path = Path(self.load_path.resolve())
            weights_path = weights_path.with_suffix(f".pth.tar")
            if weights_path.exists():
                log.info(f"Load path {weights_path} exists.")
                log.info(
                    f"Initializing `{self.__class__.__name__}` from saved.")

                # now load the weights, optimizer from saved
                log.info(f"Loading weights from {weights_path}.")
                checkpoint = torch.load(weights_path, map_location=self.device)
                model_state = checkpoint["model_state_dict"]
                optimizer_state = checkpoint["optimizer_state_dict"]

                # load a multi-gpu model on a single device
                if not self.is_data_parallel and "module." in list(
                        model_state.keys())[0]:
                    tmp_model_state = {}
                    for key, value in model_state.items():
                        tmp_model_state[re.sub("module.", "", key)] = value
                    model_state = tmp_model_state

                # set strict flag to False if position_ids are missing
                # this is needed to load models trained on older versions
                # of transformers library
                strict_load_flag = bool([
                    key for key in checkpoint["model_state_dict"].keys()
                    if key.endswith("embeddings.position_ids")
                ])
                self.model.load_state_dict(model_state,
                                           strict=strict_load_flag)
                self.optimizer.load_state_dict(optimizer_state)
                self.epochs_done = checkpoint.get("epochs_done", 0)
            else:
                log.info(
                    f"Init from scratch. Load path {weights_path} does not exist."
                )
Exemple #3
0
def get_model(name: str) -> type:
    if name not in _REGISTRY:
        raise ConfigError("Model {} is not registered.".format(name))
    return cls_from_str(_REGISTRY[name])
Exemple #4
0
def _train_batches(model: Chainer,
                   iterator: DataLearningIterator,
                   train_config: dict,
                   metrics_functions: List[Metric],
                   *,
                   start_epoch_num: Optional[int] = None) -> NNModel:

    default_train_config = {
        'epochs': 0,
        'start_epoch_num': 0,
        'max_batches': 0,
        'batch_size': 1,
        'metric_optimization': 'maximize',
        'validation_patience': 5,
        'val_every_n_epochs': 0,
        'val_every_n_batches': 0,
        'log_every_n_batches': 0,
        'log_every_n_epochs': 0,
        'validate_best': True,
        'test_best': True,
        'tensorboard_log_dir': None,
    }

    train_config = dict(default_train_config, **train_config)

    if 'train_metrics' in train_config:
        train_metrics_functions = _parse_metrics(train_config['train_metrics'],
                                                 model.in_y, model.out_params)
    else:
        train_metrics_functions = metrics_functions
    expected_outputs = list(set().union(
        model.out_params, *[m.inputs for m in train_metrics_functions]))

    if train_config['metric_optimization'] == 'maximize':

        def improved(score, best):
            return score > best

        best = float('-inf')
    elif train_config['metric_optimization'] == 'minimize':

        def improved(score, best):
            return score < best

        best = float('inf')
    else:
        raise ConfigError('metric_optimization has to be one of {}'.format(
            ['maximize', 'minimize']))

    i = 0
    epochs = start_epoch_num if start_epoch_num is not None else train_config[
        'start_epoch_num']
    examples = 0
    saved = False
    patience = 0
    log_on = train_config['log_every_n_batches'] > 0 or train_config[
        'log_every_n_epochs'] > 0
    outputs = {key: [] for key in expected_outputs}
    losses = []
    start_time = time.time()
    break_flag = False

    if train_config['tensorboard_log_dir'] is not None:
        import tensorflow as tf
        tb_log_dir = expand_path(train_config['tensorboard_log_dir'])

        tb_train_writer = tf.summary.FileWriter(str(tb_log_dir / 'train_log'))
        tb_valid_writer = tf.summary.FileWriter(str(tb_log_dir / 'valid_log'))

    # validate first (important if model is pre-trained)
    if train_config['val_every_n_epochs'] > 0 or train_config[
            'val_every_n_batches'] > 0:
        report = _test_model(model, metrics_functions, iterator,
                             train_config['batch_size'], 'valid', start_time,
                             train_config['show_examples'])
        report['epochs_done'] = epochs
        report['batches_seen'] = i
        report['train_examples_seen'] = examples

        metrics = list(report['metrics'].items())

        m_name, score = metrics[0]
        if improved(score, best):
            patience = 0
            log.info('New best {} of {}'.format(m_name, score))
            best = score
            log.info('Saving model')
            model.save()
            saved = True
        else:
            patience += 1
            log.info('Did not improve on the {} of {}'.format(m_name, best))

        report['impatience'] = patience
        if train_config['validation_patience'] > 0:
            report['patience_limit'] = train_config['validation_patience']

        model.process_event(event_name='after_validation', data=report)
        report = {'valid': report}
        print(json.dumps(report, ensure_ascii=False))

    try:
        while True:
            for x, y_true in iterator.gen_batches(train_config['batch_size']):
                if log_on and len(train_metrics_functions) > 0:
                    y_predicted = list(
                        model.compute(list(x),
                                      list(y_true),
                                      targets=expected_outputs))
                    if len(expected_outputs) == 1:
                        y_predicted = [y_predicted]
                    for out, val in zip(outputs.values(), y_predicted):
                        out += list(val)
                loss = model.train_on_batch(x, y_true)
                if loss is not None:
                    losses.append(loss)
                i += 1
                examples += len(x)

                if train_config['log_every_n_batches'] > 0 and i % train_config[
                        'log_every_n_batches'] == 0:
                    metrics = [(m.name, m.fn(*[outputs[i] for i in m.inputs]))
                               for m in train_metrics_functions]
                    report = {
                        'epochs_done':
                        epochs,
                        'batches_seen':
                        i,
                        'examples_seen':
                        examples,
                        'metrics':
                        prettify_metrics(metrics),
                        'time_spent':
                        str(
                            datetime.timedelta(
                                seconds=round(time.time() - start_time + 0.5)))
                    }

                    if train_config['show_examples']:
                        try:
                            y_predicted = zip(*[
                                y_predicted_group
                                for out_name, y_predicted_group in zip(
                                    expected_outputs, y_predicted)
                                if out_name in model.out_params
                            ])
                            if len(model.out_params) == 1:
                                y_predicted = [
                                    y_predicted_item[0]
                                    for y_predicted_item in y_predicted
                                ]
                            report['examples'] = [{
                                'x': x_item,
                                'y_predicted': y_predicted_item,
                                'y_true': y_true_item
                            } for x_item, y_predicted_item, y_true_item in zip(
                                x, y_predicted, y_true)]
                        except NameError:
                            log.warning(
                                'Could not log examples as y_predicted is not defined'
                            )

                    if losses:
                        report['loss'] = sum(losses) / len(losses)
                        losses = []

                    if train_config['tensorboard_log_dir'] is not None:
                        for name, score in metrics:
                            metric_sum = tf.Summary(value=[
                                tf.Summary.Value(tag='every_n_batches/' + name,
                                                 simple_value=score),
                            ])
                            tb_train_writer.add_summary(metric_sum, i)

                        if 'loss' in report:
                            loss_sum = tf.Summary(value=[
                                tf.Summary.Value(tag='every_n_batches/' +
                                                 'loss',
                                                 simple_value=report['loss']),
                            ])
                            tb_train_writer.add_summary(loss_sum, i)

                    report = {'train': report}
                    print(json.dumps(report, ensure_ascii=False))
                    for out in outputs.values():
                        out.clear()

                if train_config['val_every_n_batches'] > 0 and i % train_config[
                        'val_every_n_batches'] == 0:
                    report = _test_model(model, metrics_functions, iterator,
                                         train_config['batch_size'], 'valid',
                                         start_time,
                                         train_config['show_examples'])
                    report['epochs_done'] = epochs
                    report['batches_seen'] = i
                    report['train_examples_seen'] = examples

                    metrics = list(report['metrics'].items())

                    if train_config['tensorboard_log_dir'] is not None:
                        for name, score in metrics:
                            metric_sum = tf.Summary(value=[
                                tf.Summary.Value(tag='every_n_batches/' + name,
                                                 simple_value=score),
                            ])
                            tb_valid_writer.add_summary(metric_sum, i)

                    m_name, score = metrics[0]
                    if improved(score, best):
                        patience = 0
                        log.info('New best {} of {}'.format(m_name, score))
                        best = score
                        log.info('Saving model')
                        model.save()
                        saved = True
                    else:
                        patience += 1
                        log.info('Did not improve on the {} of {}'.format(
                            m_name, best))

                    report['impatience'] = patience
                    if train_config['validation_patience'] > 0:
                        report['patience_limit'] = train_config[
                            'validation_patience']

                    model.process_event(event_name='after_validation',
                                        data=report)
                    report = {'valid': report}
                    print(json.dumps(report, ensure_ascii=False))

                    if patience >= train_config['validation_patience'] > 0:
                        log.info('Ran out of patience')
                        break_flag = True
                        break

                if i >= train_config['max_batches'] > 0:
                    break_flag = True
                    break

                report = {
                    'epochs_done':
                    epochs,
                    'batches_seen':
                    i,
                    'train_examples_seen':
                    examples,
                    'time_spent':
                    str(
                        datetime.timedelta(seconds=round(time.time() -
                                                         start_time + 0.5)))
                }
                model.process_event(event_name='after_batch', data=report)
            if break_flag:
                break

            epochs += 1

            report = {
                'epochs_done':
                epochs,
                'batches_seen':
                i,
                'train_examples_seen':
                examples,
                'time_spent':
                str(
                    datetime.timedelta(seconds=round(time.time() - start_time +
                                                     0.5)))
            }
            model.process_event(event_name='after_epoch', data=report)

            if train_config['log_every_n_epochs'] > 0 and epochs % train_config['log_every_n_epochs'] == 0\
                    and outputs:
                metrics = [(m.name, m.fn(*[outputs[i] for i in m.inputs]))
                           for m in train_metrics_functions]
                report = {
                    'epochs_done':
                    epochs,
                    'batches_seen':
                    i,
                    'train_examples_seen':
                    examples,
                    'metrics':
                    prettify_metrics(metrics),
                    'time_spent':
                    str(
                        datetime.timedelta(seconds=round(time.time() -
                                                         start_time + 0.5)))
                }

                if train_config['show_examples']:
                    try:
                        y_predicted = zip(*[
                            y_predicted_group
                            for out_name, y_predicted_group in zip(
                                expected_outputs, y_predicted)
                            if out_name in model.out_params
                        ])
                        if len(model.out_params) == 1:
                            y_predicted = [
                                y_predicted_item[0]
                                for y_predicted_item in y_predicted
                            ]
                        report['examples'] = [{
                            'x': x_item,
                            'y_predicted': y_predicted_item,
                            'y_true': y_true_item
                        } for x_item, y_predicted_item, y_true_item in zip(
                            x, y_predicted, y_true)]
                    except NameError:
                        log.warning('Could not log examples')

                if losses:
                    report['loss'] = sum(losses) / len(losses)
                    losses = []

                if train_config['tensorboard_log_dir'] is not None:
                    for name, score in metrics:
                        metric_sum = tf.Summary(value=[
                            tf.Summary.Value(tag='every_n_epochs/' + name,
                                             simple_value=score),
                        ])
                        tb_train_writer.add_summary(metric_sum, epochs)

                    if 'loss' in report:
                        loss_sum = tf.Summary(value=[
                            tf.Summary.Value(tag='every_n_epochs/' + 'loss',
                                             simple_value=report['loss']),
                        ])
                        tb_train_writer.add_summary(loss_sum, epochs)

                model.process_event(event_name='after_train_log', data=report)
                report = {'train': report}
                print(json.dumps(report, ensure_ascii=False))
                for out in outputs.values():
                    out.clear()

            if train_config['val_every_n_epochs'] > 0 and epochs % train_config[
                    'val_every_n_epochs'] == 0:
                report = _test_model(model, metrics_functions, iterator,
                                     train_config['batch_size'], 'valid',
                                     start_time, train_config['show_examples'])
                report['epochs_done'] = epochs
                report['batches_seen'] = i
                report['train_examples_seen'] = examples

                metrics = list(report['metrics'].items())

                if train_config['tensorboard_log_dir'] is not None:
                    for name, score in metrics:
                        metric_sum = tf.Summary(value=[
                            tf.Summary.Value(tag='every_n_epochs/' + name,
                                             simple_value=score),
                        ])
                        tb_valid_writer.add_summary(metric_sum, epochs)

                m_name, score = metrics[0]
                if improved(score, best):
                    patience = 0
                    log.info('New best {} of {}'.format(m_name, score))
                    best = score
                    log.info('Saving model')
                    model.save()
                    saved = True
                else:
                    patience += 1
                    log.info('Did not improve on the {} of {}'.format(
                        m_name, best))

                report['impatience'] = patience
                if train_config['validation_patience'] > 0:
                    report['patience_limit'] = train_config[
                        'validation_patience']

                model.process_event(event_name='after_validation', data=report)
                report = {'valid': report}
                print(json.dumps(report, ensure_ascii=False))

                if patience >= train_config['validation_patience'] > 0:
                    log.info('Ran out of patience')
                    break

            if epochs >= train_config['epochs'] > 0:
                break
    except KeyboardInterrupt:
        log.info('Stopped training')

    if not saved:
        log.info('Saving model')
        model.save()

    return model
Exemple #5
0
    def __init__(self,
                 max_word_length: int = 50,
                 pad_special_char_use: bool = False,
                 word_boundary_special_char_use: bool = False,
                 sentence_boundary_special_char_use: bool = False,
                 reversed_sentense_tokens: bool = False,
                 bos: str = '<S>',
                 eos: str = '</S>',
                 **kwargs) -> None:
        super().__init__(**kwargs)

        if word_boundary_special_char_use and max_word_length < 3:
            raise ConfigError(f"`max_word_length` should be more than 3!")
        if max_word_length < 1:
            raise ConfigError(f"`max_word_length` should be more than 1!")

        self._max_word_length = max_word_length
        self._reverse = reversed_sentense_tokens

        self._pad_special_char_use = pad_special_char_use
        self._word_boundary_special_char_use = word_boundary_special_char_use
        self._sentence_boundary_special_char_use = sentence_boundary_special_char_use

        # char ids 0-255 come from utf-8 encoding bytes
        # assign 256-300 to special chars
        self.bos_char = 256  # <begin sentence>
        self.eos_char = 257  # <end sentence>
        self.bow_char = 258  # <begin word>
        self.eow_char = 259  # <end word>
        self.pad_char = 260  # <padding>

        self._len = 261  # an upper bound of all indexes

        # the charcter representation of the begin/end of sentence characters
        def _make_bos_eos(indx):
            indx = np.array([indx], dtype=np.int32)
            if self._word_boundary_special_char_use:
                code = np.pad(indx, (1, 1),
                              'constant',
                              constant_values=(self.bow_char, self.eow_char))
            else:
                code = indx
            if self._pad_special_char_use:
                code = np.pad(code, (0, self._max_word_length - code.shape[0]),
                              'constant',
                              constant_values=(self.pad_char))
            else:
                pass
            return code

        self.bos_chars = _make_bos_eos(self.bos_char)
        self.eos_chars = _make_bos_eos(self.eos_char)

        if self._sentence_boundary_special_char_use:
            self._eos_chars = [self.eos_chars]
            self._bos_chars = [self.bos_chars]
        else:
            self._eos_chars = []
            self._bos_chars = []

        if self.load_path:
            self.load()
        else:
            self.tokens = []
        self._word_char_ids = OrderedDict()

        for token in self.tokens:
            self._word_char_ids[token] = self._convert_word_to_char_ids(token)
        self._word_char_ids[bos] = self.bos_chars
        self._word_char_ids[eos] = self.eos_chars
Exemple #6
0
def get_metric_by_name(name: str) -> Callable[..., Any]:
    """Returns a metric callable with a corresponding name."""
    if name not in _REGISTRY:
        raise ConfigError(f'"{name}" is not registered as a metric')
    return fn_from_str(_REGISTRY[name])
def get_metrics_by_names(names: list):
    not_found = [name for name in names if name not in _REGISTRY]
    if not_found:
        raise ConfigError(
            'Names {} are not registered as metrics'.format(not_found))
    return [fn_from_str(_REGISTRY[name]) for name in names]
Exemple #8
0
    def train(self, data):
        
        if self.network.train_now is False:
            raise ConfigError("It looks like 'train_now' of mother model is True, while"
                              "`train_now` of submodel is False. Set `train_now` of submodel"
                              "to True.")

        print('\n:: training started')

        curr_patience = self.val_patience
        best_valid_accuracy = 0.
        # TODO: in case val_patience is off, save model {val_patience} steps before
        for j in range(self.num_epochs):

            tr_data = data.iter_all('train')
            eval_data = data.iter_all('valid')

            self.reset_metrics()

            for context, response, other in tr_data:
                if other.get('episode_done'):
                    self.reset()
                    self.metrics.n_dialogs += 1

                if other.get('db_result') is not None:
                    self.db_result = other['db_result']
                action_id = self._encode_response(response, other['act'])

                loss, pred_id = self.network.train(
                    self._encode_context(context, other.get('db_result')),
                    action_id,
                    self._action_mask()
                )

                self.prev_action *= 0.
                self.prev_action[pred_id] = 1.

                pred = self._decode_response(pred_id).lower()
                true = self.tokenizer.infer(response.lower().split())

                # update metrics
                self.metrics.n_examples += 1
                self.metrics.train_loss += loss
                self.metrics.conf_matrix[pred_id, action_id] += 1
                self.metrics.n_corr_examples += int(pred == true)
                if self.debug and ((pred == true) != (pred_id == action_id)):
                    print("Slot filling problem: ")
                    print("Pred = {}: {}".format(pred_id, pred))
                    print("True = {}: {}".format(action_id, true))
                    print("State =", self.tracker.get_state())
                    print("db_result =", self.db_result)
                    # TODO: update dialog metrics
            print('\n\n:: {}.train {}'.format(j + 1, self.metrics.report()))

            valid_metrics = self.evaluate(eval_data)
            print(':: {}.valid {}'.format(j + 1, valid_metrics.report()))

            if valid_metrics.action_accuracy < best_valid_accuracy:
                curr_patience -= 1
                print(":: patience decreased by 1, is equal to {}".format(curr_patience))
            else:
                if curr_patience != self.val_patience:
                    curr_patience = self.val_patience
                    print(":: patience is equal to {}".format(curr_patience))
                best_valid_accuracy = valid_metrics.action_accuracy
            if curr_patience < 1:
                print("\n:: patience is over, stopped training\n")
                break
        else:
            print("\n:: stopping because max number of epochs encountered\n")
        self.save()
Exemple #9
0
    def load(self,
             model_name: str,
             optimizer_name: str,
             loss_name: str,
             lear_rate: float = 0.01,
             lear_rate_decay: float = 0.):
        """
        Initialize model from saved params and weights
        Args:
            model_name: name of model function described as a method of this class
            optimizer_name: name of optimizer from keras.optimizers
            loss_name: loss function name (from keras.losses)
            lear_rate: learning rate.
            lear_rate_decay: learning rate decay.

        Returns:
            model with loaded weights and network parameters from files
            but compiled with given learning parameters
        """
        if self.load_path:
            if isinstance(self.load_path,
                          Path) and not self.load_path.parent.is_dir():
                raise ConfigError("Provided load path is incorrect!")

            opt_path = Path("{}_opt.json".format(str(
                self.load_path.resolve())))
            weights_path = Path("{}.h5".format(str(self.load_path.resolve())))

            if opt_path.exists() and weights_path.exists():

                log.info("[initializing `{}` from saved]".format(
                    self.__class__.__name__))

                self.opt = read_json(opt_path)

                model_func = getattr(self, model_name, None)
                if callable(model_func):
                    model = model_func(**self.opt)
                else:
                    raise AttributeError(
                        "Model {} is not defined".format(model_name))

                log.info("[loading weights from {}]".format(weights_path.name))
                model.load_weights(str(weights_path))

                optimizer_func = getattr(keras.optimizers, optimizer_name,
                                         None)
                if callable(optimizer_func):
                    if not (lear_rate is None):
                        if not (lear_rate_decay is None):
                            self.optimizer = optimizer_func(
                                lr=lear_rate, decay=lear_rate_decay)
                        else:
                            self.optimizer = optimizer_func(lr=lear_rate)
                    elif not (lear_rate_decay is None):
                        self.optimizer = optimizer_func(decay=lear_rate_decay)
                    else:
                        self.optimizer = optimizer_func()
                else:
                    raise AttributeError(
                        "Optimizer {} is not defined in `keras.optimizers`".
                        format(optimizer_name))

                loss_func = getattr(keras.losses, loss_name, None)
                if callable(loss_func):
                    loss = loss_func
                else:
                    raise AttributeError(
                        "Loss {} is not defined".format(loss_name))

                model.compile(optimizer=self.optimizer, loss=loss)
                return model
            else:
                return self.init_model_from_scratch(model_name, optimizer_name,
                                                    loss_name, lear_rate,
                                                    lear_rate_decay)
        else:
            log.warning("No `load_path` is provided for {}".format(
                self.__class__.__name__))
            return self.init_model_from_scratch(model_name, optimizer_name,
                                                loss_name, lear_rate,
                                                lear_rate_decay)
Exemple #10
0
    def __init__(self,
                 chainer_config: dict,
                 *,
                 batch_size: int = 1,
                 epochs: int = -1,
                 start_epoch_num: int = 0,
                 max_batches: int = -1,
                 metrics: Iterable[Union[str, dict]] = ('accuracy', ),
                 train_metrics: Optional[Iterable[Union[str, dict]]] = None,
                 metric_optimization: str = 'maximize',
                 evaluation_targets: Iterable[str] = ('valid', 'test'),
                 show_examples: bool = False,
                 tensorboard_log_dir: Optional[Union[str, Path]] = None,
                 max_test_batches: int = -1,
                 validate_first: bool = True,
                 validation_patience: int = 5,
                 val_every_n_epochs: int = -1,
                 val_every_n_batches: int = -1,
                 log_every_n_batches: int = -1,
                 log_every_n_epochs: int = -1,
                 log_on_k_batches: int = 1,
                 **kwargs) -> None:
        super().__init__(chainer_config,
                         batch_size=batch_size,
                         metrics=metrics,
                         evaluation_targets=evaluation_targets,
                         show_examples=show_examples,
                         tensorboard_log_dir=tensorboard_log_dir,
                         max_test_batches=max_test_batches,
                         **kwargs)
        if train_metrics is None:
            self.train_metrics = self.metrics
        else:
            self.train_metrics = parse_metrics(train_metrics,
                                               self._chainer.in_y,
                                               self._chainer.out_params)

        metric_optimization = metric_optimization.strip().lower()
        if metric_optimization == 'maximize':
            self.best = float('-inf')
            self.improved = lambda score: score > self.best
        elif metric_optimization == 'minimize':
            self.best = float('inf')
            self.improved = lambda score: score < self.best
        else:
            raise ConfigError('metric_optimization has to be one of {}'.format(
                ['maximize', 'minimize']))

        self.validate_first = validate_first
        self.validation_patience = validation_patience
        self.val_every_n_epochs = val_every_n_epochs
        self.val_every_n_batches = val_every_n_batches
        self.log_every_n_epochs = log_every_n_epochs
        self.log_every_n_batches = log_every_n_batches
        self.log_on_k_batches = log_on_k_batches if log_on_k_batches >= 0 else None

        self.max_epochs = epochs
        self.epoch = start_epoch_num
        self.max_batches = max_batches

        self.train_batches_seen = 0
        self.examples = 0
        self.patience = 0
        self.last_result = {}
        self.losses = []
        self.start_time = None

        if self.tensorboard_log_dir is not None:
            self.tb_train_writer = self._tf.summary.FileWriter(
                str(self.tensorboard_log_dir / 'train_log'))
            self.tb_valid_writer = self._tf.summary.FileWriter(
                str(self.tensorboard_log_dir / 'valid_log'))
Exemple #11
0
def main():
    args = parser.parse_args()

    pipeline_config_path = find_config(args.config_path)
    key_main_model = args.key_main_model
    population_size = args.p_size
    gpus = [int(gpu) for gpu in args.gpus.split(",")]
    train_partition = int(args.train_partition)
    start_from_population = int(args.start_from_population)
    path_to_population = args.path_to_population
    elitism_with_weights = args.elitism_with_weights
    iterations = int(args.iterations)

    p_crossover = args.p_cross
    pow_crossover = args.pow_cross
    p_mutation = args.p_mut
    pow_mutation = args.pow_mut

    if os.environ.get("CUDA_VISIBLE_DEVICES") is None:
        pass
    else:
        cvd = [int(gpu) for gpu in os.environ.get("CUDA_VISIBLE_DEVICES").split(",")]
        if gpus == [-1]:
            gpus = cvd
        else:
            try:
                gpus = [cvd[gpu] for gpu in gpus]
            except:
                raise ConfigError("Can not use gpus `{}` with CUDA_VISIBLE_DEVICES='{}'".format(
                    ",".join(gpus), ",".join(cvd)
                ))

    basic_params = read_json(pipeline_config_path)
    log.info("Given basic params: {}\n".format(json.dumps(basic_params, indent=2)))

    # Initialize evolution
    evolution = ParamsEvolution(population_size=population_size,
                                p_crossover=p_crossover, crossover_power=pow_crossover,
                                p_mutation=p_mutation, mutation_power=pow_mutation,
                                key_main_model=key_main_model,
                                seed=42,
                                train_partition=train_partition,
                                elitism_with_weights=elitism_with_weights,
                                **basic_params)

    considered_metrics = evolution.get_value_from_config(evolution.basic_config,
                                                         list(evolution.find_model_path(
                                                             evolution.basic_config, "metrics"))[0] + ["metrics"])

    log.info(considered_metrics)
    evolve_metric = considered_metrics[0]

    # Create table variable for gathering results
    set_deeppavlov_root(evolution.basic_config)

    expand_path(Path(evolution.get_value_from_config(
        evolution.basic_config, evolution.main_model_path + ["save_path"]))).mkdir(parents=True, exist_ok=True)

    result_file = expand_path(Path(evolution.get_value_from_config(evolution.basic_config,
                                                                   evolution.main_model_path + ["save_path"])
                                   ).joinpath("result_table.csv"))

    result_table_columns = []
    result_table_dict = {}
    for el in considered_metrics:
        result_table_dict[el + "_valid"] = []
        result_table_dict[el + "_test"] = []
        result_table_columns.extend([el + "_valid", el + "_test"])

    result_table_dict["params"] = []
    result_table_columns.append("params")

    if start_from_population == 0:
        # if starting evolution from scratch
        iters = 0
        result_table = pd.DataFrame(result_table_dict)
        # write down result table file
        result_table.loc[:, result_table_columns].to_csv(result_file, index=False, sep='\t')

        log.info("Iteration #{} starts".format(iters))
        # randomly generate the first population
        population = evolution.first_generation()
    else:
        # if starting evolution from already existing population
        iters = start_from_population
        log.info("Iteration #{} starts".format(iters))

        population = []
        for i in range(population_size):
            population.append(read_json(expand_path(Path(path_to_population).joinpath(
                "model_" + str(i)).joinpath("config.json"))))
            population[i] = evolution.insert_value_or_dict_into_config(
                population[i], evolution.main_model_path + ["save_path"],
                str(Path(
                    evolution.get_value_from_config(evolution.basic_config, evolution.main_model_path + ["save_path"])
                    ).joinpath(
                    "population_" + str(start_from_population)).joinpath(
                    "model_" + str(i)).joinpath(
                    "model")))

            population[i] = evolution.insert_value_or_dict_into_config(
                population[i], evolution.main_model_path + ["load_path"],
                str(Path(
                    evolution.get_value_from_config(population[i], evolution.main_model_path + ["load_path"]))))

            for path_id, path_ in enumerate(evolution.paths_to_fiton_dicts):
                population[i] = evolution.insert_value_or_dict_into_config(
                    population[i], path_ + ["save_path"],
                    str(Path(evolution.get_value_from_config(evolution.basic_config,
                                                             evolution.main_model_path + ["save_path"])
                             ).joinpath("population_" + str(iters)).joinpath("model_" + str(i)).joinpath(
                        "fitted_model_" + str(path_id))))

            for path_id, path_ in enumerate(evolution.paths_to_fiton_dicts):
                population[i] = evolution.insert_value_or_dict_into_config(
                    population[i], path_ + ["load_path"],
                    str(Path(evolution.get_value_from_config(
                        population[i], path_ + ["load_path"]))))

    run_population(population, evolution, gpus)
    population_scores = results_to_table(population, evolution, considered_metrics,
                                         result_file, result_table_columns)[evolve_metric]
    log.info("Population scores: {}".format(population_scores))
    log.info("Iteration #{} was done".format(iters))
    iters += 1

    while True:
        if iterations != -1 and start_from_population + iterations == iters:
            log.info("End of evolution on iteration #{}".format(iters))
            break
        log.info("Iteration #{} starts".format(iters))
        population = evolution.next_generation(population, population_scores, iters)
        run_population(population, evolution, gpus)
        population_scores = results_to_table(population, evolution, considered_metrics,
                                             result_file, result_table_columns)[evolve_metric]
        log.info("Population scores: {}".format(population_scores))
        log.info("Iteration #{} was done".format(iters))
        iters += 1
Exemple #12
0
def results_to_table(population, evolution, considered_metrics, result_file, result_table_columns):
    population_size = len(population)
    validate_best = evolution.get_value_from_config(evolution.basic_config,
                                                    list(evolution.find_model_path(
                                                        evolution.basic_config, "validate_best"))[0]
                                                    + ["validate_best"])
    test_best = evolution.get_value_from_config(evolution.basic_config,
                                                list(evolution.find_model_path(
                                                    evolution.basic_config, "test_best"))[0]
                                                + ["test_best"])
    if (not validate_best) and test_best:
        log.info("Validate_best is set to False. Tuning parameters on test")
    elif (not validate_best) and (not test_best):
        raise ConfigError("Validate_best and test_best are set to False. Can not evolve.")

    population_metrics = {}
    for m in considered_metrics:
        population_metrics[m] = []
    for i in range(population_size):
        with open(str(expand_path(Path(evolution.get_value_from_config(
                population[i],
                evolution.main_model_path + ["save_path"])).parent.joinpath("out.txt"))), "r", encoding='utf8') as fout:
            reports_data = fout.read().splitlines()[-2:]
        reports = []
        for j in range(2):
            try:
                reports.append(json.loads(reports_data[j]))
            except:
                pass

        val_results = {}
        test_results = {}
        for m in considered_metrics:
            val_results[m] = None
            test_results[m] = None
        if len(reports) == 2 and "valid" in reports[0].keys() and "test" in reports[1].keys():
            val_results = reports[0]["valid"]["metrics"]
            test_results = reports[1]["test"]["metrics"]
        elif len(reports) == 2 and "valid" in reports[0].keys() and "valid" in reports[1].keys():
            val_results = reports[1]["valid"]["metrics"]
        elif len(reports) == 2 and "test" in reports[0].keys() and "test" in reports[1].keys():
            val_results = reports[1]["test"]["metrics"]
        elif len(reports) == 2 and "train" in reports[0].keys() and "valid" in reports[1].keys():
            val_results = reports[1]["valid"]["metrics"]
        elif len(reports) == 2 and "train" in reports[0].keys() and "test" in reports[1].keys():
            val_results = reports[1]["test"]["metrics"]
        elif len(reports) == 2 and "train" in reports[0].keys() and "train" in reports[1].keys():
            val_results = reports[1]["train"]["metrics"]
        elif len(reports) == 1 and "valid" in reports[0].keys():
            val_results = reports[0]["valid"]["metrics"]
        elif len(reports) == 1 and "test" in reports[0].keys():
            test_results = reports[0]["test"]["metrics"]
        else:
            raise ConfigError("Can not proceed output files: didn't find valid and/or test results")

        result_table_dict = {}
        for el in result_table_columns:
            result_table_dict[el] = []

        for m in considered_metrics:
            result_table_dict[m + "_valid"].append(val_results[m])
            result_table_dict[m + "_test"].append(test_results[m])
            if validate_best:
                population_metrics[m].append(val_results[m])
            elif test_best:
                population_metrics[m].append(test_results[m])

        result_table_dict[result_table_columns[-1]] = [population[i]]
        result_table = pd.DataFrame(result_table_dict)
        result_table.loc[:, result_table_columns].to_csv(result_file, index=False, sep='\t', mode='a', header=None)

    return population_metrics
Exemple #13
0
    def fit(self, *args):
        """
        Find the best learning rate schedule, and set obtained values of learning rate
        and momentum for further model training. Best learning rate will be divided
        by `fit_learning_rate_div` for further training model.

        Args:
            *args: arguments

        Returns:

        """
        data = list(zip(*args))
        self.save()
        if self._fit_batch_size is None:
            raise ConfigError("in order to use fit() method"
                              " set `fit_batch_size` parameter")
        bs = int(self._fit_batch_size)
        data_len = len(data)
        num_batches = self._fit_max_batches or ((data_len - 1) // bs + 1)

        avg_loss = 0.
        best_loss = float('inf')
        lrs, losses = [], []
        _lr_find_schedule = DecayScheduler(
            start_val=self._fit_learning_rate[0],
            end_val=self._fit_learning_rate[1],
            dec_type="exponential",
            num_it=num_batches)
        self._lr = _lr_find_schedule.start_val
        self._mom = 0.
        self._update_graph_variables(learning_rate=self._lr,
                                     momentum=self._mom)
        best_lr = _lr_find_schedule.start_val
        for i in range(num_batches):
            batch_start = (i * bs) % data_len
            batch_end = batch_start + bs
            report = self.train_on_batch(*zip(*data[batch_start:batch_end]))
            if not isinstance(report, dict):
                report = {'loss': report}
            # Calculating smoothed loss
            avg_loss = self._fit_beta * avg_loss + (
                1 - self._fit_beta) * report['loss']
            smoothed_loss = avg_loss / (1 - self._fit_beta**(i + 1))
            lrs.append(self._lr)
            losses.append(smoothed_loss)
            log.info(f"Batch {i}/{num_batches}: smooth_loss = {smoothed_loss}"
                     f", lr = {self._lr}, best_lr = {best_lr}")
            if math.isnan(smoothed_loss) or (smoothed_loss > 4 * best_loss):
                break
            if (smoothed_loss < best_loss) and (i >= self._fit_min_batches):
                best_loss = smoothed_loss
                best_lr = self._lr
            self._lr = _lr_find_schedule.next_val()
            self._update_graph_variables(learning_rate=self._lr)

            if i >= num_batches:
                break
        # best_lr /= 10
        end_val = self._get_best(lrs, losses)

        start_val = end_val
        if self._lr_schedule.dec_type in (DecayType.ONECYCLE,
                                          DecayType.TRAPEZOID):
            start_val = end_val / self._fit_learning_rate_div
        elif self._lr_schedule.dec_type in (DecayType.POLYNOMIAL,
                                            DecayType.EXPONENTIAL,
                                            DecayType.LINEAR,
                                            DecayType.COSINE):
            start_val = end_val
            end_val = end_val / self._fit_learning_rate_div
        self._lr_schedule = DecayScheduler(start_val=start_val,
                                           end_val=end_val,
                                           num_it=self._lr_schedule.nb,
                                           dec_type=self._lr_schedule.dec_type,
                                           extra=self._lr_schedule.extra)
        log.info(
            f"Found best learning rate value = {best_lr}"
            f", setting new learning rate schedule with {self._lr_schedule}.")

        self.load()
        self._lr = self._lr_schedule.start_val
        self._mom = self._mom_schedule.start_val
        self._update_graph_variables(learning_rate=self._lr,
                                     momentum=self._mom)
        return {'smoothed_loss': losses, 'learning_rate': lrs}
Exemple #14
0
    def __init__(self,
                 learning_rate: Union[None, float, Tuple[float, float]] = None,
                 learning_rate_decay: Union[DType,
                                            Tuple[DType,
                                                  float]] = DecayType.NO,
                 learning_rate_decay_epochs: int = 0,
                 learning_rate_decay_batches: int = 0,
                 learning_rate_drop_div: float = 2.0,
                 learning_rate_drop_patience: Optional[int] = None,
                 momentum: Union[None, float, Tuple[float, float]] = None,
                 momentum_decay: Union[DType, Tuple[DType,
                                                    float]] = DecayType.NO,
                 momentum_decay_epochs: int = 0,
                 momentum_decay_batches: int = 0,
                 fit_batch_size: Union[None, int, str] = None,
                 fit_learning_rate: Tuple[float, float] = (1e-7, 100),
                 fit_learning_rate_div: float = 10.,
                 fit_beta: float = 0.98,
                 fit_min_batches: int = 10,
                 fit_max_batches: Optional[int] = None,
                 *args,
                 **kwargs) -> None:
        """
        Initialize learning rate scheduler
        """
        if learning_rate_decay_epochs and learning_rate_decay_batches:
            raise ConfigError("isn't able to update learning rate every batch"
                              " and every epoch simultaneously")
        if momentum_decay_epochs and momentum_decay_batches:
            raise ConfigError("isn't able to update momentum every batch"
                              " and every epoch simultaneously")

        start_val, end_val = learning_rate, None
        if isinstance(learning_rate, (tuple, list)):
            start_val, end_val = learning_rate
        dec_type, extra = learning_rate_decay, None
        if isinstance(learning_rate_decay, (tuple, list)):
            dec_type, extra = learning_rate_decay

        self._lr = start_val
        num_it, self._lr_update_on_batch = learning_rate_decay_epochs, False
        if learning_rate_decay_batches > 0:
            num_it, self._lr_update_on_batch = learning_rate_decay_batches, True

        self._lr_schedule = DecayScheduler(start_val=start_val,
                                           end_val=end_val,
                                           num_it=num_it,
                                           dec_type=dec_type,
                                           extra=extra)
        self._lr_var = self._init_learning_rate_variable()

        start_val, end_val = momentum, None
        if isinstance(momentum, (tuple, list)):
            start_val, end_val = momentum
        dec_type, extra = momentum_decay, None
        if isinstance(momentum_decay, (tuple, list)):
            dec_type, extra = momentum_decay

        self._mom = start_val
        num_it, self._mom_update_on_batch = momentum_decay_epochs, False
        self._mom_update_on_batch = momentum_decay_batches > 0
        num_it = momentum_decay_epochs if self._mom_update_on_batch else momentum_decay_batches

        self._mom_schedule = DecayScheduler(start_val=start_val,
                                            end_val=end_val,
                                            num_it=num_it,
                                            dec_type=dec_type,
                                            extra=extra)
        self._mom_var = self._init_momentum_variable()

        self._learning_rate_drop_patience = learning_rate_drop_patience
        self._learning_rate_drop_div = learning_rate_drop_div
        self._learning_rate_cur_impatience = 0.
        self._learning_rate_last_impatience = 0.
        self._learning_rate_cur_div = 1.
        self._fit_batch_size = fit_batch_size
        self._fit_learning_rate = fit_learning_rate
        self._fit_learning_rate_div = fit_learning_rate_div
        self._fit_beta = fit_beta
        self._fit_min_batches = fit_min_batches
        self._fit_max_batches = fit_max_batches
def results_to_table(population, evolution, considered_metrics, result_file,
                     result_table_columns):
    population_size = len(population)
    train_config = evolution.basic_config.get('train', {})

    if 'evaluation_targets' in train_config:
        evaluation_targets = train_config['evaluation_targets']
    else:
        evaluation_targets = []
        if train_config.get('validate_best', True):
            evaluation_targets.append('valid')
        elif train_config.get('test_best', True):
            evaluation_targets.append('test')

    if 'valid' in evaluation_targets:
        target = 'valid'
    elif 'test' in evaluation_targets:
        target = 'test'
    elif 'train' in evaluation_targets:
        target = 'train'
    else:
        raise ConfigError('evaluation_targets are empty. Can not evolve')

    if target != 'valid':
        log.info(f"Tuning parameters on {target}")

    population_metrics = {}
    for m in considered_metrics:
        population_metrics[m] = []
    for i in range(population_size):
        log_path = expand_path(
            evolution.get_value_from_config(
                parse_config(population[i]),
                evolution.path_to_models_save_path)) / "out.txt"

        report = {}
        with log_path.open(encoding='utf8') as f:
            for line in f:
                try:
                    report.update(json.loads(line))
                except:
                    pass

        result_table_dict = defaultdict(list)

        for m in considered_metrics:
            for data_type in evaluation_targets:
                result_table_dict[f'{m}_{data_type}'].append(
                    report[data_type]['metrics'][m])
                if data_type == target:
                    population_metrics[m].append(
                        report[data_type]['metrics'][m])

        result_table_dict[result_table_columns[-1]] = [
            json.dumps(population[i])
        ]
        result_table = pd.DataFrame(result_table_dict)
        result_table.loc[:, result_table_columns].to_csv(result_file,
                                                         index=False,
                                                         sep='\t',
                                                         mode='a',
                                                         header=None)

    return population_metrics