Beispiel #1
0
def _test_model(
        model: Component,
        metrics_functions: List[Tuple[str, Callable]],
        iterator: DataLearningIterator,
        batch_size=-1,
        data_type='valid',
        start_time: float = None) -> Dict[str, Union[int, OrderedDict, str]]:
    if start_time is None:
        start_time = time.time()

    val_y_true = []
    val_y_predicted = []
    for x, y_true in iterator.gen_batches(batch_size, data_type,
                                          shuffle=False):
        y_predicted = list(model(list(x)))
        val_y_true += y_true
        val_y_predicted += y_predicted

    metrics = [(s, f(val_y_true, val_y_predicted))
               for s, f in metrics_functions]

    report = {
        'eval_examples_count':
        len(val_y_true),
        'metrics':
        prettify_metrics(metrics),
        'time_spent':
        str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
    }
    return report
Beispiel #2
0
def _test_model(model: Component, metrics_functions: List[Tuple[str, Callable]],
                iterator: DataLearningIterator, batch_size=-1, data_type='valid',
                start_time: float=None, show_examples=False) -> Dict[str, Union[int, OrderedDict, str]]:
    if start_time is None:
        start_time = time.time()

    val_y_true = []
    val_y_predicted = []
    for x, y_true in iterator.gen_batches(batch_size, data_type, shuffle=False):
        y_predicted = list(model(list(x)))
        val_y_true += y_true
        val_y_predicted += y_predicted

    metrics = [(s, f(val_y_true, val_y_predicted)) for s, f in metrics_functions]

    report = {
        'eval_examples_count': len(val_y_true),
        'metrics': prettify_metrics(metrics),
        'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
    }

    if show_examples:
        try:
            report['examples'] = [{
                'x': x_item,
                'y_predicted': y_predicted_item,
                'y_true': y_true_item
            } for x_item, y_predicted_item, y_true_item in zip(x, y_predicted, y_true)]
        except NameError:
            log.warning(f'Could not log examples for {data_type}, assuming it\'s empty')

    return report
Beispiel #3
0
    def evaluate(self, iterator: DataLearningIterator, evaluation_targets: Optional[Iterable[str]] = None, *,
                 print_reports: bool = True) -> Dict[str, dict]:
        """
        Run :meth:`test` on multiple data types using provided data iterator

        Args:
            iterator: :class:`~deeppavlov.core.data.data_learning_iterator.DataLearningIterator` used for evaluation
            evaluation_targets: iterable of data types to evaluate on
            print_reports: a flag used to print evaluation reports as json lines

        Returns:
            a dictionary with data types as keys and evaluation reports as values
        """
        self._load()
        if evaluation_targets is None:
            evaluation_targets = self.evaluation_targets

        res = {}

        for data_type in evaluation_targets:
            data_gen = iterator.gen_batches(self.batch_size, data_type=data_type, shuffle=False)
            report = self.test(data_gen)
            res[data_type] = report
            if print_reports:
                print(json.dumps({data_type: report}, ensure_ascii=False, cls=NumpyArrayEncoder))

        return res
    def train_on_batches(self, iterator: DataLearningIterator) -> None:
        """Train pipeline on batches using provided data iterator and initialization parameters"""
        self.start_time = time.time()
        if self.validate_first:
            self._validate(iterator)

        while True:
            impatient = False
            self._send_event(event_name='before_train')
            for x, y_true in iterator.gen_batches(self.batch_size, data_type='train'):
                self.last_result = self._chainer.train_on_batch(x, y_true)
                if self.last_result is None:
                    self.last_result = {}
                elif not isinstance(self.last_result, dict):
                    self.last_result = {'loss': self.last_result}
                if 'loss' in self.last_result:
                    self.losses.append(self.last_result.pop('loss'))

                self.train_batches_seen += 1
                self.examples += len(x)

                if self.log_every_n_batches > 0 and self.train_batches_seen % self.log_every_n_batches == 0:
                    self._log(iterator, tensorboard_tag='every_n_batches', tensorboard_index=self.train_batches_seen)

                if self.val_every_n_batches > 0 and self.train_batches_seen % self.val_every_n_batches == 0:
                    self._validate(iterator,
                                   tensorboard_tag='every_n_batches', tensorboard_index=self.train_batches_seen)

                self._send_event(event_name='after_batch')

                if 0 < self.max_batches <= self.train_batches_seen:
                    impatient = True
                    break

                if 0 < self.validation_patience <= self.patience:
                    log.info('Ran out of patience')
                    impatient = True
                    break

            if impatient:
                break

            self.epoch += 1

            if self.log_every_n_epochs > 0 and self.epoch % self.log_every_n_epochs == 0:
                self._log(iterator, tensorboard_tag='every_n_epochs', tensorboard_index=self.epoch)

            if self.val_every_n_epochs > 0 and self.epoch % self.val_every_n_epochs == 0:
                self._validate(iterator, tensorboard_tag='every_n_epochs', tensorboard_index=self.epoch)

            self._send_event(event_name='after_epoch')

            if 0 < self.max_epochs <= self.epoch:
                break

            if 0 < self.validation_patience <= self.patience:
                log.info('Ran out of patience')
                break
Beispiel #5
0
def _test_model(
        model: Chainer,
        metrics_functions: List[Metric],
        iterator: DataLearningIterator,
        batch_size=-1,
        data_type='valid',
        start_time: float = None,
        show_examples=False) -> Dict[str, Union[int, OrderedDict, str]]:
    if start_time is None:
        start_time = time.time()

    expected_outputs = list(set().union(model.out_params,
                                        *[m.inputs
                                          for m in metrics_functions]))

    outputs = {out: [] for out in expected_outputs}
    examples = 0
    for x, y_true in iterator.gen_batches(batch_size, data_type,
                                          shuffle=False):
        examples += len(x)
        y_predicted = list(
            model.compute(list(x), list(y_true), targets=expected_outputs))
        if len(expected_outputs) == 1:
            y_predicted = [y_predicted]
        for out, val in zip(outputs.values(), y_predicted):
            out += list(val)

    metrics = [(m.name, m.fn(*[outputs[i] for i in m.inputs]))
               for m in metrics_functions]

    report = {
        'eval_examples_count':
        examples,
        'metrics':
        prettify_metrics(metrics),
        'time_spent':
        str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
    }

    if show_examples:
        try:
            report['examples'] = [{
                'x': x_item,
                'y_predicted': y_predicted_item,
                'y_true': y_true_item
            } for x_item, y_predicted_item, y_true_item in zip(
                x, {k: outputs[k]
                    for k in model.out_params}, y_true)]
        except NameError:
            log.warning(
                f'Could not log examples for {data_type}, assuming it\'s empty'
            )

    return report
Beispiel #6
0
    def _log(self,
             iterator: DataLearningIterator,
             tensorboard_tag: Optional[str] = None,
             tensorboard_index: Optional[int] = None) -> None:
        self._send_event(event_name='before_log')
        if self.log_on_k_batches == 0:
            report = {
                'time_spent':
                str(
                    datetime.timedelta(seconds=round(time.time() -
                                                     self.start_time + 0.5)))
            }
        else:
            data = islice(
                iterator.gen_batches(self.batch_size,
                                     data_type='train',
                                     shuffle=True), self.log_on_k_batches)
            report = self.test(data,
                               self.train_metrics,
                               start_time=self.start_time)

        report.update({
            'epochs_done': self.epoch,
            'batches_seen': self.train_batches_seen,
            'train_examples_seen': self.examples
        })

        metrics: List[Tuple[str, float]] = list(
            report.get('metrics', {}).items()) + list(self.last_result.items())

        report.update(self.last_result)
        if self.losses:
            report['loss'] = sum(self.losses) / len(self.losses)
            self.losses.clear()
            metrics.append(('loss', report['loss']))

        if metrics and self.tensorboard_log_dir is not None:
            summary = self._tf.Summary()

            for name, score in metrics:
                summary.value.add(tag=f'{tensorboard_tag}/{name}',
                                  simple_value=score)
            self.tb_train_writer.add_summary(summary, tensorboard_index)
            self.tb_train_writer.flush()

        self._send_event(event_name='after_train_log', data=report)

        report = {'train': report}
        print(json.dumps(report, ensure_ascii=False))
Beispiel #7
0
def _test_model(
        model: Component,
        metrics_functions: List[Tuple[str, Callable]],
        iterator: DataLearningIterator,
        batch_size=-1,
        data_type='valid',
        start_time: float = None,
        show_examples=False) -> Dict[str, Union[int, OrderedDict, str]]:
    if start_time is None:
        start_time = time.time()

    val_y_true = []
    val_y_predicted = []
    for x, y_true in iterator.gen_batches(batch_size, data_type,
                                          shuffle=False):
        y_predicted = list(model(list(x)))
        val_y_true += y_true
        val_y_predicted += y_predicted

    metrics = [(s, f(val_y_true, val_y_predicted))
               for s, f in metrics_functions]

    report = {
        'eval_examples_count':
        len(val_y_true),
        'metrics':
        prettify_metrics(metrics),
        'time_spent':
        str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
    }

    if show_examples:
        try:
            report['examples'] = [{
                'x': x_item,
                'y_predicted': y_predicted_item,
                'y_true': y_true_item
            } for x_item, y_predicted_item, y_true_item in zip(
                x, y_predicted, y_true)]
        except NameError:
            log.warning(
                f'Could not log examples for {data_type}, assuming it\'s empty'
            )

    return report
Beispiel #8
0
    def _validate(self,
                  iterator: DataLearningIterator,
                  tensorboard_tag: Optional[str] = None,
                  tensorboard_index: Optional[int] = None) -> None:
        self._send_event(event_name='before_validation')
        report = self.test(iterator.gen_batches(self.batch_size,
                                                data_type='valid',
                                                shuffle=False),
                           start_time=self.start_time)

        report['epochs_done'] = self.epoch
        report['batches_seen'] = self.train_batches_seen
        report['train_examples_seen'] = self.examples

        metrics = list(report['metrics'].items())

        if tensorboard_tag is not None and self.tensorboard_log_dir is not None:
            summary = self._tf.Summary()
            for name, score in metrics:
                summary.value.add(tag=f'{tensorboard_tag}/{name}',
                                  simple_value=score)
            if tensorboard_index is None:
                tensorboard_index = self.train_batches_seen
            self.tb_valid_writer.add_summary(summary, tensorboard_index)
            self.tb_valid_writer.flush()

        m_name, score = metrics[0]
        if self.improved(score):
            self.patience = 0
            log.info('New best {} of {}'.format(m_name, score))
            self.best = score
            log.info('Saving model')
            self.save()
        else:
            self.patience += 1
            log.info('Did not improve on the {} of {}'.format(
                m_name, self.best))

        report['impatience'] = self.patience
        if self.validation_patience > 0:
            report['patience_limit'] = self.validation_patience

        self._send_event(event_name='after_validation', data=report)
        report = {'valid': report}
        print(json.dumps(report, ensure_ascii=False))
Beispiel #9
0
def _train_batches(model: NNModel, iterator: DataLearningIterator,
                   train_config: dict,
                   metrics_functions: List[Tuple[str, Callable]]) -> NNModel:

    default_train_config = {
        'epochs': 0,
        'max_batches': 0,
        'batch_size': 1,
        'metric_optimization': 'maximize',
        'validation_patience': 5,
        'val_every_n_epochs': 0,
        'log_every_n_batches': 0,
        'log_every_n_epochs': 0,
        # 'show_examples': False,
        'validate_best': True,
        'test_best': True,
        'tensorboard_log_dir': None,
    }

    train_config = dict(default_train_config, **train_config)

    if 'train_metrics' in train_config:
        train_metrics_functions = list(
            zip(train_config['train_metrics'],
                get_metrics_by_names(train_config['train_metrics'])))
    else:
        train_metrics_functions = metrics_functions

    if train_config['metric_optimization'] == 'maximize':

        def improved(score, best):
            return score > best

        best = float('-inf')
    elif train_config['metric_optimization'] == 'minimize':

        def improved(score, best):
            return score < best

        best = float('inf')
    else:
        raise ConfigError('metric_optimization has to be one of {}'.format(
            ['maximize', 'minimize']))

    i = 0
    epochs = 0
    examples = 0
    saved = False
    patience = 0
    log_on = train_config['log_every_n_batches'] > 0 or train_config[
        'log_every_n_epochs'] > 0
    train_y_true = []
    train_y_predicted = []
    losses = []
    start_time = time.time()
    break_flag = False

    if train_config['tensorboard_log_dir'] is not None:
        import tensorflow as tf
        tb_log_dir = expand_path(train_config['tensorboard_log_dir'])

        tb_train_writer = tf.summary.FileWriter(str(tb_log_dir / 'train_log'))
        tb_valid_writer = tf.summary.FileWriter(str(tb_log_dir / 'valid_log'))

    try:
        while True:
            for x, y_true in iterator.gen_batches(train_config['batch_size']):
                if log_on:
                    y_predicted = list(model(list(x)))
                    train_y_true += y_true
                    train_y_predicted += y_predicted
                loss = model.train_on_batch(x, y_true)
                if loss is not None:
                    losses.append(loss)
                i += 1
                examples += len(x)

                if train_config['log_every_n_batches'] > 0 and i % train_config[
                        'log_every_n_batches'] == 0:
                    metrics = [(s, f(train_y_true, train_y_predicted))
                               for s, f in train_metrics_functions]
                    report = {
                        'epochs_done':
                        epochs,
                        'batches_seen':
                        i,
                        'examples_seen':
                        examples,
                        'metrics':
                        prettify_metrics(metrics),
                        'time_spent':
                        str(
                            datetime.timedelta(
                                seconds=round(time.time() - start_time + 0.5)))
                    }

                    if losses:
                        report['loss'] = sum(losses) / len(losses)
                        losses = []
                    report = {'train': report}

                    if train_config['tensorboard_log_dir'] is not None:
                        for name, score in metrics:
                            metric_sum = tf.Summary(value=[
                                tf.Summary.Value(tag='every_n_batches/' + name,
                                                 simple_value=score),
                            ])
                            tb_train_writer.add_summary(metric_sum, i)

                        if losses:
                            loss_sum = tf.Summary(value=[
                                tf.Summary.Value(tag='every_n_batches/' +
                                                 'loss',
                                                 simple_value=report['loss']),
                            ])
                            tb_train_writer.add_summary(loss_sum, i)

                    print(json.dumps(report, ensure_ascii=False))
                    train_y_true.clear()
                    train_y_predicted.clear()

                if i >= train_config['max_batches'] > 0:
                    break_flag = True
                    break

                report = {
                    'epochs_done':
                    epochs,
                    'batches_seen':
                    i,
                    'train_examples_seen':
                    examples,
                    'time_spent':
                    str(
                        datetime.timedelta(seconds=round(time.time() -
                                                         start_time + 0.5)))
                }
                model.process_event(event_name='after_batch', data=report)
            if break_flag:
                break

            epochs += 1

            report = {
                'epochs_done':
                epochs,
                'batches_seen':
                i,
                'train_examples_seen':
                examples,
                'time_spent':
                str(
                    datetime.timedelta(seconds=round(time.time() - start_time +
                                                     0.5)))
            }
            model.process_event(event_name='after_epoch', data=report)

            if train_config['log_every_n_epochs'] > 0 and epochs % train_config['log_every_n_epochs'] == 0\
                    and train_y_true:
                metrics = [(s, f(train_y_true, train_y_predicted))
                           for s, f in train_metrics_functions]
                report = {
                    'epochs_done':
                    epochs,
                    'batches_seen':
                    i,
                    'train_examples_seen':
                    examples,
                    'metrics':
                    prettify_metrics(metrics),
                    'time_spent':
                    str(
                        datetime.timedelta(seconds=round(time.time() -
                                                         start_time + 0.5)))
                }
                if losses:
                    report['loss'] = sum(losses) / len(losses)
                    losses = []

                if train_config['tensorboard_log_dir'] is not None:
                    for name, score in metrics:
                        metric_sum = tf.Summary(value=[
                            tf.Summary.Value(tag='every_n_epochs/' + name,
                                             simple_value=score),
                        ])
                        tb_train_writer.add_summary(metric_sum, epochs)

                    if losses:
                        loss_sum = tf.Summary(value=[
                            tf.Summary.Value(tag='every_n_epochs/' + 'loss',
                                             simple_value=report['loss']),
                        ])
                        tb_train_writer.add_summary(loss_sum, epochs)

                model.process_event(event_name='after_train_log', data=report)
                report = {'train': report}
                print(json.dumps(report, ensure_ascii=False))
                train_y_true.clear()
                train_y_predicted.clear()

            if train_config['val_every_n_epochs'] > 0 and epochs % train_config[
                    'val_every_n_epochs'] == 0:
                report = _test_model(model, metrics_functions, iterator,
                                     train_config['batch_size'], 'valid',
                                     start_time)
                report['epochs_done'] = epochs
                report['batches_seen'] = i
                report['train_examples_seen'] = examples

                metrics = list(report['metrics'].items())

                if train_config['tensorboard_log_dir'] is not None:
                    for name, score in metrics:
                        metric_sum = tf.Summary(value=[
                            tf.Summary.Value(tag='every_n_epochs/' + name,
                                             simple_value=score),
                        ])
                        tb_valid_writer.add_summary(metric_sum, epochs)

                m_name, score = metrics[0]
                if improved(score, best):
                    patience = 0
                    log.info('New best {} of {}'.format(m_name, score))
                    best = score
                    log.info('Saving model')
                    model.save()
                    saved = True
                else:
                    patience += 1
                    log.info('Did not improve on the {} of {}'.format(
                        m_name, best))

                report['impatience'] = patience
                if train_config['validation_patience'] > 0:
                    report['patience_limit'] = train_config[
                        'validation_patience']

                model.process_event(event_name='after_validation', data=report)
                report = {'valid': report}
                print(json.dumps(report, ensure_ascii=False))

                if patience >= train_config['validation_patience'] > 0:
                    log.info('Ran out of patience')
                    break

            if epochs >= train_config['epochs'] > 0:
                break
    except KeyboardInterrupt:
        log.info('Stopped training')

    if not saved:
        log.info('Saving model')
        model.save()

    return model
Beispiel #10
0
def _train_batches(model: Chainer, iterator: DataLearningIterator, train_config: dict,
                   metrics_functions: List[Metric], *, start_epoch_num: Optional[int] = None) -> NNModel:

    default_train_config = {
        'epochs': 0,
        'start_epoch_num': 0,
        'max_batches': 0,
        'batch_size': 1,

        'metric_optimization': 'maximize',

        'validation_patience': 5,
        'val_every_n_epochs': 0,
        'val_every_n_batches': 0,

        'log_every_n_batches': 0,
        'log_every_n_epochs': 0,

        'validate_best': True,
        'test_best': True,
        'tensorboard_log_dir': None,
    }

    train_config = dict(default_train_config, **train_config)

    if 'train_metrics' in train_config:
        train_metrics_functions = _parse_metrics(train_config['train_metrics'], model.in_y, model.out_params)
    else:
        train_metrics_functions = metrics_functions
    expected_outputs = list(set().union(model.out_params, *[m.inputs for m in train_metrics_functions]))

    if train_config['metric_optimization'] == 'maximize':
        def improved(score, best):
            return score > best
        best = float('-inf')
    elif train_config['metric_optimization'] == 'minimize':
        def improved(score, best):
            return score < best
        best = float('inf')
    else:
        raise ConfigError('metric_optimization has to be one of {}'.format(['maximize', 'minimize']))

    i = 0
    epochs = start_epoch_num if start_epoch_num is not None else train_config['start_epoch_num']
    examples = 0
    saved = False
    patience = 0
    log_on = train_config['log_every_n_batches'] > 0 or train_config['log_every_n_epochs'] > 0
    outputs = {key: [] for key in expected_outputs}
    losses = []
    start_time = time.time()
    break_flag = False

    if train_config['tensorboard_log_dir'] is not None:
        import tensorflow as tf
        tb_log_dir = expand_path(train_config['tensorboard_log_dir'])

        tb_train_writer = tf.summary.FileWriter(str(tb_log_dir / 'train_log'))
        tb_valid_writer = tf.summary.FileWriter(str(tb_log_dir / 'valid_log'))

    # validate first (important if model is pre-trained)
    if train_config['val_every_n_epochs'] > 0 or train_config['val_every_n_batches'] > 0:
        report = _test_model(model, metrics_functions, iterator,
                             train_config['batch_size'], 'valid', start_time, train_config['show_examples'])
        report['epochs_done'] = epochs
        report['batches_seen'] = i
        report['train_examples_seen'] = examples

        metrics = list(report['metrics'].items())

        m_name, score = metrics[0]
        if improved(score, best):
            patience = 0
            log.info('New best {} of {}'.format(m_name, score))
            best = score
            log.info('Saving model')
            model.save()
            saved = True
        else:
            patience += 1
            log.info('Did not improve on the {} of {}'.format(m_name, best))

        report['impatience'] = patience
        if train_config['validation_patience'] > 0:
            report['patience_limit'] = train_config['validation_patience']

        model.process_event(event_name='after_validation', data=report)
        report = {'valid': report}
        print(json.dumps(report, ensure_ascii=False))

    try:
        while True:
            for x, y_true in iterator.gen_batches(train_config['batch_size']):
                if log_on and len(train_metrics_functions) > 0:
                    y_predicted = list(model.compute(list(x), list(y_true), targets=expected_outputs))
                    if len(expected_outputs) == 1:
                        y_predicted = [y_predicted]
                    for out, val in zip(outputs.values(), y_predicted):
                        out += list(val)
                result = model.train_on_batch(x, y_true)
                if not isinstance(result, dict):
                    result = {'loss': result} if result is not None else {}
                if 'loss' in result:
                    losses.append(result['loss'])
                i += 1
                examples += len(x)

                if train_config['log_every_n_batches'] > 0 and i % train_config['log_every_n_batches'] == 0:
                    metrics = [(m.name, m.fn(*[outputs[i] for i in m.inputs])) for m in train_metrics_functions]
                    report = {
                        'epochs_done': epochs,
                        'batches_seen': i,
                        'examples_seen': examples,
                        'metrics': prettify_metrics(metrics),
                        'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
                    }
                    default_report_keys = list(report.keys())
                    report.update(result)

                    if train_config['show_examples']:
                        try:
                            y_predicted = zip(*[y_predicted_group
                                                for out_name, y_predicted_group in zip(expected_outputs, y_predicted)
                                                if out_name in model.out_params])
                            if len(model.out_params) == 1:
                                y_predicted = [y_predicted_item[0] for y_predicted_item in y_predicted]
                            report['examples'] = [{
                                'x': x_item,
                                'y_predicted': y_predicted_item,
                                'y_true': y_true_item
                            } for x_item, y_predicted_item, y_true_item
                                in zip(x, y_predicted, y_true)]
                        except NameError:
                            log.warning('Could not log examples as y_predicted is not defined')

                    if losses:
                        report['loss'] = sum(losses)/len(losses)
                        losses = []

                    model.process_event(event_name='after_train_log', data=report)

                    if train_config['tensorboard_log_dir'] is not None:
                        summ = tf.Summary()

                        for name, score in metrics:
                            summ.value.add(tag='every_n_batches/' + name, simple_value=score)
                        for name, score in report.items():
                            if name not in default_report_keys:
                                summ.value.add(tag='every_n_batches/' + name, simple_value=score)

                        tb_train_writer.add_summary(summ, i)
                        tb_train_writer.flush()

                    report = {'train': report}
                    print(json.dumps(report, ensure_ascii=False))
                    for out in outputs.values():
                        out.clear()

                if train_config['val_every_n_batches'] > 0 and i % train_config['val_every_n_batches'] == 0:
                    report = _test_model(model, metrics_functions, iterator,
                                         train_config['batch_size'], 'valid', start_time, train_config['show_examples'])
                    report['epochs_done'] = epochs
                    report['batches_seen'] = i
                    report['train_examples_seen'] = examples

                    metrics = list(report['metrics'].items())

                    if train_config['tensorboard_log_dir'] is not None:
                        summ = tf.Summary()
                        for name, score in metrics:
                            summ.value.add(tag='every_n_batches/' + name, simple_value=score)
                        tb_valid_writer.add_summary(summ, i)
                        tb_valid_writer.flush()


                    m_name, score = metrics[0]
                    if improved(score, best):
                        patience = 0
                        log.info('New best {} of {}'.format(m_name, score))
                        best = score
                        log.info('Saving model')
                        model.save()
                        saved = True
                    else:
                        patience += 1
                        log.info('Did not improve on the {} of {}'.format(m_name, best))

                    report['impatience'] = patience
                    if train_config['validation_patience'] > 0:
                        report['patience_limit'] = train_config['validation_patience']

                    model.process_event(event_name='after_validation', data=report)
                    report = {'valid': report}
                    print(json.dumps(report, ensure_ascii=False))

                    if patience >= train_config['validation_patience'] > 0:
                        log.info('Ran out of patience')
                        break_flag = True
                        break

                if i >= train_config['max_batches'] > 0:
                    break_flag = True
                    break

                report = {
                    'epochs_done': epochs,
                    'batches_seen': i,
                    'train_examples_seen': examples,
                    'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
                }
                model.process_event(event_name='after_batch', data=report)
            if break_flag:
                break

            epochs += 1

            report = {
                'epochs_done': epochs,
                'batches_seen': i,
                'train_examples_seen': examples,
                'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
            }
            model.process_event(event_name='after_epoch', data=report)

            if train_config['log_every_n_epochs'] > 0 and epochs % train_config['log_every_n_epochs'] == 0\
                    and outputs:
                metrics = [(m.name, m.fn(*[outputs[i] for i in m.inputs])) for m in train_metrics_functions]
                report = {
                    'epochs_done': epochs,
                    'batches_seen': i,
                    'train_examples_seen': examples,
                    'metrics': prettify_metrics(metrics),
                    'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
                }
                default_report_keys = list(report.keys())
                report.update(result)

                if train_config['show_examples']:
                    try:
                        y_predicted = zip(*[y_predicted_group
                                            for out_name, y_predicted_group in zip(expected_outputs, y_predicted)
                                            if out_name in model.out_params])
                        if len(model.out_params) == 1:
                            y_predicted = [y_predicted_item[0] for y_predicted_item in y_predicted]
                        report['examples'] = [{
                            'x': x_item,
                            'y_predicted': y_predicted_item,
                            'y_true': y_true_item
                        } for x_item, y_predicted_item, y_true_item
                            in zip(x, y_predicted, y_true)]
                    except NameError:
                        log.warning('Could not log examples')

                if losses:
                    report['loss'] = sum(losses)/len(losses)
                    losses = []

                model.process_event(event_name='after_train_log', data=report)

                if train_config['tensorboard_log_dir'] is not None:
                    summ = tf.Summary()

                    for name, score in metrics:
                        summ.value.add(tag='every_n_epochs/' + name, simple_value=score)
                    for name, score in report.items():
                        if name not in default_report_keys:
                            summ.value.add(tag='every_n_epochs/' + name, simple_value=score)

                    tb_train_writer.add_summary(summ, epochs)
                    tb_train_writer.flush()

                report = {'train': report}
                print(json.dumps(report, ensure_ascii=False))
                for out in outputs.values():
                    out.clear()

            if train_config['val_every_n_epochs'] > 0 and epochs % train_config['val_every_n_epochs'] == 0:
                report = _test_model(model, metrics_functions, iterator,
                                     train_config['batch_size'], 'valid', start_time, train_config['show_examples'])
                report['epochs_done'] = epochs
                report['batches_seen'] = i
                report['train_examples_seen'] = examples

                metrics = list(report['metrics'].items())

                if train_config['tensorboard_log_dir'] is not None:
                    summ = tf.Summary()
                    for name, score in metrics:
                        summ.value.add(tag='every_n_epochs/' + name, simple_value=score)
                    tb_valid_writer.add_summary(summ, epochs)
                    tb_valid_writer.flush()

                m_name, score = metrics[0]
                if improved(score, best):
                    patience = 0
                    log.info('New best {} of {}'.format(m_name, score))
                    best = score
                    log.info('Saving model')
                    model.save()
                    saved = True
                else:
                    patience += 1
                    log.info('Did not improve on the {} of {}'.format(m_name, best))

                report['impatience'] = patience
                if train_config['validation_patience'] > 0:
                    report['patience_limit'] = train_config['validation_patience']

                model.process_event(event_name='after_validation', data=report)
                report = {'valid': report}
                print(json.dumps(report, ensure_ascii=False))

                if patience >= train_config['validation_patience'] > 0:
                    log.info('Ran out of patience')
                    break

            if epochs >= train_config['epochs'] > 0:
                break
    except KeyboardInterrupt:
        log.info('Stopped training')

    if not saved:
        log.info('Saving model')
        model.save()

    return model
Beispiel #11
0
def _train_batches(model: NNModel, iterator: DataLearningIterator, train_config: dict,
                   metrics_functions: List[Tuple[str, Callable]]) -> NNModel:

    default_train_config = {
        'epochs': 0,
        'max_batches': 0,
        'batch_size': 1,

        'metric_optimization': 'maximize',

        'validation_patience': 5,
        'val_every_n_epochs': 0,

        'log_every_n_batches': 0,
        'log_every_n_epochs': 0,

        'validate_best': True,
        'test_best': True,
        'tensorboard_log_dir': None,
    }

    train_config = dict(default_train_config, **train_config)

    if 'train_metrics' in train_config:
        train_metrics_functions = list(zip(train_config['train_metrics'],
                                           get_metrics_by_names(train_config['train_metrics'])))
    else:
        train_metrics_functions = metrics_functions

    if train_config['metric_optimization'] == 'maximize':
        def improved(score, best):
            return score > best
        best = float('-inf')
    elif train_config['metric_optimization'] == 'minimize':
        def improved(score, best):
            return score < best
        best = float('inf')
    else:
        raise ConfigError('metric_optimization has to be one of {}'.format(['maximize', 'minimize']))

    i = 0
    epochs = 0
    examples = 0
    saved = False
    patience = 0
    log_on = train_config['log_every_n_batches'] > 0 or train_config['log_every_n_epochs'] > 0
    train_y_true = []
    train_y_predicted = []
    losses = []
    start_time = time.time()
    break_flag = False

    if train_config['tensorboard_log_dir'] is not None:
        import tensorflow as tf
        tb_log_dir = expand_path(train_config['tensorboard_log_dir'])

        tb_train_writer = tf.summary.FileWriter(str(tb_log_dir / 'train_log'))
        tb_valid_writer = tf.summary.FileWriter(str(tb_log_dir / 'valid_log'))

    try:
        while True:
            for x, y_true in iterator.gen_batches(train_config['batch_size']):
                if log_on:
                    y_predicted = list(model(list(x)))
                    train_y_true += y_true
                    train_y_predicted += y_predicted
                loss = model.train_on_batch(x, y_true)
                if loss is not None:
                    losses.append(loss)
                i += 1
                examples += len(x)

                if train_config['log_every_n_batches'] > 0 and i % train_config['log_every_n_batches'] == 0:
                    metrics = [(s, f(train_y_true, train_y_predicted)) for s, f in train_metrics_functions]
                    report = {
                        'epochs_done': epochs,
                        'batches_seen': i,
                        'examples_seen': examples,
                        'metrics': prettify_metrics(metrics),
                        'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
                    }

                    if train_config['show_examples']:
                        try:
                            report['examples'] = [{
                                'x': x_item,
                                'y_predicted': y_predicted_item,
                                'y_true': y_true_item
                            } for x_item, y_predicted_item, y_true_item in zip(x, y_predicted, y_true)]
                        except NameError:
                            log.warning('Could not log examples as y_predicted is not defined')

                    if losses:
                        report['loss'] = sum(losses)/len(losses)
                        losses = []
                    report = {'train': report}

                    if train_config['tensorboard_log_dir'] is not None:
                        for name, score in metrics:
                            metric_sum = tf.Summary(value=[tf.Summary.Value(tag='every_n_batches/' + name,
                                                                            simple_value=score), ])
                            tb_train_writer.add_summary(metric_sum, i)

                        if losses:
                            loss_sum = tf.Summary(value=[tf.Summary.Value(tag='every_n_batches/' + 'loss',
                                                                            simple_value=report['loss']), ])
                            tb_train_writer.add_summary(loss_sum, i)

                    print(json.dumps(report, ensure_ascii=False))
                    train_y_true.clear()
                    train_y_predicted.clear()

                if i >= train_config['max_batches'] > 0:
                    break_flag = True
                    break

                report = {
                    'epochs_done': epochs,
                    'batches_seen': i,
                    'train_examples_seen': examples,
                    'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
                }
                model.process_event(event_name='after_batch', data=report)
            if break_flag:
                break

            epochs += 1

            report = {
                'epochs_done': epochs,
                'batches_seen': i,
                'train_examples_seen': examples,
                'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
            }
            model.process_event(event_name='after_epoch', data=report)

            if train_config['log_every_n_epochs'] > 0 and epochs % train_config['log_every_n_epochs'] == 0\
                    and train_y_true:
                metrics = [(s, f(train_y_true, train_y_predicted)) for s, f in train_metrics_functions]
                report = {
                    'epochs_done': epochs,
                    'batches_seen': i,
                    'train_examples_seen': examples,
                    'metrics': prettify_metrics(metrics),
                    'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
                }

                if train_config['show_examples']:
                    try:
                        report['examples'] = [{
                            'x': x_item,
                            'y_predicted': y_predicted_item,
                            'y_true': y_true_item
                        } for x_item, y_predicted_item, y_true_item in zip(x, y_predicted, y_true)]
                    except NameError:
                        log.warning('Could not log examples')

                if losses:
                    report['loss'] = sum(losses)/len(losses)
                    losses = []

                if train_config['tensorboard_log_dir'] is not None:
                    for name, score in metrics:
                        metric_sum = tf.Summary(value=[tf.Summary.Value(tag='every_n_epochs/' + name,
                                                                        simple_value=score), ])
                        tb_train_writer.add_summary(metric_sum, epochs)

                    if losses:
                        loss_sum = tf.Summary(value=[tf.Summary.Value(tag='every_n_epochs/' + 'loss',
                                                                        simple_value=report['loss']), ])
                        tb_train_writer.add_summary(loss_sum, epochs)

                model.process_event(event_name='after_train_log', data=report)
                report = {'train': report}
                print(json.dumps(report, ensure_ascii=False))
                train_y_true.clear()
                train_y_predicted.clear()

            if train_config['val_every_n_epochs'] > 0 and epochs % train_config['val_every_n_epochs'] == 0:
                report = _test_model(model, metrics_functions, iterator,
                                     train_config['batch_size'], 'valid', start_time, train_config['show_examples'])
                report['epochs_done'] = epochs
                report['batches_seen'] = i
                report['train_examples_seen'] = examples

                metrics = list(report['metrics'].items())

                if train_config['tensorboard_log_dir'] is not None:
                    for name, score in metrics:
                        metric_sum = tf.Summary(value=[tf.Summary.Value(tag='every_n_epochs/' + name,
                                                                        simple_value=score), ])
                        tb_valid_writer.add_summary(metric_sum, epochs)

                m_name, score = metrics[0]
                if improved(score, best):
                    patience = 0
                    log.info('New best {} of {}'.format(m_name, score))
                    best = score
                    log.info('Saving model')
                    model.save()
                    saved = True
                else:
                    patience += 1
                    log.info('Did not improve on the {} of {}'.format(m_name, best))

                report['impatience'] = patience
                if train_config['validation_patience'] > 0:
                    report['patience_limit'] = train_config['validation_patience']

                model.process_event(event_name='after_validation', data=report)
                report = {'valid': report}
                print(json.dumps(report, ensure_ascii=False))

                if patience >= train_config['validation_patience'] > 0:
                    log.info('Ran out of patience')
                    break

            if epochs >= train_config['epochs'] > 0:
                break
    except KeyboardInterrupt:
        log.info('Stopped training')

    if not saved:
        log.info('Saving model')
        model.save()

    return model