예제 #1
0
def _train_batches(model: NNModel, iterator: DataLearningIterator, train_config: dict,
                   metrics_functions: List[Tuple[str, Callable]]) -> NNModel:

    default_train_config = {
        'epochs': 0,
        'max_batches': 0,
        'batch_size': 1,

        'metric_optimization': 'maximize',

        'validation_patience': 5,
        'val_every_n_epochs': 0,

        'log_every_n_batches': 0,
        'log_every_n_epochs': 0,

        'validate_best': True,
        'test_best': True,
        'tensorboard_log_dir': None,
    }

    train_config = dict(default_train_config, **train_config)

    if 'train_metrics' in train_config:
        train_metrics_functions = list(zip(train_config['train_metrics'],
                                           get_metrics_by_names(train_config['train_metrics'])))
    else:
        train_metrics_functions = metrics_functions

    if train_config['metric_optimization'] == 'maximize':
        def improved(score, best):
            return score > best
        best = float('-inf')
    elif train_config['metric_optimization'] == 'minimize':
        def improved(score, best):
            return score < best
        best = float('inf')
    else:
        raise ConfigError('metric_optimization has to be one of {}'.format(['maximize', 'minimize']))

    i = 0
    epochs = 0
    examples = 0
    saved = False
    patience = 0
    log_on = train_config['log_every_n_batches'] > 0 or train_config['log_every_n_epochs'] > 0
    train_y_true = []
    train_y_predicted = []
    losses = []
    start_time = time.time()
    break_flag = False

    if train_config['tensorboard_log_dir'] is not None:
        import tensorflow as tf
        tb_log_dir = expand_path(train_config['tensorboard_log_dir'])

        tb_train_writer = tf.summary.FileWriter(str(tb_log_dir / 'train_log'))
        tb_valid_writer = tf.summary.FileWriter(str(tb_log_dir / 'valid_log'))

    try:
        while True:
            for x, y_true in iterator.gen_batches(train_config['batch_size']):
                if log_on:
                    y_predicted = list(model(list(x)))
                    train_y_true += y_true
                    train_y_predicted += y_predicted
                loss = model.train_on_batch(x, y_true)
                if loss is not None:
                    losses.append(loss)
                i += 1
                examples += len(x)

                if train_config['log_every_n_batches'] > 0 and i % train_config['log_every_n_batches'] == 0:
                    metrics = [(s, f(train_y_true, train_y_predicted)) for s, f in train_metrics_functions]
                    report = {
                        'epochs_done': epochs,
                        'batches_seen': i,
                        'examples_seen': examples,
                        'metrics': prettify_metrics(metrics),
                        'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
                    }

                    if train_config['show_examples']:
                        try:
                            report['examples'] = [{
                                'x': x_item,
                                'y_predicted': y_predicted_item,
                                'y_true': y_true_item
                            } for x_item, y_predicted_item, y_true_item in zip(x, y_predicted, y_true)]
                        except NameError:
                            log.warning('Could not log examples as y_predicted is not defined')

                    if losses:
                        report['loss'] = sum(losses)/len(losses)
                        losses = []
                    report = {'train': report}

                    if train_config['tensorboard_log_dir'] is not None:
                        for name, score in metrics:
                            metric_sum = tf.Summary(value=[tf.Summary.Value(tag='every_n_batches/' + name,
                                                                            simple_value=score), ])
                            tb_train_writer.add_summary(metric_sum, i)

                        if losses:
                            loss_sum = tf.Summary(value=[tf.Summary.Value(tag='every_n_batches/' + 'loss',
                                                                            simple_value=report['loss']), ])
                            tb_train_writer.add_summary(loss_sum, i)

                    print(json.dumps(report, ensure_ascii=False))
                    train_y_true.clear()
                    train_y_predicted.clear()

                if i >= train_config['max_batches'] > 0:
                    break_flag = True
                    break

                report = {
                    'epochs_done': epochs,
                    'batches_seen': i,
                    'train_examples_seen': examples,
                    'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
                }
                model.process_event(event_name='after_batch', data=report)
            if break_flag:
                break

            epochs += 1

            report = {
                'epochs_done': epochs,
                'batches_seen': i,
                'train_examples_seen': examples,
                'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
            }
            model.process_event(event_name='after_epoch', data=report)

            if train_config['log_every_n_epochs'] > 0 and epochs % train_config['log_every_n_epochs'] == 0\
                    and train_y_true:
                metrics = [(s, f(train_y_true, train_y_predicted)) for s, f in train_metrics_functions]
                report = {
                    'epochs_done': epochs,
                    'batches_seen': i,
                    'train_examples_seen': examples,
                    'metrics': prettify_metrics(metrics),
                    'time_spent': str(datetime.timedelta(seconds=round(time.time() - start_time + 0.5)))
                }

                if train_config['show_examples']:
                    try:
                        report['examples'] = [{
                            'x': x_item,
                            'y_predicted': y_predicted_item,
                            'y_true': y_true_item
                        } for x_item, y_predicted_item, y_true_item in zip(x, y_predicted, y_true)]
                    except NameError:
                        log.warning('Could not log examples')

                if losses:
                    report['loss'] = sum(losses)/len(losses)
                    losses = []

                if train_config['tensorboard_log_dir'] is not None:
                    for name, score in metrics:
                        metric_sum = tf.Summary(value=[tf.Summary.Value(tag='every_n_epochs/' + name,
                                                                        simple_value=score), ])
                        tb_train_writer.add_summary(metric_sum, epochs)

                    if losses:
                        loss_sum = tf.Summary(value=[tf.Summary.Value(tag='every_n_epochs/' + 'loss',
                                                                        simple_value=report['loss']), ])
                        tb_train_writer.add_summary(loss_sum, epochs)

                model.process_event(event_name='after_train_log', data=report)
                report = {'train': report}
                print(json.dumps(report, ensure_ascii=False))
                train_y_true.clear()
                train_y_predicted.clear()

            if train_config['val_every_n_epochs'] > 0 and epochs % train_config['val_every_n_epochs'] == 0:
                report = _test_model(model, metrics_functions, iterator,
                                     train_config['batch_size'], 'valid', start_time, train_config['show_examples'])
                report['epochs_done'] = epochs
                report['batches_seen'] = i
                report['train_examples_seen'] = examples

                metrics = list(report['metrics'].items())

                if train_config['tensorboard_log_dir'] is not None:
                    for name, score in metrics:
                        metric_sum = tf.Summary(value=[tf.Summary.Value(tag='every_n_epochs/' + name,
                                                                        simple_value=score), ])
                        tb_valid_writer.add_summary(metric_sum, epochs)

                m_name, score = metrics[0]
                if improved(score, best):
                    patience = 0
                    log.info('New best {} of {}'.format(m_name, score))
                    best = score
                    log.info('Saving model')
                    model.save()
                    saved = True
                else:
                    patience += 1
                    log.info('Did not improve on the {} of {}'.format(m_name, best))

                report['impatience'] = patience
                if train_config['validation_patience'] > 0:
                    report['patience_limit'] = train_config['validation_patience']

                model.process_event(event_name='after_validation', data=report)
                report = {'valid': report}
                print(json.dumps(report, ensure_ascii=False))

                if patience >= train_config['validation_patience'] > 0:
                    log.info('Ran out of patience')
                    break

            if epochs >= train_config['epochs'] > 0:
                break
    except KeyboardInterrupt:
        log.info('Stopped training')

    if not saved:
        log.info('Saving model')
        model.save()

    return model
예제 #2
0
def _train_batches(model: NNModel, iterator: DataLearningIterator,
                   train_config: dict,
                   metrics_functions: List[Tuple[str, Callable]]) -> NNModel:

    default_train_config = {
        'epochs': 0,
        'max_batches': 0,
        'batch_size': 1,
        'metric_optimization': 'maximize',
        'validation_patience': 5,
        'val_every_n_epochs': 0,
        'log_every_n_batches': 0,
        'log_every_n_epochs': 0,
        # 'show_examples': False,
        'validate_best': True,
        'test_best': True,
        'tensorboard_log_dir': None,
    }

    train_config = dict(default_train_config, **train_config)

    if 'train_metrics' in train_config:
        train_metrics_functions = list(
            zip(train_config['train_metrics'],
                get_metrics_by_names(train_config['train_metrics'])))
    else:
        train_metrics_functions = metrics_functions

    if train_config['metric_optimization'] == 'maximize':

        def improved(score, best):
            return score > best

        best = float('-inf')
    elif train_config['metric_optimization'] == 'minimize':

        def improved(score, best):
            return score < best

        best = float('inf')
    else:
        raise ConfigError('metric_optimization has to be one of {}'.format(
            ['maximize', 'minimize']))

    i = 0
    epochs = 0
    examples = 0
    saved = False
    patience = 0
    log_on = train_config['log_every_n_batches'] > 0 or train_config[
        'log_every_n_epochs'] > 0
    train_y_true = []
    train_y_predicted = []
    losses = []
    start_time = time.time()
    break_flag = False

    if train_config['tensorboard_log_dir'] is not None:
        import tensorflow as tf
        tb_log_dir = expand_path(train_config['tensorboard_log_dir'])

        tb_train_writer = tf.summary.FileWriter(str(tb_log_dir / 'train_log'))
        tb_valid_writer = tf.summary.FileWriter(str(tb_log_dir / 'valid_log'))

    try:
        while True:
            for x, y_true in iterator.gen_batches(train_config['batch_size']):
                if log_on:
                    y_predicted = list(model(list(x)))
                    train_y_true += y_true
                    train_y_predicted += y_predicted
                loss = model.train_on_batch(x, y_true)
                if loss is not None:
                    losses.append(loss)
                i += 1
                examples += len(x)

                if train_config['log_every_n_batches'] > 0 and i % train_config[
                        'log_every_n_batches'] == 0:
                    metrics = [(s, f(train_y_true, train_y_predicted))
                               for s, f in train_metrics_functions]
                    report = {
                        'epochs_done':
                        epochs,
                        'batches_seen':
                        i,
                        'examples_seen':
                        examples,
                        'metrics':
                        prettify_metrics(metrics),
                        'time_spent':
                        str(
                            datetime.timedelta(
                                seconds=round(time.time() - start_time + 0.5)))
                    }

                    if losses:
                        report['loss'] = sum(losses) / len(losses)
                        losses = []
                    report = {'train': report}

                    if train_config['tensorboard_log_dir'] is not None:
                        for name, score in metrics:
                            metric_sum = tf.Summary(value=[
                                tf.Summary.Value(tag='every_n_batches/' + name,
                                                 simple_value=score),
                            ])
                            tb_train_writer.add_summary(metric_sum, i)

                        if losses:
                            loss_sum = tf.Summary(value=[
                                tf.Summary.Value(tag='every_n_batches/' +
                                                 'loss',
                                                 simple_value=report['loss']),
                            ])
                            tb_train_writer.add_summary(loss_sum, i)

                    print(json.dumps(report, ensure_ascii=False))
                    train_y_true.clear()
                    train_y_predicted.clear()

                if i >= train_config['max_batches'] > 0:
                    break_flag = True
                    break

                report = {
                    'epochs_done':
                    epochs,
                    'batches_seen':
                    i,
                    'train_examples_seen':
                    examples,
                    'time_spent':
                    str(
                        datetime.timedelta(seconds=round(time.time() -
                                                         start_time + 0.5)))
                }
                model.process_event(event_name='after_batch', data=report)
            if break_flag:
                break

            epochs += 1

            report = {
                'epochs_done':
                epochs,
                'batches_seen':
                i,
                'train_examples_seen':
                examples,
                'time_spent':
                str(
                    datetime.timedelta(seconds=round(time.time() - start_time +
                                                     0.5)))
            }
            model.process_event(event_name='after_epoch', data=report)

            if train_config['log_every_n_epochs'] > 0 and epochs % train_config['log_every_n_epochs'] == 0\
                    and train_y_true:
                metrics = [(s, f(train_y_true, train_y_predicted))
                           for s, f in train_metrics_functions]
                report = {
                    'epochs_done':
                    epochs,
                    'batches_seen':
                    i,
                    'train_examples_seen':
                    examples,
                    'metrics':
                    prettify_metrics(metrics),
                    'time_spent':
                    str(
                        datetime.timedelta(seconds=round(time.time() -
                                                         start_time + 0.5)))
                }
                if losses:
                    report['loss'] = sum(losses) / len(losses)
                    losses = []

                if train_config['tensorboard_log_dir'] is not None:
                    for name, score in metrics:
                        metric_sum = tf.Summary(value=[
                            tf.Summary.Value(tag='every_n_epochs/' + name,
                                             simple_value=score),
                        ])
                        tb_train_writer.add_summary(metric_sum, epochs)

                    if losses:
                        loss_sum = tf.Summary(value=[
                            tf.Summary.Value(tag='every_n_epochs/' + 'loss',
                                             simple_value=report['loss']),
                        ])
                        tb_train_writer.add_summary(loss_sum, epochs)

                model.process_event(event_name='after_train_log', data=report)
                report = {'train': report}
                print(json.dumps(report, ensure_ascii=False))
                train_y_true.clear()
                train_y_predicted.clear()

            if train_config['val_every_n_epochs'] > 0 and epochs % train_config[
                    'val_every_n_epochs'] == 0:
                report = _test_model(model, metrics_functions, iterator,
                                     train_config['batch_size'], 'valid',
                                     start_time)
                report['epochs_done'] = epochs
                report['batches_seen'] = i
                report['train_examples_seen'] = examples

                metrics = list(report['metrics'].items())

                if train_config['tensorboard_log_dir'] is not None:
                    for name, score in metrics:
                        metric_sum = tf.Summary(value=[
                            tf.Summary.Value(tag='every_n_epochs/' + name,
                                             simple_value=score),
                        ])
                        tb_valid_writer.add_summary(metric_sum, epochs)

                m_name, score = metrics[0]
                if improved(score, best):
                    patience = 0
                    log.info('New best {} of {}'.format(m_name, score))
                    best = score
                    log.info('Saving model')
                    model.save()
                    saved = True
                else:
                    patience += 1
                    log.info('Did not improve on the {} of {}'.format(
                        m_name, best))

                report['impatience'] = patience
                if train_config['validation_patience'] > 0:
                    report['patience_limit'] = train_config[
                        'validation_patience']

                model.process_event(event_name='after_validation', data=report)
                report = {'valid': report}
                print(json.dumps(report, ensure_ascii=False))

                if patience >= train_config['validation_patience'] > 0:
                    log.info('Ran out of patience')
                    break

            if epochs >= train_config['epochs'] > 0:
                break
    except KeyboardInterrupt:
        log.info('Stopped training')

    if not saved:
        log.info('Saving model')
        model.save()

    return model
예제 #3
0
def _train_batches(model: NNModel, dataset: Dataset, train_config: dict,
                   metrics_functions: List[Tuple[str, Callable]]):

    default_train_config = {
        'epochs': 0,
        'batch_size': 1,
        'metric_optimization': 'maximize',
        'validation_patience': 5,
        'val_every_n_epochs': 0,
        'log_every_n_batches': 0,
        'log_every_n_epochs': 0,
        # 'show_examples': False,
        'validate_best': True,
        'test_best': True
    }

    train_config = dict(default_train_config, **train_config)

    if train_config['metric_optimization'] == 'maximize':

        def improved(score, best):
            return score > best

        best = float('-inf')
    elif train_config['metric_optimization'] == 'minimize':

        def improved(score, best):
            return score < best

        best = float('inf')
    else:
        raise ConfigError('metric_optimization has to be one of {}'.format(
            ['maximize', 'minimize']))

    i = 0
    epochs = 0
    examples = 0
    saved = False
    patience = 0
    log_on = train_config['log_every_n_batches'] > 0 or train_config[
        'log_every_n_epochs'] > 0
    train_y_true = []
    train_y_predicted = []
    start_time = time.time()
    try:
        while True:
            for x, y_true in dataset.batch_generator(
                    train_config['batch_size']):
                if log_on:
                    y_predicted = list(model(list(x)))
                    train_y_true += y_true
                    train_y_predicted += y_predicted
                model.train_on_batch(x, y_true)
                i += 1
                examples += len(x)

                if train_config['log_every_n_batches'] > 0 and i % train_config[
                        'log_every_n_batches'] == 0:
                    metrics = [(s, f(train_y_true, train_y_predicted))
                               for s, f in metrics_functions]
                    report = {
                        'epochs_done':
                        epochs,
                        'batches_seen':
                        i,
                        'examples_seen':
                        examples,
                        'metrics':
                        dict(metrics),
                        'time_spent':
                        str(
                            datetime.timedelta(seconds=round(time.time() -
                                                             start_time)))
                    }
                    report = {'train': report}
                    print(json.dumps(report, ensure_ascii=False))
                    train_y_true = []
                    train_y_predicted = []

            epochs += 1

            if train_config['log_every_n_epochs'] > 0 and epochs % train_config['log_every_n_epochs'] == 0\
                    and train_y_true:
                metrics = [(s, f(train_y_true, train_y_predicted))
                           for s, f in metrics_functions]
                report = {
                    'epochs_done':
                    epochs,
                    'batches_seen':
                    i,
                    'examples_seen':
                    examples,
                    'metrics':
                    dict(metrics),
                    'time_spent':
                    str(
                        datetime.timedelta(seconds=round(time.time() -
                                                         start_time)))
                }
                report = {'train': report}
                print(json.dumps(report, ensure_ascii=False))
                train_y_true = []
                train_y_predicted = []

            if train_config['val_every_n_epochs'] > 0 and epochs % train_config[
                    'val_every_n_epochs'] == 0:
                report = _test_model(model, metrics_functions, dataset,
                                     train_config['batch_size'], 'valid',
                                     start_time)

                metrics = list(report['metrics'].items())

                m_name, score = metrics[0]
                if improved(score, best):
                    patience = 0
                    log.info('New best {} of {}'.format(m_name, score))
                    best = score
                    log.info('Saving model')
                    model.save()
                    saved = True
                else:
                    patience += 1
                    log.info('Did not improve on the {} of {}'.format(
                        m_name, best))

                report['impatience'] = patience
                if train_config['validation_patience'] > 0:
                    report['patience_limit'] = train_config[
                        'validation_patience']

                report = {'valid': report}
                print(json.dumps(report, ensure_ascii=False))

                if patience >= train_config['validation_patience'] > 0:
                    log.info('Ran out of patience')
                    break

            if epochs >= train_config['epochs'] > 0:
                break
    except KeyboardInterrupt:
        log.info('Stopped training')

    if not saved:
        log.info('Saving model')
        model.save()

    return model