Esempio n. 1
0
 def __init__(self, sentences, extra={}, **kwargs):
     self.sentences = sentences
     self.config = config = get_config(get_default_config(), **kwargs)
     self.extra = extra
     self.loader = None
     self.cuda = config.get('cuda')
     self.logger = get_logger()
Esempio n. 2
0
def run(options):
    logger = get_logger()
    experiment_logger = ExperimentLogger()

    train_dataset, validation_dataset = get_train_and_validation(options)
    train_iterator = get_train_iterator(options, train_dataset)
    validation_iterator = get_validation_iterator(options, validation_dataset)
    embeddings = train_dataset['embeddings']
    word2idx = train_dataset['word2idx']

    logger.info('Initializing model.')
    trainer = build_net(options, embeddings, train_iterator, word2idx=word2idx)
    logger.info('Model:')
    for name, p in trainer.net.named_parameters():
        logger.info('{} {} {}'.format(name, p.shape, p.requires_grad))

    # Evaluation.
    context = {}
    context['dataset'] = validation_dataset
    context['batch_iterator'] = validation_iterator
    model_evaluation = ModelEvaluation(get_eval_components(options, context, config_lst=options.eval_config))


    if options.eval_only_mode:
        info = dict()
        info['experiment_path'] = options.experiment_path
        info['step'] = 0
        run_evaluation(options, trainer, model_evaluation, info, metadata=dict(step=0))
        sys.exit()

    if options.save_init:
        logger.info('Saving model (init).')
        trainer.save_model(os.path.join(options.experiment_path, 'model_init.pt'))

    run_train(options, train_iterator, trainer, model_evaluation)
Esempio n. 3
0
 def __init__(self, data_source, batch_size, include_partial=False, rng=None):
     self.data_source = data_source
     self.active = False
     if rng is None:
         rng = np.random.RandomState(seed=11)
     self.rng = rng
     self.batch_size = batch_size
     self.include_partial = include_partial
     self._batch_size_cache = { 0: self.batch_size }
     self.logger = get_logger()
Esempio n. 4
0
def configure(options):
    # Configure output paths for this experiment.
    configure_experiment(options.experiment_path)

    # Get logger.
    logger = get_logger()

    # Print flags.
    logger.info(stringify_flags(options))

    save_flags(options, options.experiment_path)
Esempio n. 5
0
def run_evaluation(options, trainer, model_evaluation, info, metadata):
    logger = get_logger()

    lst = []

    for eval_result_dict in model_evaluation.run(trainer, info, metadata):
        eval_result = eval_result_dict['result']
        eval_name = eval_result['name']
        for k, v in eval_result['meta'].items():
            logger.info('eval[{}] {}={}'.format(eval_name, k, v))
        lst.append(eval_result_dict)

    return lst
Esempio n. 6
0
    def __init__(self, **kwargs):
        super().__init__()

        # defaults
        self.enabled = True
        self.is_initialized = True
        self.write = False
        self.logger = get_logger()
        self.init_defaults()

        # override
        for k, v in kwargs.items():
            setattr(self, k, v)
Esempio n. 7
0
def build_net(options, context=dict(), net_components=dict()):

    logger = get_logger()

    logger.info('build net')

    # Components.
    projection_layer = net_components['projection_layer']
    diora = net_components['diora']
    loss_funcs = net_components['loss_funcs']

    # Context.
    cuda = context['cuda']
    lr = options.lr
    word2idx = context['word2idx']

    # Net
    net = Net(projection_layer,
              diora,
              loss_funcs=loss_funcs,
              size=options.hidden_dim)

    # Load model.
    if options.load_model_path is not None:
        logger.info('Loading model: {}'.format(options.load_model_path))
        Trainer.load_model(net, options.load_model_path)

    # CUDA-support
    if cuda:
        net.cuda()
        diora.cuda()

    # Trainer
    trainer_kwargs = {}
    trainer_kwargs['net'] = net
    trainer_kwargs['cuda'] = cuda
    trainer_kwargs['mlp_reg'] = options.mlp_reg
    trainer_kwargs['clip_threshold'] = options.clip_threshold
    trainer_kwargs['word2idx'] = word2idx
    trainer = Trainer(**trainer_kwargs)
    trainer.experiment_name = options.experiment_name

    trainer.init_optimizer(options)

    logger.info('Architecture =\n {}'.format(str(trainer.net)))

    return trainer
Esempio n. 8
0
    def get_iterator(self):
        options = self.options
        logger = get_logger()
        logger.info('Generating seeds for {} epochs using initial seed {}.'.format(options.max_epoch, options.seed))
        seeds = generate_seeds(options.max_epoch, options.seed)

        step = 0
        for epoch, seed in zip(range(options.max_epoch), seeds):
            logger.info('epoch={} seed={}'.format(epoch, seed))
            it = self.batch_iterator.get_iterator(random_seed=seed, epoch=epoch)
            wrap_it = self.get_iterator_standard(it)

            def wrap_again(it):
                last_batch_idx, last_batch_map, end_of_epoch = None, None, False
                for batch_idx, batch_map in it:
                    if last_batch_idx is not None:
                        yield last_batch_idx, last_batch_map, end_of_epoch
                    last_batch_idx, last_batch_map = batch_idx, batch_map
                end_of_epoch = True
                yield last_batch_idx, last_batch_map, end_of_epoch

            for epoch_step, (batch_idx, batch_map, end_of_epoch) in enumerate(wrap_again(wrap_it)):
                should = {}
                should['end_of_epoch'] = end_of_epoch
                should['log'] = step % options.log_every_batch == 0
                should['periodic'] = step % options.save_latest == 0 and step >= options.save_after
                should['distinct'] = step % options.save_distinct == 0 and step >= options.save_after

                should_eval = (
                    options.eval_every_batch > 0 and
                    step % options.eval_every_batch == 0 and
                    step >= options.eval_after
                    )
                should_eval = should_eval or (
                    end_of_epoch and
                    epoch % options.eval_every_epoch == 0 and
                    step >= options.eval_after
                    )
                should['eval'] = should_eval

                yield epoch, step, batch_idx, batch_map, should
                step += 1

                if options.max_step >= 0 and step >= options.max_step:
                    logger.info('Max-Step={} Quitting.'.format(options.max_step))
                    sys.exit()
Esempio n. 9
0
def build_net(options, embeddings, batch_iterator=None, word2idx=None):

    cuda = options.cuda

    context = {}
    context['cuda'] = cuda
    context['embeddings'] = embeddings
    context['batch_iterator'] = batch_iterator
    context['word2idx'] = word2idx

    net_components = get_net_components(options, context)

    trainer = trainer_build_net(options, context=context, net_components=net_components)

    logger = get_logger()
    logger.info('# of params = {}'.format(count_params(trainer.net)))

    return trainer
Esempio n. 10
0
    def __init__(self, **kwargs):
        super().__init__()

        # defaults
        self.enabled = True
        self.unregistered = {}
        self.logger = get_logger()
        self.init_defaults()

        # override
        for k, v in kwargs.items():
            if k in self.unregistered:
                self.unregistered[k] = v
                continue
            setattr(self, k, v)

        self.init_parameters()
        self.reset_parameters()
Esempio n. 11
0
    def __init__(self,
                 net=None,
                 cuda=None,
                 word2idx=None,
                 mlp_reg=0,
                 clip_threshold=5.0):

        super(Trainer, self).__init__()
        self.net = net
        self.optimizer = None
        self.optimizer_cls = None
        self.optimizer_kwargs = None
        self.cuda = cuda
        self.word2idx = word2idx
        self._embedding_keys = None
        self.mlp_reg = mlp_reg
        self.clip_threshold = clip_threshold

        self.logger = get_logger()
Esempio n. 12
0
 def __init__(self, name, kwargs_dict):
     self.name = name
     self.kwargs_dict = kwargs_dict
     self.logger = get_logger()
Esempio n. 13
0
def run_train(options, train_iterator, trainer, model_evaluation):
    logger = get_logger()
    experiment_logger = ExperimentLogger()

    logger.info('Running train.')

    step = 0
    best_step = 0
    best_metric = math.inf
    best_dict = {}

    my_iterator = MyTrainIterator(options, train_iterator)

    for epoch, step, batch_idx, batch_map, should in my_iterator.get_iterator():
        # HACK: Weird fix that counteracts other libraries (i.e. allennlp) modifying
        # the global logger. Needed after adding tensorboard.
        while len(logger.parent.handlers) > 0:
            logger.parent.handlers.pop()

        batch_map['step'] = step
        result = trainer.step(batch_map)
        experiment_logger.record(result)
        del result

        if should['log']:
            metrics = experiment_logger.log_batch(epoch, step, batch_idx)

        # -- Periodic Checkpoints -- #

        if should['periodic']:
            logger.info('Saving model (periodic).')
            trainer.save_model(os.path.join(options.experiment_path, 'model_periodic.pt'))
            save_experiment(os.path.join(options.experiment_path, 'experiment_periodic.json'),
                            dict(step=step, epoch=epoch, best_step=best_step, best_metric=best_metric))

        if should['distinct']:
            logger.info('Saving model (distinct).')
            trainer.save_model(os.path.join(options.experiment_path, 'model.step_{}.pt'.format(step)))
            save_experiment(os.path.join(options.experiment_path, 'experiment.step_{}.json'.format(step)),
                            dict(step=step, epoch=epoch, best_step=best_step, best_metric=best_metric))

        # -- Validation -- #

        if should['eval']:
            logger.info('Evaluation.')

            info = dict()
            info['experiment_path'] = options.experiment_path
            info['step'] = step
            info['epoch'] = epoch

            for eval_result_dict in run_evaluation(options, trainer, model_evaluation, info, metadata=dict(step=step)):
                result = eval_result_dict['result']
                func = eval_result_dict['component']
                name = func.name

                for key, val, is_best in func.compare(best_dict, result):
                    best_dict_key = 'best__{}__{}'.format(name, key)

                    # Early stopping.
                    if is_best:
                        if best_dict_key in best_dict:
                            prev_val = best_dict[best_dict_key]['value']
                        else:
                            prev_val = None
                        # Update running result.
                        best_dict[best_dict_key] = {}
                        best_dict[best_dict_key]['eval'] = name
                        best_dict[best_dict_key]['metric'] = key
                        best_dict[best_dict_key]['value'] = val
                        best_dict[best_dict_key]['step'] = step
                        best_dict[best_dict_key]['epoch'] = epoch

                        logger.info('Recording, best eval, key = {}, val = {} -> {}, json = {}'.format(
                            best_dict_key, prev_val, val, json.dumps(best_dict[best_dict_key])))

                        if step >= options.save_after:
                            logger.info('Saving model, best eval, key = {}, json = {}'.format(
                                best_dict_key, json.dumps(best_dict[best_dict_key])))
                            logger.info('checkpoint_dir = {}'.format(options.experiment_path))

                            # Save result and model.
                            trainer.save_model(
                                os.path.join(options.experiment_path, 'model.{}.pt'.format(best_dict_key)))
                            save_experiment(os.path.join(options.experiment_path, 'experiment.{}.json'.format(best_dict_key)),
                                best_dict[best_dict_key])

        # END OF EPOCH

        if should['end_of_epoch']:
            experiment_logger.log_epoch(epoch, step)
            trainer.end_of_epoch(best_dict)
Esempio n. 14
0
 def __init__(self, datasets):
     self.datasets = datasets
     self.logger = get_logger()
Esempio n. 15
0
 def __init__(self, reader):
     super(ReaderManager, self).__init__()
     self.reader = reader
     self.logger = get_logger()
Esempio n. 16
0
 def __init__(self, lowercase=True, filter_length=0, for_nopunct=False):
     self.lowercase = lowercase
     self.filter_length = filter_length
     self.for_nopunct = for_nopunct
     self.logger = get_logger()