Example #1
0
    def start(self):

        args = self.args

        # Set default configuration in args.py
        #args = get_args()
        logger = self.get_logger()

        # Set random seed for reproducibility
        torch.manual_seed(args.seed)
        torch.backends.cudnn.deterministic = True
        np.random.seed(args.seed)
        random.seed(args.seed)

        if not args.cuda:
            args.gpu = -1
        if torch.cuda.is_available() and args.cuda:
            print('Note: You are using GPU for training')
            torch.cuda.set_device(args.gpu)
            torch.cuda.manual_seed(args.seed)
        if torch.cuda.is_available() and not args.cuda:
            print('Warning: Using CPU for training')

        dataset_map = {
            'Reuters': Reuters,
            'AAPD': AAPD,
            'IMDB': IMDB,
            'Yelp2014': Yelp2014
        }

        if args.dataset not in dataset_map:
            raise ValueError('Unrecognized dataset')

        else:
            dataset_class = dataset_map[args.dataset]
            train_iter, dev_iter, test_iter = dataset_class.iters(
                args.data_dir,
                args.word_vectors_file,
                args.word_vectors_dir,
                batch_size=args.batch_size,
                device=args.gpu,
                unk_init=UnknownWordVecCache.unk)

        config = deepcopy(args)
        config.dataset = train_iter.dataset
        config.target_class = train_iter.dataset.NUM_CLASSES

        print('Dataset:', args.dataset)
        print('No. of target classes:', train_iter.dataset.NUM_CLASSES)
        print('No. of train instances', len(train_iter.dataset))
        print('No. of dev instances', len(dev_iter.dataset))
        print('No. of test instances', len(test_iter.dataset))

        if args.resume_snapshot:
            if args.cuda:
                model = torch.load(args.resume_snapshot,
                                   map_location=lambda storage, location:
                                   storage.cuda(args.gpu))
            else:
                model = torch.load(
                    args.resume_snapshot,
                    map_location=lambda storage, location: storage)
        else:
            model = CharCNN(config)
            if args.cuda:
                model.cuda()

        if not args.trained_model:
            save_path = os.path.join(args.save_path,
                                     dataset_map[args.dataset].NAME)
            os.makedirs(save_path, exist_ok=True)

        parameter = filter(lambda p: p.requires_grad, model.parameters())
        optimizer = torch.optim.Adam(parameter,
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)

        train_evaluator = EvaluatorFactory.get_evaluator(
            dataset_class, model, None, train_iter, args.batch_size, args.gpu)
        test_evaluator = EvaluatorFactory.get_evaluator(
            dataset_class, model, None, test_iter, args.batch_size, args.gpu)
        dev_evaluator = EvaluatorFactory.get_evaluator(dataset_class, model,
                                                       None, dev_iter,
                                                       args.batch_size,
                                                       args.gpu)

        if hasattr(train_evaluator, 'is_multilabel'):
            train_evaluator.is_multilabel = dataset_class.IS_MULTILABEL
        if hasattr(dev_evaluator, 'is_multilabel'):
            dev_evaluator.is_multilabel = dataset_class.IS_MULTILABEL
        if hasattr(dev_evaluator, 'ignore_lengths'):
            dev_evaluator.ignore_lengths = True
        if hasattr(test_evaluator, 'is_multilabel'):
            test_evaluator.is_multilabel = dataset_class.IS_MULTILABEL
        if hasattr(test_evaluator, 'ignore_lengths'):
            test_evaluator.ignore_lengths = True

        trainer_config = {
            'optimizer': optimizer,
            'batch_size': args.batch_size,
            'log_interval': args.log_every,
            'patience': args.patience,
            'model_outfile': args.save_path,
            'logger': logger,
            'is_multilabel': dataset_class.IS_MULTILABEL,
            'ignore_lengths': True
        }

        trainer = TrainerFactory.get_trainer(args.dataset, model, None,
                                             train_iter, trainer_config,
                                             train_evaluator, test_evaluator,
                                             dev_evaluator)

        if not args.trained_model:
            trainer.train(args.epochs)
        else:
            if args.cuda:
                model = torch.load(args.trained_model,
                                   map_location=lambda storage, location:
                                   storage.cuda(args.gpu))
            else:
                model = torch.load(
                    args.trained_model,
                    map_location=lambda storage, location: storage)

        # Calculate dev and test metrics
        if hasattr(trainer, 'snapshot_path'):
            model = torch.load(trainer.snapshot_path)

        # evaluate_dataset(split_name, dataset_cls, model, embedding, loader, batch_size, device, is_multilabel
        self.evaluate_dataset('dev', dataset_map[args.dataset], model, None,
                              dev_iter, args.batch_size, args.gpu,
                              dataset_class.IS_MULTILABEL)
        self.evaluate_dataset('test', dataset_map[args.dataset], model, None,
                              test_iter, args.batch_size, args.gpu,
                              dataset_class.IS_MULTILABEL)
Example #2
0
                                                    args.device)
    dev_evaluator = EvaluatorFactory.get_evaluator(dataset_cls, model, embedding, dev_loader, args.batch_size,
                                                   args.device)

    trainer_config = {
        'optimizer': optimizer,
        'batch_size': args.batch_size,
        'log_interval': args.log_interval,
        'model_outfile': args.model_outfile,
        'lr_reduce_factor': args.lr_reduce_factor,
        'patience': args.patience,
        'tensorboard': args.tensorboard,
        'run_label': args.run_label,
        'logger': logger
    }
    trainer = TrainerFactory.get_trainer(args.dataset, model, embedding, train_loader, trainer_config, train_evaluator, test_evaluator, dev_evaluator)

    if not args.skip_training:
        total_params = 0
        for param in model.parameters():
            size = [s for s in param.size()]
            total_params += np.prod(size)
        logger.info('Total number of parameters: %s', total_params)
        trainer.train(args.epochs)

    _, _, state_dict, _, _ = load_checkpoint(args.model_outfile)

    for k, tensor in state_dict.items():
        state_dict[k] = tensor.to(device)

    model.load_state_dict(state_dict)
Example #3
0
        test_evaluator.is_multilabel = dataset_class.IS_MULTILABEL
    if hasattr(dev_evaluator, 'is_multilabel'):
        dev_evaluator.is_multilabel = dataset_class.IS_MULTILABEL

    trainer_config = {
        'optimizer': optimizer,
        'batch_size': args.batch_size,
        'log_interval': args.log_every,
        'patience': args.patience,
        'model_outfile': args.save_path,
        'logger': logger,
        'is_multilabel': dataset_class.IS_MULTILABEL
    }

    trainer = TrainerFactory.get_trainer(args.dataset, model, None, train_iter,
                                         trainer_config, train_evaluator,
                                         test_evaluator, dev_evaluator)

    if not args.trained_model:
        trainer.train(args.epochs)
    else:
        if args.cuda:
            model = torch.load(
                args.trained_model,
                map_location=lambda storage, location: storage.cuda(args.gpu))
        else:
            model = torch.load(args.trained_model,
                               map_location=lambda storage, location: storage)

    # Calculate dev and test metrics
    if hasattr(trainer, 'snapshot_path'):
    dev_evaluator = EvaluatorFactory.get_evaluator(dataset_cls, model, embedding, dev_loader, args.batch_size, args.device)

    trainer_config = {
        'optimizer': optimizer,
        'batch_size': args.batch_size,
        'log_interval': args.log_interval,
        'model_outfile': args.model_outfile,
        'lr_reduce_factor': args.lr_reduce_factor,
        'patience': args.patience,
        'tensorboard': args.tensorboard,
        'run_label': args.run_label,
        'logger': logger,
        'clip_norm': args.clip_norm
    }

    trainer = TrainerFactory.get_trainer('sick', model, embedding, train_loader, trainer_config, train_evaluator, test_evaluator, dev_evaluator)

    if not args.skip_training:
        total_params = 0
        #print(model.parameters)
        for param in model.parameters():
            size = [s for s in param.size()]
            total_params += np.prod(size)
            #print(param.size())
            #print('\n')
        logger.info('Total number of parameters: %s', total_params)
        trainer.train(args.epochs)

    _, _, state_dict, _, _ = load_checkpoint(args.model_outfile)

    for k, tensor in state_dict.items():