Esempio n. 1
0
                args.resume_snapshot,
                map_location=lambda storage, location: storage.cuda(args.gpu))
        else:
            model = torch.load(args.resume_snapshot,
                               map_location=lambda storage, location: storage)
    else:
        model = CharCNN(config)
        if args.cuda:
            model.cuda()

    if not args.trained_model:
        save_path = os.path.join(args.save_path,
                                 dataset_map[args.dataset].NAME)
        os.makedirs(save_path, exist_ok=True)

    parameter = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adam(parameter,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    train_evaluator = EvaluatorFactory.get_evaluator(dataset_class, model,
                                                     None, train_iter,
                                                     args.batch_size, args.gpu)
    test_evaluator = EvaluatorFactory.get_evaluator(dataset_class, model, None,
                                                    test_iter, args.batch_size,
                                                    args.gpu)
    dev_evaluator = EvaluatorFactory.get_evaluator(dataset_class, model, None,
                                                   dev_iter, args.batch_size,
                                                   args.gpu)

    if hasattr(train_evaluator, 'is_multilabel'):
Esempio n. 2
0
    def start(self):

        args = self.args



        # Set default configuration in args.py
        #args = get_args()
        logger = self.get_logger()

        # Set random seed for reproducibility
        torch.manual_seed(args.seed)
        torch.backends.cudnn.deterministic = True
        np.random.seed(args.seed)
        random.seed(args.seed)

        if not args.cuda:
            args.gpu = -1
        if torch.cuda.is_available() and args.cuda:
            print('Note: You are using GPU for training')
            torch.cuda.set_device(args.gpu)
            torch.cuda.manual_seed(args.seed)
        if torch.cuda.is_available() and not args.cuda:
            print('Warning: Using CPU for training')

        dataset_map = {
            'Reuters': Reuters,
            'AAPD': AAPD,
            'IMDB': IMDB,
            'Yelp2014': Yelp2014
        }

        if args.dataset not in dataset_map:
            raise ValueError('Unrecognized dataset')

        else:
            dataset_class = dataset_map[args.dataset]
            train_iter, dev_iter, test_iter = dataset_class.iters(args.data_dir,
                                                                  args.word_vectors_file,
                                                                  args.word_vectors_dir,
                                                                  batch_size=args.batch_size,
                                                                  device=args.gpu,
                                                                  unk_init=UnknownWordVecCache.unk)

        config = deepcopy(args)
        config.dataset = train_iter.dataset
        config.target_class = train_iter.dataset.NUM_CLASSES

        print('Dataset:', args.dataset)
        print('No. of target classes:', train_iter.dataset.NUM_CLASSES)
        print('No. of train instances', len(train_iter.dataset))
        print('No. of dev instances', len(dev_iter.dataset))
        print('No. of test instances', len(test_iter.dataset))

        if args.resume_snapshot:
            if args.cuda:
                model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage.cuda(args.gpu))
            else:
                model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage)
        else:
            model = CharCNN(config)
            if args.cuda:
                model.cuda()

        if not args.trained_model:
            save_path = os.path.join(args.save_path, dataset_map[args.dataset].NAME)
            os.makedirs(save_path, exist_ok=True)

        parameter = filter(lambda p: p.requires_grad, model.parameters())
        optimizer = torch.optim.Adam(parameter, lr=args.lr, weight_decay=args.weight_decay)

        train_evaluator = EvaluatorFactory.get_evaluator(dataset_class, model, None, train_iter, args.batch_size, args.gpu)
        test_evaluator = EvaluatorFactory.get_evaluator(dataset_class, model, None, test_iter, args.batch_size, args.gpu)
        dev_evaluator = EvaluatorFactory.get_evaluator(dataset_class, model, None, dev_iter, args.batch_size, args.gpu)

        if hasattr(train_evaluator, 'is_multilabel'):
            train_evaluator.is_multilabel = dataset_class.IS_MULTILABEL
        if hasattr(dev_evaluator, 'is_multilabel'):
            dev_evaluator.is_multilabel = dataset_class.IS_MULTILABEL
        if hasattr(dev_evaluator, 'ignore_lengths'):
            dev_evaluator.ignore_lengths = True
        if hasattr(test_evaluator, 'is_multilabel'):
            test_evaluator.is_multilabel = dataset_class.IS_MULTILABEL
        if hasattr(test_evaluator, 'ignore_lengths'):
            test_evaluator.ignore_lengths = True

        trainer_config = {
            'optimizer': optimizer,
            'batch_size': args.batch_size,
            'log_interval': args.log_every,
            'patience': args.patience,
            'model_outfile': args.save_path,
            'logger': logger,
            'is_multilabel': dataset_class.IS_MULTILABEL,
            'ignore_lengths': True
        }

        trainer = TrainerFactory.get_trainer(args.dataset, model, None, train_iter, trainer_config, train_evaluator, test_evaluator, dev_evaluator)

        if not args.trained_model:
            trainer.train(args.epochs)
        else:
            if args.cuda:
                model = torch.load(args.trained_model, map_location=lambda storage, location: storage.cuda(args.gpu))
            else:
                model = torch.load(args.trained_model, map_location=lambda storage, location: storage)

        # Calculate dev and test metrics
        if hasattr(trainer, 'snapshot_path'):
            model = torch.load(trainer.snapshot_path)

        self.evaluate_dataset('dev', dataset_map[args.dataset], model, None, dev_iter, args.batch_size,
                         is_multilabel=dataset_class.IS_MULTILABEL,
                         device=args.gpu)
        self.evaluate_dataset('test', dataset_map[args.dataset], model, None, test_iter, args.batch_size,
                         is_multilabel=dataset_class.IS_MULTILABEL,
                         device=args.gpu)