Ejemplo n.º 1
0
 def __init__(self, channels, classes, imagesize, **kwargs):
     super(ModelCnn, self).__init__()
     self.layers = Cnn.get_layers(channels, classes, imagesize)
     self.distills = torch.nn.ModuleList([
         models.GlobalSumPool(
             h=models.DenseNet(headsize=32, layers=1, dropout=0.2),
             c=models.Classifier(32,
                                 classes + 1,
                                 useprototype=1,
                                 usenorm=0,
                                 p=2),
         ),
         models.GlobalSumPool(
             h=models.DenseNet(headsize=64, layers=1, dropout=0.2),
             c=models.Classifier(64,
                                 classes + 1,
                                 useprototype=1,
                                 usenorm=0,
                                 p=2),
         ),
         models.GlobalSumPool(
             h=models.DenseNet(headsize=64, layers=1, dropout=0.2),
             c=models.Classifier(64,
                                 classes + 1,
                                 useprototype=1,
                                 usenorm=0,
                                 p=2),
         )
     ])
Ejemplo n.º 2
0
 def set_model(self):
     if args.wide:
         self.g = models.WideResNet().cuda()
         self.c1 = models.Classifier(self.g.nChannels,
                                     self.args.num_classes).cuda()
         self.c2 = models.Classifier(self.g.nChannels,
                                     self.args.num_classes).cuda()
     else:
         self.g = models.DenseNet().cuda()
         self.c1 = models.Classifier(self.g.in_planes,
                                     self.args.num_classes).cuda()
         self.c2 = models.Classifier(self.g.in_planes,
                                     self.args.num_classes).cuda()
Ejemplo n.º 3
0
def train(args):
    if args.debug:
        train_dataloader = torchtext.data.BucketIterator(test_data,
                                                         batch_size=64,
                                                         train=True)
    else:
        train_dataloader = torchtext.data.BucketIterator(train_data,
                                                         batch_size=64,
                                                         train=True)
    valid_dataloader = torchtext.data.BucketIterator(valid_data,
                                                     batch_size=64,
                                                     train=False)

    classifier = models.Classifier(args)

    logger = pl.loggers.TensorBoardLogger('logs', args.encoder)
    lr_monitor = pl.callbacks.LearningRateMonitor('epoch')
    checkpoint_callback = pl.callbacks.ModelCheckpoint(
        monitor='valid_loss',
        dirpath=f"checkpoints/{args.encoder}",
        filename=f"version_{logger.version}")

    trainer = pl.Trainer(max_epochs=args.max_epochs,
                         logger=[logger],
                         callbacks=[lr_monitor, checkpoint_callback],
                         gpus=torch.cuda.device_count(),
                         progress_bar_refresh_rate=args.progress_bar,
                         weights_summary=None)

    print("Starting training...")
    trainer.fit(classifier, train_dataloader, valid_dataloader)
    print("Done training!")
Ejemplo n.º 4
0
def eval_bert_plus_lstm():
    data_loader_test = prepare_byarticle_data(aug_count=constant.aug_count, batch_size=constant.batch_size, test_whole=True)
    # load model
    # bert model
    from pytorch_pretrained_bert import BertTokenizer, BertModel
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    bert_model = BertModel.from_pretrained('bert-base-uncased')
    state = torch.load("bert_model/pytorch_model.bin")
    bert_model.load_state_dict(state)
    article_model = bert_model
    title_model = bert_model
    # lstm model and classifier
    lstm_article = nn.LSTM(input_size=768, hidden_size=constant.hidden_dim, 
                           num_layers=constant.n_layers, bidirectional=False, batch_first=True)
    lstm_title = nn.LSTM(input_size=768, hidden_size=constant.hidden_dim_tit,
                         num_layers=constant.n_layers, bidirectional=False, batch_first=True)
    classifier = models.Classifier(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit)
    lstm_article.load_state_dict(torch.load("bert_model/9folds_large/fold_3_lstm_article_0.9709711056544115.bin"))
    lstm_title.load_state_dict(torch.load("bert_model/9folds_large/fold_3_lstm_title_0.9709711056544115.bin"))
    classifier.load_state_dict(torch.load("bert_model/9folds_large/fold_3_classifier_0.9709711056544115.bin"))

    if constant.USE_CUDA:
        article_model.cuda()
        title_model.cuda()
        lstm_article.cuda()
        lstm_title.cuda()
        classifier.cuda()

    article_model.eval()
    title_model.eval()
    lstm_article.eval()
    lstm_title.eval()
    classifier.eval()
    accuracy, pred, id_ = eval_bert(article_model, title_model, classifier, data_loader_test, tokenizer, lstm_article, lstm_title, False, None, 1, True)
Ejemplo n.º 5
0
    def __init__(self, opt, num_classes, source_train_ds, source_test_ds):
        self.source_train_ds = source_train_ds
        self.source_test_ds = source_test_ds
        self.opt = opt
        self.best_val = 0
        self.num_classes = num_classes

        # networks and optimizers
        self.mixer = models.Mixer(opt)
        self.classifier = models.Classifier(opt, num_classes)

        # initialize weight's
        self.mixer.apply(utils.weights_init)
        self.classifier.apply(utils.weights_init)

        # Defining loss criterions
        self.criterion = nn.CrossEntropyLoss()

        if opt.gpu >= 0:
            self.mixer.cuda()
            self.classifier.cuda()
            self.criterion.cuda()

        # Defining optimizers
        self.optimizer_mixer = optim.Adam(self.mixer.parameters(),
                                          lr=opt.lr,
                                          betas=(opt.beta1, 0.999))
        self.optimizer_classifier = optim.Adam(self.classifier.parameters(),
                                               lr=opt.lr,
                                               betas=(opt.beta1, 0.999))
Ejemplo n.º 6
0
    def load_models(self):
        """
        Load models.
        """

        self.N_class = numpy.max(self.test_codes) + 1
        network_units = list(map(int, self.args.network_units.split(',')))
        log('[Testing] using %d input channels' % self.test_images.shape[3])
        self.model = models.Classifier(
            self.N_class,
            resolution=(self.test_images.shape[3], self.test_images.shape[1],
                        self.test_images.shape[2]),
            architecture=self.args.network_architecture,
            activation=self.args.network_activation,
            batch_normalization=not self.args.network_no_batch_normalization,
            start_channels=self.args.network_channels,
            dropout=self.args.network_dropout,
            units=network_units)
        assert os.path.exists(
            self.args.classifier_file
        ), 'state file %s not found' % self.args.classifier_file
        state = State.load(self.args.classifier_file)
        log('[Testing] read %s' % self.args.classifier_file)

        self.model.load_state_dict(state.model)
        if self.args.use_gpu and not cuda.is_cuda(self.model):
            log('[Testing] classifier is not CUDA')
            self.model = self.model.cuda()
        log('[Testing] loaded classifier')

        # !
        self.model.eval()
        log('[Testing] set classifier to eval')
Ejemplo n.º 7
0
 def make_distillpools(self, classes):
     return [
         models.GlobalSumPool(
             h=models.DenseNet(headsize=64,
                               bodysize=256,
                               tailsize=self.squash[3],
                               layers=self.layers,
                               dropout=0.2,
                               activation=self.act,
                               bias=self.usebias),
             c=models.Classifier(self.squash[3],
                                 classes + self.optout,
                                 useprototype=self.useprototype,
                                 usenorm=self.usenorm,
                                 p=self.p),
         ),
         models.GlobalSumPool(
             h=models.DenseNet(headsize=128,
                               bodysize=256,
                               tailsize=self.squash[5],
                               layers=self.layers,
                               dropout=0.2,
                               activation=self.act,
                               bias=self.usebias),
             c=models.Classifier(self.squash[5],
                                 classes + self.optout,
                                 useprototype=self.useprototype,
                                 usenorm=self.usenorm,
                                 p=self.p),
         ),
         models.GlobalSumPool(
             h=models.DenseNet(headsize=256,
                               bodysize=1024,
                               tailsize=self.squash[7],
                               layers=self.layers,
                               dropout=0.2,
                               activation=self.act,
                               bias=self.usebias),
             c=models.Classifier(self.squash[7],
                                 classes + self.optout,
                                 useprototype=self.useprototype,
                                 usenorm=self.usenorm,
                                 p=self.p),
         )
     ]
Ejemplo n.º 8
0
    def __init__(self, config):
        self.config = config

        # Create dataloader
        source_loader, target_loader, nclasses = datasets.form_visda_datasets(
            config=config, ignore_anomaly=False)
        self.source_loader = source_loader
        self.target_loader = target_loader
        self.nclasses = nclasses

        # Create model
        self.netF, self.nemb = models.form_models(config)
        print(self.netF)
        self.netC = models.Classifier(self.nemb, self.nclasses, nlayers=1)
        utils.weights_init(self.netC)
        print(self.netC)

        if self.config.exp == 'openset':
            self.ano_class_id = self.source_loader.dataset.class_to_idx[
                self.config.anomaly_class]

        self.netF = torch.nn.DataParallel(self.netF).cuda()
        self.netC = torch.nn.DataParallel(self.netC).cuda()

        # Create optimizer
        self.optimizerF = optim.SGD(self.netF.parameters(),
                                    lr=self.config.lr,
                                    momentum=config.momentum,
                                    weight_decay=0.0005)
        self.optimizerC = optim.SGD(self.netC.parameters(),
                                    lr=self.config.lrC,
                                    momentum=config.momentum,
                                    weight_decay=0.0005)
        self.lr_scheduler_F = optim.lr_scheduler.StepLR(self.optimizerF,
                                                        step_size=7000,
                                                        gamma=0.1)
        self.lr_scheduler_C = optim.lr_scheduler.StepLR(self.optimizerC,
                                                        step_size=7000,
                                                        gamma=0.1)

        # restoring checkpoint
        print('Restoring checkpoint ...')
        try:
            ckpt_data = torch.load(
                os.path.join(config.logdir, 'checkpoint.pth'))
            self.start_iter = ckpt_data['iter']
            self.netF.load_state_dict(ckpt_data['F_dict'])
            self.netC.load_state_dict(ckpt_data['C_dict'])
        except:
            # If loading failed, begin from scratch
            print('Checkpoint not found. Training from scratch ...')
            self.start_iter = 0

        # Other vars
        self.criterion = nn.CrossEntropyLoss().cuda()
Ejemplo n.º 9
0
def build_classifier(args,tasker):
  if 'node_cls' == args.task or 'static_node_cls' == args.task:
    mult = 1
  else:
    mult = 2
  if 'gru' in args.model or 'lstm' in args.model:
    in_feats = args.gcn_parameters['lstm_l2_feats'] * mult
  elif args.model == 'skipfeatsgcn' or args.model == 'skipfeatsegcn_h':
    in_feats = (args.gcn_parameters['layer_2_feats'] + args.gcn_parameters['feats_per_node']) * mult
  else:
    in_feats = args.gcn_parameters['layer_2_feats'] * mult
  return mls.Classifier(args,in_features = in_feats, out_features = tasker.num_classes).to(args.device)
Ejemplo n.º 10
0
    def main(self):
        """
        Main which should be overwritten.
        """

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(
            numpy.float32)
        log('[Testing] read %s' % self.args.test_images_file)

        # For handling both color and gray images.
        if len(self.test_images.shape) < 4:
            self.test_images = numpy.expand_dims(self.test_images, axis=3)
            log('[Testing] no color images, adjusted size')
        self.resolution = self.test_images.shape[2]
        log('[Testing] resolution %d' % self.resolution)

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(
            numpy.int)
        self.test_codes = self.test_codes[:, self.args.label_index]
        log('[Testing] read %s' % self.args.test_codes_file)

        N_class = numpy.max(self.test_codes) + 1
        network_units = list(map(int, self.args.network_units.split(',')))
        log('[Testing] using %d input channels' % self.test_images.shape[3])
        self.model = models.Classifier(
            N_class,
            resolution=(self.test_images.shape[3], self.test_images.shape[1],
                        self.test_images.shape[2]),
            architecture=self.args.network_architecture,
            activation=self.args.network_activation,
            batch_normalization=not self.args.network_no_batch_normalization,
            start_channels=self.args.network_channels,
            dropout=self.args.network_dropout,
            units=network_units)

        assert os.path.exists(
            self.args.state_file
        ), 'state file %s not found' % self.args.state_file
        state = State.load(self.args.state_file)
        log('[Testing] read %s' % self.args.state_file)

        self.model.load_state_dict(state.model)
        if self.args.use_gpu and not cuda.is_cuda(self.model):
            log('[Testing] model is not CUDA')
            self.model = self.model.cuda()
        log('[Testing] loaded model')

        self.model.eval()
        log('[Testing] set classifier to eval')

        self.test()
    def load_model(self):
        """
        Load model.
        """

        database = utils.read_hdf5(self.args.database_file).astype(numpy.float32)
        log('[Attack] read %sd' % self.args.database_file)

        self.N_font = database.shape[0]
        self.N_class = database.shape[1]
        resolution = database.shape[2]

        database = database.reshape((database.shape[0] * database.shape[1], database.shape[2], database.shape[3]))
        database = torch.from_numpy(database)
        if self.args.use_gpu:
            database = database.cuda()
        database = torch.autograd.Variable(database, False)

        N_theta = self.test_theta.shape[1]
        log('[Attack] using %d N_theta' % N_theta)
        decoder = models.AlternativeOneHotDecoder(database, self.N_font, self.N_class, N_theta)
        decoder.eval()

        image_channels = 1 if N_theta <= 7 else 3
        network_units = list(map(int, self.args.network_units.split(',')))
        log('[Attack] using %d input channels' % image_channels)
        classifier = models.Classifier(self.N_class, resolution=(image_channels, resolution, resolution),
                                       architecture=self.args.network_architecture,
                                       activation=self.args.network_activation,
                                       batch_normalization=not self.args.network_no_batch_normalization,
                                       start_channels=self.args.network_channels,
                                       dropout=self.args.network_dropout,
                                       units=network_units)

        assert os.path.exists(self.args.classifier_file), 'state file %s not found' % self.args.classifier_file
        state = State.load(self.args.classifier_file)
        log('[Attack] read %s' % self.args.classifier_file)

        classifier.load_state_dict(state.model)
        if self.args.use_gpu and not cuda.is_cuda(classifier):
            log('[Attack] classifier is not CUDA')
            classifier = classifier.cuda()
        log('[Attack] loaded classifier')

        # !
        classifier.eval()
        log('[Attack] set classifier to eval')

        self.model = models.DecoderClassifier(decoder, classifier)
Ejemplo n.º 12
0
def predict():
    # prepare data_loader and vocab
    use_by_article = False
    if use_by_article:
        _, data_loader_test, vocab = prepare_byarticle_data()
    else:
        _, _, data_loader_test, vocab = prepare_data('./data_new/preprocessed_new_{}', constant.batch_size)
    
    if constant.use_bert:
        from pytorch_pretrained_bert import BertTokenizer, BertModel
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        bert_model = BertModel.from_pretrained('bert-base-uncased')
        state = torch.load("bert_model/pytorch_model.bin")
        bert_model.load_state_dict(state)
        article_model = bert_model
        title_model = bert_model
        # print("finish bert model loading")
        LR = models.Classifier(hidden_dim1=768, hidden_dim2=768)
        classifer_state = torch.load("bert_model/classifier.bin")
        LR.load_state_dict(classifer_state)
        # 
    else:
        # for basic LSTM model
        article_model = models.LSTM(vocab=vocab, 
                        embedding_size=constant.emb_dim, 
                        hidden_size=constant.hidden_dim, 
                        num_layers=constant.n_layers,
                        pretrain_emb=constant.pretrain_emb
                        )
        title_model = models.LSTM(vocab=vocab,
                        embedding_size=constant.emb_dim,
                        hidden_size=constant.hidden_dim_tit,
                        num_layers=constant.n_layers,
                        pretrain_emb=constant.pretrain_emb
                        )
        LR = models.LR(hidden_dim1=constant.hidden_dim, hidden_dim2=constant.hidden_dim_tit)

        # load parameters
        article_model = load_model(article_model, model_name="article_model")
        title_model = load_model(title_model, model_name="title_model")
        LR = load_model(LR, model_name="LR")

    if constant.USE_CUDA:
        article_model.cuda()
        title_model.cuda()
        LR.cuda()

    # predict and save result in result folder
    predict(article_model, title_model, LR, data_loader_test, name="bypublisher", print_pred=True)
Ejemplo n.º 13
0
def load_model(args):
    classifier = models.Classifier()

    model_dir_class = os.path.join(args.resume)

    model_std = torch.load(os.path.join(model_dir_class,
                                        'model_best_class.pth.tar'),
                           map_location="cuda:" + str(args.gpu))
    classifier.load_state_dict(model_std)
    classifier = classifier.cuda()

    #model = torch.nn.DataParallel(model,
    #device_ids=list(range(torch.cuda.device_count()))).cuda()

    return classifier
    def load_models(self):
        """
        Init models.
        """

        log('[Training] using %d input channels' % self.train_images.shape[3])
        network_units = list(map(int, self.args.network_units.split(',')))
        self.encoder = models.LearnedVariationalEncoder(
            self.args.latent_space_size,
            0,
            resolution=(self.train_images.shape[3], self.train_images.shape[1],
                        self.train_images.shape[2]),
            architecture=self.args.network_architecture,
            start_channels=self.args.network_channels,
            activation=self.args.network_activation,
            batch_normalization=not self.args.network_no_batch_normalization,
            units=network_units)
        self.decoder = models.LearnedDecoder(
            self.args.latent_space_size,
            resolution=(self.train_images.shape[3], self.train_images.shape[1],
                        self.train_images.shape[2]),
            architecture=self.args.network_architecture,
            start_channels=self.args.network_channels,
            activation=self.args.network_activation,
            batch_normalization=not self.args.network_no_batch_normalization,
            units=network_units)
        log(self.encoder)
        log(self.decoder)

        classifier_units = list(map(int,
                                    self.args.classifier_units.split(',')))
        self.classifier = models.Classifier(
            1,
            resolution=(self.train_images.shape[3], self.train_images.shape[1],
                        self.train_images.shape[2]),
            architecture=self.args.classifier_architecture,
            activation=self.args.classifier_activation,
            batch_normalization=not self.args.
            classifier_no_batch_normalization,
            start_channels=self.args.classifier_channels,
            dropout=self.args.classifier_dropout,
            units=classifier_units,
            kernel_size=6)
        log(self.classifier)
Ejemplo n.º 15
0
def load_model(args):
    classifier = models.Classifier()
    stract = models.Stractor()

    model_dir_class = os.path.join(args.resume, 'Classifier')

    model_dir_str = os.path.join(args.resume, 'featureStractor')


    model_std = torch.load(os.path.join(model_dir_class, 'model_1_class.pth.tar'), map_location="cuda:"+str(args.gpu))
    classifier.load_state_dict(model_std)
    classifier = classifier.cuda()

    model_std = torch.load(os.path.join(model_dir_str, 'model_1_feaStr.pth.tar'), map_location="cuda:"+str(args.gpu))
    stract.load_state_dict(model_std)
    stract = stract.cuda()


    #model = torch.nn.DataParallel(model,
                                  #device_ids=list(range(torch.cuda.device_count()))).cuda()

    return stract, classifier
Ejemplo n.º 16
0
 def __init__(self):
     self.slack_token = token
     self.rtmclient = slack.RTMClient(token=self.slack_token)
     self.webclient = slack.WebClient(token=self.slack_token)
     self.channels = None
     self.thread_ts = None
     self.user = None
     self.text_in = None
     self.text_out = None
     self.schedule = schedule.Read_google_sheet_schedule()
     self.responses_df = self.schedule.run(command='get_responses')
     self.commands = [
         'send_info', 'show_weather', 'get_schedule', 'tell_joke',
         'send_email', 'who_is'
     ]
     self.classifier = models.Classifier()
     self.state_in = {
         'intent': None,
         'command': None,
         'state': 'normal',
         'reply': None,
         'query_params': {}
     }
     self.state_out = {
         'intent': None,
         'command': None,
         'state': 'normal',
         'reply': None,
         'query_params': {}
     }
     self.send_email = email.Send_email()
     self.weather_descr = pd.read_csv(
         './data/Multilingual_Weather_Conditions.csv',
         sep=',',
         encoding='utf-8')
     self.sleep_cnt = 0
     self.time_state = 'normal'
Ejemplo n.º 17
0
def train_cnn(train_dataset,
              validation_dataset,
              batch_size,
              num_filters,
              filter_sizes,
              use_elmo=False,
              epochs=15,
              patience=None,
              learning_rate=3e-4,
              num_classes=2,
              use_gpu=False):
    """
    Trains CNN on train_dataset; optionally, perform early stopping based on validation loss. Initialises word embeddings with pre-trained GloVe (OR) uses pre-trained ELMo model to dynamically compute embeddings.
    The CNN has one convolution layer for each ngram filter size.
    
    Parameters
    ----------
    train_dataset: List[Instance]
        Instances for training set
    validation_dataset: List[Instance]
        Instances for validation set
    batch_size: int
        number of Instances to process in a batch
    num_filters: int
        output dim for each convolutional layer, which is the number of 'filters' learned by that layer
    filter_sizes: Tuple[int]
        specifies the number of convolutional layers and their sizes
    use_elmo: bool
        use ELMo embeddings if True | GloVe embeddings if False
    epochs: int
        total number of epochs to train on (default=15)
    patience: int or None
        early stopping - number of epochs to wait for validation loss to improve; 'None' to disable early stopping
    learning_rate: float
        learning rate for Adam Optimizer
    num_classes: int
        default=2 for binary classification
    use_gpu: bool
        True to use the GPU
    
    Returns
    -------
    Trained Model, Vocabulary, Number of actual training epochs
    """
    if use_elmo:
        vocab = Vocabulary()
        word_embeddings: TextFieldEmbedder = load_elmo_embeddings()
    else:
        vocab = Vocabulary.from_instances(train_dataset + validation_dataset)
        word_embeddings: TextFieldEmbedder = load_glove_embeddings(vocab)

    iterator = BucketIterator(batch_size=batch_size,
                              sorting_keys=[("tokens", "num_tokens")])

    iterator.index_with(vocab)  # numericalize the data

    # CNN encoder
    encoder: Seq2VecEncoder = CnnEncoder(
        embedding_dim=word_embeddings.get_output_dim(),
        num_filters=num_filters,
        ngram_filter_sizes=filter_sizes)

    # Feedforward:
    classifier_feedforward: FeedForward = nn.Linear(encoder.get_output_dim(),
                                                    num_classes)

    model = models.Classifier(vocab=vocab,
                              word_embeddings=word_embeddings,
                              encoder=encoder,
                              classifier_feedforward=classifier_feedforward)

    if use_gpu: model.cuda()
    else: model

    optimizer = optim.Adam(model.parameters(), learning_rate)

    if patience == None:  # Train on both train+validation dataset if patience is None
        trainer = Trainer(model=model,
                          optimizer=optimizer,
                          iterator=iterator,
                          train_dataset=train_dataset + validation_dataset,
                          cuda_device=0 if use_gpu else -1,
                          num_epochs=epochs)

    else:
        trainer = Trainer(
            model=model,
            optimizer=optimizer,
            iterator=iterator,
            train_dataset=train_dataset,
            validation_dataset=validation_dataset,
            cuda_device=0 if use_gpu else -1,
            patience=
            patience,  # stop if loss does not improve for 'patience' epochs
            num_epochs=epochs)

    metrics = trainer.train()
    #     print(metrics)

    return model, vocab, metrics['training_epochs']
Ejemplo n.º 18
0
def main(cfg, model_cfg):
    # Load Configuration
    cfg = configuration.params.from_json(cfg)                   # Train or Eval cfg
    model_cfg = configuration.model.from_json(model_cfg)        # BERT_cfg
    set_seeds(cfg.seed)

    # Load Data & Create Criterion
    data = load_data(cfg)
    if cfg.uda_mode:
        unsup_criterion = nn.KLDivLoss(reduction='none')
        data_iter = [data.sup_data_iter(), data.unsup_data_iter()] if cfg.mode=='train' \
            else [data.sup_data_iter(), data.unsup_data_iter(), data.eval_data_iter()]  # train_eval
    else:
        data_iter = [data.sup_data_iter()]
    sup_criterion = nn.CrossEntropyLoss(reduction='none')
    
    # Load Model
    model = models.Classifier(model_cfg, len(data.TaskDataset.labels))

    # Create trainer
    trainer = train.Trainer(cfg, model, data_iter, optim.optim4GPU(cfg, model), get_device())

    # Training
    def get_loss(model, sup_batch, unsup_batch, global_step):

        # logits -> prob(softmax) -> log_prob(log_softmax)

        # batch
        input_ids, segment_ids, input_mask, label_ids = sup_batch
        if unsup_batch:
            ori_input_ids, ori_segment_ids, ori_input_mask, \
            aug_input_ids, aug_segment_ids, aug_input_mask  = unsup_batch

            input_ids = torch.cat((input_ids, aug_input_ids), dim=0)
            segment_ids = torch.cat((segment_ids, aug_segment_ids), dim=0)
            input_mask = torch.cat((input_mask, aug_input_mask), dim=0)
            
        # logits
        logits = model(input_ids, segment_ids, input_mask)

        # sup loss
        sup_size = label_ids.shape[0]            
        sup_loss = sup_criterion(logits[:sup_size], label_ids)  # shape : train_batch_size
        if cfg.tsa:
            tsa_thresh = get_tsa_thresh(cfg.tsa, global_step, cfg.total_steps, start=1./logits.shape[-1], end=1)
            larger_than_threshold = torch.exp(-sup_loss) > tsa_thresh   # prob = exp(log_prob), prob > tsa_threshold
            # larger_than_threshold = torch.sum(  F.softmax(pred[:sup_size]) * torch.eye(num_labels)[sup_label_ids]  , dim=-1) > tsa_threshold
            loss_mask = torch.ones_like(label_ids, dtype=torch.float32) * (1 - larger_than_threshold.type(torch.float32))
            sup_loss = torch.sum(sup_loss * loss_mask, dim=-1) / torch.max(torch.sum(loss_mask, dim=-1), torch_device_one())
        else:
            sup_loss = torch.mean(sup_loss)

        # unsup loss
        if unsup_batch:
            # ori
            with torch.no_grad():
                ori_logits = model(ori_input_ids, ori_segment_ids, ori_input_mask)
                ori_prob   = F.softmax(ori_logits, dim=-1)    # KLdiv target
                # ori_log_prob = F.log_softmax(ori_logits, dim=-1)

                # confidence-based masking
                if cfg.uda_confidence_thresh != -1:
                    unsup_loss_mask = torch.max(ori_prob, dim=-1)[0] > cfg.uda_confidence_thresh
                    unsup_loss_mask = unsup_loss_mask.type(torch.float32)
                else:
                    unsup_loss_mask = torch.ones(len(logits) - sup_size, dtype=torch.float32)
                unsup_loss_mask = unsup_loss_mask.to(_get_device())
                    
            # aug
            # softmax temperature controlling
            uda_softmax_temp = cfg.uda_softmax_temp if cfg.uda_softmax_temp > 0 else 1.
            aug_log_prob = F.log_softmax(logits[sup_size:] / uda_softmax_temp, dim=-1)

            # KLdiv loss
            """
                nn.KLDivLoss (kl_div)
                input : log_prob (log_softmax)
                target : prob    (softmax)
                https://pytorch.org/docs/stable/nn.html

                unsup_loss is divied by number of unsup_loss_mask
                it is different from the google UDA official
                The official unsup_loss is divided by total
                https://github.com/google-research/uda/blob/master/text/uda.py#L175
            """
            unsup_loss = torch.sum(unsup_criterion(aug_log_prob, ori_prob), dim=-1)
            unsup_loss = torch.sum(unsup_loss * unsup_loss_mask, dim=-1) / torch.max(torch.sum(unsup_loss_mask, dim=-1), torch_device_one())
            final_loss = sup_loss + cfg.uda_coeff*unsup_loss

            return final_loss, sup_loss, unsup_loss
        return sup_loss, None, None

    # evaluation
    def get_acc(model, batch):
        # input_ids, segment_ids, input_mask, label_id, sentence = batch
        input_ids, segment_ids, input_mask, label_id = batch
        logits = model(input_ids, segment_ids, input_mask)
        _, label_pred = logits.max(1)

        result = (label_pred == label_id).float()
        accuracy = result.mean()
        # output_dump.logs(sentence, label_pred, label_id)    # output dump

        return accuracy, result

    if cfg.mode == 'train':
        trainer.train(get_loss, None, cfg.model_file, cfg.pretrain_file)

    if cfg.mode == 'train_eval':
        trainer.train(get_loss, get_acc, cfg.model_file, cfg.pretrain_file)

    if cfg.mode == 'eval':
        results = trainer.eval(get_acc, cfg.model_file, None)
        total_accuracy = torch.cat(results).mean().item()
        print('Accuracy :' , total_accuracy)
Ejemplo n.º 19
0
def main():
    # Load Configuration
    model_cfg = configuration.model.from_json(cfg.model_cfg)        # BERT_cfg
    set_seeds(cfg.seed)

    # Load Data & Create Criterion
    #data = load_data(cfg)

    #if cfg.uda_mode or cfg.mixmatch_mode:
    #    data_iter = [data.sup_data_iter(), data.unsup_data_iter()] if cfg.mode=='train' \
    #        else [data.sup_data_iter(), data.unsup_data_iter(), data.eval_data_iter()]  # train_eval
    #else:
    #    data_iter = [data.sup_data_iter()]

    # my own implementation
    dataset = DataSet(cfg)
    train_dataset, val_dataset, unsup_dataset = dataset.get_dataset()

    # Create the DataLoaders for our training and validation sets.
    train_dataloader = DataLoader(
                train_dataset,  # The training samples.
                sampler = RandomSampler(train_dataset), # Select batches randomly
                batch_size = cfg.train_batch_size # Trains with this batch size.
            )

    validation_dataloader = DataLoader(
                val_dataset, # The validation samples.
                sampler = SequentialSampler(val_dataset), # Pull out batches sequentially.
                batch_size = cfg.eval_batch_size # Evaluate with this batch size.
            )

    unsup_dataloader = None
    if unsup_dataset:
        unsup_dataloader = DataLoader(
            unsup_dataset,
            sampler = RandomSampler(unsup_dataset),
            batch_size = cfg.train_batch_size
        )

    if cfg.uda_mode or cfg.mixmatch_mode:
        data_iter = [train_dataloader, unsup_dataloader, validation_dataloader] 
    else:
        data_iter = [train_dataloader, validation_dataloader]

    ema_optimizer = None
    ema_model = None

    if cfg.model == "custom":
        model = models.Classifier(model_cfg, NUM_LABELS[cfg.task])
    elif cfg.model == "bert":
        model = BertForSequenceClassificationCustom.from_pretrained(
            "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
            num_labels = NUM_LABELS[cfg.task],
            output_attentions = False, # Whether the model returns attentions weights.
            output_hidden_states = False, # Whether the model returns all hidden-states.
        )


    if cfg.uda_mode:
        if cfg.unsup_criterion == 'KL':
            unsup_criterion = nn.KLDivLoss(reduction='none')
        else:
            unsup_criterion = nn.MSELoss(reduction='none')
        sup_criterion = nn.CrossEntropyLoss(reduction='none')
        optimizer = optim.optim4GPU(cfg, model)
    elif cfg.mixmatch_mode:
        train_criterion = SemiLoss()
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr)
        ema_model = models.Classifier(model_cfg,  NUM_LABELS[cfg.task])
        for param in ema_model.parameters():
            param.detach_()
        ema_optimizer= WeightEMA(cfg, model, ema_model, alpha=cfg.ema_decay)
    else:
        sup_criterion = nn.CrossEntropyLoss(reduction='none')
        optimizer = optim.optim4GPU(cfg, model)
    
    # Create trainer
    trainer = train.Trainer(cfg, model, data_iter, optimizer, get_device(), ema_model, ema_optimizer)

    # loss functions
    def get_sup_loss(model, sup_batch, unsup_batch, global_step):
        # batch
        input_ids, segment_ids, input_mask, og_label_ids, num_tokens = sup_batch

        # convert label ids to hot vectors
        sup_size = input_ids.size(0)
        label_ids = torch.zeros(sup_size, 2).scatter_(1, og_label_ids.cpu().view(-1,1), 1)
        label_ids = label_ids.cuda(non_blocking=True)

        # sup mixup
        sup_l = np.random.beta(cfg.alpha, cfg.alpha)
        sup_l = max(sup_l, 1-sup_l)
        sup_idx = torch.randperm(sup_size)

        if cfg.sup_mixup and 'word' in cfg.sup_mixup:
            if cfg.simple_pad:
                simple_pad(input_ids, input_mask, num_tokens)
                c_input_ids = None
            else:
                input_ids, c_input_ids = pad_for_word_mixup(
                    input_ids, input_mask, num_tokens, sup_idx
                )
        else:
            c_input_ids = None

        # sup loss
        hidden = model(
            input_ids=input_ids, 
            segment_ids=segment_ids, 
            input_mask=input_mask,
            output_h=True,
            mixup=cfg.sup_mixup,
            shuffle_idx=sup_idx,
            clone_ids=c_input_ids,
            l=sup_l,
            manifold_mixup=cfg.manifold_mixup,
            simple_pad=cfg.simple_pad,
            no_grad_clone=cfg.no_grad_clone
        )
        logits = model(input_h=hidden)

        if cfg.sup_mixup:
            label_ids = mixup_op(label_ids, sup_l, sup_idx)

        sup_loss = -torch.sum(F.log_softmax(logits, dim=1) * label_ids, dim=1)

        if cfg.tsa and cfg.tsa != "none":
            tsa_thresh = get_tsa_thresh(cfg.tsa, global_step, cfg.total_steps, start=1./logits.shape[-1], end=1)
            larger_than_threshold = torch.exp(-sup_loss) > tsa_thresh   # prob = exp(log_prob), prob > tsa_threshold
            # larger_than_threshold = torch.sum(  F.softmax(pred[:sup_size]) * torch.eye(num_labels)[sup_label_ids]  , dim=-1) > tsa_threshold
            loss_mask = torch.ones_like(og_label_ids, dtype=torch.float32) * (1 - larger_than_threshold.type(torch.float32))
            sup_loss = torch.sum(sup_loss * loss_mask, dim=-1) / torch.max(torch.sum(loss_mask, dim=-1), torch_device_one())
        else:
            sup_loss = torch.mean(sup_loss)

        return sup_loss, sup_loss, sup_loss, sup_loss


    def get_loss_ict(model, sup_batch, unsup_batch, global_step):
        # batch
        input_ids, segment_ids, input_mask, og_label_ids, num_tokens = sup_batch
        ori_input_ids, ori_segment_ids, ori_input_mask, \
        aug_input_ids, aug_segment_ids, aug_input_mask, \
        ori_num_tokens, aug_num_tokens = unsup_batch

        # convert label ids to hot vectors
        sup_size = input_ids.size(0)
        label_ids = torch.zeros(sup_size, 2).scatter_(1, og_label_ids.cpu().view(-1,1), 1)
        label_ids = label_ids.cuda(non_blocking=True)

        # sup mixup
        sup_l = np.random.beta(cfg.alpha, cfg.alpha)
        sup_l = max(sup_l, 1-sup_l)
        sup_idx = torch.randperm(sup_size)

        if cfg.sup_mixup and 'word' in cfg.sup_mixup:
            if cfg.simple_pad:
                simple_pad(input_ids, input_mask, num_tokens)
                c_input_ids = None
            else:
                input_ids, c_input_ids = pad_for_word_mixup(
                    input_ids, input_mask, num_tokens, sup_idx
                )
        else:
            c_input_ids = None

        # sup loss
        if cfg.model == "bert":
            logits = model(
                input_ids=input_ids,
                c_input_ids=c_input_ids,
                attention_mask=input_mask,
                mixup=cfg.sup_mixup,
                shuffle_idx=sup_idx,
                l=sup_l,
                manifold_mixup = cfg.manifold_mixup,
                no_pretrained_pool=cfg.no_pretrained_pool
            )
        else:
            hidden = model(
                input_ids=input_ids, 
                segment_ids=segment_ids, 
                input_mask=input_mask,
                output_h=True,
                mixup=cfg.sup_mixup,
                shuffle_idx=sup_idx,
                clone_ids=c_input_ids,
                l=sup_l,
                manifold_mixup=cfg.manifold_mixup,
                simple_pad=cfg.simple_pad,
                no_grad_clone=cfg.no_grad_clone
            )
            logits = model(input_h=hidden)

        if cfg.sup_mixup:
            label_ids = mixup_op(label_ids, sup_l, sup_idx)

        sup_loss = -torch.sum(F.log_softmax(logits, dim=1) * label_ids, dim=1)

        if cfg.tsa and cfg.tsa != "none":
            tsa_thresh = get_tsa_thresh(cfg.tsa, global_step, cfg.total_steps, start=1./logits.shape[-1], end=1)
            larger_than_threshold = torch.exp(-sup_loss) > tsa_thresh   # prob = exp(log_prob), prob > tsa_threshold
            # larger_than_threshold = torch.sum(  F.softmax(pred[:sup_size]) * torch.eye(num_labels)[sup_label_ids]  , dim=-1) > tsa_threshold
            loss_mask = torch.ones_like(og_label_ids, dtype=torch.float32) * (1 - larger_than_threshold.type(torch.float32))
            sup_loss = torch.sum(sup_loss * loss_mask, dim=-1) / torch.max(torch.sum(loss_mask, dim=-1), torch_device_one())
        else:
            sup_loss = torch.mean(sup_loss)

        if cfg.no_unsup_loss:
            return sup_loss, sup_loss, sup_loss, sup_loss

        # unsup loss
        with torch.no_grad():
            if cfg.model == "bert":
                ori_logits = model(
                    input_ids = ori_input_ids,
                    attention_mask = ori_input_mask,
                    no_pretrained_pool=cfg.no_pretrained_pool
                )
            else:
                ori_logits = model(ori_input_ids, ori_segment_ids, ori_input_mask)
            ori_prob   = F.softmax(ori_logits, dim=-1)    # KLdiv target


        # mixup
        l = np.random.beta(cfg.alpha, cfg.alpha)
        l = max(l, 1-l)
        idx = torch.randperm(hidden.size(0))

        
        if cfg.mixup and 'word' in cfg.mixup:
            ori_input_ids, c_ori_input_ids = pad_for_word_mixup(
                ori_input_ids, ori_input_mask, ori_num_tokens, idx
            )
        else:
            c_ori_input_ids = None

        
        #for i in range(0, batch_size):
        #    new_mask = ori_input_mask[i]
        #    new_ids = ori_input_ids[i]
        #    old_ids = c_ori_input_ids[i]
        #    pdb.set_trace()
        if cfg.model == "bert":
            logits = model(
                input_ids=ori_input_ids,
                c_input_ids=c_ori_input_ids,
                attention_mask=ori_input_mask,
                mixup=cfg.mixup,
                shuffle_idx=idx,
                l=l,
                manifold_mixup = cfg.manifold_mixup,
                no_pretrained_pool=cfg.no_pretrained_pool
            )
        else:
            hidden = model(
                input_ids=ori_input_ids, 
                segment_ids=ori_segment_ids, 
                input_mask=ori_input_mask,
                output_h=True,
                mixup=cfg.mixup,
                shuffle_idx=idx,
                clone_ids=c_ori_input_ids,
                l=l,
                manifold_mixup=cfg.manifold_mixup,
                simple_pad=cfg.simple_pad,
                no_grad_clone=cfg.no_grad_clone
            )
            logits = model(input_h=hidden)

        if cfg.mixup:
            ori_prob = mixup_op(ori_prob, l, idx)

        probs_u = torch.softmax(logits, dim=1)
        unsup_loss = torch.mean((probs_u - ori_prob)**2)

        w = cfg.uda_coeff * sigmoid_rampup(global_step, cfg.consistency_rampup_ends - cfg.consistency_rampup_starts)
        final_loss = sup_loss + w*unsup_loss
        return final_loss, sup_loss, unsup_loss, w*unsup_loss

    # evaluation
    def get_acc(model, batch):
        # input_ids, segment_ids, input_mask, label_id, sentence = batch
        input_ids, segment_ids, input_mask, label_id = batch
        logits = model(input_ids, segment_ids, input_mask)
        _, label_pred = logits.max(1)

        result = (label_pred == label_id).float()
        accuracy = result.mean()
        # output_dump.logs(sentence, label_pred, label_id)    # output dump

        return accuracy, result

    if cfg.mode == 'train':
        trainer.train(get_loss, None, cfg.model_file, cfg.pretrain_file)

    if cfg.mode == 'train_eval':
        if cfg.mixmatch_mode:
            trainer.train(get_mixmatch_loss_short, get_acc, cfg.model_file, cfg.pretrain_file)
        elif cfg.uda_test_mode:
            trainer.train(get_sup_loss, get_acc, cfg.model_file, cfg.pretrain_file)
        elif cfg.uda_test_mode_two:
            trainer.train(get_loss_ict, get_acc, cfg.model_file, cfg.pretrain_file)
        else:
            trainer.train(get_sup_loss, get_acc, cfg.model_file, cfg.pretrain_file)

    if cfg.mode == 'eval':
        results = trainer.eval(get_acc, cfg.model_file, None)
        total_accuracy = torch.cat(results).mean().item()
        print('Accuracy :' , total_accuracy)
Ejemplo n.º 20
0
def create_model(args):
    print("> Create model.")

    ## Gensim
    # word_model = Word2Vec.load("Word2Vec_V1.h5")
    # vectors = word_model.wv
    # all_words = vectors.index2word
    # mean_vector = vectors.vectors.mean(axis=0)
    # wei = torch.tensor(vectors.vectors, dtype=torch.float)
    ## Gensim

    with open(os.path.join(args.data_path, "dict&vectors.pkl"), "rb") as f:
        [word2idx, vectors] = pickle.load(f)

    global model
    if args.attn == 1:
        hidden = args.hidden_size
        encoder1 = models.Encoder(hidden_size=hidden, nlayers=1)
        encoder2 = models.Encoder(input_size=hidden*2*4, hidden_size=hidden, nlayers=1)

        attention_dim = 128
        attention = models.Attention(attention_dim, attention_dim, attention_dim)

        model = models.Classifier(encoder1, encoder2, attention,
                                  hidden_size=hidden,
                                  rec_len=rec_len,
                                  rep_len=rep_len,
                                  num_of_words=len(word2idx),
                                  drop_p=args.drop_p)

    elif args.attn == 2:
        model = models.BiDAF(window_size=args.max_length,
                             hidden_size=args.hidden_size,
                             drop_p=args.drop_p,
                             num_of_words=len(word2idx)
                            )
    elif args.attn == 3:
        model = models.RNNatt(window_size=args.max_length,
                              hidden_size=args.hidden_size,
                              drop_p=args.drop_p,
                              num_of_words=len(word2idx),
                              rec_len=rec_len,
                              rep_len=rep_len
                            )
    elif args.attn == 4:
        model = models.RNNatt_weight(window_size=args.max_length,
                                     hidden_size=args.hidden_size,
                                     drop_p=args.drop_p,
                                     num_of_words=len(word2idx),
                                     rec_len=rec_len,
                                     rep_len=rep_len
                                    )
    else: # args.attn == 0
        model = models.RNNbase(window_size=args.max_length,
                               hidden_size=args.hidden_size,
                               drop_p=args.drop_p,
                               num_of_words=len(word2idx)
                            )


    model.word_embedding.load_state_dict({'weight': vectors.to(torch.float32)})
    model.word_embedding.weight.requires_grad = False

    model = model.to(device)
    print(model)

    global optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr_rate) # , betas=(0.9, 0.999), weight_decay=1e-3)

    return word2idx, vectors
Ejemplo n.º 21
0
def main(args):
    def precision(confusion):
        correct = confusion * torch.eye(confusion.shape[0])
        incorrect = confusion - correct
        correct = correct.sum(0)
        incorrect = incorrect.sum(0)
        precision = correct / (correct + incorrect)
        total_correct = correct.sum().item()
        total_incorrect = incorrect.sum().item()
        percent_correct = total_correct / (total_correct + total_incorrect)
        return precision, percent_correct

    def get_lr(optimizer):
        for param_group in optimizer.param_groups:
            return param_group['lr']

    class Batch:
        def __init__(self, type, loader, dataset):
            self.type = type
            self.loader = loader
            self.batch = tqdm(loader, total=len(dataset) // args.batchsize)
            self.ll = 0
            self.confusion = torch.zeros(datapack.num_classes,
                                         datapack.num_classes)
            self.total = 0
            self.correct = 0
            self.batch_step = 0

        def __iter__(self):
            return iter(self.batch)

        def log_step(self):
            global global_step
            self.batch_step += 1
            self.ll += loss.detach().item()

            _, predicted = y.detach().max(1)
            self.total += target.size(0)
            self.correct += predicted.eq(target).sum().item()
            running_loss = self.ll / self.batch_step
            accuracy = 100.0 * self.correct / self.total

            self.batch.set_description(
                f'Epoch: {epoch} {args.optim_class} LR: {get_lr(optim)} '
                f'{self.type} Loss: {running_loss:.4f} '
                f'Accuracy {accuracy:.4f}% {self.correct}/{self.total}')

            if self.type == 'test':
                for p, t in zip(predicted, target):
                    self.confusion[p, t] += 1

            writer.add_scalar(f'{self.type}_loss', loss.item(), global_step)
            writer.add_scalar(f'{self.type}_accuracy', accuracy, global_step)
            global_step += 1
            return accuracy

    def log_epoch(confusion, best_precision, test_accuracy, train_accuracy):
        precis, ave_precis = precision(confusion)
        print('')
        print(
            f'{Fore.CYAN}RESULTS FOR EPOCH {Fore.LIGHTYELLOW_EX}{epoch}{Style.RESET_ALL}'
        )
        for i, cls in enumerate(datapack.class_list):
            print(
                f'{Fore.LIGHTMAGENTA_EX}{cls} : {precis[i].item()}{Style.RESET_ALL}'
            )
        best_precision = ave_precis if ave_precis > best_precision else best_precision
        print(
            f'{Fore.GREEN}ave precision : {ave_precis} best: {best_precision} test accuracy {test_accuracy} '
            f'train accuracy {train_accuracy}{Style.RESET_ALL}')
        return ave_precis, best_precision

    def nop(args, x, target):
        return x.to(args.device), target.to(args.device)

    def flatten(args, x, target):
        return x.flatten(start_dim=1).to(args.device), target.to(args.device)

    """ reproducibility """
    if args.seed is not None:
        torch.manual_seed(args.seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        np.random.seed(args.seed)
    """ variables """
    run_dir = f'data/models/classifiers/{args.dataset_name}/{args.model_name}/run_{args.run_id}'
    writer = SummaryWriter(log_dir=run_dir)
    global_step = 0.0
    ave_precision = 0.0
    best_precision = 0.0
    train_accuracy = 0.0
    test_accuracy = 0.0
    """ data """
    datapack = package.datasets[args.dataset_name]
    trainset, testset = datapack.make(args.dataset_train_len,
                                      args.dataset_test_len,
                                      data_root=args.dataroot)
    train = DataLoader(trainset,
                       batch_size=args.batchsize,
                       shuffle=True,
                       drop_last=True,
                       pin_memory=True)
    test = DataLoader(testset,
                      batch_size=args.batchsize,
                      shuffle=True,
                      drop_last=True,
                      pin_memory=True)
    augment = flatten if args.model_type == 'fc' else nop
    """ model """
    if 'model_stride' in args:
        encoder, shapes = make_layers(args.model_type,
                                      args.model_encoder,
                                      datapack.shape,
                                      stride=args.model_stride)
    else:
        encoder, shapes = make_layers(args.model_type, args.model_encoder,
                                      datapack.shape)

    classifier = models.Classifier(encoder,
                                   shapes[-1],
                                   num_classes=datapack.num_classes).to(
                                       args.device)

    if args.load is not None:
        classifier.load_state_dict(torch.load(args.load))
    """ optimizer """
    optim, scheduler = config.get_optim(args, classifier.parameters())
    """ loss function """
    criterion = nn.CrossEntropyLoss()
    """ training/test loop """
    for i, epoch in enumerate(range(args.epochs)):

        batch = Batch('train', train, trainset)
        for x, target in batch:
            x, target = augment(args, x, target)

            optim.zero_grad()
            y = classifier(x)
            loss = criterion(y, target)
            loss.backward()
            optim.step()

            train_accuracy = batch.log_step()

            if i % args.checkpoint_freq == 0:
                torch.save(classifier.state_dict(), run_dir + '/checkpoint')

        batch = Batch('test', test, testset)
        for x, target in batch:
            x, target = augment(args, x, target)

            y = classifier(x)
            loss = criterion(y, target)

            test_accuracy = batch.log_step()

        ave_precision, best_precision = log_epoch(batch.confusion,
                                                  best_precision,
                                                  test_accuracy,
                                                  train_accuracy)
        scheduler.step()

        if ave_precision >= best_precision:
            torch.save(classifier.state_dict(), run_dir + '/best')

    return ave_precision, best_precision, train_accuracy, test_accuracy
 def getmodel(self):
     return models.Classifier(size=self.size, d_model=self.d_model, d_ff=self.d_ff, dropout=self.dropout, n_outputs=self.n_outputs)
    def load_model_and_scheduler(self):
        """
        Load model.
        """

        params = {
            'lr': self.args.lr,
            'lr_decay': self.args.lr_decay,
            'lr_min': 0.0000001,
            'weight_decay': self.args.weight_decay,
        }

        log('[Training] using %d input channels' % self.train_images.shape[3])
        network_units = list(map(int, self.args.network_units.split(',')))
        self.model = models.Classifier(
            self.N_class,
            resolution=(self.train_images.shape[3], self.train_images.shape[1],
                        self.train_images.shape[2]),
            architecture=self.args.network_architecture,
            activation=self.args.network_activation,
            batch_normalization=not self.args.network_no_batch_normalization,
            start_channels=self.args.network_channels,
            dropout=self.args.network_dropout,
            units=network_units)

        self.epoch = 0
        if os.path.exists(self.args.state_file):
            state = State.load(self.args.state_file)
            log('[Training] loaded %s' % self.args.state_file)

            self.model.load_state_dict(state.model)

            # needs to be done before costructing optimizer.
            if self.args.use_gpu and not cuda.is_cuda(self.model):
                self.model = self.model.cuda()
                log('[Training] model is not CUDA')
            log('[Training] loaded model')

            optimizer = torch.optim.Adam(self.model.parameters(), params['lr'])
            optimizer.load_state_dict(state.optimizer)
            self.scheduler = ADAMScheduler(optimizer, **params)

            self.epoch = state.epoch + 1
            self.scheduler.update(self.epoch)

            assert os.path.exists(self.args.training_file) and os.path.exists(
                self.args.testing_file)
            self.train_statistics = utils.read_hdf5(self.args.training_file)
            log('[Training] read %s' % self.args.training_file)
            self.test_statistics = utils.read_hdf5(self.args.testing_file)
            log('[Training] read %s' % self.args.testing_file)

            if utils.display():
                self.plot()
        else:
            if self.args.use_gpu and not cuda.is_cuda(self.model):
                self.model = self.model.cuda()
                log('[Training] model is not CUDA')
            log('[Training] did not load model, using new one')

            self.scheduler = ADAMScheduler(self.model.parameters(), **params)
            self.scheduler.initialize()  # !

        log(self.model)
Ejemplo n.º 24
0
def train_cnn(train_dataset,
              batch_size,
              num_filters,
              filter_sizes,
              use_elmo=False,
              epochs=15,
              learning_rate=3e-4,
              num_classes=2,
              use_gpu=False):
    """
    Trains CNN on train_dataset. Initialises word embeddings with pre-trained GloVe OR uses pre-trained ELMo model to dynamically compute embeddings.
    The CNN has one convolution layer for each ngram filter size.

    Parameters
    ----------
    train_dataset: List[Instance]
        Instances for training set
    batch_size: int
        number of Instances to process in a batch
    num_filters: int
        output dim for each convolutional layer, which is the number of 'filters' learned by that layer
    filter_sizes: Tuple[int]
        specifies the number of convolutional layers and their sizes
    use_elmo: bool
        use ELMo embeddings (transfer learning) if True | GloVe if False
    epochs: int
        total number of epochs to train on (default=30)
    learning_rate: float
        learning rate for Adam Optimizer
    num_classes: int
        default=2 for binary classification
    use_gpu: bool
        True to use the GPU

    Returns
    -------
    Trained Model, Vocabulary, Number of actual training epochs
    """
    if use_elmo:
        vocab = Vocabulary()
        vocab.add_tokens_to_namespace(tokens=['fic', 'non'],
                                      namespace="labels")
        word_embeddings: TextFieldEmbedder = load_elmo_embeddings()
    else:
        vocab = Vocabulary.from_instances(train_dataset)
        word_embeddings: TextFieldEmbedder = load_glove_embeddings(vocab)

    iterator = BucketIterator(batch_size=batch_size,
                              sorting_keys=[("tokens", "num_tokens")])

    iterator.index_with(vocab)  # numericalize the data

    assert vocab.get_token_from_index(index=0, namespace='labels') == 'fic'
    assert vocab.get_token_from_index(index=1, namespace='labels') == 'non'
    print("\n\nThe ordering of labels is ['fic', 'non']\n\n")

    encoder: Seq2VecEncoder = CnnEncoder(
        embedding_dim=word_embeddings.get_output_dim(),
        num_filters=num_filters,
        ngram_filter_sizes=filter_sizes)

    classifier_feedforward: FeedForward = nn.Linear(encoder.get_output_dim(),
                                                    num_classes)
    model = models.Classifier(vocab=vocab,
                              word_embeddings=word_embeddings,
                              encoder=encoder,
                              classifier_feedforward=classifier_feedforward)

    if use_gpu: model.cuda()
    else: model

    optimizer = optim.Adam(model.parameters(), learning_rate)

    trainer = Trainer(model=model,
                      optimizer=optimizer,
                      iterator=iterator,
                      train_dataset=train_dataset,
                      cuda_device=0 if use_gpu else -1,
                      num_epochs=epochs)

    metrics = trainer.train()
    print(metrics)

    return model, vocab, metrics['training_epochs']
Ejemplo n.º 25
0
def train_lstm(train_dataset,
               batch_size,
               num_layers,
               use_elmo=False,
               epochs=15,
               bidirectional=True,
               learning_rate=3e-4,
               hidden_size=64,
               num_classes=2,
               use_gpu=False):
    """
    Trains a LSTM and its variants (Vanilla, Bi-Directional, Stacked BiLSTM) on train_dataset. Initialises word embeddings with pre-trained GloVe OR uses pre-trained ELMo model to dynamically compute embeddings.


    Parameters
    ----------
    train_dataset: List[Instance]
        Instances for training set
    batch_size: int
        number of Instances to process in a batch
    num_layers: int
        number of BiLSTM layers: 2 or higher for Stacked BiLSTMs
    use_elmo: bool
        use elmo embeddings (transfer learning) if True | GloVe if False
    epochs: int
        total number of epochs to train on (default=30)
    bidirectional: bool
        True for a bidirectional LSTM
    learning_rate: float
        learning rate for Adam Optimizer
    hidden_size: int
        size of the hidden layer in the encoder
    num_classes: int
        default=2 for binary classification
    use_gpu: bool
        True to use the GPU

    Returns
    -------
    Trained Model, Vocabulary, Number of actual training epochs
    """
    if use_elmo:
        vocab = Vocabulary()
        vocab.add_tokens_to_namespace(tokens=['fic', 'non'],
                                      namespace="labels")
        word_embeddings: TextFieldEmbedder = load_elmo_embeddings()
    else:
        vocab = Vocabulary.from_instances(train_dataset)
        word_embeddings: TextFieldEmbedder = load_glove_embeddings(vocab)

    iterator = BucketIterator(batch_size=batch_size,
                              sorting_keys=[("tokens", "num_tokens")])

    iterator.index_with(vocab)  # numericalize the data

    assert vocab.get_token_from_index(index=0, namespace='labels') == 'fic'
    assert vocab.get_token_from_index(index=1, namespace='labels') == 'non'
    print("\n\nThe ordering of labels is ['fic', 'non']\n\n")

    encoder: Seq2VecEncoder = PytorchSeq2VecWrapper(
        nn.LSTM(word_embeddings.get_output_dim(),
                hidden_size,
                num_layers=num_layers,
                bidirectional=bidirectional,
                batch_first=True))

    classifier_feedforward: FeedForward = nn.Linear(encoder.get_output_dim(),
                                                    num_classes)
    model = models.Classifier(vocab=vocab,
                              word_embeddings=word_embeddings,
                              encoder=encoder,
                              classifier_feedforward=classifier_feedforward)

    if use_gpu: model.cuda()
    else: model

    optimizer = optim.Adam(model.parameters(), learning_rate)

    trainer = Trainer(model=model,
                      optimizer=optimizer,
                      iterator=iterator,
                      train_dataset=train_dataset,
                      cuda_device=0 if use_gpu else -1,
                      num_epochs=epochs)

    metrics = trainer.train()
    print(metrics)

    return model, vocab, metrics['training_epochs']
    def load_data_and_model(self):
        """
        Load data and model.
        """

        database = utils.read_hdf5(self.args.database_file).astype(
            numpy.float32)
        log('[Visualization] read %s' % self.args.database_file)

        N_font = database.shape[0]
        N_class = database.shape[1]
        resolution = database.shape[2]

        database = database.reshape((database.shape[0] * database.shape[1],
                                     database.shape[2], database.shape[3]))
        database = torch.from_numpy(database)
        if self.args.use_gpu:
            database = database.cuda()
        database = torch.autograd.Variable(database, False)

        self.test_images = utils.read_hdf5(self.args.test_images_file).astype(
            numpy.float32)
        if len(self.test_images.shape) < 4:
            self.test_images = numpy.expand_dims(self.test_images, axis=3)

        self.perturbations = utils.read_hdf5(
            self.args.perturbations_file).astype(numpy.float32)
        self.perturbations = numpy.swapaxes(self.perturbations, 0, 1)
        log('[Visualization] read %s' % self.args.perturbations_file)

        self.success = utils.read_hdf5(self.args.success_file)
        self.success = numpy.swapaxes(self.success, 0, 1)
        log('[Visualization] read %s' % self.args.success_file)

        self.accuracy = utils.read_hdf5(self.args.accuracy_file)
        log('[Visualization] read %s' % self.args.success_file)

        self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(
            numpy.float32)
        self.test_theta = self.test_theta[:self.perturbations.shape[0]]
        N_theta = self.test_theta.shape[1]
        log('[Visualization] using %d N_theta' % N_theta)
        log('[Visualization] read %s' % self.args.test_theta_file)

        self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(
            numpy.int)
        self.test_codes = self.test_codes[:self.perturbations.shape[0]]
        self.test_codes = self.test_codes[:, 1:3]
        self.test_codes = numpy.concatenate(
            (common.numpy.one_hot(self.test_codes[:, 0], N_font),
             common.numpy.one_hot(self.test_codes[:, 1], N_class)),
            axis=1).astype(numpy.float32)
        log('[Attack] read %s' % self.args.test_codes_file)

        image_channels = 1 if N_theta <= 7 else 3
        network_units = list(map(int, self.args.network_units.split(',')))
        log('[Visualization] using %d input channels' % image_channels)
        self.classifier = models.Classifier(
            N_class,
            resolution=(image_channels, resolution, resolution),
            architecture=self.args.network_architecture,
            activation=self.args.network_activation,
            batch_normalization=not self.args.network_no_batch_normalization,
            start_channels=self.args.network_channels,
            dropout=self.args.network_dropout,
            units=network_units)
        self.decoder = models.AlternativeOneHotDecoder(database, N_font,
                                                       N_class, N_theta)
        self.decoder.eval()

        assert os.path.exists(
            self.args.classifier_file
        ), 'state file %s not found' % self.args.classifier_file
        state = State.load(self.args.classifier_file)
        log('[Visualization] read %s' % self.args.classifier_file)

        self.classifier.load_state_dict(state.model)
        if self.args.use_gpu and not cuda.is_cuda(self.classifier):
            log('[Visualization] classifier is not CUDA')
            self.classifier = self.classifier.cuda()
        log('[Visualization] loaded classifier')

        self.classifier.eval()
        log('[Visualization] set classifier to eval')
Ejemplo n.º 27
0
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(data.DATA(args, mode='valid'),
                                             batch_size=args.train_batch,
                                             num_workers=args.workers,
                                             shuffle=False)
    ''' load model '''
    print('===> prepare model ...')
    feature_stractor = models.Stractor()
    feature_stractor.cuda()  # load model to gpu
    params_to_update = feature_stractor.parameters()
    params_to_update_str = []
    for name, param in feature_stractor.named_parameters():
        if param.requires_grad == True:
            params_to_update_str.append(param)

    classifier = models.Classifier()
    classifier = classifier.cuda()
    params_to_update_class = []
    for name, param in classifier.named_parameters():
        if param.requires_grad == True:
            params_to_update_class.append(param)
    ''' define loss '''
    criterion = nn.CrossEntropyLoss()
    ''' setup optimizer '''
    optimizer = torch.optim.Adam(params_to_update_class + params_to_update_str,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    ''' setup tensorboard '''
    writer = SummaryWriter(os.path.join(args.save_dir, 'train_info'))
    iters = 0
    best_acc = 0
Ejemplo n.º 28
0
NUM_CLASS = 10
HIDDEN_SIZE = args.HIDDEN_SIZE
NUM_STACK = args.NUM_STACK
DROPOUT = args.DROPOUT
USE_CMVN = args.USE_CMVN
MAX_ITERATION = args.MAX_ITERATION
MAX_EPOCH = args.MAX_EPOCH
BATCH_SIZE = args.BATCH_SIZE
MFCC_ROOT = args.MFCC_ROOT
TRAIN_LIST = args.TRAIN_LIST
VALID_LIST = args.VALID_LIST
SAVE_FILE = args.SAVE_FILE

# Build up model and batch generator
device = 'cuda' if torch.cuda.is_available() else 'cpu'  # check available gpu
model = models.Classifier(IN_SIZE, NUM_CLASS, HIDDEN_SIZE, NUM_STACK,
                          DROPOUT).to(device)  # build up model
loss_fun = nn.CrossEntropyLoss(
)  # define CE as loss function (objective function)
optimizer = torch.optim.Adam(model.parameters(
))  # define optimizer (choosed adam here, you can try others as well)
batch_train = utils.Batch_generator(MFCC_ROOT, TRAIN_LIST,
                                    BATCH_SIZE)  # batch generator
batch_valid = utils.Batch_generator(MFCC_ROOT, VALID_LIST, BATCH_SIZE)

# print out settings
logging.info('Batch_size: {}'.format(BATCH_SIZE))
logging.info('Max epoch: {}'.format(MAX_EPOCH))
logging.info('Max iteration: {}'.format(MAX_ITERATION))
logging.info('Hidden size: {}'.format(HIDDEN_SIZE))
logging.info('Num stack: {}'.format(NUM_STACK))
logging.info('Use cmvn: {}'.format(USE_CMVN))
Ejemplo n.º 29
0
def train_bert(train_dataset,
               validation_dataset,
               batch_size,
               pretrained_model,
               epochs=15,
               patience=None,
               learning_rate=3e-4,
               num_classes=2,
               use_gpu=False):
    """
    Trains BERT on train_dataset; with optional early stopping on validation_dataset.
    
    Parameters
    ----------
    train_dataset: List[Instance]
        Instances for training set
    validation_dataset: List[Instance]
        Instances for validation set
    batch_size: int
        number of Instances to process in a batch
    pretrained_model: str
        pretrained BERT model to use
    epochs: int
        total number of epochs to train on (default=15)
    patience: int or None
        early stopping - number of epochs to wait for validation loss to improve; 'None' to disable early stopping 
    learning_rate: float
        learning rate for Adam Optimizer
    num_classes: int
        default=2 for binary classification
    use_gpu: bool
        True to use the GPU
    
    Returns
    -------
    Trained Model, Vocabulary, Number of actual training epochs
    """
    vocab = Vocabulary()

    iterator = BucketIterator(batch_size=batch_size,
                              sorting_keys=[("tokens", "num_tokens")])

    iterator.index_with(vocab)  # numericalize the data

    word_embeddings: TextFieldEmbedder = load_bert_embeddings(pretrained_model)
    encoder: Seq2VecEncoder = BertSentencePooler(vocab)

    # Feedforward:
    classifier_feedforward: FeedForward = nn.Linear(encoder.get_output_dim(),
                                                    num_classes)

    model = models.Classifier(vocab=vocab,
                              word_embeddings=word_embeddings,
                              encoder=encoder,
                              classifier_feedforward=classifier_feedforward)

    if use_gpu: model.cuda()
    else: model

    optimizer = optim.Adam(model.parameters(), learning_rate)

    if patience == None:  # No early stopping: train on both train+validation dataset
        trainer = Trainer(model=model,
                          optimizer=optimizer,
                          iterator=iterator,
                          train_dataset=train_dataset + validation_dataset,
                          cuda_device=0 if use_gpu else -1,
                          num_epochs=epochs)

    else:
        trainer = Trainer(
            model=model,
            optimizer=optimizer,
            iterator=iterator,
            train_dataset=train_dataset,
            validation_dataset=validation_dataset,
            cuda_device=0 if use_gpu else -1,
            patience=
            patience,  # stop if loss does not improve for 'patience' epochs
            num_epochs=epochs)

    metrics = trainer.train()
    #     print(metrics)

    return model, vocab, metrics['training_epochs']
Ejemplo n.º 30
0
def train_lstm(train_dataset,
               validation_dataset,
               batch_size,
               num_layers,
               use_elmo=False,
               epochs=15,
               patience=None,
               bidirectional=True,
               learning_rate=3e-4,
               hidden_size=64,
               num_classes=2,
               use_gpu=False):
    """
    Trains a LSTM and its variants (Vanilla, Bi-Directional, Stacked BiLSTM) on train_dataset; optionally, perform early stopping based on validation loss. Initialises word embeddings with pre-trained GloVe (OR) uses pre-trained ELMo model to dynamically compute embeddings.
    
    Parameters
    ----------
    train_dataset: List[Instance]
        Instances for training set
    validation_dataset: List[Instance]
        Instances for validation set
    batch_size: int
        number of Instances to process in a batch
    num_layers: int
        number of BiLSTM layers: 2 or higher for Stacked BiLSTMs
    use_elmo: bool
        use ELMo embeddings if True | GloVe embeddings if False
    epochs: int
        total number of epochs to train for (default=15)
    patience: int or None
        early stopping - number of epochs to wait for validation loss to improve; 'None' to disable early stopping
    bidirectional: bool
        True for a bidirectional LSTM
    learning_rate: float
        learning rate for Adam Optimizer
    hidden_size: int
        size of the hidden layer in the encoder
    num_classes: int
        default=2 for binary classification
    use_gpu: bool
        True to use the GPU
    
    Returns
    -------
    Trained Model, Vocabulary, Number of actual training epochs
    """
    if use_elmo:
        vocab = Vocabulary()
        word_embeddings: TextFieldEmbedder = load_elmo_embeddings()
    else:
        vocab = Vocabulary.from_instances(train_dataset + validation_dataset)
        word_embeddings: TextFieldEmbedder = load_glove_embeddings(vocab)

    iterator = BucketIterator(batch_size=batch_size,
                              sorting_keys=[("tokens", "num_tokens")])

    iterator.index_with(vocab)  # numericalize the data

    # BiLSTM encoder
    encoder: Seq2VecEncoder = PytorchSeq2VecWrapper(
        nn.LSTM(word_embeddings.get_output_dim(),
                hidden_size,
                num_layers=num_layers,
                bidirectional=bidirectional,
                batch_first=True))

    # Feedforward:
    classifier_feedforward: FeedForward = nn.Linear(encoder.get_output_dim(),
                                                    num_classes)

    model = models.Classifier(vocab=vocab,
                              word_embeddings=word_embeddings,
                              encoder=encoder,
                              classifier_feedforward=classifier_feedforward)

    if use_gpu: model.cuda()
    else: model

    optimizer = optim.Adam(model.parameters(), learning_rate)

    if patience == None:  # No early stopping: train on both train+validation dataset
        trainer = Trainer(model=model,
                          optimizer=optimizer,
                          iterator=iterator,
                          train_dataset=train_dataset + validation_dataset,
                          cuda_device=0 if use_gpu else -1,
                          num_epochs=epochs)

    else:
        trainer = Trainer(
            model=model,
            optimizer=optimizer,
            iterator=iterator,
            train_dataset=train_dataset,
            validation_dataset=validation_dataset,
            cuda_device=0 if use_gpu else -1,
            patience=
            patience,  # stop if loss does not improve for 'patience' epochs
            num_epochs=epochs)

    metrics = trainer.train()
    #     print(metrics)

    return model, vocab, metrics['training_epochs']