Esempio n. 1
0
def train():
    lr = 0.002
    batch_size = 16
    epoch = 80
    start = time()
    sgd = SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True)
    model = cnn_model()
    model.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])
    X_train, y_train, X_val, y_val, X_test, y_test = prepare_data()

    datagen = ImageDataGenerator(featurewise_center=False,
                                 featurewise_std_normalization=False,
                                 width_shift_range=0.2,
                                 height_shift_range=0.2,
                                 zoom_range=0.3,
                                 shear_range=0.15,
                                 rotation_range=30., )
    datagen.fit(X_train)

    history = model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size),
                            steps_per_epoch=2000,
                            epochs=epoch,
                            validation_data=(X_val, y_val),
                            callbacks=[ReduceLROnPlateau('val_loss', factor=0.2, patience=20, verbose=1, mode='auto'),
                                       ModelCheckpoint('model.h5',save_best_only=True)]
                           )
    end = time()
    print (end - start)
Esempio n. 2
0
    def pred_probs(self,
                   f_pred_prob,
                   prepare_data,
                   data,
                   iterator,
                   verbose=False):
        """ If you want to use a trained model, this is useful to compute
        the probabilities of new examples.
        """
        n_samples = len(data[0])
        probs = numpy.zeros((n_samples, 2)).astype(config.floatX)

        n_done = 0

        for _, valid_index in iterator:
            x, mask, y = prepare_data([data[0][t] for t in valid_index],
                                      numpy.array(data[1])[valid_index],
                                      maxlen=None)
            pred_probas = f_pred_prob(x, mask)
            probs[valid_index, :] = pred_probas

            n_done += len(valid_index)
            if verbose:
                print '%d/%d samples classified' % (n_done, n_samples)

        return probs
Esempio n. 3
0
def test():
    _, _, _, _, X_test, y_test = prepare_data()
    signs = get_traffic_sign_config()
    start = time()
    model = load_model('model.h5')
    pred = model.predict(X_test,batch_size=1000)
    end=time()
    y_pred = np.empty(12630)
    for i in range(0,12630):
        y_pred[i] = (np.argmax(pred[i][:]))
        if y_pred[i] != y_test[i]:
	    print "**************************************************************************"
            print "the model prediction is %s,correct label is %s"%(signs[y_pred[i]],signs[y_test[i]])
    acc = np.mean(y_pred==y_test)
    print("Test accuracy = {}".format(acc))
    print  (end-start)
Esempio n. 4
0
    def pred_error(self, f_pred, prepare_data, data, iterator, verbose=False):
        """
        Just compute the error
        f_pred: Theano fct computing the prediction
        prepare_data: usual prepare_data for that dataset.
        """
        valid_err = 0
        for _, valid_index in iterator:
            x, mask, y = prepare_data([data[0][t] for t in valid_index],
                                      numpy.array(data[1])[valid_index],
                                      maxlen=None)
            preds = f_pred(x, mask)

            targets = numpy.array(data[1])[valid_index]
            valid_err += (preds == targets).sum()
        valid_err = 1. - numpy_floatX(valid_err) / len(data[0])

        return valid_err
Esempio n. 5
0
    def pred_error(self, f_pred, prepare_data, data, iterator, verbose=False):
        """
        Just compute the error
        f_pred: Theano fct computing the prediction
        prepare_data: usual prepare_data for that dataset.
        """
        valid_err = 0
        for _, valid_index in iterator:
            x, mask, y = prepare_data([data[0][t] for t in valid_index],
                                      numpy.array(data[1])[valid_index],
                                      maxlen=None)
            preds = f_pred(x, mask)

            targets = numpy.array(data[1])[valid_index]
            valid_err += (preds == targets).sum()
        valid_err = 1. - numpy_floatX(valid_err) / len(data[0])

        return valid_err
Esempio n. 6
0
    def pred_probs(self, f_pred_prob, prepare_data, data, iterator, verbose=False):
        """ If you want to use a trained model, this is useful to compute
        the probabilities of new examples.
        """
        n_samples = len(data[0])
        probs = numpy.zeros((n_samples, 2)).astype(config.floatX)

        n_done = 0


        for _, valid_index in iterator:
            x, mask, y = prepare_data([data[0][t] for t in valid_index],
                                      numpy.array(data[1])[valid_index],
                                      maxlen=None)
            pred_probas = f_pred_prob(x, mask)
            probs[valid_index, :] = pred_probas

            n_done += len(valid_index)
            if verbose:
                print '%d/%d samples classified' % (n_done, n_samples)

        return probs
def main(args):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    REVIEW, SCORE, train_data, valid_data, test_data = prepare_data(
        train=False)

    test_iterator = data.Iterator(test_data,
                                  batch_size=args.batch_size,
                                  device=device,
                                  sort_within_batch=True,
                                  sort_key=lambda x: len(x.review))
    print('Finished loading data.')

    model = None
    with open(args.model_path, 'rb') as f:
        model = pickle.load(f)
    criterion = nn.CrossEntropyLoss()
    model = model.to(device)
    criterion = criterion.to(device)

    loss, acc = evaluate(model, test_iterator, criterion)
    print('Testing data:')
    print(f'Loss: {loss:.3f}')
    print(f'Acc: {acc:.2f}')
Esempio n. 8
0
def main():

    args, unparsed = FLAGS.parse_known_args()
    if len(unparsed) != 0:
        raise NameError("Argument {} not recognized".format(unparsed))

    if args.seed is None:
        args.seed = random.randint(0, 1e3)
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    if args.cpu:
        args.dev = torch.device('cpu')
    else:
        if not torch.cuda.is_available():
            raise RuntimeError("GPU unavailable.")

        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        args.dev = torch.device('cuda')

    #logger = GOATLogger(args)
    use_qrnn = True

    # Get data
    train_loader, val_loader, test_loader = prepare_data(args)

    # Set up learner, meta-learner
    learner_w_grad = Learner(args.image_size, args.bn_eps, args.bn_momentum,
                             args.n_class).to(args.dev)
    learner_wo_grad = copy.deepcopy(learner_w_grad)
    metalearner = MetaLearner(args.input_size, args.hidden_size,
                              learner_w_grad.get_flat_params().size(0),
                              use_qrnn).to(args.dev)
    metalearner.metalstm.init_cI(learner_w_grad.get_flat_params())

    # Set up loss, optimizer, learning rate scheduler
    optim = torch.optim.Adam(metalearner.parameters(), args.lr)

    if args.resume:
        #logger.loginfo("Initialized from: {}".format(args.resume))
        last_eps, metalearner, optim = resume_ckpt(metalearner, optim,
                                                   args.resume, args.dev)

    if args.mode == 'test':
        #_ = meta_test(last_eps, test_loader, learner_w_grad, learner_wo_grad, metalearner, args, logger)
        return

    best_acc = 0.0
    print("Starting training...")
    print("Shots: ", args.n_shot)
    print("Classes: ", args.n_class)

    start_time = datetime.now()

    # Meta-training
    for eps, (episode_x, episode_y) in enumerate(train_loader):
        # episode_x.shape = [n_class, n_shot + n_eval, c, h, w]
        # episode_y.shape = [n_class, n_shot + n_eval] --> NEVER USED
        train_input = episode_x[:, :args.n_shot].reshape(
            -1, *episode_x.shape[-3:]).to(args.dev)  # [n_class * n_shot, :]
        train_target = torch.LongTensor(
            np.repeat(range(args.n_class),
                      args.n_shot)).to(args.dev)  # [n_class * n_shot]
        test_input = episode_x[:, args.n_shot:].reshape(
            -1, *episode_x.shape[-3:]).to(args.dev)  # [n_class * n_eval, :]
        test_target = torch.LongTensor(
            np.repeat(range(args.n_class),
                      args.n_eval)).to(args.dev)  # [n_class * n_eval]

        # Train learner with metalearner
        learner_w_grad.reset_batch_stats()
        learner_wo_grad.reset_batch_stats()
        learner_w_grad.train()
        learner_wo_grad.train()
        cI = train_learner(learner_w_grad, metalearner, train_input,
                           train_target, args)

        # Train meta-learner with validation loss
        learner_wo_grad.transfer_params(learner_w_grad, cI)
        output = learner_wo_grad(test_input)
        loss = learner_wo_grad.criterion(output, test_target)
        acc = accuracy(output, test_target)

        optim.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(metalearner.parameters(), args.grad_clip)
        optim.step()

        if ((eps + 1) % 250 == 0 or eps == 0):
            print(eps + 1, "/", args.episode, " Loss: ", loss.item(), " Acc:",
                  acc)
        #logger.batch_info(eps=eps, totaleps=args.episode, loss=loss.item(), acc=acc, phase='train')

        # Meta-validation
        if ((eps + 1) % args.val_freq == 0
                and eps != 0) or eps + 1 == args.episode:
            #save_ckpt(eps, metalearner, optim, args.save)
            acc = meta_test(eps, val_loader, learner_w_grad, learner_wo_grad,
                            metalearner, args)
            print("Meta validation: ", eps + 1, " Acc: ", acc)
            if acc > best_acc:
                best_acc = acc
                print("    New best: ", acc)
            # logger.loginfo("* Best accuracy so far *\n")

    end_time = datetime.now()
    print("Time to execute: ", end_time - start_time)
    print("Average per iteration", (end_time - start_time) / args.episode)
    torch.cuda.empty_cache()
    #acc = meta_test(eps, val_loader, learner_w_grad, learner_wo_grad, metalearner, args)
    print("Training complete, best acc: ", best_acc)
Esempio n. 9
0
    return mcc, report


if __name__ == "__main__":
    args = parser.parse_args()
    print(args.model)

    print('Loading SciBERT tokenizer...')
    tokenizer = AutoTokenizer.from_pretrained(
        'allenai/scibert_scivocab_uncased')

    batch_size = int(HYPERPARAMS["BATCH_SIZE"])
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    train_sentences, train_labels = prepare_data(
        input_dir=PATHS["TRAIN_DATA_PATH"], oversample=True)

    trial_sentences, trial_labels = prepare_data(
        input_dir=PATHS["VALIDATION_DATA_PATH"], oversample=True)

    train_sentences = train_sentences + trial_sentences
    train_labels = train_labels + trial_labels

    train_dataloader = create_dataloader(train_sentences, train_labels,
                                         tokenizer)

    test_sentences, test_labels = prepare_data(
        input_dir=PATHS["TEST_DATA_PATH"], oversample=False)

    test_dataloader = create_dataloader(test_sentences, test_labels, tokenizer)
Esempio n. 10
0
def main():

    args, unparsed = FLAGS.parse_known_args()
    args = brandos_load(args)
    if len(unparsed) != 0:
        raise NameError("Argument {} not recognized".format(unparsed))

    if args.seed is None:
        args.seed = random.randint(0, 1e3)
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    #args.dev = torch.device('cpu')
    if args.cpu:
        args.dev = torch.device('cpu')
        args.gpu_name = args.dev
    else:
        if not torch.cuda.is_available():
            raise RuntimeError("GPU unavailable.")

        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        args.dev = torch.device('cuda')
        try:
            args.gpu_name = torch.cuda.get_device_name(0)
        except:
            args.gpu_name = args.dev

    print(f'device {args.dev}')
    logger = GOATLogger(args)

    # Get data
    train_loader, val_loader, test_loader = prepare_data(args)

    # Set up learner, meta-learner
    learner_w_grad = Learner(args.image_size, args.bn_eps, args.bn_momentum,
                             args.n_class).to(args.dev)
    learner_wo_grad = copy.deepcopy(learner_w_grad)
    metalearner = MetaLearner(args.input_size, args.hidden_size,
                              learner_w_grad.get_flat_params().size(0)).to(
                                  args.dev)
    metalearner.metalstm.init_cI(learner_w_grad.get_flat_params())

    # Set up loss, optimizer, learning rate scheduler
    optim = torch.optim.Adam(metalearner.parameters(), args.lr)

    if args.resume:
        logger.loginfo("Initialized from: {}".format(args.resume))
        last_eps, metalearner, optim = resume_ckpt(metalearner, optim,
                                                   args.resume, args.dev)

    if args.mode == 'test':
        _ = meta_test(last_eps, test_loader, learner_w_grad, learner_wo_grad,
                      metalearner, args, logger)
        return

    best_acc = 0.0
    logger.loginfo("---> Start training")
    # Meta-training
    for eps, (episode_x, episode_y) in enumerate(
            train_loader
    ):  # sample data set split episode_x = D = (D^{train},D^{test})
        print(f'episode = {eps}')
        #print(f'episode_y = {episode_y}')
        # print(f'episide_x.size() = {episode_x.size()}')  # episide_x.size() = torch.Size([5, 20, 3, 84, 84]) i.e. N classes for K shot task with K_eval query examples
        # print(f'episode_x.mean() = {episode_x.mean()}')
        # episode_x.shape = [n_class, n_shot + n_eval, c, h, w]
        # episode_y.shape = [n_class, n_shot + n_eval] --> NEVER USED
        train_input = episode_x[:, :args.n_shot].reshape(
            -1, *episode_x.shape[-3:]).to(args.dev)  # [n_class * n_shot, :]
        train_target = torch.LongTensor(
            np.repeat(range(args.n_class),
                      args.n_shot)).to(args.dev)  # [n_class * n_shot]
        test_input = episode_x[:, args.n_shot:].reshape(
            -1, *episode_x.shape[-3:]).to(args.dev)  # [n_class * n_eval, :]
        test_target = torch.LongTensor(
            np.repeat(range(args.n_class),
                      args.n_eval)).to(args.dev)  # [n_class * n_eval]

        # Train learner with metalearner
        learner_w_grad.reset_batch_stats()
        learner_wo_grad.reset_batch_stats()
        learner_w_grad.train()
        learner_wo_grad.train()
        cI = train_learner(learner_w_grad, metalearner, train_input,
                           train_target, args)

        # Train meta-learner with validation loss
        learner_wo_grad.transfer_params(learner_w_grad, cI)
        output = learner_wo_grad(test_input)
        loss = learner_wo_grad.criterion(output, test_target)
        acc = accuracy(output, test_target)

        optim.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(metalearner.parameters(), args.grad_clip)
        optim.step()

        logger.batch_info(eps=eps,
                          totaleps=args.episode,
                          loss=loss.item(),
                          acc=acc,
                          phase='train')

        # Meta-validation
        if eps % args.val_freq == 0 and eps != 0:
            save_ckpt(eps, metalearner, optim, args.save)
            acc = meta_test(eps, val_loader, learner_w_grad, learner_wo_grad,
                            metalearner, args, logger)
            if acc > best_acc:
                best_acc = acc
                logger.loginfo(f"* Best accuracy so far {acc}*\n")

    logger.loginfo(f'acc: {acc}')
    logger.loginfo(f"* Best accuracy so far {acc}*\n")
    logger.loginfo("Done")
def main(args):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    REVIEW, SCORE, train_data, valid_data, test_data = prepare_data()

    train_iterator, valid_iterator = data.BucketIterator.splits(
        (train_data, valid_data),
        batch_size=args.batch_size,
        device=device,
        sort_within_batch=True,
        sort_key=lambda x: len(x.review))
    print('Finished loading data.')

    vocab_size = len(REVIEW.vocab)
    embedding_dim = 100
    hidden_dim = args.hidden_dim
    output_dim = 5
    num_layers = args.num_layers
    dropout = args.dropout
    padding_index = REVIEW.vocab.stoi['<pad>']
    unknown_index = REVIEW.vocab.stoi['<unk>']

    model = BiLSTM_RNN(vocab_size, embedding_dim, hidden_dim, output_dim,
                       num_layers, dropout, padding_index)

    #Load pretrained embeddings
    pretrained_embeddings = REVIEW.vocab.vectors
    model.embedding.weight.data.copy_(pretrained_embeddings)

    #Reset unknown and padding vectors
    model.embedding.weight.data[unknown_index] = torch.zeros(embedding_dim,
                                                             device=device)
    model.embedding.weight.data[padding_index] = torch.zeros(embedding_dim,
                                                             device=device)

    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()
    model = model.to(device)
    criterion = criterion.to(device)

    epochs = args.num_epochs
    best_loss = np.inf

    if not (os.path.exists("./model")):
        os.mkdir("./model")

    for epoch in np.arange(epochs):

        start = time.time()
        train_loss, train_acc = train(model, train_iterator, optimizer,
                                      criterion)
        valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
        end = time.time()

        duration = time.strftime('%H:%M:%S', time.gmtime(end - start))

        if valid_loss < best_loss:
            best_loss = valid_loss
            with open('./model/model.pkl', 'wb') as f:
                pickle.dump(model, f)

        print(f'\nEpoch {epoch + 1} at {duration}')
        print(
            f'Train Loss: {train_loss:.3f} - Validation Loss: {valid_loss:.3f}'
        )
        print(f'Train Acc: {train_acc:.2f} - Validation Acc: {valid_acc:.2f}')
Esempio n. 12
0
import torch
from dataloader import prepare_data
from model import Encoder, Attention, Decoder, Seq2Seq, init_weights
from trainer import Trainer
from config import *
""" load data """
train_loader, val_loader, test_loader, m_dh = prepare_data(
    TRAIN_PATH, VAL_PATH, TEST_PATH, DH_PATH, LOAD_FROM_DUMP, BATCH_SIZE)
""" model setup """
INPUT_DIM, OUTPUT_DIM = len(m_dh.de_vocab), len(m_dh.en_vocab)

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT)
attn = Attention(ENC_HID_DIM, DEC_HID_DIM, ATTN_DIM)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT,
              attn)

model = Seq2Seq(enc, dec)
model.apply(init_weights)
""" training setup """
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

criterion = torch.nn.CrossEntropyLoss(ignore_index=1)

trainer = Trainer(model,
                  optimizer,
                  criterion,
                  train_loader,
                  val_loader,
                  val_best_path=VAL_BEST_PATH)
trainer.load('ckpts/best.pt')
Esempio n. 13
0
    def create_and_save_adversarial_examples(self,
                                            saved_model_fpath,
                                            n_examples=100,
                                            dataset="data/imdb.pkl",
                                            saveto = "output/adversarial_examples.npz",
                                            ):
        """
        recreates the model from saved parameters, then finds adversarial examples.

        right now, not especially modular :(

        Allen's note: n_examples is not used

        :param string model_fname: the name of the file where the model has been stored.
        """



        # below: assert that the training has been done
        assert self.model_has_been_trained

        # we want to have trained nonadversarially in order to have
        # examples that are demonstrative of adversarialness
        assert not self.adversarial


        (_, x_sym, mask_sym, y_sym) =\
             self.build_model(self.model_options,)

        grad_wrt_emb = tensor.grad(self.cost, wrt=self.emb)[0]

        anti_example = tensor.sgn(grad_wrt_emb)

        adv_example = self.emb + self.adv_epsilon*anti_example

        f_adv_example = theano.function([x_sym, mask_sym, y_sym], adv_example, name='f_adv_example')
        f_identity = theano.function([x_sym], self.emb, name='f_identity')


        # 1. get the data
        print 'Loading data'
        #TODO: remove magic 10000!!!
        train, valid, test = load_data(n_words=10000, valid_portion=0.05,
                                       maxlen=self.maxlen, path=dataset)



        corpus = valid
        # make a datastructure in which to store them
        print len(corpus[1])
        sentences_and_adversaries = {
            'original_sentences': None,
            'adversarial_sentences': None,
            'saved_model_fpath' : saved_model_fpath,

            #metadata
            'n_sentences': len(corpus[1]),
            'adversarial_parameters': {
                        'alpha':self.adv_alpha,
                        'epsilon':self.adv_epsilon,
                        },
        }


        x_itf, mask_itf, y_itf = prepare_data(corpus[0], corpus[1])

        # print f_adv_example(x_itf, mask_itf, y_itf)
        # print f_adv_example(x_itf, mask_itf, y_itf).shape

        sentences_and_adversaries['adversarial_sentences'] = f_adv_example(x_itf, mask_itf, y_itf)
        sentences_and_adversaries['original_sentences'] = f_identity(x_itf)

        numpy.savez(saveto, sentences_and_adversaries)#, open(saveto, 'wb'))
Esempio n. 14
0
    def train_lstm(
            self,
            saveto,  # The best model will be saved there
            dataset,

            #----------------------------------------------------------------------
            #algorithmic hyperparameters
            encoder='lstm',  # TODO: can be removed must be lstm.
            l2_reg_U=0.,  # Weight decay for the classifier applied to the U weights.
            lrate=0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
            optimizer="adadelta",  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
            batch_size=16,  # The batch size during training.
            wemb_init='word2vec',

            #----------------------------------------------------------------------
            #parameters related to convergence, saving, and similar
            max_epochs=5000,  # The maximum number of epoch to run
            patience=10,  # Number of epoch to wait before early stop if no progress
            dispFreq=10,  # Display to stdout the training progress every N updates
            n_words=10000,  # Vocabulary size
            validFreq=370,  # Compute the validation error after this number of update.
            saveFreq=1110,  # Save the parameters after every saveFreq updates
            valid_batch_size=64,  # The batch size used for validation/test set.

            #----------------------------------------------------------------------
            # Parameter for extra option (whatever that means)
        noise_std=0.,
            use_dropout=True,  # if False slightly faster, but worst test error
            # This frequently need a bigger model.
        reload_model=None,  # Path to a saved model we want to start from.
            return_after_reloading=False,  # Path to a saved model we want to start from.
            test_size=-1,  # If >0, we keep only this number of test example.
    ):

        optimizer = OPTIMIZERS[optimizer]
        # Model options
        self.model_options = locals().copy()

        if reload_model:
            self.faulty_load_params(reload_model)
            # self.init_tparams()
            _, self.wdim = self.params['Wemb'].shape
            self.hdim, ydim = self.params['U'].shape

            self.model_options['ydim'] = ydim
            print _, self.wdim, self.hdim, ydim

        self.model_options['hdim'] = self.hdim
        self.model_options['wdim'] = self.wdim

        self.model_options['grad_clip_thresh'] = self.grad_clip_thresh
        print "model options", self.model_options

        # load_data, prepare_data = get_dataset(dataset)

        print 'Loading data'
        #each of the below is a tuple of
        # (list of sentences, where each is a list fo word indices,
        #  list of integer labels)
        if not reload_model:
            train, valid, test = load_data(n_words=n_words,
                                           valid_portion=0.05,
                                           maxlen=self.maxlen,
                                           path=dataset)

            if test_size > 0:
                # The test set is sorted by size, but we want to keep random
                # size example.  So we must select a random selection of the
                # examples.
                idx = numpy.arange(len(test[0]))
                numpy.random.shuffle(idx)
                idx = idx[:test_size]
                test = ([test[0][n] for n in idx], [test[1][n] for n in idx])

            ydim = numpy.max(train[1]) + 1

            self.model_options['ydim'] = ydim

        print 'Building model'

        if not reload_model:
            # initialize the word embedding matrix and the parameters of the model (U and b) randomly
            # self.params is a dict mapping name (string) -> numpy ndarray
            self.init_params(self.model_options)

        # This creates Theano Shared Variable from the parameters.
        # Dict name (string) -> Theano Tensor Shared Variable
        # self.params and self.tparams have different copy of the weights.
        self.init_tparams()

        # use_noise is for dropout
        (use_noise, x, mask, y) =\
             self.build_model(self.model_options,)
        # f_pred_prob, self.f_pred, cost)

        if l2_reg_U > 0.:
            l2_reg_U = theano.shared(numpy_floatX(l2_reg_U), name='l2_reg_U')
            weight_decay = 0.
            weight_decay += (self.tparams['U']**2).sum()
            weight_decay *= l2_reg_U
            self.cost += weight_decay

        f_cost = theano.function([x, mask, y], self.cost, name='f_cost')

        grads = tensor.grad(self.cost, wrt=self.tparams.values())
        f_grad = theano.function([x, mask, y], grads, name='f_grad')

        lr = tensor.scalar(name='lr')
        f_grad_shared, f_update = optimizer(lr, self.tparams, grads, x, mask,
                                            y, self.cost)

        if self.debug:
            util.colorprint(
                "Following is the graph of the shared gradient function (f_grad_shared):",
                "blue")
            theano.printing.debugprint(f_grad_shared.maker.fgraph.outputs[0])

        if return_after_reloading:
            self.model_has_been_trained = True
            return

        print 'Optimization'

        kf_valid = self.get_minibatches_idx(len(valid[0]), valid_batch_size)
        kf_test = self.get_minibatches_idx(len(test[0]), valid_batch_size)

        print "%d train examples" % len(train[0])
        print "%d valid examples" % len(valid[0])
        print "%d test examples" % len(test[0])

        history_errs = []
        best_p = None
        bad_count = 0

        if validFreq == -1:
            validFreq = len(train[0]) / batch_size
        if saveFreq == -1:
            saveFreq = len(train[0]) / batch_size

        uidx = 0  # the number of update done
        estop = False  # early stop
        start_time = time.time()

        try:
            for epoch in xrange(max_epochs):
                sys.stdout.flush()
                n_samples = 0

                # Get new shuffled index for the training set.
                minibatches = self.get_minibatches_idx(len(train[0]),
                                                       batch_size,
                                                       shuffle=True)

                for _, train_index_list in minibatches:
                    uidx += 1
                    use_noise.set_value(1.)

                    # Select the random examples for this minibatch
                    y = [train[1][t] for t in train_index_list]
                    x = [train[0][t] for t in train_index_list]

                    # Get the data in numpy.ndarray format
                    # This swap the axis!
                    # Return something of shape (minibatch maxlen, n samples)
                    x, mask, y = prepare_data(x, y)
                    n_samples += x.shape[1]

                    cur_cost_val = f_grad_shared(x, mask, y)
                    f_update(lrate)

                    if numpy.isnan(cur_cost_val) or numpy.isinf(cur_cost_val):
                        print 'NaN detected'
                        return 1., 1., 1.

                    if numpy.mod(uidx, dispFreq) == 0:
                        print 'Epoch ', epoch, 'Update ', uidx, 'Cost ', cur_cost_val

                    if saveto and numpy.mod(uidx, saveFreq) == 0:
                        print 'Saving...',

                        if best_p is not None:
                            self.params = best_p
                        else:
                            self.params = self.unzip(self.tparams)
                        numpy.savez(saveto,
                                    history_errs=history_errs,
                                    **self.params)
                        pkl.dump(self.model_options,
                                 open('%s.pkl' % saveto, 'wb'), -1)
                        print 'Done'

                    if numpy.mod(uidx, validFreq) == 0:
                        use_noise.set_value(0.)
                        train_err = self.pred_error(self.f_pred, prepare_data,
                                                    train, minibatches)
                        valid_err = self.pred_error(self.f_pred, prepare_data,
                                                    valid, kf_valid)
                        test_err = self.pred_error(self.f_pred, prepare_data,
                                                   test, kf_test)

                        history_errs.append([valid_err, test_err])

                        if (uidx == 0 or valid_err <=
                                numpy.array(history_errs)[:, 0].min()):

                            best_p = self.unzip(self.tparams)
                            bad_counter = 0

                        print('Train ', train_err, 'Valid ', valid_err,
                              'Test ', test_err)

                        if (len(history_errs) > patience and valid_err >=
                                numpy.array(history_errs)[:-patience,
                                                          0].min()):
                            bad_counter += 1
                            if bad_counter > patience:
                                print 'Early Stop!'
                                estop = True
                                break

                print 'Seen %d samples' % n_samples

                if estop:
                    break

        except KeyboardInterrupt:
            print "Training interrupted"

        end_time = time.time()
        if best_p is not None:
            self.zipp(best_p, self.tparams)
        else:
            best_p = self.unzip(self.tparams)

        use_noise.set_value(0.)
        kf_train_sorted = self.get_minibatches_idx(len(train[0]), batch_size)
        train_err = self.pred_error(self.f_pred, prepare_data, train,
                                    kf_train_sorted)
        valid_err = self.pred_error(self.f_pred, prepare_data, valid, kf_valid)
        test_err = self.pred_error(self.f_pred, prepare_data, test, kf_test)

        print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err
        if saveto:
            numpy.savez(saveto,
                        train_err=train_err,
                        valid_err=valid_err,
                        test_err=test_err,
                        history_errs=history_errs,
                        **best_p)
        print 'The code run for %d epochs, with %f sec/epochs' % (
            (epoch + 1), (end_time - start_time) / (1. * (epoch + 1)))
        print >> sys.stderr, ('Training took %.1fs' % (end_time - start_time))
        self.model_has_been_trained = True
        return train_err, valid_err, test_err
Esempio n. 15
0
    def create_and_save_adversarial_examples(
        self,
        saved_model_fpath,
        n_examples=100,
        dataset="data/imdb.pkl",
        saveto="output/adversarial_examples.npz",
    ):
        """
        recreates the model from saved parameters, then finds adversarial examples.

        right now, not especially modular :(

        Allen's note: n_examples is not used

        :param string model_fname: the name of the file where the model has been stored.
        """

        # below: assert that the training has been done
        assert self.model_has_been_trained

        # we want to have trained nonadversarially in order to have
        # examples that are demonstrative of adversarialness
        assert not self.adversarial


        (_, x_sym, mask_sym, y_sym) =\
             self.build_model(self.model_options,)

        grad_wrt_emb = tensor.grad(self.cost, wrt=self.emb)[0]

        anti_example = tensor.sgn(grad_wrt_emb)

        adv_example = self.emb + self.adv_epsilon * anti_example

        f_adv_example = theano.function([x_sym, mask_sym, y_sym],
                                        adv_example,
                                        name='f_adv_example')
        f_identity = theano.function([x_sym], self.emb, name='f_identity')

        # 1. get the data
        print 'Loading data'
        #TODO: remove magic 10000!!!
        train, valid, test = load_data(n_words=10000,
                                       valid_portion=0.05,
                                       maxlen=self.maxlen,
                                       path=dataset)

        corpus = valid
        # make a datastructure in which to store them
        print len(corpus[1])
        sentences_and_adversaries = {
            'original_sentences': None,
            'adversarial_sentences': None,
            'saved_model_fpath': saved_model_fpath,

            #metadata
            'n_sentences': len(corpus[1]),
            'adversarial_parameters': {
                'alpha': self.adv_alpha,
                'epsilon': self.adv_epsilon,
            },
        }

        x_itf, mask_itf, y_itf = prepare_data(corpus[0], corpus[1])

        # print f_adv_example(x_itf, mask_itf, y_itf)
        # print f_adv_example(x_itf, mask_itf, y_itf).shape

        sentences_and_adversaries['adversarial_sentences'] = f_adv_example(
            x_itf, mask_itf, y_itf)
        sentences_and_adversaries['original_sentences'] = f_identity(x_itf)

        numpy.savez(saveto, sentences_and_adversaries)  #, open(saveto, 'wb'))
Esempio n. 16
0
    def train_lstm(self,
        saveto, # The best model will be saved there
        dataset,

        #----------------------------------------------------------------------
        #algorithmic hyperparameters
        encoder='lstm',  # TODO: can be removed must be lstm.
        l2_reg_U=0.,  # Weight decay for the classifier applied to the U weights.
        lrate=0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
        optimizer="adadelta",  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
        batch_size=16,  # The batch size during training.
        wemb_init='word2vec',

        #----------------------------------------------------------------------
        #parameters related to convergence, saving, and similar
        max_epochs=5000,  # The maximum number of epoch to run
        patience=10,  # Number of epoch to wait before early stop if no progress
        dispFreq=10,  # Display to stdout the training progress every N updates
        n_words=10000,  # Vocabulary size
        validFreq=370,  # Compute the validation error after this number of update.
        saveFreq=1110,  # Save the parameters after every saveFreq updates
        valid_batch_size=64,  # The batch size used for validation/test set.

        #----------------------------------------------------------------------
        # Parameter for extra option (whatever that means)
        noise_std=0.,
        use_dropout=True,  # if False slightly faster, but worst test error
                           # This frequently need a bigger model.
        reload_model=None,  # Path to a saved model we want to start from.
        return_after_reloading=False,  # Path to a saved model we want to start from.
        test_size=-1,  # If >0, we keep only this number of test example.
    ):

        optimizer = OPTIMIZERS[optimizer]
        # Model options
        self.model_options = locals().copy()


        if reload_model:
            self.faulty_load_params(reload_model)
            # self.init_tparams()
            _, self.wdim = self.params['Wemb'].shape
            self.hdim, ydim = self.params['U'].shape

            self.model_options['ydim'] = ydim
            print _, self.wdim, self.hdim, ydim


        self.model_options['hdim'] = self.hdim
        self.model_options['wdim'] = self.wdim

        self.model_options['grad_clip_thresh'] = self.grad_clip_thresh
        print "model options", self.model_options

        # load_data, prepare_data = get_dataset(dataset)

        print 'Loading data'
        #each of the below is a tuple of
        # (list of sentences, where each is a list fo word indices,
        #  list of integer labels)
        if not reload_model:
            train, valid, test =  load_data(n_words=n_words, valid_portion=0.05,
                                           maxlen=self.maxlen, path=dataset)

            if test_size > 0:
                # The test set is sorted by size, but we want to keep random
                # size example.  So we must select a random selection of the
                # examples.
                idx = numpy.arange(len(test[0]))
                numpy.random.shuffle(idx)
                idx = idx[:test_size]
                test = ([test[0][n] for n in idx], [test[1][n] for n in idx])

            ydim = numpy.max(train[1]) + 1

            self.model_options['ydim'] = ydim

        print 'Building model'

        if not reload_model:
            # initialize the word embedding matrix and the parameters of the model (U and b) randomly
            # self.params is a dict mapping name (string) -> numpy ndarray
            self.init_params(self.model_options)

        # This creates Theano Shared Variable from the parameters.
        # Dict name (string) -> Theano Tensor Shared Variable
        # self.params and self.tparams have different copy of the weights.
        self.init_tparams()

        # use_noise is for dropout
        (use_noise, x, mask, y) =\
             self.build_model(self.model_options,)
         # f_pred_prob, self.f_pred, cost)


        if l2_reg_U > 0.:
            l2_reg_U = theano.shared(numpy_floatX(l2_reg_U), name='l2_reg_U')
            weight_decay = 0.
            weight_decay += (self.tparams['U'] ** 2).sum()
            weight_decay *= l2_reg_U
            self.cost += weight_decay

        f_cost = theano.function([x, mask, y], self.cost, name='f_cost')

        grads = tensor.grad(self.cost, wrt=self.tparams.values())
        f_grad = theano.function([x, mask, y], grads, name='f_grad')

        lr = tensor.scalar(name='lr')
        f_grad_shared, f_update = optimizer(lr, self.tparams, grads,
                                            x, mask, y, self.cost)

        if self.debug:
            util.colorprint("Following is the graph of the shared gradient function (f_grad_shared):", "blue")
            theano.printing.debugprint(f_grad_shared.maker.fgraph.outputs[0])

        if return_after_reloading:
            self.model_has_been_trained = True
            return

        print 'Optimization'

        kf_valid = self.get_minibatches_idx(len(valid[0]), valid_batch_size)
        kf_test = self.get_minibatches_idx(len(test[0]), valid_batch_size)

        print "%d train examples" % len(train[0])
        print "%d valid examples" % len(valid[0])
        print "%d test examples" % len(test[0])

        history_errs = []
        best_p = None
        bad_count = 0

        if validFreq == -1:
            validFreq = len(train[0]) / batch_size
        if saveFreq == -1:
            saveFreq = len(train[0]) / batch_size

        uidx = 0  # the number of update done
        estop = False  # early stop
        start_time = time.time()



        try:
            for epoch in xrange(max_epochs):
                sys.stdout.flush()
                n_samples = 0

                # Get new shuffled index for the training set.
                minibatches = self.get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

                for _, train_index_list in minibatches:
                    uidx += 1
                    use_noise.set_value(1.)

                    # Select the random examples for this minibatch
                    y = [train[1][t] for t in train_index_list]
                    x = [train[0][t]for t in train_index_list]

                    # Get the data in numpy.ndarray format
                    # This swap the axis!
                    # Return something of shape (minibatch maxlen, n samples)
                    x, mask, y = prepare_data(x, y)
                    n_samples += x.shape[1]

                    cur_cost_val = f_grad_shared(x, mask, y)
                    f_update(lrate)

                    if numpy.isnan(cur_cost_val) or numpy.isinf(cur_cost_val):
                        print 'NaN detected'
                        return 1., 1., 1.

                    if numpy.mod(uidx, dispFreq) == 0:
                        print 'Epoch ', epoch, 'Update ', uidx, 'Cost ', cur_cost_val

                    if saveto and numpy.mod(uidx, saveFreq) == 0:
                        print 'Saving...',

                        if best_p is not None:
                            self.params = best_p
                        else:
                            self.params = self.unzip(self.tparams)
                        numpy.savez(saveto, history_errs=history_errs, **self.params)
                        pkl.dump(self.model_options, open('%s.pkl' % saveto, 'wb'), -1)
                        print 'Done'

                    if numpy.mod(uidx, validFreq) == 0:
                        use_noise.set_value(0.)
                        train_err = self.pred_error(self.f_pred, prepare_data, train, minibatches)
                        valid_err = self.pred_error(self.f_pred, prepare_data, valid,
                                               kf_valid)
                        test_err = self.pred_error(self.f_pred, prepare_data, test, kf_test)

                        history_errs.append([valid_err, test_err])

                        if (uidx == 0 or
                            valid_err <= numpy.array(history_errs)[:,
                                                                   0].min()):

                            best_p = self.unzip(self.tparams)
                            bad_counter = 0

                        print ('Train ', train_err, 'Valid ', valid_err,
                               'Test ', test_err)

                        if (len(history_errs) > patience and
                            valid_err >= numpy.array(history_errs)[:-patience,
                                                                   0].min()):
                            bad_counter += 1
                            if bad_counter > patience:
                                print 'Early Stop!'
                                estop = True
                                break

                print 'Seen %d samples' % n_samples

                if estop:
                    break

        except KeyboardInterrupt:
            print "Training interrupted"

        end_time = time.time()
        if best_p is not None:
            self.zipp(best_p, self.tparams)
        else:
            best_p = self.unzip(self.tparams)

        use_noise.set_value(0.)
        kf_train_sorted = self.get_minibatches_idx(len(train[0]), batch_size)
        train_err = self.pred_error(self.f_pred, prepare_data, train, kf_train_sorted)
        valid_err = self.pred_error(self.f_pred, prepare_data, valid, kf_valid)
        test_err = self.pred_error(self.f_pred, prepare_data, test, kf_test)

        print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err
        if saveto:
            numpy.savez(saveto, train_err=train_err,
                        valid_err=valid_err, test_err=test_err,
                        history_errs=history_errs, **best_p)
        print 'The code run for %d epochs, with %f sec/epochs' % (
            (epoch + 1), (end_time - start_time) / (1. * (epoch + 1)))
        print >> sys.stderr, ('Training took %.1fs' %
                              (end_time - start_time))
        self.model_has_been_trained = True
        return train_err, valid_err, test_err
        random.seed(args.seed)
        torch.manual_seed(args.seed)

    print(f'lr:{args.lr},step:{args.step_size},gamma:{args.gamma}')

    for data_name in data_list:

        print(data_name)
        vocab_dict_path = args.word_embed_file
        file_path = args.data_path + data_name + '.json'
        glove_data = 'data/' + data_name + '_.glove_data.pkl'
        glove_matrix = 'data/' + data_name + '_glove_matrix.pkl'

        glove_data, matrix, review_len = dataloader.word_to_id(
            glove_data, glove_matrix, vocab_dict_path, file_path)
        train_data, test_data, user_dict, item_dict, u_max, i_max, num_users, num_items = dataloader.prepare_data(
            glove_data)
        batch = dataloader.Batch(train_data,
                                 test_data,
                                 user_dict,
                                 item_dict,
                                 u_max,
                                 i_max,
                                 batch_size,
                                 review_len,
                                 train=True)  #(review_len是一条评论的长度)

        if args.base_model == 'NARRE':
            mainmodel = kbs_model.NARRE(num_users, num_items, matrix,
                                        review_len, args)

        elif args.base_model == 'PMF':
import torch
from dataloader import prepare_data
from KEflow.model import prepare_classifier
from KEflow.model.utils import weights_init
from KEflow.trainer import Trainer
from KEflow.config import TYPE_DATA, TYPE_CLS
from KEflow.config import CLS_CONFIG as Ccfg
""" dataloader """
trainset, devset = prepare_data("./data", TYPE_DATA)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=Ccfg["BATCH_SIZE"])
devloader = torch.utils.data.DataLoader(devset, batch_size=Ccfg["BATCH_SIZE"])
""" define model """
model = prepare_classifier(TYPE_CLS, Ccfg["NC"], Ccfg["N_CLASS"])
# model.apply(weights_init)
optimizer = torch.optim.Adam(model.parameters(),
                             lr=Ccfg["LR"],
                             weight_decay=Ccfg["WD"])
""" criterion define """
criterion = torch.nn.CrossEntropyLoss()
""" train """
trainer = Trainer(model,
                  optimizer,
                  criterion,
                  trainloader,
                  devloader,
                  best_save_path="ckpts/")
# trainer.load("ckpts/best.pt")
trainer.train(Ccfg["EPOCHS"], Ccfg["PRINT_FREQ"], Ccfg["VAL_FREQ"])
""" save model """
trainer.save(f"ckpts/classifier.pt")
Esempio n. 19
0
from dataloader import prepare_data
from torchvision.utils import save_image
import os

from KEflow.config import TYPE_DATA

_, dataset = prepare_data('./data', TYPE_DATA)

if not os.path.exists(f'aided_sample/{TYPE_DATA}'):
    os.makedirs(f'aided_sample/{TYPE_DATA}')

for i in range(500):
    x, label = dataset[i]
    save_image(x, os.path.join("aided_sample", TYPE_DATA, f"{i}_image{label}.png"), normalize=True)
Esempio n. 20
0
def main():

    args, unparsed = FLAGS.parse_known_args()
    if len(unparsed) != 0:
        raise NameError("Argument {} not recognized".format(unparsed))

    if args.seed is None:
        args.seed = random.randint(0, 1e3)
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    args.dev = torch.device('cpu')

    logger = GOATLogger(args)

    # Get data
    train_loader, val_loader, test_loader = prepare_data(args)

    # Set up learner, meta-learner
    learner_w_grad = Learner(args.image_size, args.bn_eps, args.bn_momentum,
                             args.n_class).to(args.dev)
    learner_wo_grad = copy.deepcopy(learner_w_grad)
    metalearner = MetaLearner(args.input_size, args.hidden_size,
                              learner_w_grad.get_flat_params().size(0)).to(
                                  args.dev)
    metalearner.init_cI(learner_w_grad.get_flat_params())

    # Set up loss, optimizer, learning rate scheduler
    optim = torch.optim.Adam(metalearner.parameters(), args.lr)

    if args.resume:
        logger.loginfo("Initialized from: {}".format(args.resume))
        last_eps, metalearner, optim = resume_ckpt(metalearner, optim,
                                                   args.resume, args.dev)

    if args.mode == 'test':
        _ = meta_test(last_eps, test_loader, learner_w_grad, learner_wo_grad,
                      metalearner, args, logger)
        return

    best_acc = 0.0
    logger.loginfo("Start training")
    # Meta-training
    for eps, (episode_x, episode_y) in enumerate(train_loader):
        # episode_x.shape = [n_class, n_shot + n_eval, c, h, w]
        # episode_y.shape = [n_class, n_shot + n_eval] --> NEVER USED
        train_input = episode_x[:, :args.n_shot].reshape(
            -1, *episode_x.shape[-3:]).to(args.dev)  # [n_class * n_shot, :]
        train_target = torch.LongTensor(
            np.repeat(range(args.n_class),
                      args.n_shot)).to(args.dev)  # [n_class * n_shot]
        test_input = episode_x[:, args.n_shot:].reshape(
            -1, *episode_x.shape[-3:]).to(args.dev)  # [n_class * n_eval, :]
        test_target = torch.LongTensor(
            np.repeat(range(args.n_class),
                      args.n_eval)).to(args.dev)  # [n_class * n_eval]

        # Train learner with metalearner
        learner_w_grad.reset_batch_stats()
        learner_wo_grad.reset_batch_stats()
        learner_w_grad.train()
        learner_wo_grad.train()
        cI = train_learner(learner_w_grad, metalearner, train_input,
                           train_target, args)

        # Train meta-learner with validation loss
        learner_wo_grad.transfer_params(learner_w_grad, cI)
        output = learner_wo_grad(test_input)
        loss = learner_wo_grad.criterion(output, test_target)
        acc = accuracy(output, test_target)

        optim.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(metalearner.parameters(), args.grad_clip)
        optim.step()

        logger.batch_info(eps=eps,
                          totaleps=args.episode,
                          loss=loss.item(),
                          acc=acc,
                          phase='train')

        # Meta-validation
        if eps % args.val_freq == 0 and eps != 0:
            print('start eval')
            save_ckpt(eps, metalearner, optim, args.save)
            acc = meta_test(eps, val_loader, learner_w_grad, learner_wo_grad,
                            metalearner, args, logger)
            if acc > best_acc:
                best_acc = acc
                logger.loginfo("* Best accuracy so far *\n")

    logger.loginfo("Done")