def build_model(self):
        with tf.name_scope("batch_size"):
            # Get batch_size from the first dimension of self.images
            self.batch_size = tf.shape(self.images)[0]

        with tf.variable_scope("cnn"):
            image_emb = slim.fully_connected(self.fc7, self.input_encoding_size, activation_fn=None, scope='encode_image')
        with tf.variable_scope("rnnlm"):
            # Replicate self.seq_per_img times for each image embedding
            image_emb = tf.reshape(tf.tile(tf.expand_dims(image_emb, 1), [1, self.seq_per_img, 1]), [self.batch_size * self.seq_per_img, self.input_encoding_size])

            # rnn_inputs is a list of input, each element is the input of rnn at each time step
            # time step 0 is the image embedding
            rnn_inputs = tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=tf.nn.embedding_lookup(self.Wemb, self.labels[:,:self.seq_length + 1]))
            rnn_inputs = [tf.squeeze(input_, [1]) for input_ in rnn_inputs]
            rnn_inputs = [image_emb] + rnn_inputs

            # The initial sate is zero
            initial_state = self.cell.zero_state(self.batch_size * self.seq_per_img, tf.float32)

            outputs, last_state = tf.contrib.legacy_seq2seq.rnn_decoder(rnn_inputs, initial_state, self.cell, loop_function=None)
            
            outputs = tf.concat(axis=0, values=outputs[1:])
            self.logits = slim.fully_connected(outputs, self.vocab_size + 1, activation_fn = None, scope = 'logit')
            self.logits = tf.split(axis=0, num_or_size_splits=len(rnn_inputs) - 1, value=self.logits)

        with tf.variable_scope("loss"):
            loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(self.logits,
                    [tf.squeeze(label, [1]) for label in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.labels[:, 1:])], # self.labels[:,1:] is the target
                    [tf.squeeze(mask, [1]) for mask in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.masks[:, 1:])])
            self.cost = tf.reduce_mean(loss)
        
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        self.cnn_lr = tf.Variable(0.0, trainable=False)

        # Collect the rnn variables, and create the optimizer of rnn
        tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='rnnlm')
        grads = utils.clip_by_value(tf.gradients(self.cost, tvars), -self.opt.grad_clip, self.opt.grad_clip)
        #grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
        #        self.opt.grad_clip)
        optimizer = utils.get_optimizer(self.opt, self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # Collect the cnn variables, and create the optimizer of cnn
        cnn_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='cnn')
        cnn_grads = utils.clip_by_value(tf.gradients(self.cost, cnn_tvars), -self.opt.grad_clip, self.opt.grad_clip)
        #cnn_grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, cnn_tvars),
        #        self.opt.grad_clip)
        cnn_optimizer = utils.get_cnn_optimizer(self.opt, self.cnn_lr) 
        self.cnn_train_op = cnn_optimizer.apply_gradients(zip(cnn_grads, cnn_tvars))

        tf.summary.scalar('training loss', self.cost)
        tf.summary.scalar('learning rate', self.lr)
        tf.summary.scalar('cnn learning rate', self.cnn_lr)
        self.summaries = tf.summary.merge_all()
예제 #2
0
def main(hparams):
    model_info = hparams.model_info
    train_info = hparams.trainloader_info
    val_info = hparams.valloader_info
    test_info = hparams.testloader_info
    optimizer_info = hparams.optimizer_info
    main_info = hparams.main_info

    # initialize model and dataloader
    model = MMNIST_ConvLSTM(model_info)
    model = model.cuda()
    for name, param in model.named_parameters():
        print(name, param.size())
    num_epochs = main_info['num_epochs']
    # learning rate scheduler
    halve_every = main_info['halve_every']

    train_loader = get_dataloader(train_info)
    val_loader = get_dataloader(val_info)
    test_loader = get_dataloader(test_info)
    optimizer = get_optimizer(optimizer_info, model.parameters())

    criterion = cross_entropy

    for epoch in xrange(num_epochs):
        if train_loader is not None:
            adjust_learning_rate(optimizer, epoch, halve_every)
            traininfo = {
                'epoch': epoch,
                'num_epochs': num_epochs,
                'clip': main_info['clip']
            }
            train(train_loader, model, optimizer, criterion, traininfo)
        if val_loader is not None:
            valinfo = {'epoch': epoch, 'num_epochs': num_epochs}
            loss = val(val_loader, model, valinfo, criterion)
            res = saver.save(model, optimizer, loss, epoch)
            if res:
                logger.info('\033[96m' + '[Best model]:' +
                            '\033[0m: on Validation set!')
        if test_loader is not None:
            # TODO
            pass
    def build_model(self):
        with tf.name_scope("batch_size"):
            # Get batch_size from the first dimension of self.images
            self.batch_size = tf.shape(self.images)[0]
        with tf.variable_scope("rnnlm"):
            # Flatten the context
            flattened_ctx = tf.reshape(self.context,
                                       [self.batch_size, 196, 512])

            # Initialize the first hidden state with the mean context
            initial_state = utils.get_initial_state(self.fc7,
                                                    self.cell.state_size)
            # Replicate self.seq_per_img times for each state and image embedding
            self.initial_state = initial_state = utils.expand_feat(
                initial_state, self.seq_per_img)
            self.flattened_ctx = flattened_ctx = tf.reshape(
                tf.tile(tf.expand_dims(flattened_ctx, 1),
                        [1, self.seq_per_img, 1, 1]),
                [self.batch_size * self.seq_per_img, 196, 512])

            #projected context
            # This is used in attention module; do this outside the loop to reduce redundant computations
            # with tf.variable_scope("attention"):
            if self.att_hid_size == 0:
                pctx = slim.fully_connected(
                    self.flattened_ctx, 1, activation_fn=None,
                    scope='ctx_att')  # (batch * seq_per_img) * 196 * 1
            else:
                pctx = slim.fully_connected(
                    self.flattened_ctx,
                    self.att_hid_size,
                    activation_fn=None,
                    scope='ctx_att'
                )  # (batch * seq_per_img) * 196 * att_hid_size

            rnn_inputs = tf.split(axis=1,
                                  num_or_size_splits=self.seq_length + 1,
                                  value=tf.nn.embedding_lookup(
                                      self.Wemb,
                                      self.labels[:, :self.seq_length + 1]))
            rnn_inputs = [tf.squeeze(input_, [1]) for input_ in rnn_inputs]

            prev_h = utils.last_hidden_vec(initial_state)

            self.alphas = []
            self.logits = []
            outputs = []
            state = initial_state
            for ind in range(self.seq_length + 1):
                if ind > 0:
                    # Reuse the variables after the first timestep.
                    tf.get_variable_scope().reuse_variables()

                with tf.variable_scope("attention"):
                    alpha = self.get_alpha(prev_h, pctx)
                    self.alphas.append(alpha)
                    weighted_context = tf.reduce_sum(
                        flattened_ctx * tf.expand_dims(alpha, 2), 1)

                output, state = self.cell(
                    tf.concat(axis=1,
                              values=[weighted_context, rnn_inputs[ind]]),
                    state)
                # Save the current output for next time step attention
                prev_h = output
                # Get the score of each word in vocabulary, 0 is end token.
                self.logits.append(
                    slim.fully_connected(output,
                                         self.vocab_size + 1,
                                         activation_fn=None,
                                         scope='logit'))

        with tf.variable_scope("loss"):
            loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
                self.logits,
                [
                    tf.squeeze(label, [1])
                    for label in tf.split(axis=1,
                                          num_or_size_splits=self.seq_length +
                                          1,
                                          value=self.labels[:, 1:])
                ],  # self.labels[:,1:] is the target; ignore the first start token
                [
                    tf.squeeze(mask, [1])
                    for mask in tf.split(axis=1,
                                         num_or_size_splits=self.seq_length +
                                         1,
                                         value=self.masks[:, 1:])
                ])
            self.cost = tf.reduce_mean(loss)

        self.final_state = state
        self.lr = tf.Variable(0.0, trainable=False)
        self.cnn_lr = tf.Variable(0.0, trainable=False)

        # Collect the rnn variables, and create the optimizer of rnn
        tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                  scope='rnnlm')
        grads = utils.clip_by_value(tf.gradients(self.cost, tvars),
                                    -self.opt.grad_clip, self.opt.grad_clip)
        optimizer = utils.get_optimizer(self.opt, self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # Collect the cnn variables, and create the optimizer of cnn
        cnn_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                      scope='cnn')
        cnn_grads = utils.clip_by_value(tf.gradients(self.cost, cnn_tvars),
                                        -self.opt.grad_clip,
                                        self.opt.grad_clip)
        cnn_optimizer = utils.get_cnn_optimizer(self.opt, self.cnn_lr)
        self.cnn_train_op = cnn_optimizer.apply_gradients(
            zip(cnn_grads, cnn_tvars))

        tf.summary.scalar('training loss', self.cost)
        tf.summary.scalar('learning rate', self.lr)
        tf.summary.scalar('cnn learning rate', self.cnn_lr)
        self.summaries = tf.summary.merge_all()
    def build_model(self):
        with tf.name_scope("batch_size"):
            # Get batch_size from the first dimension of self.images
            self.batch_size = tf.shape(self.images)[0]
        with tf.variable_scope("rnnlm"):
            flattened_ctx = tf.reshape(self.context,
                                       [self.batch_size, 196, 512])
            ctx_mean = tf.reduce_mean(flattened_ctx, 1)

            # Initialize the first hidden state with the mean context
            initial_state = utils.get_initial_state(ctx_mean,
                                                    self.cell.state_size)
            # Replicate self.seq_per_img times for each state and image embedding
            self.initial_state = initial_state = utils.expand_feat(
                initial_state, self.seq_per_img)
            self.flattened_ctx = flattened_ctx = tf.reshape(
                tf.tile(tf.expand_dims(flattened_ctx, 1),
                        [1, self.seq_per_img, 1, 1]),
                [self.batch_size * self.seq_per_img, 196, 512])

            rnn_inputs = tf.split(axis=1,
                                  num_or_size_splits=self.seq_length + 1,
                                  value=tf.nn.embedding_lookup(
                                      self.Wemb,
                                      self.labels[:, :self.seq_length + 1]))
            rnn_inputs = [tf.squeeze(input_, [1]) for input_ in rnn_inputs]

            outputs, last_state = tf.contrib.legacy_seq2seq.attention_decoder(
                rnn_inputs,
                initial_state,
                flattened_ctx,
                self.cell,
                loop_function=None)
            outputs = tf.concat(axis=0, values=outputs)

            self.logits = slim.fully_connected(outputs,
                                               self.vocab_size + 1,
                                               activation_fn=None,
                                               scope='logit')
            self.logits = tf.split(axis=0,
                                   num_or_size_splits=len(rnn_inputs),
                                   value=self.logits)

        with tf.variable_scope("loss"):
            loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
                self.logits,
                [
                    tf.squeeze(label, [1])
                    for label in tf.split(axis=1,
                                          num_or_size_splits=self.seq_length +
                                          1,
                                          value=self.labels[:, 1:])
                ],  # self.labels[:,1:] is the target
                [
                    tf.squeeze(mask, [1])
                    for mask in tf.split(axis=1,
                                         num_or_size_splits=self.seq_length +
                                         1,
                                         value=self.masks[:, 1:])
                ])
            self.cost = tf.reduce_mean(loss)

        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        self.cnn_lr = tf.Variable(0.0, trainable=False)

        # Collect the rnn variables, and create the optimizer of rnn
        tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                  scope='rnnlm')
        grads = utils.clip_by_value(tf.gradients(self.cost, tvars),
                                    -self.opt.grad_clip, self.opt.grad_clip)
        #grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
        #        self.opt.grad_clip)
        optimizer = utils.get_optimizer(self.opt, self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # Collect the cnn variables, and create the optimizer of cnn
        cnn_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                      scope='cnn')
        cnn_grads = utils.clip_by_value(tf.gradients(self.cost, cnn_tvars),
                                        -self.opt.grad_clip,
                                        self.opt.grad_clip)
        #cnn_grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, cnn_tvars),
        #        self.opt.grad_clip)
        cnn_optimizer = utils.get_cnn_optimizer(self.opt, self.cnn_lr)
        self.cnn_train_op = cnn_optimizer.apply_gradients(
            zip(cnn_grads, cnn_tvars))

        tf.summary.scalar('training loss', self.cost)
        tf.summary.scalar('learning rate', self.lr)
        tf.summary.scalar('cnn learning rate', self.cnn_lr)
        self.summaries = tf.summary.merge_all()
    def build_model(self):
        with tf.name_scope("batch_size"):
            # Get batch_size from the first dimension of self.images
            self.batch_size = tf.shape(self.images)[0]
        with tf.variable_scope("rnnlm"):
            # Flatten the context
            flattened_ctx = tf.reshape(self.context, [self.batch_size, 196, 512])
            ctx_mean = tf.reduce_mean(flattened_ctx, 1)
            
            # Initialize the first hidden state with the mean context
            initial_state = utils.get_initial_state(ctx_mean, self.cell.state_size)
            # Replicate self.seq_per_img times for each state and image embedding
            self.initial_state = initial_state = utils.expand_feat(initial_state, self.seq_per_img)
            self.flattened_ctx = flattened_ctx = tf.reshape(tf.tile(tf.expand_dims(flattened_ctx, 1), [1, self.seq_per_img, 1, 1]), 
                [self.batch_size * self.seq_per_img, 196, 512])

            #projected context
            # This is used in attention module; do this outside the loop to reduce redundant computations
            # with tf.variable_scope("attention"):
            if self.att_hid_size == 0:
                pctx = slim.fully_connected(self.flattened_ctx, 1, activation_fn = None, scope = 'ctx_att') # (batch * seq_per_img) * 196 * 1
            else:
                pctx = slim.fully_connected(self.flattened_ctx, self.att_hid_size, activation_fn = None, scope = 'ctx_att') # (batch * seq_per_img) * 196 * att_hid_size

            rnn_inputs = tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=tf.nn.embedding_lookup(self.Wemb, self.labels[:,:self.seq_length + 1]))
            rnn_inputs = [tf.squeeze(input_, [1]) for input_ in rnn_inputs]

            prev_h = utils.last_hidden_vec(initial_state)

            self.alphas = []
            self.logits = []
            outputs = []
            state = initial_state
            for ind in range(self.seq_length + 1):
                if ind > 0:
                    # Reuse the variables after the first timestep.
                    tf.get_variable_scope().reuse_variables()

                with tf.variable_scope("attention"):
                    alpha = self.get_alpha(prev_h, pctx)
                    self.alphas.append(alpha)
                    weighted_context = tf.reduce_sum(flattened_ctx * tf.expand_dims(alpha, 2), 1)
                    
                output, state = self.cell(tf.concat(axis=1, values=[weighted_context, rnn_inputs[ind]]), state)
                # Save the current output for next time step attention
                prev_h = output
                # Get the score of each word in vocabulary, 0 is end token.
                self.logits.append(slim.fully_connected(output, self.vocab_size + 1, activation_fn = None, scope = 'logit'))
                
        with tf.variable_scope("loss"):
            loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
                    self.logits,
                    [tf.squeeze(label, [1]) for label in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.labels[:, 1:])], # self.labels[:,1:] is the target; ignore the first start token
                    [tf.squeeze(mask, [1]) for mask in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.masks[:, 1:])])
            self.cost = tf.reduce_mean(loss)

        self.final_state = state
        self.lr = tf.Variable(0.0, trainable=False)
        self.cnn_lr = tf.Variable(0.0, trainable=False)

        # Collect the rnn variables, and create the optimizer of rnn
        tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='rnnlm')
        grads = utils.clip_by_value(tf.gradients(self.cost, tvars), -self.opt.grad_clip, self.opt.grad_clip)
        optimizer = utils.get_optimizer(self.opt, self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # Collect the cnn variables, and create the optimizer of cnn
        cnn_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='cnn')
        cnn_grads = utils.clip_by_value(tf.gradients(self.cost, cnn_tvars), -self.opt.grad_clip, self.opt.grad_clip)
        cnn_optimizer = utils.get_cnn_optimizer(self.opt, self.cnn_lr) 
        self.cnn_train_op = cnn_optimizer.apply_gradients(zip(cnn_grads, cnn_tvars))

        tf.summary.scalar('training loss', self.cost)
        tf.summary.scalar('learning rate', self.lr)
        tf.summary.scalar('cnn learning rate', self.cnn_lr)
        self.summaries = tf.summary.merge_all()
예제 #6
0
def train(classifier, generator, critic, src_data_loader, tgt_data_loader):
    """Train generator, classifier and critic jointly."""
    ####################
    # 1. setup network #
    ####################

    # set train state for Dropout and BN layers
    classifier.train()
    generator.train()
    critic.train()

    # set criterion for classifier and optimizers
    criterion = nn.CrossEntropyLoss()
    optimizer_c = get_optimizer(classifier, "Adam")
    optimizer_g = get_optimizer(generator, "Adam")
    optimizer_d = get_optimizer(critic, "Adam")

    # zip source and target data pair
    data_iter_src = get_inf_iterator(src_data_loader)
    data_iter_tgt = get_inf_iterator(tgt_data_loader)

    # counter
    g_step = 0

    # positive and negative labels
    pos_labels = make_variable(torch.FloatTensor([1]))
    neg_labels = make_variable(torch.FloatTensor([-1]))

    ####################
    # 2. train network #
    ####################

    for epoch in range(params.num_epochs):
        ###########################
        # 2.1 train discriminator #
        ###########################
        # requires to compute gradients for D
        for p in critic.parameters():
            p.requires_grad = True

        # set steps for discriminator
        if g_step < 25 or g_step % 500 == 0:
            # this helps to start with the critic at optimum
            # even in the first iterations.
            critic_iters = 100
        else:
            critic_iters = params.d_steps

        # loop for optimizing discriminator
        for d_step in range(critic_iters):
            # convert images into torch.Variable
            images_src, labels_src = next(data_iter_src)
            images_tgt, _ = next(data_iter_tgt)
            images_src = make_variable(images_src)
            labels_src = make_variable(labels_src.squeeze_())
            images_tgt = make_variable(images_tgt)
            if images_src.size(0) != params.batch_size or \
                    images_tgt.size(0) != params.batch_size:
                continue

            # zero gradients for optimizer
            optimizer_d.zero_grad()

            # compute source data loss for discriminator
            feat_src = generator(images_src)
            d_loss_src = critic(feat_src.detach())
            d_loss_src = d_loss_src.mean()
            d_loss_src.backward(neg_labels)

            # compute target data loss for discriminator
            feat_tgt = generator(images_tgt)
            d_loss_tgt = critic(feat_tgt.detach())
            d_loss_tgt = d_loss_tgt.mean()
            d_loss_tgt.backward(pos_labels)

            # compute gradient penalty
            gradient_penalty = calc_gradient_penalty(critic, feat_src.data,
                                                     feat_tgt.data)
            gradient_penalty.backward()

            # optimize weights of discriminator
            d_loss = -d_loss_src + d_loss_tgt + gradient_penalty
            optimizer_d.step()

        ########################
        # 2.2 train classifier #
        ########################

        # zero gradients for optimizer
        optimizer_c.zero_grad()

        # compute loss for critic
        preds_c = classifier(generator(images_src).detach())
        c_loss = criterion(preds_c, labels_src)

        # optimize source classifier
        c_loss.backward()
        optimizer_c.step()

        #######################
        # 2.3 train generator #
        #######################
        # avoid to compute gradients for D
        for p in critic.parameters():
            p.requires_grad = False

        # zero grad for optimizer of generator
        optimizer_g.zero_grad()

        # compute source data classification loss for generator
        feat_src = generator(images_src)
        preds_c = classifier(feat_src)
        g_loss_cls = criterion(preds_c, labels_src)
        g_loss_cls.backward()

        # compute source data discriminattion loss for generator
        feat_src = generator(images_src)
        g_loss_src = critic(feat_src).mean()
        g_loss_src.backward(pos_labels)

        # compute target data discriminattion loss for generator
        feat_tgt = generator(images_tgt)
        g_loss_tgt = critic(feat_tgt).mean()
        g_loss_tgt.backward(neg_labels)

        # compute loss for generator
        g_loss = g_loss_src - g_loss_tgt + g_loss_cls

        # optimize weights of generator
        optimizer_g.step()
        g_step += 1

        ##################
        # 2.4 print info #
        ##################
        if ((epoch + 1) % params.log_step == 0):
            print("Epoch [{}/{}]:"
                  "d_loss={:.5f} c_loss={:.5f} g_loss={:.5f} "
                  "D(x)={:.5f} D(G(z))={:.5f} GP={:.5f}".format(
                      epoch + 1, params.num_epochs, d_loss.data[0],
                      c_loss.data[0], g_loss.data[0], d_loss_src.data[0],
                      d_loss_tgt.data[0], gradient_penalty.data[0]))

        #############################
        # 2.5 save model parameters #
        #############################
        if ((epoch + 1) % params.save_step == 0):
            save_model(critic, "WGAN-GP_critic-{}.pt".format(epoch + 1))
            save_model(classifier,
                       "WGAN-GP_classifier-{}.pt".format(epoch + 1))
            save_model(generator, "WGAN-GP_generator-{}.pt".format(epoch + 1))

    return classifier, generator
예제 #7
0
def domain_adapt(F, F_1, F_2, F_t, source_dataset, target_dataset, excerpt,
                 pseudo_labels, plot):
    """Perform Doamin Adaptation between source and target domains."""
    # set criterion for classifier and optimizers
    criterion = nn.CrossEntropyLoss()
    if 0:
        optimType = "Adam"
        cfg.learning_rate = 1.0E-4
    else:
        optimType = "sgd"
        cfg.learning_rate = 1.0E-4
    optimizer_F = get_optimizer(F, optimType)
    optimizer_F_1 = get_optimizer(F_1, optimType)
    optimizer_F_2 = get_optimizer(F_2, optimType)
    optimizer_F_t = get_optimizer(F_t, optimType)

    # get labelled target dataset
    print('pseudo_labels = %s' % str(pseudo_labels))
    target_dataset_labelled = get_dummy(target_dataset,
                                        excerpt,
                                        pseudo_labels,
                                        get_dataset=True)

    # merge soruce data and target data
    merged_dataset = ConcatDataset([source_dataset, target_dataset_labelled])

    print('target_dataset_labelled = %d' % len(target_dataset_labelled))

    # start training
    plt.figure()

    for k in range(cfg.num_epochs_k):
        # set train state for Dropout and BN layers
        F.train()
        F_1.train()
        F_2.train()
        F_t.train()

        losses = []

        merged_dataloader = make_data_loader(merged_dataset)
        target_dataloader_labelled = make_data_loader(target_dataset_labelled)
        target_dataloader_labelled_iter = get_inf_iterator(
            target_dataloader_labelled)

        if 0:
            plt.figure()
            atr.showDataSet(target_dataloader_labelled)
            plt.waitforbuttonpress()

        if 0:
            # There's a bug here, the labels are not the same data type.  print them out!!
            source_dataloader_iter = get_inf_iterator(
                make_data_loader(source_dataset))

            a, b = next(source_dataloader_iter)
            c, d = next(target_dataloader_labelled_iter)
            print('source labels = {}'.format(b))
            print('target labels = {}'.format(d))
            sys.exit(0)

        for epoch in range(cfg.num_epochs_adapt):
            if optimType == 'sgd':
                adjustLearningRate(optimizer_F, cfg.learning_rate, epoch,
                                   cfg.num_epochs_adapt)
                adjustLearningRate(optimizer_F_1, cfg.learning_rate, epoch,
                                   cfg.num_epochs_adapt)
                adjustLearningRate(optimizer_F_2, cfg.learning_rate, epoch,
                                   cfg.num_epochs_adapt)
                adjustLearningRate(optimizer_F_t, cfg.learning_rate, epoch,
                                   cfg.num_epochs_adapt)

            for step, rez in enumerate(merged_dataloader):
                #!!print('rez = %s' % rez)
                images, labels = rez
                if images.shape[0] < cfg.batch_size:
                    print('WARNING: batch of size %d smaller than desired %d: skipping' % \
                          (images.shape[0], cfg.batch_size))
                    continue

                # sample from T_l
                images_tgt, labels_tgt = next(target_dataloader_labelled_iter)
                while images_tgt.shape[0] < cfg.batch_size:
                    print('WARNING: target batch of size %d smaller than desired %d' % \
                          (images_tgt.shape[0], cfg.batch_size))
                    images_tgt, labels_tgt = next(
                        target_dataloader_labelled_iter)

                # convert into torch.autograd.Variable
                images = make_variable(images)
                labels = make_variable(labels)
                images_tgt = make_variable(images_tgt)
                labels_tgt = make_variable(labels_tgt)

                # zero-grad optimizer
                optimizer_F.zero_grad()
                optimizer_F_1.zero_grad()
                optimizer_F_2.zero_grad()
                optimizer_F_t.zero_grad()

                # forward networks
                #print('images shape = {}'.format(images.shape))#!!
                out_F = F(images)
                #print('out_F = {}'.format(out_F.shape))#!!
                out_F_1 = F_1(out_F)
                out_F_2 = F_2(out_F)
                out_F_t = F_t(F(images_tgt))

                # compute labelling loss
                loss_similiar = calc_similiar_penalty(F_1, F_2)
                loss_F_1 = criterion(out_F_1, labels)
                loss_F_2 = criterion(out_F_2, labels)
                loss_labelling = loss_F_1 + loss_F_2 + 0.03 * loss_similiar
                loss_labelling.backward()

                # compute target specific loss
                loss_F_t = criterion(out_F_t, labels_tgt)
                loss_F_t.backward()

                # optimize
                optimizer_F.step()
                optimizer_F_1.step()
                optimizer_F_2.step()
                optimizer_F_t.step()

                losses.append(loss_F_t.item())

                # print step info
                if ((step + 1) % cfg.log_step == 0):
                    print("K[{}/{}] Epoch [{}/{}] Step[{}/{}] Loss("
                          "labelling={:.5f} target={:.5f})".format(
                              k + 1,
                              cfg.num_epochs_k,
                              epoch + 1,
                              cfg.num_epochs_adapt,
                              step + 1,
                              len(merged_dataloader),
                              loss_labelling.item(),  #.data[0],
                              loss_F_t.item(),  #.data[0],
                          ))
                    #!!print('end of loop')

                    if plot:
                        plt.clf()
                        plt.plot(losses)
                        plt.grid(1)
                        plt.title(
                            'Loss for domain adaptation, k = {}/{}, epoch = {}/{}'
                            .format(k, cfg.num_epochs_k, epoch,
                                    cfg.num_epochs_adapt))
                        plt.waitforbuttonpress(0.0001)

        # re-compute the number of selected taget data
        num_target = (k + 2) * len(source_dataset) // 20
        num_target = min(num_target, cfg.num_target_max)
        print(">>> Set num of sampled target data: {}".format(num_target))

        # re-generate pseudo labels
        excerpt, pseudo_labels = generate_labels(F,
                                                 F_1,
                                                 F_2,
                                                 target_dataset,
                                                 num_target,
                                                 useWeightedSampling=True)
        print(">>> Genrate pseudo labels [{}] numtarget = {}".format(
            len(target_dataset_labelled), num_target))

        print('sizes = {}, {}, excerpt = {}, \npseudo_labels = {}'.format(
            len(excerpt), len(pseudo_labels), excerpt, pseudo_labels))

        # get labelled target dataset
        target_dataset_labelled = get_dummy(target_dataset,
                                            excerpt,
                                            pseudo_labels,
                                            get_dataset=True)

        # re-merge soruce data and target data
        merged_dataset = ConcatDataset(
            [source_dataset, target_dataset_labelled])

        # save model
        if ((k + 1) % cfg.save_step == 0):
            save_model(F, "adapt-F-{}.pt".format(k + 1))
            save_model(F_1, "adapt-F_1-{}.pt".format(k + 1))
            save_model(F_2, "adapt-F_2-{}.pt".format(k + 1))
            save_model(F_t, "adapt-F_t-{}.pt".format(k + 1))

    # save final model
    save_model(F, "adapt-F-final.pt")
    save_model(F_1, "adapt-F_1-final.pt")
    save_model(F_2, "adapt-F_2-final.pt")
    save_model(F_t, "adapt-F_t-final.pt")
예제 #8
0
def pre_train(F, F_1, F_2, F_t, source_data, plot):
    """Pre-train models on source domain dataset."""
    # set train state for Dropout and BN layers
    F.train()
    F_1.train()
    F_2.train()
    F_t.train()

    # set criterion for classifier and optimizers
    criterion = nn.CrossEntropyLoss()
    if 0:
        optimType = "Adam"
        cfg.learning_rate = 1.0E-4
    else:
        optimType = "sgd"
        cfg.learning_rate = 1.0E-3
    optimizer_F = get_optimizer(F, optimType)
    optimizer_F_1 = get_optimizer(F_1, optimType)
    optimizer_F_2 = get_optimizer(F_2, optimType)
    optimizer_F_t = get_optimizer(F_t, optimType)

    losses = []

    if plot:
        plt.figure()

    # start training
    for epoch in range(cfg.num_epochs_pre):
        if optimType == 'sgd':
            adjustLearningRate(optimizer_F, cfg.learning_rate, epoch,
                               cfg.num_epochs_pre)
            adjustLearningRate(optimizer_F_1, cfg.learning_rate, epoch,
                               cfg.num_epochs_pre)
            adjustLearningRate(optimizer_F_2, cfg.learning_rate, epoch,
                               cfg.num_epochs_pre)
            adjustLearningRate(optimizer_F_t, cfg.learning_rate, epoch,
                               cfg.num_epochs_pre)

        for step, (images, labels) in enumerate(source_data):
            # convert into torch.autograd.Variable
            images = make_variable(images)
            labels = make_variable(labels)

            # zero-grad optimizer
            optimizer_F.zero_grad()
            optimizer_F_1.zero_grad()
            optimizer_F_2.zero_grad()
            optimizer_F_t.zero_grad()

            # forward networks
            out_F = F(images)

            #!!
            #out_F = torch.flatten(out_F,1)

            out_F_1 = F_1(out_F)
            out_F_2 = F_2(out_F)
            out_F_t = F_t(out_F)

            # compute loss
            loss_similiar = calc_similiar_penalty(F_1, F_2)
            loss_F_1 = criterion(out_F_1, labels)
            loss_F_2 = criterion(out_F_2, labels)
            loss_F_t = criterion(out_F_t, labels)
            loss_F = loss_F_1 + loss_F_2 + loss_F_t + 0.03 * loss_similiar
            loss_F.backward()

            # optimize
            optimizer_F.step()
            optimizer_F_1.step()
            optimizer_F_2.step()
            optimizer_F_t.step()

            losses.append(loss_F.item())

            # print step info
            if ((step + 1) % cfg.log_step == 0):
                print("Epoch [{}/{}] Step[{}/{}] Loss("
                      "Total={:.5f} F_1={:.5f} F_2={:.5f} "
                      "F_t={:.5f} sim={:.5f})"
                      #!!                      "F_t={:.5f})"
                      .format(epoch + 1,
                              cfg.num_epochs_pre,
                              step + 1,
                              len(source_data),
                              loss_F.item(), #.data[0],
                              loss_F_1.item(), #.data[0],
                              loss_F_2.item(), #.data[0],
                              loss_F_t.item(), #.data[0],
                              loss_similiar.item(), #.data[0],
                              ))

                if plot:
                    plt.clf()
                    plt.plot(losses)
                    plt.grid(1)
                    plt.title('Loss for pre-training')
                    plt.waitforbuttonpress(0.0001)

        # save model
        if ((epoch + 1) % cfg.save_step == 0):
            save_model(F, "pretrain-F-{}.pt".format(epoch + 1))
            save_model(F_1, "pretrain-F_1-{}.pt".format(epoch + 1))
            save_model(F_2, "pretrain-F_2-{}.pt".format(epoch + 1))
            save_model(F_t, "pretrain-F_t-{}.pt".format(epoch + 1))

    # save final model
    save_model(F, "pretrain-F-final.pt")
    save_model(F_1, "pretrain-F_1-final.pt")
    save_model(F_2, "pretrain-F_2-final.pt")
    save_model(F_t, "pretrain-F_t-final.pt")
예제 #9
0
def domain_adapt(F, F_1, F_2, F_t, source_dataset, target_dataset, excerpt,
                 pseudo_labels):
    """Perform Doamin Adaptation between source and target domains."""
    # set criterion for classifier and optimizers
    criterion = nn.CrossEntropyLoss()
    optimizer_F = get_optimizer(F, "Adam")
    optimizer_F_1 = get_optimizer(F_1, "Adam")
    optimizer_F_2 = get_optimizer(F_2, "Adam")
    optimizer_F_t = get_optimizer(F_t, "Adam")

    # get labelled target dataset
    target_dataset_labelled = get_dummy(target_dataset,
                                        excerpt,
                                        pseudo_labels,
                                        get_dataset=True)

    # merge soruce data and target data
    merged_dataset = ConcatDataset([source_dataset, target_dataset_labelled])

    # start training
    for k in range(cfg.num_epochs_k):
        # set train state for Dropout and BN layers
        F.train()
        F_1.train()
        F_2.train()
        F_t.train()

        merged_dataloader = make_data_loader(merged_dataset)
        target_dataloader_labelled = get_inf_iterator(
            make_data_loader(target_dataset_labelled))

        for epoch in range(cfg.num_epochs_adapt):
            for step, (images, labels) in enumerate(merged_dataloader):
                # sample from T_l
                images_tgt, labels_tgt = next(target_dataloader_labelled)

                # convert into torch.autograd.Variable
                images = make_variable(images)
                labels = make_variable(labels)
                images_tgt = make_variable(images_tgt)
                labels_tgt = make_variable(labels_tgt)

                # zero-grad optimizer
                optimizer_F.zero_grad()
                optimizer_F_1.zero_grad()
                optimizer_F_2.zero_grad()
                optimizer_F_t.zero_grad()

                # forward networks
                out_F = F(images)
                out_F_1 = F_1(out_F)
                out_F_2 = F_2(out_F)
                out_F_t = F_t(F(images_tgt))

                # compute labelling loss
                loss_similiar = calc_similiar_penalty(F_1, F_2)
                loss_F_1 = criterion(out_F_1, labels)
                loss_F_2 = criterion(out_F_2, labels)
                loss_labelling = loss_F_1 + loss_F_2 + loss_similiar
                loss_labelling.backward()

                # compute target specific loss
                loss_F_t = criterion(out_F_t, labels_tgt)
                loss_F_t.backward()

                # optimize
                optimizer_F.step()
                optimizer_F_1.step()
                optimizer_F_2.step()
                optimizer_F_t.step()

                # print step info
                if ((step + 1) % cfg.log_step == 0):
                    print("K[{}/{}] Epoch [{}/{}] Step[{}/{}] Loss("
                          "labelling={:.5f} target={:.5f})".format(
                              k + 1,
                              cfg.num_epochs_k,
                              epoch + 1,
                              cfg.num_epochs_adapt,
                              step + 1,
                              len(merged_dataloader),
                              loss_labelling.data[0],
                              loss_F_t.data[0],
                          ))

        # re-compute the number of selected taget data
        num_target = (k + 2) * len(source_dataset) // 20
        num_target = min(num_target, cfg.num_target_max)
        print(">>> Set num of sampled target data: {}".format(num_target))

        # re-generate pseudo labels
        excerpt, pseudo_labels = genarate_labels(F, F_1, F_2, target_dataset,
                                                 num_target)
        print(">>> Genrate pseudo labels [{}]".format(
            len(target_dataset_labelled)))

        # get labelled target dataset
        target_dataset_labelled = get_dummy(target_dataset,
                                            excerpt,
                                            pseudo_labels,
                                            get_dataset=True)

        # re-merge soruce data and target data
        merged_dataset = ConcatDataset(
            [source_dataset, target_dataset_labelled])

        # save model
        if ((k + 1) % cfg.save_step == 0):
            save_model(F, "adapt-F-{}.pt".format(k + 1))
            save_model(F_1, "adapt-F_1-{}.pt".format(k + 1))
            save_model(F_2, "adapt-F_2-{}.pt".format(k + 1))
            save_model(F_t, "adapt-F_t-{}.pt".format(k + 1))

    # save final model
    save_model(F, "adapt-F-final.pt")
    save_model(F_1, "adapt-F_1-final.pt")
    save_model(F_2, "adapt-F_2-final.pt")
    save_model(F_t, "adapt-F_t-final.pt")
예제 #10
0
def pre_train(F, F_1, F_2, F_t, source_data):
    """Pre-train models on source domain dataset."""
    # set train state for Dropout and BN layers
    F.train()
    F_1.train()
    F_2.train()
    F_t.train()

    # set criterion for classifier and optimizers
    criterion = nn.CrossEntropyLoss()
    optimizer_F = get_optimizer(F, "Adam")
    optimizer_F_1 = get_optimizer(F_1, "Adam")
    optimizer_F_2 = get_optimizer(F_2, "Adam")
    optimizer_F_t = get_optimizer(F_t, "Adam")

    # start training
    for epoch in range(cfg.num_epochs_pre):
        for step, (images, labels) in enumerate(source_data):
            # convert into torch.autograd.Variable
            images = make_variable(images)
            labels = make_variable(labels)

            # zero-grad optimizer
            optimizer_F.zero_grad()
            optimizer_F_1.zero_grad()
            optimizer_F_2.zero_grad()
            optimizer_F_t.zero_grad()

            # forward networks
            out_F = F(images)
            out_F_1 = F_1(out_F)
            out_F_2 = F_2(out_F)
            out_F_t = F_t(out_F)

            # compute loss
            loss_similiar = calc_similiar_penalty(F_1, F_2)
            loss_F_1 = criterion(out_F_1, labels)
            loss_F_2 = criterion(out_F_2, labels)
            loss_F_t = criterion(out_F_t, labels)
            loss_F = loss_F_1 + loss_F_2 + loss_F_t + loss_similiar
            loss_F.backward()

            # optimize
            optimizer_F.step()
            optimizer_F_1.step()
            optimizer_F_2.step()
            optimizer_F_t.step()

            # print step info
            if ((step + 1) % cfg.log_step == 0):
                print("Epoch [{}/{}] Step[{}/{}] Loss("
                      "Total={:.5f} F_1={:.5f} F_2={:.5f} "
                      "F_t={:.5f} sim={:.5f})".format(
                          epoch + 1,
                          cfg.num_epochs_pre,
                          step + 1,
                          len(source_data),
                          loss_F.data[0],
                          loss_F_1.data[0],
                          loss_F_2.data[0],
                          loss_F_t.data[0],
                          loss_similiar.data[0],
                      ))

        # save model
        if ((epoch + 1) % cfg.save_step == 0):
            save_model(F, "pretrain-F-{}.pt".format(epoch + 1))
            save_model(F_1, "pretrain-F_1-{}.pt".format(epoch + 1))
            save_model(F_2, "pretrain-F_2-{}.pt".format(epoch + 1))
            save_model(F_t, "pretrain-F_t-{}.pt".format(epoch + 1))

    # save final model
    save_model(F, "pretrain-F-final.pt")
    save_model(F_1, "pretrain-F_1-final.pt")
    save_model(F_2, "pretrain-F_2-final.pt")
    save_model(F_t, "pretrain-F_t-final.pt")
예제 #11
0
def train(classifier, generator, critic, src_data_loader, tgt_data_loader):
    """Train generator, classifier and critic jointly."""
    ####################
    # 1. setup network #
    ####################

    # set train state for Dropout and BN layers
    classifier.train()
    generator.train()
    # set criterion for classifier and optimizers
    criterion = nn.CrossEntropyLoss()
    optimizer_c = get_optimizer(classifier, "Adam")

    # zip source and target data pair
    data_iter_src = get_inf_iterator(src_data_loader)

    # counter
    g_step = 0

    ####################
    # 2. train network #
    ####################

    for epoch in range(params.num_epochs):
        ###########################
        # 2.1 train discriminator #
        ###########################
        # requires to compute gradients for D
        for p in critic.parameters():
            p.requires_grad = True

        # set steps for discriminator
        if g_step < 25 or g_step % 500 == 0:
            # this helps to start with the critic at optimum
            # even in the first iterations.
            critic_iters = 100
        else:
            critic_iters = params.d_steps
        critic_iters = 0
        # loop for optimizing discriminator
        #for d_step in range(critic_iters):
        # convert images into torch.Variable
        images_src, labels_src = next(data_iter_src)

        images_src = make_variable(images_src).cuda()
        labels_src = make_variable(labels_src.squeeze_()).cuda()
        # print(type(images_src))

        ########################
        # 2.2 train classifier #
        ########################

        # zero gradients for optimizer
        optimizer_c.zero_grad()

        # compute loss for critic
        preds_c = classifier(generator(images_src))
        c_loss = criterion(preds_c, labels_src)

        # optimize source classifier
        c_loss.backward()
        optimizer_c.step()
        g_step += 1

        ##################
        # 2.4 print info #
        ##################
        if ((epoch + 1) % 500 == 0):
            # print("Epoch [{}/{}]:"
            #       "c_loss={:.5f}"
            #       "D(x)={:.5f}"
            #       .format(epoch + 1,
            #               params.num_epochs,
            #               c_loss.item(),
            #               ))
            test(classifier, generator, src_data_loader, params.src_dataset)
        if ((epoch + 1) % 500 == 0):
            save_model(generator, "Mnist-generator-{}.pt".format(epoch + 1))
            save_model(classifier, "Mnist-classifer{}.pt".format(epoch + 1))