Example #1
0
def train(train_file, validation_file, batch_size, epoch_limit, file_name,
          gpu_mode):

    transformations = transforms.Compose([transforms.ToTensor()])

    sys.stderr.write(TextColor.PURPLE + 'Loading data\n' + TextColor.END)
    train_data_set = PileupDataset(train_file, transformations)
    train_loader = DataLoader(train_data_set,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=16,
                              pin_memory=gpu_mode)
    sys.stderr.write(TextColor.PURPLE + 'Data loading finished\n' +
                     TextColor.END)

    model = Model()
    if gpu_mode:
        model = torch.nn.DataParallel(model).cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

    # Train the Model
    sys.stderr.write(TextColor.PURPLE + 'Training starting\n' + TextColor.END)
    seq_len = 3
    iteration_jump = 1
    for epoch in range(epoch_limit):
        total_loss = 0
        total_images = 0
        total_could_be = 0
        for i, (images, labels) in enumerate(train_loader):
            hidden = model.init_hidden(images.size(0))
            # if batch size not distributable among all GPUs then skip
            if gpu_mode is True and images.size(0) % 8 != 0:
                continue

            images = Variable(images, requires_grad=False)
            labels = Variable(labels, requires_grad=False)
            if gpu_mode:
                images = images.cuda()
                labels = labels.cuda()

            for row in range(0, images.size(2), iteration_jump):
                # segmentation of image. Currently using seq_len
                if row + seq_len > images.size(2):
                    continue

                x = images[:, :, row:row + seq_len, :]
                y = labels[:, row:row + seq_len]

                total_variation = torch.sum(y).data[0]
                total_could_be += batch_size
                # print(total_variation)

                if total_variation == 0 and random.uniform(0, 1) * 100 > 5:
                    continue
                elif random.uniform(0,
                                    1) < total_variation / batch_size < 0.02:
                    continue

                # print(x)
                # print(y)
                # exit()

                # Forward + Backward + Optimize
                optimizer.zero_grad()
                outputs = model(x, hidden)
                hidden = repackage_hidden(hidden)
                # print('Label: ', y.data[0])
                # print('Values:', outputs.data[0])
                # print(y.contiguous().view(-1))
                # exit()
                # outputs = outputs.view(1, outputs.size(0), -1) required for CTCLoss

                loss = criterion(outputs.contiguous().view(-1, 3),
                                 y.contiguous().view(-1))
                # print(outputs.contiguous().view(-1, 3).size())
                # print(y.contiguous().view(-1).size())
                # exit()
                loss.backward()
                optimizer.step()

                # loss count
                total_images += batch_size
                total_loss += loss.data[0]

            sys.stderr.write(TextColor.BLUE + "EPOCH: " + str(epoch) +
                             " Batches done: " + str(i + 1))
            sys.stderr.write(" Loss: " + str(total_loss / total_images) +
                             "\n" + TextColor.END)
            print(
                str(epoch) + "\t" + str(i + 1) + "\t" +
                str(total_loss / total_images))

        # After each epoch do validation
        validate(validation_file, batch_size, gpu_mode, model, seq_len)
        sys.stderr.write(TextColor.YELLOW + 'Could be: ' +
                         str(total_could_be) + ' Chosen: ' +
                         str(total_images) + "\n" + TextColor.END)
        sys.stderr.write(TextColor.YELLOW + 'EPOCH: ' + str(epoch))
        sys.stderr.write(' Loss: ' + str(total_loss / total_images) + "\n" +
                         TextColor.END)
        torch.save(model, file_name + '_checkpoint_' + str(epoch) + '.pkl')
        torch.save(
            model.state_dict(),
            file_name + '_checkpoint_' + str(epoch) + '-params' + '.pkl')

    sys.stderr.write(TextColor.PURPLE + 'Finished training\n' + TextColor.END)
    torch.save(model, file_name + '_final.pkl')

    sys.stderr.write(TextColor.PURPLE + 'Model saved as:' + file_name +
                     '.pkl\n' + TextColor.END)
    torch.save(model.state_dict(), file_name + '_final_params' + '.pkl')

    sys.stderr.write(TextColor.PURPLE + 'Model parameters saved as:' +
                     file_name + '-params.pkl\n' + TextColor.END)
Example #2
0
class BERTable():
    def __init__(self,
                 df,
                 column_type,
                 embedding_dim=5,
                 n_layers=5,
                 dim_feedforward=100,
                 n_head=5,
                 dropout=0.15,
                 ns_exponent=0.75,
                 share_category=False,
                 use_pos=False,
                 device='cpu'):

        self.logger = create_logger(name="BERTable")

        self.col_type = {'numerical': [], 'categorical': [], 'vector': []}
        for i, data_type in enumerate(column_type):
            self.col_type[data_type].append(i)

        self.embedding_dim = embedding_dim
        self.use_pos = use_pos
        self.device = device

        self.vocab = Vocab(df, self.col_type, share_category, ns_exponent)

        vocab_size = {
            'numerical': len(self.vocab.item2idx['numerical']),
            'categorical': len(self.vocab.item2idx['categorical'])
        }

        vector_dims = [np.shape(df[col])[1] for col in self.col_type['vector']]
        tab_len = len(column_type)
        self.model = Model(vocab_size, self.col_type, use_pos, vector_dims,
                           embedding_dim, dim_feedforward, tab_len, n_layers,
                           n_head, dropout)

    def pretrain(self,
                 df,
                 max_epochs=3,
                 lr=1e-4,
                 lr_weight={
                     'numerical': 0.33,
                     'categorical': 0.33,
                     'vector': 0.33
                 },
                 loss_clip=[0, 100],
                 n_sample=4,
                 mask_rate=0.15,
                 replace_rate=0.8,
                 batch_size=32,
                 shuffle=True,
                 num_workers=1):

        self.model.loss_clip = loss_clip
        self.logger.info("[-] Converting to indices")
        data = self.vocab.convert(df, num_workers)

        self.model.to(self.device)
        self.model.train()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=float(lr))

        self.logger.info("[-] Start Pretraining")

        process_bar = tqdm(range(max_epochs),
                           desc=f"[Progress]",
                           total=max_epochs,
                           leave=True,
                           position=0)

        for epoch in process_bar:

            generator = create_dataloader(data,
                                          self.col_type,
                                          self.vocab,
                                          self.embedding_dim,
                                          self.use_pos,
                                          batch_size,
                                          num_workers,
                                          mask_rate=mask_rate,
                                          replace_rate=replace_rate,
                                          n_sample=n_sample,
                                          shuffle=shuffle)

            metric_bar = tqdm([0],
                              desc=f"[Metric]",
                              bar_format="{desc} {postfix}",
                              leave=False,
                              position=2)

            epoch_bar = tqdm(generator,
                             desc=f"[Epoch]",
                             leave=False,
                             position=1)

            loss_history = {'numerical': [], 'categorical': [], 'vector': []}

            for batch_data in epoch_bar:

                batch_data = transfer(batch_data, self.device)
                _, losses = self.model.forward(batch_data, mode='train')

                loss = sum([
                    losses[data_type] / len(self.col_type[data_type]) *
                    lr_weight[data_type] for data_type in self.col_type
                    if len(self.col_type[data_type]) > 0
                ])

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                display = ''
                for types in losses:
                    loss_history[types].append(losses[types].item())
                    display += f'{types}: {np.mean(loss_history[types]):5.2f} '
                metric_bar.set_postfix_str(display)

            process_bar.write(f'[Log] Epoch {epoch:0>2d}| ' + display)
            epoch_bar.close()
            metric_bar.close()

        process_bar.close()

        self.model.cpu()

    # def transform(self, df, batch_size=32, num_workers=1):
    #     self.logger.info("[-] Converting to indices")
    #     data = self.vocab.convert(df, num_workers)

    #     generator = create_dataloader(
    #         data, self.col_type, self.vocab,
    #         self.embedding_dim, self.use_pos,
    #         batch_size, num_workers, mode='test')

    #     self.logger.info("[-] Start Transforming")

    #     process_bar = tqdm(
    #         generator,
    #         desc=f"[Process]",
    #         leave=False,
    #         position=0)

    #     self.model.to(self.device)
    #     self.model.eval()

    #     df_t = []
    #     for batch_data in process_bar:
    #         batch_data = transfer(batch_data, self.device)
    #         feature = self.model.forward(batch_data, mode='test')
    #         df_t += list(feature.cpu().detach().numpy())

    #     process_bar.close()
    #     self.model.cpu()

    #     return df_t

    def save(self, model_path='model.ckpt', vocab_path='vocab.pkl'):
        torch.save(self.model.state_dict(), model_path)
        with open(vocab_path, 'wb') as file:
            pkl.dump(self.vocab, file)
Example #3
0
def training_process(device, nb_class_labels, model_path, result_dir, patience,
                     epochs, do_pre_train, tr_feat_path, tr_labels_path,
                     val_feat_path, val_labels_path, tr_batch_size,
                     val_batch_size, adapt_patience, adapt_epochs, d_lr,
                     tgt_lr, update_cnt, factor):
    """Implements the complete training process of the AUDASC method.

    :param device: The device that we will use.
    :type device: str
    :param nb_class_labels: The amount of labels for label classification.
    :type nb_class_labels: int
    :param model_path: The path of previously saved model (if any)
    :type model_path: str
    :param result_dir: The directory to save newly pre-trained model.
    :type result_dir: str
    :param patience: The patience for the pre-training step.
    :type patience: int
    :param epochs: The epochs for the pre-training step.
    :type epochs: int
    :param do_pre_train: Flag to indicate if we do pre-training.
    :type do_pre_train: bool
    :param tr_feat_path: The path for loading the training features.
    :type tr_feat_path: str
    :param tr_labels_path: The path for loading the training labels.
    :type tr_labels_path: str
    :param val_feat_path: The path for loading the validation features.
    :type val_feat_path: str
    :param val_labels_path: The path for loading the validation labels.
    :type val_labels_path: str
    :param tr_batch_size: The batch used for pre-training.
    :type tr_batch_size: int
    :param val_batch_size: The batch size used for validation.
    :type val_batch_size: int
    :param adapt_patience: The patience for the domain adaptation step.
    :type adapt_patience: int
    :param adapt_epochs: The epochs for the domain adaptation step.
    :type adapt_epochs: int
    :param d_lr: The learning rate for the discriminator.
    :type d_lr: float
    :param tgt_lr: The learning rate for the adapted model.
    :type tgt_lr: float
    :param update_cnt: An update controller for adversarial loss
    :type update_cnt: int
    :param factor: the coefficient used to be multiplied by classification loss.
    :type factor: int
    """

    tr_feat = device_exchange(file_io.load_pickled_features(tr_feat_path),
                              device=device)
    tr_labels = device_exchange(file_io.load_pickled_features(tr_labels_path),
                                device=device)
    val_feat = device_exchange(file_io.load_pickled_features(val_feat_path),
                               device=device)
    val_labels = device_exchange(
        file_io.load_pickled_features(val_labels_path), device=device)

    loss_func = functional.cross_entropy

    non_adapted_cnn = Model().to(device)
    label_classifier = LabelClassifier(nb_class_labels).to(device)

    if not path.exists(result_dir):
        makedirs(result_dir)

    if do_pre_train:
        state_dict_path = result_dir

        printing.info_msg('Pre-training step')

        optimizer_source = torch.optim.Adam(
            list(non_adapted_cnn.parameters()) +
            list(label_classifier.parameters()),
            lr=1e-4)

        pre_training.pre_training(model=non_adapted_cnn,
                                  label_classifier=label_classifier,
                                  optimizer=optimizer_source,
                                  tr_batch_size=tr_batch_size,
                                  val_batch_size=val_batch_size,
                                  tr_feat=tr_feat['A'],
                                  tr_labels=tr_labels['A'],
                                  val_feat=val_feat['A'],
                                  val_labels=val_labels['A'],
                                  epochs=epochs,
                                  criterion=loss_func,
                                  patience=patience,
                                  result_dir=state_dict_path)

        del optimizer_source

    else:
        printing.info_msg('Loading a pre-trained non-adapted model')
        state_dict_path = model_path

    if not path.exists(state_dict_path):
        raise ValueError(
            'The path for loading the pre trained model does not exist!')

    non_adapted_cnn.load_state_dict(
        torch.load(path.join(state_dict_path, 'non_adapted_cnn.pytorch')))
    label_classifier.load_state_dict(
        torch.load(path.join(state_dict_path, 'label_classifier.pytorch')))

    printing.info_msg('Training the Adversarial Adaptation Model')

    target_cnn = Model().to(device)
    target_cnn.load_state_dict(non_adapted_cnn.state_dict())
    discriminator = Discriminator(2).to(device)

    target_model_opt = torch.optim.Adam(target_cnn.parameters(), lr=tgt_lr)
    discriminator_opt = torch.optim.Adam(discriminator.parameters(), lr=d_lr)

    domain_adaptation.domain_adaptation(
        non_adapted_cnn, target_cnn, label_classifier, discriminator,
        target_model_opt, discriminator_opt, loss_func, loss_func, loss_func,
        tr_feat, tr_labels, val_feat, val_labels, adapt_epochs, update_cnt,
        result_dir, adapt_patience, device, factor)
Example #4
0
class Train:
    def __init__(self, model_name, corpus_dataset):
        self._config = TrainConfig()
        self._model_name = model_name
        self._data_loader = corpus_dataset.get_data_loader(
            self._config.batch_size)
        self._vocabulary = corpus_dataset.vocabulary
        self._model = Model(vocabulary=corpus_dataset.vocabulary,
                            training=True)
        # TODO: Support for other optimizers
        self._optimizer = optim.Adam(self._model.parameters(),
                                     lr=self._config.learning_rate)
        self._global_step = -1

        self._train_logger = logging.getLogger('Train')
        logging.basicConfig(level=logging.INFO)

    def train_step(self, input_seqs, input_lengths, target_seqs, masks):
        self._optimizer.zero_grad()
        step_loss, print_loss, _ = self._model(input_seqs, input_lengths,
                                               target_seqs, masks,
                                               self._global_step)

        self._train_logger.info('Step {}:  Training loss: {}'.format(
            self._global_step, print_loss))

        step_loss.backward()

        if self._config.use_gradient_clipping:
            _ = nn.utils.clip_grad_norm_(self._model.parameters(),
                                         self._config.gradient_clipping_value)

        self._optimizer.step()

    def train(self,
              num_steps,
              save_num_steps,
              save_folder='./data/models/train_dev'):

        if self._global_step < 0:
            self._global_step = 0
        elif self._global_step >= num_steps:
            logging.info(
                'Global step past number of steps requested. No training needed. Global Step = {}. '
                'Num training steps = {}'.format(self._global_step, num_steps))
            return

        stop_training = False

        while not stop_training:
            for input_seqs, input_lengths, target_seqs, masks in self._data_loader:
                self.train_step(input_seqs, input_lengths, target_seqs, masks)
                self._global_step += 1

                if self._global_step % save_num_steps == 0:
                    self.save_checkpoint(save_folder)
                    just_saved = True
                else:
                    just_saved = False

                if self._global_step >= num_steps:
                    stop_training = True
                    logging.info('Finished training at step {}'.format(
                        self._global_step))
                    if not just_saved:
                        self.save_checkpoint(save_folder)
                    break

    def save_checkpoint(self, save_folder):
        makedirs(save_folder, exist_ok=True)
        save_path = path.join(save_folder,
                              'checkpoint-{}.tar'.format(self._global_step))
        logging.info('Saving checkpoint at step {}'.format(self._global_step))
        torch.save(
            {
                'name': self._model_name,
                'global_step': self._global_step,
                'model': self._model.state_dict(),
                'optimizer': self._optimizer.state_dict(),
                'vocabulary': self._vocabulary.__dict__,
            }, save_path)
        logging.info('Checkpoint saved at {}'.format(save_path))

    @staticmethod
    def load_from_checkpoint(checkpoint_path, corpus_dataset):
        checkpoint = torch.load(checkpoint_path)
        train_obj = Train(checkpoint['name'], corpus_dataset)
        train_obj._vocabulary.__dict__ = checkpoint['vocabulary']
        train_obj._global_step = checkpoint['global_step']
        train_obj._model.load_state_dict(checkpoint['model'])
        train_obj._train_logger.info(
            'Restored from checkpoint {}'.format(checkpoint_path))
        return train_obj