コード例 #1
0
ファイル: train.py プロジェクト: xiekt1993/Portfolio
print('LR --> 0.0000001, WD = 0.0. Resume fine-tuning CNN.')

criterion = nn.CrossEntropyLoss()

print('Loading dataset...')
trainset = MSCOCO(VOCAB_SIZE, train_imagepaths_and_captions, transform_train)
trainloader = torch.utils.data.DataLoader(dataset=trainset, batch_size=BATCH_SIZE, collate_fn=collate_fn,
                                          shuffle=True, drop_last=False, num_workers=NUM_WORKERS)

valset = MSCOCO_VAL(VOCAB_SIZE, val_imagepaths_and_captions, transform_val)
valloader = torch.utils.data.DataLoader(dataset=valset, batch_size=BATCH_SIZE, collate_fn=collate_fn_val,
                                        shuffle=False, drop_last=False, num_workers=NUM_WORKERS)
writer = SummaryWriter(log_dir)
for epoch in range(current_epoch, EPOCHS+1):
    start_time_epoch = time.time()
    encoder.train()
    decoder.train()

    print('[%d] epoch starts training...'%epoch)
    trainloss = 0.0
    for batch_idx, (images, captions, lengths) in enumerate(trainloader, 1):
        
        images = images.cuda()
        captions = captions.cuda()
        lengths = lengths.cuda()
        # when doing forward propagation, we do not input end word key; when calculating loss, we do not count start word key.
        lengths -= 1
        # throw out the start word key when calculating loss.
        targets = rnn_utils.pack_padded_sequence(captions[:, 1:], lengths, batch_first=True)[0]
        
        encoder.zero_grad()
コード例 #2
0
def train():
    transforms = Compose([ToTensor()])
    train_dataset = CaptchaData('./data/train', transform=transforms)
    train_data_loader = DataLoader(train_dataset,
                                   batch_size=batch_size,
                                   num_workers=0,
                                   shuffle=True,
                                   drop_last=True)
    test_data = CaptchaData('./data/test', transform=transforms)
    test_data_loader = DataLoader(test_data,
                                  batch_size=batch_size,
                                  num_workers=0,
                                  shuffle=True,
                                  drop_last=True)
    cnn = CNN()
    if torch.cuda.is_available():
        cnn.cuda()
    if restor:
        cnn.load_state_dict(torch.load(model_path))


#        freezing_layers = list(cnn.named_parameters())[:10]
#        for param in freezing_layers:
#            param[1].requires_grad = False
#            print('freezing layer:', param[0])

    optimizer = torch.optim.Adam(cnn.parameters(), lr=base_lr)
    criterion = nn.MultiLabelSoftMarginLoss()

    for epoch in range(max_epoch):
        start_ = time.time()

        loss_history = []
        acc_history = []
        cnn.train()
        for img, target in train_data_loader:
            img = Variable(img)
            target = Variable(target)
            if torch.cuda.is_available():
                img = img.cuda()
                target = target.cuda()
            output = cnn(img)
            loss = criterion(output, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            acc = calculat_acc(output, target)
            acc_history.append(acc)
            loss_history.append(loss)
        print('train_loss: {:.4}|train_acc: {:.4}'.format(
            torch.mean(torch.Tensor(loss_history)),
            torch.mean(torch.Tensor(acc_history)),
        ))

        loss_history = []
        acc_history = []
        cnn.eval()
        for img, target in test_data_loader:
            img = Variable(img)
            target = Variable(target)
            if torch.cuda.is_available():
                img = img.cuda()
                target = target.cuda()
            output = cnn(img)

            acc = calculat_acc(output, target)
            acc_history.append(acc)
            loss_history.append(float(loss))
        print('test_loss: {:.4}|test_acc: {:.4}'.format(
            torch.mean(torch.Tensor(loss_history)),
            torch.mean(torch.Tensor(acc_history)),
        ))
        print('epoch: {}|time: {:.4f}'.format(epoch, time.time() - start_))
        torch.save(cnn.state_dict(), model_path)
コード例 #3
0
class Solver(object):
    def __init__(self, config, data_loader):
        self.config = config
        self.data_loader = data_loader

    def build(self, is_train):
        self.model = CNN(self.config)
        self.loss_fn = self.config.loss_fn()

        if is_train:
            self.model.train()
            self.optimizer = self.config.optimizer(self.model.parameters(),
                                                   lr=self.config.lr)
        else:
            self.model.eval()

    def train(self):
        for epoch in tqdm(range(self.config.epochs)):
            loss_history = []

            for batch_i, batch in enumerate(tqdm(self.data_loader)):
                # text: [max_seq_len, batch_size]
                # label: [batch_size]
                text, label = batch.text, batch.label

                # [batch_size, max_seq_len]
                text.data.t_()

                # [batch_size, 2]
                logit = self.model(text)

                # Calculate loss
                average_batch_loss = self.loss_fn(logit, label)  # [1]
                loss_history.append(
                    average_batch_loss.data[0])  # Variable -> Tensor

                # Flush out remaining gradient
                self.optimizer.zero_grad()

                # Backpropagation
                average_batch_loss.backward()

                # Gradient descent
                self.optimizer.step()

            # Log intermediate loss
            if (epoch + 1) % self.config.log_every_epoch == 0:
                epoch_loss = np.mean(loss_history)
                log_str = f'Epoch {epoch + 1} | loss: {epoch_loss:.2f}\n'
                print(log_str)

            # Save model parameters
            if (epoch + 1) % self.config.save_every_epoch == 0:
                ckpt_path = os.path.join(self.config.save_dir,
                                         f'epoch-{epoch+1}.pkl')
                print('Save parameters at ', ckpt_path)
                torch.save(self.model.state_dict(), ckpt_path)

    def eval(self, epoch=None):

        # Load model parameters
        if not isinstance(epoch, int):
            epoch = self.config.epochs
        ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch}.pkl')
        print('Load parameters from ', ckpt_path)
        self.model.load_state_dict(torch.load(ckpt_path))

        loss_history = []
        for _, batch in tqdm(enumerate(self.data_loader)):
            # text: [max_seq_len, batch_size]
            # label: [batch_size]
            text, label = batch.text, batch.label

            # [batch_size, max_seq_len]
            text.data.t_()

            # [batch_size, 2]
            logit = self.model(text)

            # Calculate loss
            average_batch_loss = self.loss_fn(logit, label)  # [1]
            loss_history.append(
                average_batch_loss.data[0])  # Variable -> Tensor

        epoch_loss = np.mean(loss_history)

        print('Loss: {epoch_loss:.2f}')
コード例 #4
0
class Solver(object):
    def __init__(self, config, data_loader):
        self.config = config
        self.data_loader = data_loader

    def build(self, is_train):
        if torch.cuda.is_available():
            self.model = nn.DataParallel(CNN(self.config)).cuda()
        else:
            self.model = CNN(self.config)

        self.loss_fn = self.config.loss_fn()

        if is_train:
            self.model.train()
            self.optimizer = self.config.optimizer(self.model.parameters(), lr=self.config.lr)
        else:
            if torch.cuda.is_available():
                self.model = self.model.module
            self.model.eval()

    def save(self, ckpt_path):
        """Save model parameters"""
        print('Save parameters at ', ckpt_path)

        if torch.cuda.is_available():
            torch.save(self.model.module.state_dict(), ckpt_path)
        else:
            torch.save(self.model.state_dict(), ckpt_path)

    def load(self, ckpt_path=None, epoch=None):
        """Load model parameters"""
        if not (ckpt_path or epoch):
            epoch = self.config.epochs
        if epoch:
            ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch}.pkl')
        print('Load parameters from ', ckpt_path)
        print (self.model)

        self.model.load_state_dict(torch.load(ckpt_path))

    def train_once(self):
        loss_history = []

        for batch_i, batch in enumerate(tqdm(self.data_loader)):
            text, label = batch.text, batch.label

            if torch.cuda.is_available():
                text = text.cuda()
                label = label.cuda()

            text.data.t_()

            logit = self.model(text)

            average_batch_loss = self.loss_fn(logit, label)
            loss_history.append(average_batch_loss.item())

            self.optimizer.zero_grad()

            average_batch_loss.backward()

            self.optimizer.step()

        epoch_loss = np.mean(loss_history)

        return epoch_loss


    def train(self):
        """Train model with training data"""
        for epoch in tqdm(range(self.config.epochs)):
            loss_history = []

            for batch_i, batch in enumerate(tqdm(self.data_loader)):
                # text: [max_seq_len, batch_size]
                # label: [batch_size]
                text, label = batch.text, batch.label

                if torch.cuda.is_available():
                    text = text.cuda()
                    label = label.cuda()

                # [batch_size, max_seq_len]
                text.data.t_()

                # [batch_size, 2]
                logit = self.model(text)

                # Calculate loss
                average_batch_loss = self.loss_fn(logit, label)  # [1]
                loss_history.append(average_batch_loss.item())  # Variable -> Tensor

                # Flush out remaining gradient
                self.optimizer.zero_grad()

                # Backpropagation
                average_batch_loss.backward()

                # Gradient descent
                self.optimizer.step()

            # Log intermediate loss
            if (epoch + 1) % self.config.log_every_epoch == 0:
                epoch_loss = np.mean(loss_history)
                log_str = f'Epoch {epoch + 1} | loss: {epoch_loss:.4f}\n'
                print(log_str)

            # Save model parameters
            if (epoch + 1) % self.config.save_every_epoch == 0:
                ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch+1}.pkl')
                self.save(ckpt_path)

    def eval(self):
        """Evaluate model from text data"""

        n_total_data = 0
        n_correct = 0
        loss_history = []

        '''
        import ipdb
        ipdb.set_trace()
        '''

        for _, batch in enumerate(tqdm(self.data_loader)):
            # text: [max_seq_len, batch_size]
            # label: [batch_size]
            text, label = batch.text, batch.label

            if torch.cuda.is_available():
                text = text.cuda()
                label = label.cuda()

            # [batch_size, max_seq_len]
            text.data.t_()

            # [batch_size, 2]
            logit = self.model(text)

            # Calculate loss
            average_batch_loss = self.loss_fn(logit, label)  # [1]
            loss_history.append(average_batch_loss.item())  # Variable -> Tensor

            # Calculate accuracy
            n_total_data += len(label)

            # [batch_size]
            _, prediction = logit.max(1)

            n_correct += (prediction == label).sum().data

        epoch_loss = np.mean(loss_history)

        accuracy = n_correct.item() / float(n_total_data)

        print(f'Loss: {epoch_loss:.2f}')
        print(f'Accuracy: {accuracy}')

        return epoch_loss, accuracy

    def inference(self, text):
        text = Variable(torch.LongTensor([text]))

        # [batch_size, 2]
        logit = self.model(text)

        _, prediction = torch.max(logit)

        return prediction

    def train_eval(self):
        # Set this variable to your MLflow server's DNS name
        mlflow_server = '172.23.147.124'

        # Tracking URI
        mlflow_tracking_URI = 'http://' + mlflow_server + ':5000'
        print ("MLflow Tracking URI: %s" % (mlflow_tracking_URI))

        with mlflow.start_run():
            for key, value in vars(self.config).items():
                mlflow.log_param(key, value)

            '''
            output_dir = 'mlflow_logs'
            if not os.path.exists(output_dir):
                os.mkdir(output_dir)
            '''

            for epoch in tqdm(range(self.config.epochs)):
                # print out active_run
                print("Active Run ID: %s, Epoch: %s \n" % (mlflow.active_run(), epoch))

                train_loss = self.train_once()
                mlflow.log_metric('train_loss', train_loss)

                val_loss, val_acc = self.eval()
                mlflow.log_metric('val_loss', val_loss)
                mlflow.log_metric('val_acc', val_acc)

        # Finish run
        mlflow.end_run(status='FINISHED')
コード例 #5
0
class TextClassifier:
    def __init__(self,
                 paths,
                 batch_size=6,
                 iterations=50,
                 initial_lr=0.003,
                 hidden_size=256,
                 dropout=0.2,
                 kernel_sz=3):

        self.use_cuda = torch.cuda.is_available()
        self.device = torch.device('cuda:0' if self.use_cuda else 'cpu')

        self.data = DataReader(paths)
        self.data.set_training_data(batch_size,
                                    ('cuda:0' if self.use_cuda else 'cpu'))
        self.train_batch_loader = BatchGenerator(self.data.train_data,
                                                 'Sentence', 'Label')
        self.val_batch_loader = BatchGenerator(self.data.val_data, 'Sentence',
                                               'Label')
        self.test_batch_loader = BatchGenerator(self.data.test_data,
                                                'Sentence', 'Label')

        # Store hyperparameters
        self.batch_size = batch_size
        self.iterations = iterations
        self.initial_lr = initial_lr
        self.kernel_sz = kernel_sz

        # Create Model
        emb_size, emb_dim = self.data.TEXT.vocab.vectors.size()
        self.cnn_model = CNN(emb_size=emb_size,
                             emb_dimension=emb_dim,
                             n_out=len(self.data.LABEL.vocab),
                             dropout=dropout,
                             kernel_sz=kernel_sz,
                             stride=1,
                             padding=0,
                             out_filters=hidden_size,
                             pretrained_emb=self.data.TEXT.vocab.vectors)

        if self.use_cuda:
            self.cnn_model.cuda()

    def train(self):

        train_loss_hist = []
        val_loss_hist = []
        train_acc_hist = []
        val_acc_hist = []
        test_acc_hist = []

        loss = 0.0

        best_model = 0.0

        for itr in range(self.iterations):
            print("\nIteration: " + str(itr + 1))
            optimizer = optim.SGD(self.cnn_model.parameters(),
                                  lr=self.initial_lr)
            self.cnn_model.train()
            total_loss = 0.0
            total_acc = 0.0
            steps = 0

            data_iter = iter(self.train_batch_loader)

            # For some reason using for loop on iterator (next) is missing the target variable (y)
            # Have to loop over the length and retrieve the batch_data inside the loop
            for i in range(len(self.train_batch_loader)):

                ((x_batch, x_len_batch), y_batch) = next(data_iter)

                #                 if torch.min(x_len_batch) > self.kernel_sz:
                optimizer.zero_grad()

                loss, logits = self.cnn_model.forward(x_batch, y_batch)

                acc = torch.sum(torch.argmax(logits, dim=1) == y_batch)

                total_loss += loss.item()
                total_acc += acc.item()
                steps += 1

                loss.backward()

                optimizer.step()

            train_loss_hist.append(total_loss / steps)
            train_acc_hist.append(total_acc / len(self.data.trainds))

            val_loss, val_acc = self.eval_model(self.val_batch_loader,
                                                len(self.data.valds))

            val_loss_hist.append(val_loss)
            val_acc_hist.append(val_acc)

            if best_model < val_acc:
                best_model = val_acc
                test_loss, test_acc = self.eval_model(self.test_batch_loader,
                                                      len(self.data.testds))

            print("Train: {Loss: " + str(total_loss / steps) + ", Acc: " +
                  str(total_acc / len(self.data.trainds)) + " }")
            print("Val: {Loss: " + str(val_loss) + ", Acc: " + str(val_acc) +
                  " }")

#         test_loss, test_acc = self.eval_model(self.test_batch_loader, len(self.data.testds) )

        test_acc_hist.append(test_acc)

        return train_loss_hist, train_acc_hist, val_loss_hist, val_acc_hist, test_acc

    def eval_model(self, batch_loader, N):
        self.cnn_model.eval()

        total_loss = 0.0
        total_acc = 0.0
        steps = 0

        batch_iter = iter(batch_loader)

        with torch.no_grad():
            for i in range(len(batch_loader)):

                ((x_batch, x_len_batch), y_batch) = next(batch_iter)

                loss, logits = self.cnn_model(x_batch, y_batch)

                acc = torch.sum(torch.argmax(logits, dim=1) == y_batch)

                total_loss += loss.item()
                total_acc += acc.item()
                steps += 1

        return (total_loss / steps), (total_acc / N)
コード例 #6
0
def train():
    transforms = Compose([Resize((height, width)), ToTensor()])
    train_dataset = CaptchaData(train_data_path, num_class=len(alphabet), num_char=int(numchar), transform=transforms, alphabet=alphabet)
    train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers,
                                   shuffle=True, drop_last=True)
    test_data = CaptchaData(test_data_path, num_class=len(alphabet), num_char=int(numchar), transform=transforms, alphabet=alphabet)
    test_data_loader = DataLoader(test_data, batch_size=batch_size,
                                  num_workers=num_workers, shuffle=True, drop_last=True)
    cnn = CNN(num_class=len(alphabet), num_char=int(numchar), width=width, height=height)
    if use_gpu:
        cnn.cuda()

    optimizer = torch.optim.Adam(cnn.parameters(), lr=base_lr)
    criterion = nn.MultiLabelSoftMarginLoss()

    for epoch in range(max_epoch):
        start_ = time.time()

        loss_history = []
        acc_history = []
        cnn.train()
        for img, target in train_data_loader:
            img = Variable(img)
            target = Variable(target)
            if use_gpu:
                img = img.cuda()
                target = target.cuda()
            output = cnn(img)
            loss = criterion(output, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            acc = calculat_acc(output, target)
            acc_history.append(float(acc))
            loss_history.append(float(loss))
        print('epoch:{},train_loss: {:.4}|train_acc: {:.4}'.format(
            epoch,
            torch.mean(torch.Tensor(loss_history)),
            torch.mean(torch.Tensor(acc_history)),
        ))

        loss_history = []
        acc_history = []
        cnn.eval()
        for img, target in test_data_loader:
            img = Variable(img)
            target = Variable(target)
            if torch.cuda.is_available():
                img = img.cuda()
                target = target.cuda()
            output = cnn(img)

            acc = calculat_acc(output, target)
            acc_history.append(float(acc))
            loss_history.append(float(loss))
        print('test_loss: {:.4}|test_acc: {:.4}'.format(
            torch.mean(torch.Tensor(loss_history)),
            torch.mean(torch.Tensor(acc_history)),
        ))
        print('epoch: {}|time: {:.4f}'.format(epoch, time.time() - start_))
        torch.save(cnn.state_dict(), os.path.join(model_path, "model_{}.path".format(epoch)))
コード例 #7
0
class Solver(object):
    def __init__(self, config, data_loader):
        self.config = config
        self.data_loader = data_loader

    def build(self, is_train):
        self.model = CNN(self.config)
        self.loss_fn = self.config.loss_fn()

        if is_train:
            self.model.train()
            self.optimizer = self.config.optimizer(self.model.parameters(), lr=self.config.lr)
        else:
            self.model.eval()

    def save(self, ckpt_path):
        """Save model parameters"""
        print('Save parameters at ', ckpt_path)
        torch.save(self.model.state_dict(), ckpt_path)

    def load(self, ckpt_path=None, epoch=None):
        """Load model parameters"""
        if not (ckpt_path or epoch):
            epoch = self.config.epochs
        if epoch:
            ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch}.pkl')
        print('Load parameters from ', ckpt_path)
        self.model.load_state_dict(torch.load(ckpt_path))

    def train(self):
        """Train model with training data"""
        for epoch in tqdm(range(self.config.epochs)):
            loss_history = []

            for batch_i, batch in enumerate(tqdm(self.data_loader)):
                # text: [max_seq_len, batch_size]
                # label: [batch_size]
                text, label = batch.text, batch.label

                # [batch_size, max_seq_len]
                text.data.t_()

                # [batch_size, 2]
                logit = self.model(text)

                # Calculate loss
                average_batch_loss = self.loss_fn(logit, label)  # [1]
                loss_history.append(average_batch_loss.data[0])  # Variable -> Tensor

                # Flush out remaining gradient
                self.optimizer.zero_grad()

                # Backpropagation
                average_batch_loss.backward()

                # Gradient descent
                self.optimizer.step()

            # Log intermediate loss
            if (epoch + 1) % self.config.log_every_epoch == 0:
                epoch_loss = np.mean(loss_history)
                log_str = f'Epoch {epoch + 1} | loss: {epoch_loss:.2f}\n'
                print(log_str)

            # Save model parameters
            if (epoch + 1) % self.config.save_every_epoch == 0:
                ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch+1}.pkl')
                self.save(ckpt_path)

    def eval(self):
        """Evaluate model from text data"""

        n_total_data = 0
        n_correct = 0
        loss_history = []
        import ipdb
        ipdb.set_trace()
        for _, batch in enumerate(tqdm(self.data_loader)):
            # text: [max_seq_len, batch_size]
            # label: [batch_size]
            text, label = batch.text, batch.label

            # [batch_size, max_seq_len]
            text.data.t_()

            # [batch_size, 2]
            logit = self.model(text)

            # Calculate loss
            average_batch_loss = self.loss_fn(logit, label)  # [1]
            loss_history.append(average_batch_loss.data[0])  # Variable -> Tensor

            # Calculate accuracy
            n_total_data += len(label)

            # [batch_size]
            _, prediction = logit.max(1)

            n_correct += (prediction == label).sum().data

        epoch_loss = np.mean(loss_history)

        accuracy = n_correct / n_total_data

        print(f'Loss: {epoch_loss:.2f}')

        print(f'Accuracy: {accuracy}')

    def inference(self, text):

        text = Variable(torch.LongTensor([text]))

        # [batch_size, 2]
        logit = self.model(text)

        _, prediction = torch.max(logit)

        return prediction
コード例 #8
0
class Trainer:
    """
    训练
    """
    def __init__(self, _hparams):
        utils.set_seed(_hparams.fixed_seed)

        self.train_loader = get_train_loader(_hparams)
        self.val_loader = get_val_loader(_hparams)
        self.encoder = CNN().to(DEVICE)
        self.decoder = RNN(fea_dim=_hparams.fea_dim,
                           embed_dim=_hparams.embed_dim,
                           hid_dim=_hparams.hid_dim,
                           max_sen_len=_hparams.max_sen_len,
                           vocab_pkl=_hparams.vocab_pkl).to(DEVICE)
        self.loss_fn = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.get_params(), lr=_hparams.lr)
        self.writer = SummaryWriter()

        self.max_sen_len = _hparams.max_sen_len
        self.val_cap = _hparams.val_cap
        self.ft_encoder_lr = _hparams.ft_encoder_lr
        self.ft_decoder_lr = _hparams.ft_decoder_lr
        self.best_CIDEr = 0

    def fine_tune_encoder(self, fine_tune_epochs, val_interval, save_path,
                          val_path):
        print('*' * 20, 'fine tune encoder for', fine_tune_epochs, 'epochs',
              '*' * 20)
        self.encoder.fine_tune()
        self.optimizer = torch.optim.Adam([
            {
                'params': self.encoder.parameters(),
                'lr': self.ft_encoder_lr
            },
            {
                'params': self.decoder.parameters(),
                'lr': self.ft_decoder_lr
            },
        ])
        self.training(fine_tune_epochs, val_interval, save_path, val_path)
        self.encoder.froze()
        print('*' * 20, 'fine tune encoder complete', '*' * 20)

    def get_params(self):
        """
        模型需要优化的全部参数,此处encoder暂时设计不用训练,故不加参数

        :return:
        """
        return list(self.decoder.parameters())

    def training(self, max_epochs, val_interval, save_path, val_path):
        """
        训练

        :param val_path: 保存验证过程生成句子的路径
        :param save_path: 保存模型的地址
        :param val_interval: 验证的间隔
        :param max_epochs: 最大训练的轮次
        :return:
        """
        print('*' * 20, 'train', '*' * 20)
        for epoch in range(max_epochs):
            self.set_train()

            epoch_loss = 0
            epoch_steps = len(self.train_loader)
            for step, (img, cap,
                       cap_len) in tqdm(enumerate(self.train_loader)):
                # batch_size * 3 * 224 * 224
                img = img.to(DEVICE)
                cap = cap.to(DEVICE)

                self.optimizer.zero_grad()

                features = self.encoder.forward(img)
                outputs = self.decoder.forward(features, cap)

                outputs = pack_padded_sequence(outputs,
                                               cap_len - 1,
                                               batch_first=True)[0]
                targets = pack_padded_sequence(cap[:, 1:],
                                               cap_len - 1,
                                               batch_first=True)[0]
                train_loss = self.loss_fn(outputs, targets)
                epoch_loss += train_loss.item()
                train_loss.backward()
                self.optimizer.step()

            epoch_loss /= epoch_steps
            self.writer.add_scalar('epoch_loss', epoch_loss, epoch)
            print('epoch_loss: {}, epoch: {}'.format(epoch_loss, epoch))
            if (epoch + 1) % val_interval == 0:
                CIDEr = self.validating(epoch, val_path)
                if self.best_CIDEr <= CIDEr:
                    self.best_CIDEr = CIDEr
                    self.save_model(save_path, epoch)

    def save_model(self, save_path, train_epoch):
        """
        保存最好的模型

        :param save_path: 保存模型文件的地址
        :param train_epoch: 当前训练的轮次
        :return:
        """
        model_state_dict = {
            'encoder_state_dict': self.encoder.state_dict(),
            'decoder_state_dict': self.decoder.state_dict(),
            'tran_epoch': train_epoch,
        }
        print('*' * 20, 'save model to: ', save_path, '*' * 20)
        torch.save(model_state_dict, save_path)

    def validating(self, train_epoch, val_path):
        """
        验证

        :param val_path: 保存验证过程生成句子的路径
        :param train_epoch: 当前训练的epoch
        :return:
        """
        print('*' * 20, 'validate', '*' * 20)
        self.set_eval()
        sen_json = []
        with torch.no_grad():
            for val_step, (img, img_id) in tqdm(enumerate(self.val_loader)):
                img = img.to(DEVICE)
                features = self.encoder.forward(img)
                sens, _ = self.decoder.sample(features)
                sen_json.append({'image_id': int(img_id), 'caption': sens[0]})

        with open(val_path, 'w') as f:
            json.dump(sen_json, f)

        result = coco_eval(self.val_cap, val_path)
        scores = {}
        for metric, score in result:
            scores[metric] = score
            self.writer.add_scalar(metric, score, train_epoch)

        return scores['CIDEr']

    def set_train(self):
        self.encoder.train()
        self.decoder.train()

    def set_eval(self):
        self.encoder.eval()
        self.decoder.eval()
コード例 #9
0
if __name__ == '__main__':
    opt = Training_options().parse()
    if opt.model == 'cnn':
        from models import CNN
        net = CNN(opt)
        train_predictors, train_predictands = assemble_predictors_predictands(
            opt, train=True)
        train_dataset = ENSODataset(train_predictors, train_predictands)
        trainloader = DataLoader(train_dataset, batch_size=opt.batch_size)
        optimizer = optim.Adam(net.parameters(), lr=opt.lr)
        device = "cuda:0" if torch.cuda.is_available() else "cpu"
        net = net.to(device)
        best_loss = np.infty
        train_losses = []
        net.train()
        criterion = nn.MSELoss()
        for epoch in range(opt.epoch):
            running_loss = 0.0
            for i, data in enumerate(trainloader):
                batch_predictors, batch_predictands = data
                batch_predictands = batch_predictands.to(device)
                batch_predictors = batch_predictors.to(device)
                optimizer.zero_grad()
                predictions = net(batch_predictors).squeeze()
                loss = criterion(predictions, batch_predictands.squeeze())
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            print('Training Set: Epoch {:02d}. loss: {:3f}'.format( epoch+1, \
                                            running_loss/len(trainloader)))
コード例 #10
0
    #create test loader
    test_dataset = HumanActivityDataset(file='test')

    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size_test,
                             shuffle=True,
                             num_workers=0)
    test_gen = iter(test_loader)

    #print every 't' steps
    t = 5

    for epoch in range(n_epochs):  # loop over the dataset multiple times

        network.train()
        train_loss = 0.0

        for i_batch, batch in enumerate(train_loader):

            # get the inputs and labels
            inputs, labels = batch['inputs'], batch['labels']

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = network(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
コード例 #11
0
#                                           shuffle=True)
#    test_loader = test

    train_data, test_data = dataFetch(dset=args.data)

    train_loader = t.utils.data.DataLoader(dataset=train_data,
                                           batch_size=args.batch_size,
                                           shuffle=True)
    test_loader = t.utils.data.DataLoader(dataset=test_data,
                                          batch_size=args.batch_size,
                                          shuffle=False)

    print("Starting with training process...")
    if not args.validate:
        for epoch in range(args.epochs):
            model.train()
            start = time.time()
            tloss = []
            best = 1000
            for i, (img, lbl) in enumerate(train_loader):
                if args.gpu:
                    images = Variable(img).cuda()
                    labels = Variable(lbl).cuda()
                else:
                    images = Variable(img)
                    labels = Variable(lbl)

                # Pass and Backpropagate

                #clear gradient
                optimizer.zero_grad()  # Clear stored gradients