コード例 #1
0
def main(epochs=5, learning_rate=1e-3):
    # use GPU
    device = torch.device('cuda')

    # get data loaders
    training = get_dataloader(train=True)
    testing = get_dataloader(train=False)

    # model
    model = CNN().to(device)
    info('Model')
    print(model)

    # cost function
    cost = torch.nn.BCELoss()

    # optimizers
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(1, epochs + 1):
        info('Epoch {}'.format(epoch))
        train(model, device, training, cost, optimizer, epoch)
        test(model, device, testing, cost)

    # save model
    info('Saving Model')
    save_model(model, device, 'model.onnx')
    print('Saving PyTorch Model as model.pth')
    torch.save(model.state_dict(), 'model.pth')
コード例 #2
0
    def bilstm_train(self, numEpochs, batch_size, save_file, lr):
        print('training .....')

        # set up loss function -- 'SVM Loss' a.k.a ''Cross-Entropy Loss
        loss_func = nn.CrossEntropyLoss()
        net = CNN(embed_dim=100)
        # net.load_state_dict(torch.load('model_50.pth'))
        # SGD used for optimization, momentum update used as parameter update
        optimization = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9)
        net.cuda()
        loss_func.cuda()
        train_losses = []
        test_losses = []
        for epoch in range(0,numEpochs):

            # training set -- perform model training
            epoch_training_loss = 0.0
            num_batches = 0
            pbar = tqdm(range(0, len(self.train_seqs), batch_size))
            for batch_num in pbar:  # 'enumerate' is a super helpful function
                # split training data into inputs and labels
                if batch_num+batch_size>len(self.train_seqs):
                    end = len(self.action_seqs)
                else:
                    end = batch_num+batch_size
                raw_inputs, labels_ = self.train_seqs[batch_num:end], self.train_labels[batch_num:end]  # 'training_batch' is a list
                inputs_ = self.get_embedding(raw_inputs)
                inputs = torch.from_numpy(inputs_).float().cuda()
                labels = torch.from_numpy(labels_).cuda()
                # wrap data in 'Variable'
                inputs, labels = torch.autograd.Variable(inputs), torch.autograd.Variable(labels)
                # Make gradients zero for parameters 'W', 'b'
                optimization.zero_grad()
                # forward, backward pass with parameter update
                forward_output = net(inputs)
                loss = loss_func(forward_output, labels)
                loss.backward()
                optimization.step()
                # calculating loss
                epoch_training_loss += loss.data.item()
                num_batches += 1
                # print(loss.data.item())
                pbar.set_description("processing batch %s" % str(batch_num))
            print("epoch: ", epoch, ", loss: ", epoch_training_loss / num_batches)
            # train_loss = self.test(net, batch_size=256, test_data=self.train_seqs, test_label=self.train_labels)
            test_loss = self.test(net, batch_size=256, test_data=self.test_seqs, test_label=self.test_labels)
            # train_losses.append(train_loss)
            test_losses.append(test_loss)
            # if epoch%10 == 0:
            #     save_path = save_file+'model3_' +str(epoch)+'.pth'
            #     torch.save(net.state_dict(), save_path)
        # with open('train_loss_1.p','wb') as fin:
        #     pickle.dump(train_losses,fin)
        #     fin.close()
        with open('test_loss_1.p','wb') as fin:
            pickle.dump(test_losses,fin)
            fin.close()
コード例 #3
0
def train():
    fluid.enable_dygraph(device)
    processor = SentaProcessor(data_dir=args.data_dir,
                               vocab_path=args.vocab_path,
                               random_seed=args.random_seed)
    num_labels = len(processor.get_labels())

    num_train_examples = processor.get_num_examples(phase="train")

    max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

    train_data_generator = processor.data_generator(
        batch_size=args.batch_size,
        padding_size=args.padding_size,
        places=device,
        phase='train',
        epoch=args.epoch,
        shuffle=False)

    eval_data_generator = processor.data_generator(
        batch_size=args.batch_size,
        padding_size=args.padding_size,
        places=device,
        phase='dev',
        epoch=args.epoch,
        shuffle=False)
    if args.model_type == 'cnn_net':
        model = CNN(args.vocab_size, args.batch_size, args.padding_size)
    elif args.model_type == 'bow_net':
        model = BOW(args.vocab_size, args.batch_size, args.padding_size)
    elif args.model_type == 'gru_net':
        model = GRU(args.vocab_size, args.batch_size, args.padding_size)
    elif args.model_type == 'bigru_net':
        model = BiGRU(args.vocab_size, args.batch_size, args.padding_size)

    optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr,
                                        parameter_list=model.parameters())

    inputs = [Input([None, None], 'int64', name='doc')]
    labels = [Input([None, 1], 'int64', name='label')]

    model.prepare(optimizer,
                  CrossEntropy(),
                  Accuracy(topk=(1, )),
                  inputs,
                  labels,
                  device=device)

    model.fit(train_data=train_data_generator,
              eval_data=eval_data_generator,
              batch_size=args.batch_size,
              epochs=args.epoch,
              save_dir=args.checkpoints,
              eval_freq=args.eval_freq,
              save_freq=args.save_freq)
コード例 #4
0
def init_model(nfm=32,
              res_blocks=1,
              in_frames=2,
              batch_size=2,
              epoch_to_load=None):

    resnet = ResNet(nfm*2, res_blocks)
    if torch.cuda.is_available(): resnet=resnet.cuda()

    my_unet = U_Net(nfm, resnet, 1, 1)
    discriminator = CNN((in_frames+1)*3, nfm, 512)

    if epoch_to_load != None:
        my_unet = torch.load('unet_epoch_{}'.format(epoch_to_load))
        discriminator = torch.load('D_epoch_{}'.format(epoch_to_load))

    if torch.cuda.is_available(): my_unet, discriminator = my_unet.cuda(), discriminator.cuda()

    Unet_optim = torch.optim.Adam(my_unet.parameters(), lr=0.002)
    D_optim = torch.optim.Adam(discriminator.parameters(), lr=0.002)

    return {'Unet': my_unet, 'Discriminator': discriminator, 'Unet_optimizer': Unet_optim, 'Discriminator_optimizer': D_optim}
コード例 #5
0
def main(args):
    if not os.path.exists(args.logdir):
        os.makedirs(args.logdir)
    logger = get_logger(os.path.join(args.logdir, 'train_source.log'))
    logger.info(args)

    # data
    source_transform = transforms.Compose([transforms.ToTensor()])
    source_dataset_train = SVHN('./input',
                                'train',
                                transform=source_transform,
                                download=True)
    source_dataset_test = SVHN('./input',
                               'test',
                               transform=source_transform,
                               download=True)
    source_train_loader = DataLoader(source_dataset_train,
                                     args.batch_size,
                                     shuffle=True,
                                     drop_last=True,
                                     num_workers=args.n_workers)
    source_test_loader = DataLoader(source_dataset_test,
                                    args.batch_size,
                                    shuffle=False,
                                    num_workers=args.n_workers)

    # train source CNN
    source_cnn = CNN(in_channels=args.in_channels).to(args.device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(source_cnn.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    source_cnn = train_source_cnn(source_cnn,
                                  source_train_loader,
                                  source_test_loader,
                                  criterion,
                                  optimizer,
                                  args=args)
コード例 #6
0
ファイル: main.py プロジェクト: Jean-KOUAGOU/CNN-CatDog
        pred = output.data.max(
            1, keepdim=True)[1]  # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    accuracy_list.append(accuracy)
    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset), accuracy))


n_features = 8  # number of feature maps

model_cnn = CNN(input_size, n_features, output_size)
optimizer = optim.SGD(model_cnn.parameters(), lr=0.01, momentum=0.5)
print('Number of parameters: {}'.format(get_n_params(model_cnn)))

for epoch in range(0, 1):
    train(epoch, model_cnn)
    test(model_cnn)

print("Multiple hidden layers CNN model:")
print()

model_cnn = deepCNN(input_size, n_features, output_size)
optimizer = optim.SGD(model_cnn.parameters(), lr=0.01, momentum=0.5)
print('Number of parameters: {}'.format(get_n_params(model_cnn)))

for epoch in range(0, 1):
    train(epoch, model_cnn)
コード例 #7
0
class Solver(object):
    def __init__(self, config, data_loader):
        self.config = config
        self.data_loader = data_loader

    def build(self, is_train):
        if torch.cuda.is_available():
            self.model = nn.DataParallel(CNN(self.config)).cuda()
        else:
            self.model = CNN(self.config)

        self.loss_fn = self.config.loss_fn()

        if is_train:
            self.model.train()
            self.optimizer = self.config.optimizer(self.model.parameters(), lr=self.config.lr)
        else:
            if torch.cuda.is_available():
                self.model = self.model.module
            self.model.eval()

    def save(self, ckpt_path):
        """Save model parameters"""
        print('Save parameters at ', ckpt_path)

        if torch.cuda.is_available():
            torch.save(self.model.module.state_dict(), ckpt_path)
        else:
            torch.save(self.model.state_dict(), ckpt_path)

    def load(self, ckpt_path=None, epoch=None):
        """Load model parameters"""
        if not (ckpt_path or epoch):
            epoch = self.config.epochs
        if epoch:
            ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch}.pkl')
        print('Load parameters from ', ckpt_path)
        print (self.model)

        self.model.load_state_dict(torch.load(ckpt_path))

    def train_once(self):
        loss_history = []

        for batch_i, batch in enumerate(tqdm(self.data_loader)):
            text, label = batch.text, batch.label

            if torch.cuda.is_available():
                text = text.cuda()
                label = label.cuda()

            text.data.t_()

            logit = self.model(text)

            average_batch_loss = self.loss_fn(logit, label)
            loss_history.append(average_batch_loss.item())

            self.optimizer.zero_grad()

            average_batch_loss.backward()

            self.optimizer.step()

        epoch_loss = np.mean(loss_history)

        return epoch_loss


    def train(self):
        """Train model with training data"""
        for epoch in tqdm(range(self.config.epochs)):
            loss_history = []

            for batch_i, batch in enumerate(tqdm(self.data_loader)):
                # text: [max_seq_len, batch_size]
                # label: [batch_size]
                text, label = batch.text, batch.label

                if torch.cuda.is_available():
                    text = text.cuda()
                    label = label.cuda()

                # [batch_size, max_seq_len]
                text.data.t_()

                # [batch_size, 2]
                logit = self.model(text)

                # Calculate loss
                average_batch_loss = self.loss_fn(logit, label)  # [1]
                loss_history.append(average_batch_loss.item())  # Variable -> Tensor

                # Flush out remaining gradient
                self.optimizer.zero_grad()

                # Backpropagation
                average_batch_loss.backward()

                # Gradient descent
                self.optimizer.step()

            # Log intermediate loss
            if (epoch + 1) % self.config.log_every_epoch == 0:
                epoch_loss = np.mean(loss_history)
                log_str = f'Epoch {epoch + 1} | loss: {epoch_loss:.4f}\n'
                print(log_str)

            # Save model parameters
            if (epoch + 1) % self.config.save_every_epoch == 0:
                ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch+1}.pkl')
                self.save(ckpt_path)

    def eval(self):
        """Evaluate model from text data"""

        n_total_data = 0
        n_correct = 0
        loss_history = []

        '''
        import ipdb
        ipdb.set_trace()
        '''

        for _, batch in enumerate(tqdm(self.data_loader)):
            # text: [max_seq_len, batch_size]
            # label: [batch_size]
            text, label = batch.text, batch.label

            if torch.cuda.is_available():
                text = text.cuda()
                label = label.cuda()

            # [batch_size, max_seq_len]
            text.data.t_()

            # [batch_size, 2]
            logit = self.model(text)

            # Calculate loss
            average_batch_loss = self.loss_fn(logit, label)  # [1]
            loss_history.append(average_batch_loss.item())  # Variable -> Tensor

            # Calculate accuracy
            n_total_data += len(label)

            # [batch_size]
            _, prediction = logit.max(1)

            n_correct += (prediction == label).sum().data

        epoch_loss = np.mean(loss_history)

        accuracy = n_correct.item() / float(n_total_data)

        print(f'Loss: {epoch_loss:.2f}')
        print(f'Accuracy: {accuracy}')

        return epoch_loss, accuracy

    def inference(self, text):
        text = Variable(torch.LongTensor([text]))

        # [batch_size, 2]
        logit = self.model(text)

        _, prediction = torch.max(logit)

        return prediction

    def train_eval(self):
        # Set this variable to your MLflow server's DNS name
        mlflow_server = '172.23.147.124'

        # Tracking URI
        mlflow_tracking_URI = 'http://' + mlflow_server + ':5000'
        print ("MLflow Tracking URI: %s" % (mlflow_tracking_URI))

        with mlflow.start_run():
            for key, value in vars(self.config).items():
                mlflow.log_param(key, value)

            '''
            output_dir = 'mlflow_logs'
            if not os.path.exists(output_dir):
                os.mkdir(output_dir)
            '''

            for epoch in tqdm(range(self.config.epochs)):
                # print out active_run
                print("Active Run ID: %s, Epoch: %s \n" % (mlflow.active_run(), epoch))

                train_loss = self.train_once()
                mlflow.log_metric('train_loss', train_loss)

                val_loss, val_acc = self.eval()
                mlflow.log_metric('val_loss', val_loss)
                mlflow.log_metric('val_acc', val_acc)

        # Finish run
        mlflow.end_run(status='FINISHED')
コード例 #8
0
ファイル: train.py プロジェクト: zarzouram/aics
def main(args):
    
    '''
    main function
    - describes the whole model pipeline
    - it should refer to a separate train function, validation function
    - loads datasets, initializes models and hyperparameters
    - sets learning rate and optimizer, passes arguments to the training loop
    '''
    
    # load my hyperparameters
    with open(args.config_file, 'r') as f:
        hyps = json.load(f)

    # why do we need to transform images?
    # - images differ from each other size-wise
    # - random cropping can be helpful because it allows your model to learn different
    # ways your object can be represented
    # - normalisation helps to speed up training / utilise pre-trained models better
    transform = transforms.Compose(
            [
                transforms.Resize((256, 256)),
                #transforms.RandomCrop((x, x)),
                transforms.ToTensor(),
                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            ]
        )
    
    
    # load the dataset
    dataset = cats_dogs_dataset(args.data_dir, args.annotations_file, transform=transform)
    
    # split dataset into training and validation (25k : 20k + 5k)
    train_set, validation_set = torch.utils.data.random_split(dataset, [20000 , 5000])
    
    # train loader
    train_loader = DataLoader(dataset=train_set, shuffle=bool(hyps['shuffle']),
                                   batch_size=hyps['batch_size'], num_workers=hyps['num_workers'],
                                   pin_memory=bool(hyps['pin_memory']))    
    # val loader
    validation_loader = DataLoader(dataset=validation_set, shuffle=bool(hyps['shuffle']),
                                   batch_size=hyps['batch_size'], num_workers=hyps['num_workers'],
                                   pin_memory=bool(hyps['pin_memory']))    
    
    
    # initialize and import model to GPU
    model = CNN().to(device)
    #print(model)
    
    for name, param in model.named_parameters():
        if 'classifier' in name:
            param.requires_grad = True
        else:
            param.requires_grad = False

    # define loss (Binary Cross-Entropy for the binary classification task)
    criterion = nn.BCELoss()
    # define optimizer (Adam) on model parameters with the specified learning rate
    optimizer = torch.optim.SGD(model.parameters(), lr=hyps['learning_rate'])
    
    train(model, criterion, optimizer, train_loader, validation_loader, hyps['num_epochs'])
コード例 #9
0
class Solver(object):
    def __init__(self, config, data_loader):
        self.config = config
        self.data_loader = data_loader

    def build(self, is_train):
        self.model = CNN(self.config)
        self.loss_fn = self.config.loss_fn()

        if is_train:
            self.model.train()
            self.optimizer = self.config.optimizer(self.model.parameters(),
                                                   lr=self.config.lr)
        else:
            self.model.eval()

    def train(self):
        for epoch in tqdm(range(self.config.epochs)):
            loss_history = []

            for batch_i, batch in enumerate(tqdm(self.data_loader)):
                # text: [max_seq_len, batch_size]
                # label: [batch_size]
                text, label = batch.text, batch.label

                # [batch_size, max_seq_len]
                text.data.t_()

                # [batch_size, 2]
                logit = self.model(text)

                # Calculate loss
                average_batch_loss = self.loss_fn(logit, label)  # [1]
                loss_history.append(
                    average_batch_loss.data[0])  # Variable -> Tensor

                # Flush out remaining gradient
                self.optimizer.zero_grad()

                # Backpropagation
                average_batch_loss.backward()

                # Gradient descent
                self.optimizer.step()

            # Log intermediate loss
            if (epoch + 1) % self.config.log_every_epoch == 0:
                epoch_loss = np.mean(loss_history)
                log_str = f'Epoch {epoch + 1} | loss: {epoch_loss:.2f}\n'
                print(log_str)

            # Save model parameters
            if (epoch + 1) % self.config.save_every_epoch == 0:
                ckpt_path = os.path.join(self.config.save_dir,
                                         f'epoch-{epoch+1}.pkl')
                print('Save parameters at ', ckpt_path)
                torch.save(self.model.state_dict(), ckpt_path)

    def eval(self, epoch=None):

        # Load model parameters
        if not isinstance(epoch, int):
            epoch = self.config.epochs
        ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch}.pkl')
        print('Load parameters from ', ckpt_path)
        self.model.load_state_dict(torch.load(ckpt_path))

        loss_history = []
        for _, batch in tqdm(enumerate(self.data_loader)):
            # text: [max_seq_len, batch_size]
            # label: [batch_size]
            text, label = batch.text, batch.label

            # [batch_size, max_seq_len]
            text.data.t_()

            # [batch_size, 2]
            logit = self.model(text)

            # Calculate loss
            average_batch_loss = self.loss_fn(logit, label)  # [1]
            loss_history.append(
                average_batch_loss.data[0])  # Variable -> Tensor

        epoch_loss = np.mean(loss_history)

        print('Loss: {epoch_loss:.2f}')
コード例 #10
0
ファイル: train.py プロジェクト: KWYi/PytorchStudy
                                              shuffle=True)
    # 반복 작업을 위한 준비를 하는 문구. dataset = 데이터, batch_size = 한 번에 훈련시킬 데이터수,
    #  num_workers = 데이터를 읽는데 사용할 cpu thread 갯수, shuffle=데이터 순서를 뒤섞을지 여부

    if MODEL == 'CNN':
        from models import CNN
        model = CNN()
    elif MODEL == 'MLP':
        from models import MLP
        model = MLP()
    else:
        raise NotImplementedError("You need to choose among [CNN, MLP].")

    loss = nn.CrossEntropyLoss()  # loss 객체로 CrossEntropyLoss 함수 선언
    # CrossEntropy 실행 시 먼저 자동으로 softmax 를 실행함.
    optim = torch.optim.Adam(model.parameters(), lr=2e-4, betas=[0.5, 0.99])
    # 옵티마이저 선언. / lr:러닝 레이트. 웨이트가 변할 때 gradient를 얼마나 반영할 지 정함.
    # beta1: std / beta2: Adam 공식의 분모에 들어갈 log(x) 의 x가 0이 되는 것을 막기 위해 부여하는 bias 수치. 맞나? 확인할 것.

    EPOCHS = 5  # 데이터 전체를 몇 번이나 이용해 학습할 지 저장하기 위한 객체
    total_step = 0  # 총 몇 번의 학습이 일어났나 저장하기 위한 객체
    list_loss = list()  # loss 저장용 리스트

    for epoch in trange(EPOCHS):  # EPOCHS 만큼 실행할 for 문
        for i, data in enumerate(
                data_loader):  # data_loader 에서 선언한 data 와 그 인덱스 i를 선언
            total_step += 1  # step을 1 늘림
            input, label = data[0], data[
                1]  # data_loader에서 읽은 data 객체의 인풋 데이터와 라벨 읽어옴
            # input shape = [32,1,28,28]  [batch size, channel, height, width]
            input = input.view(
コード例 #11
0
n_epochs = 5000
batch_size_train = 32
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
weight_decay = .001

random_seed = 1
torch.backends.cudnn.enabled = True
torch.manual_seed(random_seed)

network = CNN()
network.cuda()

gpu_available = "GPU available?:      " + str(torch.cuda.is_available())
using_cuda = "Network using cuda?: " + str(next(network.parameters()).is_cuda)

print(gpu_available)
print(using_cuda)
logging.info("\n\n------------------------------------------------------")
logging.info(gpu_available)
logging.info(using_cuda)

logging.info(\
f"""\n\nNetwork Details:

n_epochs         = {n_epochs}
batch_size_train = {batch_size_train}
batch_size_test  = {batch_size_test}
learning_rate    = {learning_rate}
momentum         = {momentum}
コード例 #12
0
class TextClassifier:
    def __init__(self,
                 paths,
                 batch_size=6,
                 iterations=50,
                 initial_lr=0.003,
                 hidden_size=256,
                 dropout=0.2,
                 kernel_sz=3):

        self.use_cuda = torch.cuda.is_available()
        self.device = torch.device('cuda:0' if self.use_cuda else 'cpu')

        self.data = DataReader(paths)
        self.data.set_training_data(batch_size,
                                    ('cuda:0' if self.use_cuda else 'cpu'))
        self.train_batch_loader = BatchGenerator(self.data.train_data,
                                                 'Sentence', 'Label')
        self.val_batch_loader = BatchGenerator(self.data.val_data, 'Sentence',
                                               'Label')
        self.test_batch_loader = BatchGenerator(self.data.test_data,
                                                'Sentence', 'Label')

        # Store hyperparameters
        self.batch_size = batch_size
        self.iterations = iterations
        self.initial_lr = initial_lr
        self.kernel_sz = kernel_sz

        # Create Model
        emb_size, emb_dim = self.data.TEXT.vocab.vectors.size()
        self.cnn_model = CNN(emb_size=emb_size,
                             emb_dimension=emb_dim,
                             n_out=len(self.data.LABEL.vocab),
                             dropout=dropout,
                             kernel_sz=kernel_sz,
                             stride=1,
                             padding=0,
                             out_filters=hidden_size,
                             pretrained_emb=self.data.TEXT.vocab.vectors)

        if self.use_cuda:
            self.cnn_model.cuda()

    def train(self):

        train_loss_hist = []
        val_loss_hist = []
        train_acc_hist = []
        val_acc_hist = []
        test_acc_hist = []

        loss = 0.0

        best_model = 0.0

        for itr in range(self.iterations):
            print("\nIteration: " + str(itr + 1))
            optimizer = optim.SGD(self.cnn_model.parameters(),
                                  lr=self.initial_lr)
            self.cnn_model.train()
            total_loss = 0.0
            total_acc = 0.0
            steps = 0

            data_iter = iter(self.train_batch_loader)

            # For some reason using for loop on iterator (next) is missing the target variable (y)
            # Have to loop over the length and retrieve the batch_data inside the loop
            for i in range(len(self.train_batch_loader)):

                ((x_batch, x_len_batch), y_batch) = next(data_iter)

                #                 if torch.min(x_len_batch) > self.kernel_sz:
                optimizer.zero_grad()

                loss, logits = self.cnn_model.forward(x_batch, y_batch)

                acc = torch.sum(torch.argmax(logits, dim=1) == y_batch)

                total_loss += loss.item()
                total_acc += acc.item()
                steps += 1

                loss.backward()

                optimizer.step()

            train_loss_hist.append(total_loss / steps)
            train_acc_hist.append(total_acc / len(self.data.trainds))

            val_loss, val_acc = self.eval_model(self.val_batch_loader,
                                                len(self.data.valds))

            val_loss_hist.append(val_loss)
            val_acc_hist.append(val_acc)

            if best_model < val_acc:
                best_model = val_acc
                test_loss, test_acc = self.eval_model(self.test_batch_loader,
                                                      len(self.data.testds))

            print("Train: {Loss: " + str(total_loss / steps) + ", Acc: " +
                  str(total_acc / len(self.data.trainds)) + " }")
            print("Val: {Loss: " + str(val_loss) + ", Acc: " + str(val_acc) +
                  " }")

#         test_loss, test_acc = self.eval_model(self.test_batch_loader, len(self.data.testds) )

        test_acc_hist.append(test_acc)

        return train_loss_hist, train_acc_hist, val_loss_hist, val_acc_hist, test_acc

    def eval_model(self, batch_loader, N):
        self.cnn_model.eval()

        total_loss = 0.0
        total_acc = 0.0
        steps = 0

        batch_iter = iter(batch_loader)

        with torch.no_grad():
            for i in range(len(batch_loader)):

                ((x_batch, x_len_batch), y_batch) = next(batch_iter)

                loss, logits = self.cnn_model(x_batch, y_batch)

                acc = torch.sum(torch.argmax(logits, dim=1) == y_batch)

                total_loss += loss.item()
                total_acc += acc.item()
                steps += 1

        return (total_loss / steps), (total_acc / N)
コード例 #13
0
def hp_grid_search(model_type,
                   lr_list,
                   momentum_list,
                   reg_list,
                   batch_size_list,
                   train_ds,
                   valid_ds,
                   optimizer,
                   epochs,
                   loss_type_list=["l1"],
                   save_all_plots="No",
                   save_final_plot="No",
                   final_plot_prefix=None,
                   return_all_loss=False):
    '''
    model (numeric): initialized model to test
    lr_list (list of numeric): list of learning rates
    momentum_list (list of numeric): list of momentums
    reg_list (list of numeric): list of regularization penaltys
    batch_size_list (list of numeric): list of sizes of the batches
    train_ds: training dataset after using WaldoDataset
    valid_ds: validation dataset after using WaldoDataset
    loss_type_list (list of str): list of losses if you want to try more than one
    save_all_plots (str): Do you want to save every plot? default to "No" 
    save_final_plot (str): if you just want to save the final plot.  default to "No".  Final plot will automaticall save if "save_all_plots"==Yes
    final_plot_prefix (str): provide a prefix for the final plot name
    '''

    i = 0
    all_loss_train = []
    all_loss_valid = []

    for lr in lr_list:
        for r in reg_list:
            for m in momentum_list:
                for b in batch_size_list:
                    for loss_type in loss_type_list:

                        print('HP ITERATION: ', i)
                        i += 1
                        print('learning_rate: ', lr)
                        print('regularization: ', r)
                        print('momentum: ', m)
                        print('batch_size: ', b)
                        print('loss type: ', loss_type)

                        param_str = "{0}_{1}_{2}_{3}_{4}".format(
                            model_type, str(lr), str(r), str(m), str(b),
                            loss_type)
                        print(param_str)

                        if loss_type == "l1":
                            criterion = nn.L1Loss()

                        if loss_type == "l2":
                            criterion = nn.MSELoss()

                        if model_type == "SimpleCNN":
                            model = CNN()

                        train_dl = DataLoader(train_ds,
                                              batch_size=b,
                                              shuffle=True)
                        valid_dl = DataLoader(valid_ds, batch_size=b)

                        optimizer = torch.optim.SGD(model.parameters(),
                                                    lr,
                                                    momentum=m,
                                                    weight_decay=r)

                        train_loss, valid_loss = train(model_type=model_type,
                                                       model=model,
                                                       optimizer=optimizer,
                                                       train_dl=train_dl,
                                                       valid_dl=valid_dl,
                                                       epochs=epochs,
                                                       criterion=criterion,
                                                       return_loss=True,
                                                       plot=True,
                                                       verbose=True)

                        all_loss_train.append(train_loss)
                        all_loss_valid.append(valid_loss)

                        plt.plot(valid_loss)
                        plt.title('Validation Loss')
                        plt.xlabel('Epoch')
                        plt.ylabel('Perplexity')
                        if save_all_plots == "Yes":
                            print('./figures/V_{0}.png'.format(param_str))
                            plt.savefig(
                                './figures/V_{0}.png'.format(param_str))
                        plt.show()

                        plt.plot(train_loss)
                        plt.title('Training Loss')
                        plt.xlabel('Epoch')
                        plt.ylabel('Loss')
                        if save_all_plots == "Yes":
                            plt.savefig(
                                './figures/T_{0}.png'.format(param_str))
                        plt.show()

    for pt in all_loss_train:
        plt.plot(pt)
    plt.title('All Plots Training')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    if save_final_plot == "Yes":
        plt.savefig(
            './figures/{0} All Training Loss.png'.format(final_plot_prefix))
    plt.show()

    for pv in all_loss_valid:
        plt.plot(pv)
    plt.title('All Plots Validation')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    if save_final_plot == "Yes":
        plt.savefig(
            './figures/{0}All Validation Loss.png'.format(final_plot_prefix))
    plt.show()

    if return_all_loss == True:
        return all_loss_train, all_loss_valid
コード例 #14
0
class Solver(object):
    def __init__(self, config, data_loader):
        self.config = config
        self.data_loader = data_loader

    def build(self, is_train):
        self.model = CNN(self.config)
        self.loss_fn = self.config.loss_fn()

        if is_train:
            self.model.train()
            self.optimizer = self.config.optimizer(self.model.parameters(), lr=self.config.lr)
        else:
            self.model.eval()

    def save(self, ckpt_path):
        """Save model parameters"""
        print('Save parameters at ', ckpt_path)
        torch.save(self.model.state_dict(), ckpt_path)

    def load(self, ckpt_path=None, epoch=None):
        """Load model parameters"""
        if not (ckpt_path or epoch):
            epoch = self.config.epochs
        if epoch:
            ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch}.pkl')
        print('Load parameters from ', ckpt_path)
        self.model.load_state_dict(torch.load(ckpt_path))

    def train(self):
        """Train model with training data"""
        for epoch in tqdm(range(self.config.epochs)):
            loss_history = []

            for batch_i, batch in enumerate(tqdm(self.data_loader)):
                # text: [max_seq_len, batch_size]
                # label: [batch_size]
                text, label = batch.text, batch.label

                # [batch_size, max_seq_len]
                text.data.t_()

                # [batch_size, 2]
                logit = self.model(text)

                # Calculate loss
                average_batch_loss = self.loss_fn(logit, label)  # [1]
                loss_history.append(average_batch_loss.data[0])  # Variable -> Tensor

                # Flush out remaining gradient
                self.optimizer.zero_grad()

                # Backpropagation
                average_batch_loss.backward()

                # Gradient descent
                self.optimizer.step()

            # Log intermediate loss
            if (epoch + 1) % self.config.log_every_epoch == 0:
                epoch_loss = np.mean(loss_history)
                log_str = f'Epoch {epoch + 1} | loss: {epoch_loss:.2f}\n'
                print(log_str)

            # Save model parameters
            if (epoch + 1) % self.config.save_every_epoch == 0:
                ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch+1}.pkl')
                self.save(ckpt_path)

    def eval(self):
        """Evaluate model from text data"""

        n_total_data = 0
        n_correct = 0
        loss_history = []
        import ipdb
        ipdb.set_trace()
        for _, batch in enumerate(tqdm(self.data_loader)):
            # text: [max_seq_len, batch_size]
            # label: [batch_size]
            text, label = batch.text, batch.label

            # [batch_size, max_seq_len]
            text.data.t_()

            # [batch_size, 2]
            logit = self.model(text)

            # Calculate loss
            average_batch_loss = self.loss_fn(logit, label)  # [1]
            loss_history.append(average_batch_loss.data[0])  # Variable -> Tensor

            # Calculate accuracy
            n_total_data += len(label)

            # [batch_size]
            _, prediction = logit.max(1)

            n_correct += (prediction == label).sum().data

        epoch_loss = np.mean(loss_history)

        accuracy = n_correct / n_total_data

        print(f'Loss: {epoch_loss:.2f}')

        print(f'Accuracy: {accuracy}')

    def inference(self, text):

        text = Variable(torch.LongTensor([text]))

        # [batch_size, 2]
        logit = self.model(text)

        _, prediction = torch.max(logit)

        return prediction
コード例 #15
0
ファイル: tuning.py プロジェクト: jan-huiskes/DL4NLP
def train(model_name="LSTM", params=None, embedding="Random"):

    # Parameters to tune
    print(params)
    batch_size = params["batch_size"]
    num_epochs = params["num_epochs"]
    oversample = params["oversample"]
    soft_labels = params["soft_labels"]
    if model_name == "LSTM":
        learning_rate = params["learning_rate"]
        hidden_dim = params["hidden_dim"]
        num_layers = params["num_layers"]
        dropout = params["dropout"]
        combine = embedding == "Both"

    embedding_dim = 300

    if combine:
        embedding = "Random"

    if model_name == "Bert":
        learning_rate = params["learning_rate"]
        num_warmup_steps = params["num_warmup_steps"]
        num_total_steps = params["num_total_steps"]
        embedding = "None"

    # Constants
    test_percentage = 0.1
    val_percentage = 0.2

    # Load data
    torch.manual_seed(42)
    dataset = Dataset("../data/cleaned_tweets_orig.csv",
                      use_embedding=embedding,
                      embedd_dim=embedding_dim,
                      for_bert=(model_name == "Bert"),
                      combine=combine)
    train_data, val_test_data = split_dataset(dataset,
                                              test_percentage + val_percentage)
    val_data, test_data = split_dataset(
        val_test_data, test_percentage / (test_percentage + val_percentage))
    train_loader, val_loader, weights = load_data(oversample, train_data,
                                                  val_data, batch_size)

    # Define model
    if model_name == "CNN":
        vocab_size = len(dataset.vocab)
        model = CNN(vocab_size,
                    embedding_dim=embedding_dim,
                    combine=params["combine"],
                    n_filters=params["filters"])
    elif model_name == "LSTM":

        vocab_size = len(dataset.vocab)
        model = LSTM(vocab_size,
                     embedding_dim,
                     batch_size=batch_size,
                     hidden_dim=hidden_dim,
                     lstm_num_layers=num_layers,
                     combine=combine,
                     dropout=dropout)
    elif model_name == "Bert":
        model = BertForSequenceClassification.from_pretrained(
            "bert-base-uncased", num_labels=3)
        train_loader, val_loader, weights = load_data(oversample,
                                                      train_data,
                                                      val_data,
                                                      batch_size,
                                                      collate_fn=bert_collate)

    if not model_name == "Bert":
        model.embedding.weight.data.copy_(dataset.vocab.vectors)
        if combine:
            model.embedding_glove.weight.data.copy_(dataset.glove.vectors)

    # cuda
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # optimiser
    scheduler = None
    optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"])

    if model_name == "Bert":
        optimizer = AdamW(model.parameters(),
                          lr=learning_rate,
                          correct_bias=False)
        # Linear scheduler for adaptive lr
        scheduler = WarmupLinearSchedule(optimizer,
                                         warmup_steps=num_warmup_steps,
                                         t_total=num_total_steps)

    # weighted cross entropy loss, by class counts of other classess
    weights = torch.tensor([0.9414, 0.2242, 0.8344], device=device)
    if soft_labels:
        criterion = weighted_soft_cross_entropy
    else:
        criterion = nn.CrossEntropyLoss(weight=weights)
    eval_criterion = nn.CrossEntropyLoss(weight=weights)

    for epoch in range(num_epochs):
        # train
        epoch_loss, epoch_acc = train_epoch(model,
                                            train_loader,
                                            optimizer,
                                            criterion,
                                            device,
                                            scheduler=scheduler,
                                            weights=weights)

        # realtime feel
        print(f'Epoch: {epoch+1}')
        print(
            f'\tTrain Loss: {epoch_loss:.5f} | Train Acc: {epoch_acc*100:.2f}%'
        )

    # Compute F1 score on validation set - this is what we optimise during tuning
    loss, acc, predictions, ground_truth = evaluate_epoch(model,
                                                          val_loader,
                                                          eval_criterion,
                                                          device,
                                                          is_final=True)
    val_f1 = f1_score(y_true=ground_truth, y_pred=predictions, average="macro")
    print("Done")
    return val_f1
コード例 #16
0
class Trainer:
    """
    训练
    """
    def __init__(self, _hparams):
        utils.set_seed(_hparams.fixed_seed)

        self.train_loader = get_train_loader(_hparams)
        self.val_loader = get_val_loader(_hparams)
        self.encoder = CNN().to(DEVICE)
        self.decoder = RNN(fea_dim=_hparams.fea_dim,
                           embed_dim=_hparams.embed_dim,
                           hid_dim=_hparams.hid_dim,
                           max_sen_len=_hparams.max_sen_len,
                           vocab_pkl=_hparams.vocab_pkl).to(DEVICE)
        self.loss_fn = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.get_params(), lr=_hparams.lr)
        self.writer = SummaryWriter()

        self.max_sen_len = _hparams.max_sen_len
        self.val_cap = _hparams.val_cap
        self.ft_encoder_lr = _hparams.ft_encoder_lr
        self.ft_decoder_lr = _hparams.ft_decoder_lr
        self.best_CIDEr = 0

    def fine_tune_encoder(self, fine_tune_epochs, val_interval, save_path,
                          val_path):
        print('*' * 20, 'fine tune encoder for', fine_tune_epochs, 'epochs',
              '*' * 20)
        self.encoder.fine_tune()
        self.optimizer = torch.optim.Adam([
            {
                'params': self.encoder.parameters(),
                'lr': self.ft_encoder_lr
            },
            {
                'params': self.decoder.parameters(),
                'lr': self.ft_decoder_lr
            },
        ])
        self.training(fine_tune_epochs, val_interval, save_path, val_path)
        self.encoder.froze()
        print('*' * 20, 'fine tune encoder complete', '*' * 20)

    def get_params(self):
        """
        模型需要优化的全部参数,此处encoder暂时设计不用训练,故不加参数

        :return:
        """
        return list(self.decoder.parameters())

    def training(self, max_epochs, val_interval, save_path, val_path):
        """
        训练

        :param val_path: 保存验证过程生成句子的路径
        :param save_path: 保存模型的地址
        :param val_interval: 验证的间隔
        :param max_epochs: 最大训练的轮次
        :return:
        """
        print('*' * 20, 'train', '*' * 20)
        for epoch in range(max_epochs):
            self.set_train()

            epoch_loss = 0
            epoch_steps = len(self.train_loader)
            for step, (img, cap,
                       cap_len) in tqdm(enumerate(self.train_loader)):
                # batch_size * 3 * 224 * 224
                img = img.to(DEVICE)
                cap = cap.to(DEVICE)

                self.optimizer.zero_grad()

                features = self.encoder.forward(img)
                outputs = self.decoder.forward(features, cap)

                outputs = pack_padded_sequence(outputs,
                                               cap_len - 1,
                                               batch_first=True)[0]
                targets = pack_padded_sequence(cap[:, 1:],
                                               cap_len - 1,
                                               batch_first=True)[0]
                train_loss = self.loss_fn(outputs, targets)
                epoch_loss += train_loss.item()
                train_loss.backward()
                self.optimizer.step()

            epoch_loss /= epoch_steps
            self.writer.add_scalar('epoch_loss', epoch_loss, epoch)
            print('epoch_loss: {}, epoch: {}'.format(epoch_loss, epoch))
            if (epoch + 1) % val_interval == 0:
                CIDEr = self.validating(epoch, val_path)
                if self.best_CIDEr <= CIDEr:
                    self.best_CIDEr = CIDEr
                    self.save_model(save_path, epoch)

    def save_model(self, save_path, train_epoch):
        """
        保存最好的模型

        :param save_path: 保存模型文件的地址
        :param train_epoch: 当前训练的轮次
        :return:
        """
        model_state_dict = {
            'encoder_state_dict': self.encoder.state_dict(),
            'decoder_state_dict': self.decoder.state_dict(),
            'tran_epoch': train_epoch,
        }
        print('*' * 20, 'save model to: ', save_path, '*' * 20)
        torch.save(model_state_dict, save_path)

    def validating(self, train_epoch, val_path):
        """
        验证

        :param val_path: 保存验证过程生成句子的路径
        :param train_epoch: 当前训练的epoch
        :return:
        """
        print('*' * 20, 'validate', '*' * 20)
        self.set_eval()
        sen_json = []
        with torch.no_grad():
            for val_step, (img, img_id) in tqdm(enumerate(self.val_loader)):
                img = img.to(DEVICE)
                features = self.encoder.forward(img)
                sens, _ = self.decoder.sample(features)
                sen_json.append({'image_id': int(img_id), 'caption': sens[0]})

        with open(val_path, 'w') as f:
            json.dump(sen_json, f)

        result = coco_eval(self.val_cap, val_path)
        scores = {}
        for metric, score in result:
            scores[metric] = score
            self.writer.add_scalar(metric, score, train_epoch)

        return scores['CIDEr']

    def set_train(self):
        self.encoder.train()
        self.decoder.train()

    def set_eval(self):
        self.encoder.eval()
        self.decoder.eval()
コード例 #17
0
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

if __name__ == '__main__':
    opt = Training_options().parse()
    if opt.model == 'cnn':
        from models import CNN
        net = CNN(opt)
        train_predictors, train_predictands = assemble_predictors_predictands(
            opt, train=True)
        train_dataset = ENSODataset(train_predictors, train_predictands)
        trainloader = DataLoader(train_dataset, batch_size=opt.batch_size)
        optimizer = optim.Adam(net.parameters(), lr=opt.lr)
        device = "cuda:0" if torch.cuda.is_available() else "cpu"
        net = net.to(device)
        best_loss = np.infty
        train_losses = []
        net.train()
        criterion = nn.MSELoss()
        for epoch in range(opt.epoch):
            running_loss = 0.0
            for i, data in enumerate(trainloader):
                batch_predictors, batch_predictands = data
                batch_predictands = batch_predictands.to(device)
                batch_predictors = batch_predictors.to(device)
                optimizer.zero_grad()
                predictions = net(batch_predictors).squeeze()
                loss = criterion(predictions, batch_predictands.squeeze())
コード例 #18
0
    if args.init_emb:
        assert emb.shape[1] == args.emb_dim
        emb = torch.Tensor(emb)
    else:
        emb = None

    if args.model == "cnn":
        model = CNN(len(data["word2idx"]), args.emb_dim, args.out_dim,
                    args.window_dim, len(data["lbl2idx"]), args.dp, emb)

    if args.fix_emb:
        model.embedding.weight.requires_grad = False

    loss = torch.nn.CrossEntropyLoss()
    optim = torch.optim.Adam(model.parameters(),
                             lr=args.lr,
                             weight_decay=args.l2)

    if args.cuda:
        model.cuda()
    trainModel(args, model, loss, optim, trainData, valData)

    if args.submit:
        # load the best model saved during training
        model.load_state_dict(
            torch.load(args.path_savedir +
                       "{}_{}.model".format(args.model, args.epochs)))
        model.eval()

        preds_val = predict(model, valData)
コード例 #19
0
ファイル: ses2-2.py プロジェクト: knamdar/IEEE_Workshops
                                               batch_size=batch_size,
                                               shuffle=True)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              shuffle=False)

    model = CNN()
    if torch.cuda.is_available():
        model.cuda()

    criterion = nn.CrossEntropyLoss()

    learning_rate = 0.1
    writer.add_scalar("Learning_Rate", learning_rate)
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    train(train_loader, test_loader, model, optimizer, criterion)
    # torch.save(model, "./CNN.pt")
    # model2 = torch.load("./CNN.pt")

    # torch.save(model.state_dict(), "./CNN_State.pt")
    # model2 = CNN()
    # model2.load_state_dict(torch.load("./CNN_State.pt"))

    # torch.save({
    #         "Learning Rate": learning_rate,
    #         "model_state_dict": model.state_dict(),
    #         "optimizer_state_dict": optimizer.state_dict(),
    #         "batch size": batch_size,
    #         "number of epochs": num_epochs
    #         }, "./checkpoint.pt")
コード例 #20
0
def train():
    transforms = Compose([ToTensor()])
    train_dataset = CaptchaData('./data/train', transform=transforms)
    train_data_loader = DataLoader(train_dataset,
                                   batch_size=batch_size,
                                   num_workers=0,
                                   shuffle=True,
                                   drop_last=True)
    test_data = CaptchaData('./data/test', transform=transforms)
    test_data_loader = DataLoader(test_data,
                                  batch_size=batch_size,
                                  num_workers=0,
                                  shuffle=True,
                                  drop_last=True)
    cnn = CNN()
    if torch.cuda.is_available():
        cnn.cuda()
    if restor:
        cnn.load_state_dict(torch.load(model_path))


#        freezing_layers = list(cnn.named_parameters())[:10]
#        for param in freezing_layers:
#            param[1].requires_grad = False
#            print('freezing layer:', param[0])

    optimizer = torch.optim.Adam(cnn.parameters(), lr=base_lr)
    criterion = nn.MultiLabelSoftMarginLoss()

    for epoch in range(max_epoch):
        start_ = time.time()

        loss_history = []
        acc_history = []
        cnn.train()
        for img, target in train_data_loader:
            img = Variable(img)
            target = Variable(target)
            if torch.cuda.is_available():
                img = img.cuda()
                target = target.cuda()
            output = cnn(img)
            loss = criterion(output, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            acc = calculat_acc(output, target)
            acc_history.append(acc)
            loss_history.append(loss)
        print('train_loss: {:.4}|train_acc: {:.4}'.format(
            torch.mean(torch.Tensor(loss_history)),
            torch.mean(torch.Tensor(acc_history)),
        ))

        loss_history = []
        acc_history = []
        cnn.eval()
        for img, target in test_data_loader:
            img = Variable(img)
            target = Variable(target)
            if torch.cuda.is_available():
                img = img.cuda()
                target = target.cuda()
            output = cnn(img)

            acc = calculat_acc(output, target)
            acc_history.append(acc)
            loss_history.append(float(loss))
        print('test_loss: {:.4}|test_acc: {:.4}'.format(
            torch.mean(torch.Tensor(loss_history)),
            torch.mean(torch.Tensor(acc_history)),
        ))
        print('epoch: {}|time: {:.4f}'.format(epoch, time.time() - start_))
        torch.save(cnn.state_dict(), model_path)
コード例 #21
0
ファイル: train.py プロジェクト: xiekt1993/Portfolio
              'pretrained_word_embeddings_file': pretrained_word_embeddings_file, 'transform_train': transform_train, 
              'transform_val': transform_val, 'WEIGHT_DECAY': WEIGHT_DECAY, 'ADAM_FLAG': ADAM_FLAG, 'RNN_DROPOUT':RNN_DROPOUT,
              'CNN_DROPOUT': CNN_DROPOUT, 'GRAD_CLIP': GRAD_CLIP}


    print('Initializing models...')
    encoder = CNN(NO_WORD_EMBEDDINGS, pretrained_cnn_dir, freeze=True, dropout_prob=CNN_DROPOUT, model_name='resnet152')
    decoder = RNN(VOCAB_SIZE, NO_WORD_EMBEDDINGS, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS,
                  pre_trained_file=pretrained_word_embeddings_file, freeze=False, dropout_prob=RNN_DROPOUT)
    params['encoder'] = encoder
    params['decoder'] = decoder
    encoder.cuda()
    decoder.cuda()

    print('Initializing optimizer...')
    model_paras = list(encoder.parameters()) + list(decoder.parameters())
    optimizer = optim.Adam(model_paras, lr=LR, weight_decay=WEIGHT_DECAY)
    params['optimizer'] = optimizer


    pickle.dump(params, open(init_params_file, 'wb'))


# initialize accumulators.
current_epoch = 1
batch_step_count = 1
time_used_global = 0.0
checkpoint = 1


# load lastest model to resume training
コード例 #22
0
        model = gmsCNN(kernelg=args.kernelg,
                       kernels=args.kernels,
                       kernel=args.kernel,
                       num_filters=args.num_filters,
                       rate=args.rate)
        fname = "models/gmsCNN_" + args.data + str(args.kernel) + "_" + str(
            args.kernelg) + "_" + str(args.kernels) + "_" + str(
                args.num_filters) + "_" + str(args.batch_size) + "_" + str(
                    args.rate) + ".model"

    if args.gpu:
        model = model.cuda()

    # Training setup
    L = t.nn.CrossEntropyLoss()
    optimizer = t.optim.Adam(model.parameters(), lr=args.learn_rate)

    if not os.path.exists("models"):
        os.makedirs("models")

    # load to continue with pre-existing model
    if os.path.exists(fname):
        model.load_state_dict(t.load(fname))
        print("Successfully loaded previous model " + str(fname))

    # start with a model defined on 0
#    train_mix, test, train_data, train_labels = dataFetch()
#    # select only 0 category
#    train_dataset = customDataset(train_data[0], train_labels[0])
#
#    # define train and test as DataLoaders
コード例 #23
0
                               ]))

water = datasets.ImageFolder(root=WATER_DIRECTORY, transform=transforms.Compose([
                                   transforms.ToTensor()
                               ]))

ship_loader = torch.utils.data.DataLoader(dataset=ship, batch_size=BATCH_SIZE, shuffle=True)
water_loader = torch.utils.data.DataLoader(dataset=water, batch_size=BATCH_SIZE, shuffle=True)

### INIT MODEL
device = torch.device("cpu") #change if on GPU, also need to use .cuda()
model = CNN().to(device)

### MSE LOSS AND ADAM OPTIMIZER
criterion = nn.MSELoss(size_average=True, reduce=True)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, amsgrad=True)

### TRAIN (SHIP == 1, WATER == 0)
for epoch in range(EPOCHS):

    print(f"EPOCH {epoch}")
    correct, total = 0, 0

    for idx, ((positive, _), (negative, __)) in enumerate(zip(ship_loader, water_loader)):
        
        #training on ship batch
        out_positive = model(positive)
        loss = criterion(out_positive, torch.ones(1))
        optimizer.zero_grad()
        loss.sum().backward()
        optimizer.step()
コード例 #24
0
ファイル: main.py プロジェクト: rvarma9604/flexible-cnn
                        train=False,
                        download=False,
                        transform=transforms.ToTensor())
'''model setup'''
model = CNN(
    arch,
    channels,
    kernels,
    num_class,
    input_shape,  #cnn parameters
    hidden_dims,
    activation,
    p_drop,
    batchnorm  # mlp parameters
)
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

print(f'The Model:\n{model}')
'''train the model'''
(train_loss, train_acc, test_loss, test_acc) =\
     model_train(model,
         trainer,
         optimizer,
         criterion,
         tester=tester,
         batch_size=500,
         epochs=epochs)
'''plot the performance'''
performance_plot(train_loss, test_loss, 0.7, "loss", "Loss.jpeg")
performance_plot(train_acc, test_acc, 0.7, "accuracy", "Accuracy.jpeg")
コード例 #25
0
                                                                                                              # batch_size : 한번에 처리할 데이터의 개수
                                                                                                              # shuffle = True : 자료를 섞을 것인지

    if MODEL == 'CNN':
        from models import CNN
        model = CNN()
    elif MODEL == 'MLP':
        from models import MLP
        model = MLP()
        print(10)
    else:
        raise NotImplementedError("You need to choose among [CNN, MLP].")


    loss = nn.CrossEntropyLoss()
    optim = torch.optim.Adam(model.parameters(), lr=2e-4, betas=(0.5,0.99), eps=1e-8)       # parameters : wexight , optim : 그래디언트 전달 , lr: 계산되는 그래디언트의 값이 너무 크기 때문에 그래디언트에 붙이는 계수

    EPOCHS = 1                                                                 # 전체 데이터의 학습을 몇번 시킬 것인가?
    total_step = 0
    list_loss = list()
    for epoch in range(EPOCHS):
      for i, data in enumerate(data_loader):                                 # enumerate : 반복문에서 index를 입력할 때 사용
        total_step = total_step+1
        input, label = data[0], data[1]
        # input shape [32, 1, 28, 28] 첫번째 배치사이즈, channel, height, width
        input = input.view(input.shape[0], -1) if MODEL == 'MLP' else input   #
                                                    # batchsize, channel * height * width     왜 이렇게 하는거지 ? → 1차원으로 바꾸기 위해서
                                                    # view? reshape?  메모리주소?

        classification_results = model.forward(input) # [bstch size, 10] #nn.module을 상속한 클래스는 forward를 생략해도된다.
コード例 #26
0
def training_run_cnn(combination, criterion, train_loader, valid_loader, run):

    n_featuremap_1, n_featuremap_2, mode = combination
    model_path = "CNN_run_{}.pt".format(run)
    results[model_path] = dict()

    # initialize the network with the given configuration
    my_net = CNN(n_featuremap_1=n_featuremap_1, n_featuremap_2=n_featuremap_2)

    # initialize weights with the given mode
    my_net.apply(partial(init_weights, mode=mode))
    my_net.to(device)

    optimizer = torch.optim.Adam(my_net.parameters())

    for epoch in range(10):  # loop over the training dataset multiple times

        training_loss = .0
        pbar = tqdm(10)

        for batch_idx, (x, target) in enumerate(train_loader):
            x, target = x.to(device), target.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = my_net(x).view(-1, 1)
            loss = criterion(outputs, target.view(-1, 1))
            loss.backward()
            optimizer.step()

            if epoch == 9:  # update training loss in the last epoch
                training_loss += loss.item() * len(x)

            if batch_idx % 100 == 99:  # print every 100 mini-batches
                print("[ Epoch %d,Batch %2d] loss: %.3f" %
                      (epoch + 1, batch_idx + 1, loss.item()))

        pbar.update(1)
    # update results
    results[model_path]["training_loss"] = training_loss / len(train)

    print("Finished Training !")
    print("Start Evaluating !")

    # Validation loss
    valid_loss = .0
    correct = 0
    thres = 0.5
    with torch.no_grad():
        for batch_idx, (x, target) in enumerate(valid_loader):
            x, target = x.to(device), target.to(device)

            outputs = my_net(x).view(-1, 1)
            prediction = outputs >= thres
            correct += prediction.eq(target.view(-1, 1)).sum().item()

            loss = criterion(outputs, target.view(-1, 1))
            valid_loss += loss.item() * len(x)

    # update results
    results[model_path]["validation_loss"] = valid_loss / len(valid)
    results[model_path]["accuracy"] = correct / len(valid)

    # save model in disk
    torch.save(my_net.state_dict(), "./models/" + model_path)
コード例 #27
0
def main():
    torch.manual_seed(42)

    # Random
    #params = {'batch_size': 32, 'dropout': 0, 'hidden_dim': 128, 'learning_rate': 0.01, 'num_epochs': 5, 'num_layers': 2, 'oversample': False, 'soft_labels': False}
    # Glove
    params = {
        'batch_size': 32,
        'dropout': 0,
        'hidden_dim': 128,
        'learning_rate': 0.001,
        'num_epochs': 5,
        'num_layers': 2,
        'oversample': False,
        'soft_labels': False
    }
    # Random
    #params = {'batch_size': 32, 'dropout': 0, 'hidden_dim': 256, 'learning_rate': 0.0001, 'num_epochs': 5, 'num_layers': 3, 'oversample': False, 'soft_labels': False}

    #some params
    experiment_number = 1
    test_percentage = 0.1
    val_percentage = 0.2
    batch_size = params["batch_size"]
    num_epochs = 5  #params["num_epochs"]
    dropout = params["dropout"]
    embedding_dim = 300
    model_name = "CNN"  #'Bert' #"CNN" #"LSTM"
    unsupervised = True
    embedding = "Glove"  #"Random" ##"Glove" # "Both" #
    soft_labels = False
    combine = embedding == "Both"

    # LSTM parameters
    if model_name == "LSTM":
        hidden_dim = params["hidden_dim"]
        num_layers = params["num_layers"]

    # Bert parameter
    num_warmup_steps = 100
    num_total_steps = 1000
    if model_name == "Bert":
        embedding = "None"
    if embedding == "Both":
        combine = True
        embedding = "Random"
    else:
        combine = False
    learning_rate = params["learning_rate"]  #5e-5, 3e-5, 2e-5
    oversample_bool = False
    weighted_loss = True
    # load data
    dataset = Dataset("../data/cleaned_tweets_orig.csv",
                      use_embedding=embedding,
                      embedd_dim=embedding_dim,
                      combine=combine,
                      for_bert=(model_name == "Bert"))

    #dataset.oversample()
    train_data, val_test_data = split_dataset(dataset,
                                              test_percentage + val_percentage)
    val_data, test_data = split_dataset(
        val_test_data, test_percentage / (test_percentage + val_percentage))

    # print(len(train_data))
    #save_data(train_data, 'train')
    #save_data(test_data, 'test')

    #define loaders
    if oversample_bool:
        weights, targets = get_loss_weights(train_data, return_targets=True)
        class_sample_count = [
            1024 / 20, 13426, 2898 / 2
        ]  # dataset has 10 class-1 samples, 1 class-2 samples, etc.
        oversample_weights = 1 / torch.Tensor(class_sample_count)
        oversample_weights = oversample_weights[targets]
        # oversample_weights = torch.tensor([0.9414, 0.2242, 0.8344]) #torch.ones((3))-
        sampler = torch.utils.data.sampler.WeightedRandomSampler(
            oversample_weights, len(oversample_weights))
        train_loader = torch.utils.data.DataLoader(train_data,
                                                   batch_size=batch_size,
                                                   collate_fn=my_collate,
                                                   sampler=sampler)
    else:
        train_loader = torch.utils.data.DataLoader(train_data,
                                                   batch_size=batch_size,
                                                   collate_fn=my_collate)
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=batch_size,
                                             collate_fn=my_collate)

    #define model
    if model_name == "CNN":
        vocab_size = len(dataset.vocab)
        model = CNN(vocab_size, embedding_dim, combine=combine)
    elif model_name == "LSTM":
        vocab_size = len(dataset.vocab)
        model = LSTM(vocab_size,
                     embedding_dim,
                     batch_size=batch_size,
                     hidden_dim=hidden_dim,
                     lstm_num_layers=num_layers,
                     combine=combine,
                     dropout=dropout)

    elif model_name == "Bert":
        model = BertForSequenceClassification.from_pretrained(
            "bert-base-uncased", num_labels=3)
        train_loader = torch.utils.data.DataLoader(train_data,
                                                   batch_size=batch_size,
                                                   collate_fn=bert_collate)
        val_loader = torch.utils.data.DataLoader(val_data,
                                                 batch_size=batch_size,
                                                 collate_fn=bert_collate)

    #device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #LOSS : weighted cross entropy loss, by class counts of other classess
    if weighted_loss:
        weights = torch.tensor([0.9414, 0.2242, 0.8344], device=device)
    else:
        weights = torch.ones(3, device=device)
    #weights = torch.tensor([1.0, 1.0, 1.0], device = device) #get_loss_weights(train_data).to(device) # not to run again
    criterion = nn.CrossEntropyLoss(weight=weights)
    if soft_labels:
        criterion = weighted_soft_cross_entropy
    #latent model
    if unsupervised:
        vocab_size = len(dataset.vocab)
        criterion = nn.CrossEntropyLoss(weight=weights, reduction='none')
        model = Rationalisation_model(vocab_size,
                                      embedding_dim=embedding_dim,
                                      model=model_name,
                                      batch_size=batch_size,
                                      combine=combine,
                                      criterion=criterion)

    if not model_name == "Bert":
        model.embedding.weight.data.copy_(dataset.vocab.vectors)
        if combine:
            model.embedding_glove.weight.data.copy_(dataset.glove.vectors)

    #model to device
    model.to(device)

    #optimiser
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    if model_name == "Bert":
        optimizer = AdamW(model.parameters(),
                          lr=learning_rate,
                          correct_bias=False)
        # Linear scheduler for adaptive lr
        scheduler = WarmupLinearSchedule(optimizer,
                                         warmup_steps=num_warmup_steps,
                                         t_total=num_total_steps)
    else:
        scheduler = None

    plot_log = defaultdict(list)
    for epoch in range(num_epochs):
        #train and validate
        epoch_loss, epoch_acc = train_epoch(model,
                                            train_loader,
                                            optimizer,
                                            criterion,
                                            device,
                                            soft_labels=soft_labels,
                                            weights=weights,
                                            scheduler=scheduler,
                                            unsupervised=unsupervised)
        val_loss, val_acc = evaluate_epoch(model,
                                           val_loader,
                                           criterion,
                                           device,
                                           soft_labels=soft_labels,
                                           weights=weights,
                                           unsupervised=unsupervised)
        #save for plotting
        for name, point in zip(
            ["train_loss", "train_accuracy", "val_loss", "val_accuracy"],
            [epoch_loss, epoch_acc, val_loss, val_acc]):
            plot_log[f'{name}'] = point
        #realtime feel
        print(f'Epoch: {epoch+1}')
        print(
            f'\tTrain Loss: {epoch_loss:.5f} | Train Acc: {epoch_acc*100:.2f}%'
        )
        print(f'\t Val. Loss: {val_loss:.5f} |  Val. Acc: {val_acc*100:.2f}%')
    sample_sentences_and_z(model, train_loader, device, dataset.vocab)
    #save plot
    results_directory = f'plots/{experiment_number}'
    os.makedirs(results_directory, exist_ok=True)
    for name, data in plot_log.items():
        save_plot(data, name, results_directory)
    #save model
    torch.save(model, os.path.join(results_directory, 'model_cnn.pth'))
    #confusion matrix and all that fun
    loss, acc, predictions, ground_truth = evaluate_epoch(
        model,
        val_loader,
        criterion,
        device,
        is_final=True,
        soft_labels=soft_labels,
        weights=weights,
        unsupervised=unsupervised)
    conf_matrix = confusion_matrix(ground_truth, predictions)
    class_report = classification_report(ground_truth, predictions)
    print('\nFinal Loss and Accuracy\n----------------\n')
    print(f'\t Val. Loss: {loss:.5f} |  Val. Acc: {acc*100:.2f}%')
    print('\nCONFUSION MATRIX\n----------------\n')
    print(conf_matrix)
    print('\nCLASSSIFICATION REPORT\n----------------------\n')
    print(class_report)

    plot_confusion_matrix(ground_truth,
                          predictions,
                          classes=["Hate speech", "Offensive", "Neither"],
                          normalize=False,
                          title='Confusion matrix')
    plt.show()
コード例 #28
0
def train():
    transforms = Compose([Resize((height, width)), ToTensor()])
    train_dataset = CaptchaData(train_data_path, num_class=len(alphabet), num_char=int(numchar), transform=transforms, alphabet=alphabet)
    train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers,
                                   shuffle=True, drop_last=True)
    test_data = CaptchaData(test_data_path, num_class=len(alphabet), num_char=int(numchar), transform=transforms, alphabet=alphabet)
    test_data_loader = DataLoader(test_data, batch_size=batch_size,
                                  num_workers=num_workers, shuffle=True, drop_last=True)
    cnn = CNN(num_class=len(alphabet), num_char=int(numchar), width=width, height=height)
    if use_gpu:
        cnn.cuda()

    optimizer = torch.optim.Adam(cnn.parameters(), lr=base_lr)
    criterion = nn.MultiLabelSoftMarginLoss()

    for epoch in range(max_epoch):
        start_ = time.time()

        loss_history = []
        acc_history = []
        cnn.train()
        for img, target in train_data_loader:
            img = Variable(img)
            target = Variable(target)
            if use_gpu:
                img = img.cuda()
                target = target.cuda()
            output = cnn(img)
            loss = criterion(output, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            acc = calculat_acc(output, target)
            acc_history.append(float(acc))
            loss_history.append(float(loss))
        print('epoch:{},train_loss: {:.4}|train_acc: {:.4}'.format(
            epoch,
            torch.mean(torch.Tensor(loss_history)),
            torch.mean(torch.Tensor(acc_history)),
        ))

        loss_history = []
        acc_history = []
        cnn.eval()
        for img, target in test_data_loader:
            img = Variable(img)
            target = Variable(target)
            if torch.cuda.is_available():
                img = img.cuda()
                target = target.cuda()
            output = cnn(img)

            acc = calculat_acc(output, target)
            acc_history.append(float(acc))
            loss_history.append(float(loss))
        print('test_loss: {:.4}|test_acc: {:.4}'.format(
            torch.mean(torch.Tensor(loss_history)),
            torch.mean(torch.Tensor(acc_history)),
        ))
        print('epoch: {}|time: {:.4f}'.format(epoch, time.time() - start_))
        torch.save(cnn.state_dict(), os.path.join(model_path, "model_{}.path".format(epoch)))
コード例 #29
0
def create_cifar_model(ema=False):
    model = CNN()
    if ema:
        for param in model.parameters():
            param.detach_()
    return model
コード例 #30
0
def main(args):
    args.logdir = args.logdir + args.mode
    if not os.path.exists(args.logdir):
        os.makedirs(args.logdir)
    logger = get_logger(os.path.join(args.logdir, 'train_source.log'))
    logger.info(args)

    # data
    # source_transform = transforms.Compose([
    #     transforms.ToTensor()]
    # )
    # source_dataset_train = SVHN(
    #     './input', 'train', transform=source_transform, download=True)
    # source_dataset_test = SVHN(
    #     './input', 'test', transform=source_transform, download=True)
    # source_train_loader = DataLoader(
    #     source_dataset_train, args.batch_size, shuffle=True,
    #     drop_last=True,
    #     num_workers=args.n_workers)
    # source_test_loader = DataLoader(
    #     source_dataset_test, args.batch_size, shuffle=False,
    #     num_workers=args.n_workers)
    source_dataset_name = 'MNIST'
    target_dataset_name = 'mnist_m'
    source_image_root = os.path.join('dataset', source_dataset_name)
    target_image_root = os.path.join('dataset', target_dataset_name)
    batch_size = 128
    image_size = 28
    img_transform_source = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.1307, ), std=(0.3081, ))
    ])
    dataset_source_train = datasets.MNIST(root='dataset',
                                          train=True,
                                          transform=img_transform_source,
                                          download=True)
    source_train_loader = torch.utils.data.DataLoader(
        dataset=dataset_source_train,
        batch_size=batch_size,
        shuffle=True,
        num_workers=8)
    dataset_source_test = datasets.MNIST(
        root='dataset',
        train=False,
        transform=img_transform_source,
    )

    source_test_loader = torch.utils.data.DataLoader(
        dataset=dataset_source_test,
        batch_size=batch_size,
        shuffle=False,
        num_workers=8)

    # train source CNN
    source_cnn = CNN(in_channels=args.in_channels).to(args.device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(source_cnn.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    source_cnn = train_source_cnn(source_cnn,
                                  source_train_loader,
                                  source_test_loader,
                                  criterion,
                                  optimizer,
                                  args=args)