Example #1
0
def trainloop(model, learning_rate, optimizer):
    import torch.optim as optim
    if optimizer == "SGD":
        optimizer = optim.SGD(list(model.parameters()),
                              lr=learning_rate,
                              momentum=0.9)
    if optimizer == "Adam":
        optimizer = optim.Adam(list(model.parameters()),
                               lr=learning_rate,
                               betas=(0.9, 0.999),
                               eps=1e-08,
                               weight_decay=0,
                               amsgrad=False)
    if optimizer == "Adadelta":
        optimizer = optim.Adadelta(list(model.parameters()),
                                   lr=learning_rate,
                                   rho=0.9,
                                   eps=1e-06,
                                   weight_decay=0)
    if optimizer == "Adagrad":
        optimizer = optim.Adagrad(list(model.parameters()),
                                  lr=learning_rate,
                                  lr_decay=0,
                                  weight_decay=0,
                                  initial_accumulator_value=0,
                                  eps=1e-10)
    if optimizer == "AdamW":
        optimizer = optim.AdamW(list(model.parameters()),
                                lr=learning_rate,
                                betas=(0.9, 0.999),
                                eps=1e-08,
                                weight_decay=0.01,
                                amsgrad=False)
    if optimizer == "Adamax":
        optimizer = optim.Adamax(list(model.parameters()),
                                 lr=learning_rate,
                                 betas=(0.9, 0.999),
                                 eps=1e-08,
                                 weight_decay=0)
    if optimizer == "ASGD":
        optimizer = optim.ASGD(list(model.parameters()),
                               lr=learning_rate,
                               lambd=0.0001,
                               alpha=0.75,
                               t0=1000000.0,
                               weight_decay=0)
    if optimizer == "Rprop":
        optimizer = optim.Rprop(list(model.parameters()),
                                lr=learning_rate,
                                etas=(0.5, 1.2),
                                step_sizes=(1e-06, 50))
    if optimizer == "RMSprop":
        optimizer = optim.RMSprop(list(model.parameters()),
                                  lr=learning_rate,
                                  alpha=0.99,
                                  eps=1e-08,
                                  weight_decay=0,
                                  momentum=0,
                                  centered=False)

    import torch.optim as optim
    import numpy as np
    import numpy
    # optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
    gen = Generator()
    import random
    criterion = nn.L1Loss()
    for epoch in range(1, 6):  # loop over the dataset multiple times
        running_loss = 0.0
        i = 0

        for i in range(0, 1000):
            # get the inputs; data is a list of [inputs, labels]
            k = 0
            file_no = random.randint(0, 16000)
            inputs, labels = get_data_batch(file_no)
            INN = inputs.to(device)
            OUT = labels.to(device)
            optimizer.zero_grad()
            outputs = model(INN)
            loss = criterion(outputs, OUT)
            loss.backward()
            optimizer.step()
            del INN
            del OUT
            torch.cuda.empty_cache()
            # print statistics
            running_loss += loss.item()
            LOSS = 0.0
            if i == 999:  # print every 2000 mini-batches# CHANGE THIS VALUE
                # print('[%d, %5d] loss: %.5f' %(epoch , i + 1, running_loss / 3500))
                running_loss = 0.0

                g = 0
                for g in range(0, 312):
                    k = 0
                    file_no = random.randint(67012, 72730)

                    inputs, labels = get_data_batch(file_no)
                    INN = inputs.to(device)
                    OUT = labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(INN)
                    loss = criterion(outputs, OUT)
                    LOSS += loss.item()
                    del INN
                    del OUT
                    torch.cuda.empty_cache()

                LOSS = LOSS / 312
                running_loss = 0.0
                if epoch == 1:
                    best_loss = LOSS
                #  torch.save(net, 'pca_30_points_relational_model_2.pt')
                if LOSS < best_loss:
                    best_loss = LOSS
                #  torch.save(net, 'pca_30_points_relational_model_2.pt')
                del LOSS
                del running_loss
                torch.cuda.empty_cache()
                LOSS = 0
                running_loss = 0.0

#  best_loss = best_loss.cpu()
#  best_loss = best_loss.detach().numpy()
    return best_loss
Example #2
0
    def optimization_algorithms(SCI_optimizer, cnn, LR, SCI_SGD_MOMENTUM,
                                REGULARIZATION):

        if type(SCI_optimizer) is str:
            if (SCI_optimizer == 'Adam'):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'AMSGrad'):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION,
                                       amsgrad=True)
            if (SCI_optimizer == 'AdamW'):
                optimizer = AdamW(cnn.parameters(),
                                  lr=LR,
                                  betas=(0.01, 0.999),
                                  weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'RMSprop'):
                optimizer = optim.RMSprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'SparseAdam') or (int(SCI_optimizer) == 4) :
            #optimizer = optim.SparseAdam(cnn.parameters(), lr=LR)
            if (SCI_optimizer == 'SGD'):
                optimizer = optim.SGD(cnn.parameters(),
                                      lr=LR,
                                      momentum=SCI_SGD_MOMENTUM,
                                      weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'Adadelta'):
                optimizer = optim.Adadelta(cnn.parameters(),
                                           lr=LR,
                                           weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'Rprop'):
                optimizer = optim.Rprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'Adagrad') or (int(SCI_optimizer) == 7) :
            #    optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'Adamax'):
                optimizer = optim.Adamax(cnn.parameters(),
                                         lr=LR,
                                         weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'ASGD'):
                optimizer = optim.ASGD(cnn.parameters(),
                                       lr=LR,
                                       weight_decay=REGULARIZATION)
            #if (SCI_optimizer == 'LBFGS') or (int(SCI_optimizer) == 10) :
            #optimizer = optim.LBFGS(cnn.parameters(), lr=LR)
        else:
            if (int(SCI_optimizer) == 7):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 5):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION,
                                       amsgrad=True)
            if (int(SCI_optimizer) == 4):
                optimizer = AdamW(cnn.parameters(),
                                  lr=LR,
                                  betas=(0.01, 0.999),
                                  weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 3):
                optimizer = optim.RMSprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'SparseAdam') or (int(SCI_optimizer) == 4) :
            #optimizer = optim.SparseAdam(cnn.parameters(), lr=LR)
            if (int(SCI_optimizer) == 2):
                optimizer = optim.SGD(cnn.parameters(),
                                      lr=LR,
                                      momentum=SCI_SGD_MOMENTUM,
                                      weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 6):
                optimizer = optim.Adadelta(cnn.parameters(),
                                           lr=LR,
                                           weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 1):
                optimizer = optim.Rprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'Adagrad') or (int(SCI_optimizer) == 7) :
            #    optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 8):
                optimizer = optim.Adamax(cnn.parameters(),
                                         lr=LR,
                                         weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 9):
                optimizer = optim.ASGD(cnn.parameters(),
                                       lr=LR,
                                       lambd=0.0001,
                                       alpha=0.75,
                                       t0=1000000.0,
                                       weight_decay=REGULARIZATION)

            #if (SCI_optimizer == 'LBFGS') or (int(SCI_optimizer) == 10) :
            #optimizer = optim.LBFGS(cnn.parameters(), lr=LR)

        return optimizer
Example #3
0
    def fit(self,
            training_data,
            validation_data,
            options,
            model,
            device=None,
            detail=False,
            run=None):
        """
        Perform the training.  This is not called "train" because the base class already defines
        that method with a different meaning.  The base class "train" method puts the Module into
        "training mode".
        """
        print(
            "Training {} using {} rows of featurized training input...".format(
                self.name(), training_data.num_rows))

        if training_data.mean is not None:
            self.mean = torch.from_numpy(np.array([[training_data.mean]
                                                   ])).to(device)
            self.std = torch.from_numpy(np.array([[training_data.std]
                                                  ])).to(device)
        else:
            self.mean = None
            self.std = None

        start = time.time()
        loss_function = nn.NLLLoss()
        initial_rate = options.learning_rate
        lr_scheduler = options.lr_scheduler
        oo = options.optimizer_options
        self.training = True

        if options.optimizer == "Adadelta":
            optimizer = optim.Adadelta(self.parameters(),
                                       lr=initial_rate,
                                       weight_decay=oo.weight_decay,
                                       rho=oo.rho,
                                       eps=oo.eps)
        elif options.optimizer == "Adagrad":
            optimizer = optim.Adagrad(self.parameters(),
                                      lr=initial_rate,
                                      weight_decay=oo.weight_decay,
                                      lr_decay=oo.lr_decay)
        elif options.optimizer == "Adam":
            optimizer = optim.Adam(self.parameters(),
                                   lr=initial_rate,
                                   weight_decay=oo.weight_decay,
                                   betas=oo.betas,
                                   eps=oo.eps)
        elif options.optimizer == "Adamax":
            optimizer = optim.Adamax(self.parameters(),
                                     lr=initial_rate,
                                     weight_decay=oo.weight_decay,
                                     betas=oo.betas,
                                     eps=oo.eps)
        elif options.optimizer == "ASGD":
            optimizer = optim.ASGD(self.parameters(),
                                   lr=initial_rate,
                                   weight_decay=oo.weight_decay,
                                   lambd=oo.lambd,
                                   alpha=oo.alpha,
                                   t0=oo.t0)
        elif options.optimizer == "RMSprop":
            optimizer = optim.RMSprop(self.parameters(),
                                      lr=initial_rate,
                                      weight_decay=oo.weight_decay,
                                      eps=oo.eps,
                                      alpha=oo.alpha,
                                      momentum=oo.momentum,
                                      centered=oo.centered)
        elif options.optimizer == "Rprop":
            optimizer = optim.Rprop(self.parameters(),
                                    lr=initial_rate,
                                    etas=oo.etas,
                                    step_sizes=oo.step_sizes)
        elif options.optimizer == "SGD":
            optimizer = optim.SGD(self.parameters(),
                                  lr=initial_rate,
                                  weight_decay=oo.weight_decay,
                                  momentum=oo.momentum,
                                  dampening=oo.dampening)

        print(optimizer)
        num_epochs = options.max_epochs
        batch_size = options.batch_size
        learning_rate = options.learning_rate
        lr_min = options.lr_min
        lr_peaks = options.lr_peaks
        ticks = training_data.num_rows / batch_size  # iterations per epoch

        # Calculation of total iterations in non-rolling vs rolling training
        # ticks = num_rows/batch_size (total number of iterations per epoch)
        # Non-Rolling Training:
        # Total Iteration = num_epochs * ticks
        # Rolling Training:
        # irl = Initial_rolling_length (We are using 2)
        # If num_epochs <=  max_rolling_length:
        # Total Iterations = sum(range(irl, irl + num_epochs))
        # If num_epochs > max_rolling_length:
        # Total Iterations = sum(range(irl, irl + max_rolling_length)) + (num_epochs - max_rolling_length)*ticks
        if options.rolling:
            rolling_length = 2
            max_rolling_length = int(ticks)
            if max_rolling_length > options.max_rolling_length + rolling_length:
                max_rolling_length = options.max_rolling_length + rolling_length
            bag_count = 100
            hidden_bag_size = batch_size * bag_count
            if num_epochs + rolling_length < max_rolling_length:
                max_rolling_length = num_epochs + rolling_length
            total_iterations = sum(range(rolling_length, max_rolling_length))
            if num_epochs + rolling_length > max_rolling_length:
                epochs_remaining = num_epochs + rolling_length - max_rolling_length
                total_iterations += epochs_remaining * training_data.num_rows / batch_size
            ticks = total_iterations / num_epochs
        else:
            total_iterations = ticks * num_epochs
        gamma = options.lr_gamma

        if not lr_min:
            lr_min = learning_rate
        scheduler = None
        if lr_scheduler == "TriangleLR":
            steps = lr_peaks * 2 + 1
            stepsize = num_epochs / steps
            scheduler = TriangularLR(optimizer, stepsize * ticks, lr_min,
                                     learning_rate, gamma)
        elif lr_scheduler == "CosineAnnealingLR":
            # divide by odd number to finish on the minimum learning rate
            cycles = lr_peaks * 2 + 1
            scheduler = optim.lr_scheduler.CosineAnnealingLR(
                optimizer, T_max=total_iterations / cycles, eta_min=lr_min)
        elif lr_scheduler == "ExponentialLR":
            scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma)
        elif lr_scheduler == "StepLR":
            scheduler = optim.lr_scheduler.StepLR(
                optimizer, step_size=options.lr_step_size, gamma=gamma)
        elif lr_scheduler == "ExponentialResettingLR":
            reset = (num_epochs * ticks) / 3  # reset at the 1/3 mark.
            scheduler = ExponentialResettingLR(optimizer, gamma, reset)

        # optimizer = optim.Adam(model.parameters(), lr=0.0001)
        log = []
        for epoch in range(num_epochs):
            self.train()
            if options.rolling:
                rolling_length += 1
                if rolling_length <= max_rolling_length:
                    hidden1_bag = torch.from_numpy(
                        np.zeros([1, hidden_bag_size, model.hidden_units],
                                 dtype=np.float32)).to(device)
                    if model.architecture == 'LSTM':
                        cell1_bag = torch.from_numpy(
                            np.zeros([1, hidden_bag_size, model.hidden_units],
                                     dtype=np.float32)).to(device)
                    if model.num_layers >= 2:
                        hidden2_bag = torch.from_numpy(
                            np.zeros([1, hidden_bag_size, model.hidden_units],
                                     dtype=np.float32)).to(device)
                        if model.architecture == 'LSTM':
                            cell2_bag = torch.from_numpy(
                                np.zeros(
                                    [1, hidden_bag_size, model.hidden_units],
                                    dtype=np.float32)).to(device)
                    if model.num_layers == 3:
                        hidden3_bag = torch.from_numpy(
                            np.zeros([
                                1, hidden_bag_size, training_data.num_keywords
                            ],
                                     dtype=np.float32)).to(device)
                        if model.architecture == 'LSTM':
                            cell3_bag = torch.from_numpy(
                                np.zeros([
                                    1, hidden_bag_size,
                                    training_data.num_keywords
                                ],
                                         dtype=np.float32)).to(device)
            for i_batch, (audio, labels) in enumerate(
                    training_data.get_data_loader(batch_size)):
                if not self.batch_first:
                    audio = audio.transpose(1,
                                            0)  # GRU wants seq,batch,feature

                if device:
                    audio = audio.to(device)
                    labels = labels.to(device)

                # Also, we need to clear out the hidden state,
                # detaching it from its history on the last instance.
                if options.rolling:
                    if rolling_length <= max_rolling_length:
                        if (i_batch + 1) % rolling_length == 0:
                            self.init_hidden()
                            break
                    shuffled_indices = list(range(hidden_bag_size))
                    np.random.shuffle(shuffled_indices)
                    temp_indices = shuffled_indices[:batch_size]
                    if model.architecture == 'LSTM':
                        if self.hidden1 is not None:
                            hidden1_bag[:,
                                        temp_indices, :], cell1_bag[:,
                                                                    temp_indices, :] = self.hidden1
                            self.hidden1 = (hidden1_bag[:, 0:batch_size, :],
                                            cell1_bag[:, 0:batch_size, :])
                            if model.num_layers >= 2:
                                hidden2_bag[:,
                                            temp_indices, :], cell2_bag[:,
                                                                        temp_indices, :] = self.hidden2
                                self.hidden2 = (hidden2_bag[:,
                                                            0:batch_size, :],
                                                cell2_bag[:, 0:batch_size, :])
                            if model.num_layers == 3:
                                hidden3_bag[:,
                                            temp_indices, :], cell3_bag[:,
                                                                        temp_indices, :] = self.hidden3
                                self.hidden3 = (hidden3_bag[:,
                                                            0:batch_size, :],
                                                cell3_bag[:, 0:batch_size, :])
                    else:
                        if self.hidden1 is not None:
                            hidden1_bag[:, temp_indices, :] = self.hidden1
                            self.hidden1 = hidden1_bag[:, 0:batch_size, :]
                            if model.num_layers >= 2:
                                hidden2_bag[:, temp_indices, :] = self.hidden2
                                self.hidden2 = hidden2_bag[:, 0:batch_size, :]
                            if model.num_layers == 3:
                                hidden3_bag[:, temp_indices, :] = self.hidden3
                                self.hidden3 = hidden3_bag[:, 0:batch_size, :]
                else:
                    self.init_hidden()

                # Before the backward pass, use the optimizer object to zero all of the
                # gradients for the variables it will update (which are the learnable
                # weights of the model). This is because by default, gradients are
                # accumulated in buffers( i.e, not overwritten) whenever .backward()
                # is called. Checkout docs of torch.autograd.backward for more details.
                optimizer.zero_grad()

                # optionally normalize the audio
                if self.mean is not None:
                    audio = (audio - self.mean) / self.std

                # Run our forward pass.
                keyword_scores = self(audio)

                # Compute the loss, gradients
                loss = loss_function(keyword_scores, labels)

                # Backward pass: compute gradient of the loss with respect to all the learnable
                # parameters of the model. Internally, the parameters of each Module are stored
                # in Tensors with requires_grad=True, so this call will compute gradients for
                # all learnable parameters in the model.
                loss.backward()
                # move to next learning rate
                if scheduler:
                    scheduler.step()

                # Calling the step function on an Optimizer makes an update to its parameters
                # applying the gradients we computed during back propagation
                optimizer.step()

                learning_rate = optimizer.param_groups[0]['lr']
                if detail:
                    learning_rate = optimizer.param_groups[0]['lr']
                    log += [{
                        'iteration': iteration,
                        'loss': loss.item(),
                        'learning_rate': learning_rate
                    }]
            # Find the best prediction in each sequence and return it's accuracy
            passed, total, rate = self.evaluate(validation_data, batch_size,
                                                device)
            learning_rate = optimizer.param_groups[0]['lr']
            current_loss = float(loss.item())
            print(
                "Epoch {}, Loss {:.3f}, Validation Accuracy {:.3f}, Learning Rate {}"
                .format(epoch, current_loss, rate * 100, learning_rate))
            log += [{
                'epoch': epoch,
                'loss': current_loss,
                'accuracy': rate,
                'learning_rate': learning_rate
            }]
            if run is not None:
                run.log('progress', epoch / num_epochs)
                run.log('epoch', epoch)
                run.log('accuracy', rate)
                run.log('loss', current_loss)
                run.log('learning_rate', learning_rate)

        end = time.time()
        self.training = False
        print("Trained in {:.2f} seconds".format(end - start))
        return log
    def fit(self,
            X,
            y,
            early_stop=4e-06,
            hybrid=None,
            accept_bias=True,
            gpu=False,
            initial=None):
        t = np.array([np.arange(0, len(X) * self.dt, self.dt)]).T

        feature, label = Variable(torch.FloatTensor(
            [t]), requires_grad=True), Variable(
                torch.FloatTensor([y]),
                requires_grad=False)  #input (time array) and desired output
        if self.fe is None:
            self.fe = np.array(np.zeros(len(feature[0]))).T
            self.fe = torch.FloatTensor([self.fe])
        else:
            self.fe = torch.FloatTensor([self.fe])

        #create Model
        activation_function = MemInt(dt=self.dt,
                                     fe=self.fe,
                                     h=torch.FloatTensor([X]))
        self.model = nn.Sequential(
            OrderedDict([('fc1', nn.Linear(1, self.n_hidden)),
                         ('GLE', activation_function),
                         ('fc2', nn.Linear(self.n_hidden, 1))]))

        #choose Loss function
        #criterion = RMSELoss
        criterion = nn.MSELoss(reduction='mean')

        #choose optimizer
        if self.optimizer == 'Rprop':

            optimizer = optim.Rprop(self.model.parameters(),
                                    lr=self.learning_rate)

        elif self.optimizer == 'Adam':
            optimizer = optim.Adam(self.model.parameters(),
                                   lr=self.learning_rate)

        elif self.optimizer == 'SGD':
            optimizer = optim.SGD(self.model.parameters(),
                                  lr=self.learning_rate)

        else:
            print('Choose an optimzer (Adam or Rprop!)')

        #initialize weights
        self.model.apply(self.weights_init_uniform_rule)
        self.model.fc2.weight.data.uniform_(1, 1)

        if not initial is None:
            n_weights = self.n_hidden

            for i in range(n_weights):
                #self.model.fc1.weight.data[i] = (initial)**(1/3)

                self.model.fc1.weight.data[i] = (initial[i][0])
                self.model.fc1.weight.data[i] /= n_weights

                self.model.fc1.bias.data[i] = np.log(initial[i][1])

                self.model.fc2.weight.data[i] = (initial[i][0])
                self.model.fc2.weight.data[i] /= n_weights

        #create list of losses for every epoch
        losses = []

        #start training
        for e in range(0, self.n_epochs):

            # Training pass
            optimizer.zero_grad()
            with torch.no_grad():
                if accept_bias == False:
                    self.model.fc1.bias.zero_()
                    self.model.fc2.bias.zero_()
                    self.model.fc2.weight.data.uniform_(1, 1)
            #print(self.model.fc1.weight.data)
            #print(self.model.fc1.bias.data)
            #model.fc1.weight = torch.nn.Parameter

            output = self.model(feature)

            loss = criterion(output, label)
            losses.append(loss.detach().numpy())
            print('loss in epoch ' + str(e + 1) + ' : ' +
                  str(loss.detach().numpy()))

            if loss.detach().numpy() < early_stop:
                print('Minimal loss reached! early stop of training!')
                break

            if not hybrid is None:  #Change to SGD if training is trapped in local minimum
                if loss.detach().numpy() < hybrid:
                    optimizer = optim.SGD(self.model.parameters(),
                                          lr=self.learning_rate)

            loss.backward(retain_graph=True)
            optimizer.step()

        return losses, self.model
Example #5
0
def main(root_dir='data',
         files=['toy.train'],
         test_ratio=0.2,
         valid_ratio=0.1,
         batch_size=128,
         which_model='DRNN',
         cell_type='GRU',
         input_size=[15817],
         embed_dim=128,
         hidden_size=128,
         dropout_r=0.1,
         n_layers=1,
         bii=False,
         time=False,
         preTrainEmb='',
         output_dir='models',
         model_prefix='toy.train',
         model_customed='',
         lr=10**-2,
         L2=10**-4,
         eps=10**-8,
         num_epochs=100,
         patience=5,
         optimizer='adam',
         seed=0,
         use_cuda=False):
    """
        Predictive Analytics on EHR
    
        Args:
            - root_dir: Path to the folders with pickled file(s)
            - files: Name(s) of pickled file(s), separated by space. so the argument will be saved as a list 
                    If list of 1: data will be first split into train, validation and test, then 3 dataloaders will be created.
                    If list of 3: 3 dataloaders will be created from 3 files directly. 
                    Files must be in the following order: training, validation and test.
            - test_ratio: Test data size 
                Default: 0.2
            - valid_ratio: Validation data size 
                Default: 0.1
            - batch_size: Batch size for training, validation or test 
                Default: 128
            - which_model: Choose from {"RNN", "DRNN", "QRNN", "TLSTM", "LR", "RETAIN"}
            - cell_type: For RNN based models, choose from {"RNN", "GRU", "LSTM"}
            - input_size: Input dimension(s) separated in space the output will be a list, decide which embedding types to use. 
                        If len of 1, then  1 embedding; len of 3, embedding medical, diagnosis and others separately (3 embeddings)
                Default: [15817]
            - embed_dim: Number of embedding dimension
                Default: 128
            - hidden_size: Size of hidden layers 
                Default: 128
            - dropout_r: Probability for dropout
                Default: 0.1
            - n_layers: Number of Layers, for Dilated RNNs, dilations will increase exponentialy with mumber of layers 
                Default: 1
            - bii: Indicator of whether Bi-directin is activated. 
                Default: False
            - time: Indicator of whether time is incorporated into embedding. 
                Default: False
            - preTrainEmb: Path to pretrained embeddings file. 
                Default:''
            - output_dir: Output directory where the best model will be saved and logs written 
                Default: we will create'../models/'
            - model_prefix: Prefix name for the saved model e.g: toy.train 
                Default: [(training)file name]
            - model_customed: Second customed specs of name for the saved model e.g: _RNN_GRU. 
                Default: none
            - lr: Learning rate 
                Default: 0.01
            - L2: L2 regularization 
                Default: 0.0001
            - eps: Term to improve numerical stability 
                Default: 0.00000001
            - num_epochs: Number of epochs for training 
                Default: 100
            - patience: Number of stagnant epochs to wait before terminating training 
                Default: 5
            - optimizer: Select which optimizer to train. Upper/lower case does not matter
                Default: adam
            - seed: Seed for reproducibility 
                Default:0
            - use_cuda: Use GPU 
                Default:False
    """
    ###########################################################################
    # 1. Data preparation
    ###########################################################################
    print("\nLoading and preparing data...")
    if len(files) == 1:
        print(
            '1 file found. Data will be split into train, validation and test.'
        )
        data = EHRdataFromPickles(root_dir=root_dir,
                                  file_name=files[0],
                                  sort=False,
                                  test_ratio=test_ratio,
                                  valid_ratio=valid_ratio,
                                  model=which_model,
                                  seed=seed)  #No sort before splitting
        # Dataloader splits
        train, test, valid = data.__splitdata__()  #this time, sort is true
        # can comment out this part if you dont want to know what's going on here
        print("\nSee an example data structure from training data:")
        print(data.__getitem__(35, seeDescription=True))
    elif len(files) == 2:
        print(
            '2 files found. 2 dataloaders will be created for train and validation'
        )
        train = EHRdataFromPickles(root_dir=root_dir,
                                   file_name=files[0],
                                   sort=True,
                                   model=which_model,
                                   seed=seed)
        valid = EHRdataFromPickles(root_dir=root_dir,
                                   file_name=files[1],
                                   sort=True,
                                   model=which_model,
                                   seed=seed)
        test = None
    else:
        print('3 files found. 3 dataloaders will be created for each')
        train = EHRdataFromPickles(root_dir=root_dir,
                                   file_name=files[0],
                                   sort=True,
                                   model=which_model,
                                   seed=seed)
        valid = EHRdataFromPickles(root_dir=root_dir,
                                   file_name=files[1],
                                   sort=True,
                                   model=which_model,
                                   seed=seed)
        test = EHRdataFromPickles(root_dir=root_dir,
                                  file_name=files[2],
                                  sort=True,
                                  model=which_model,
                                  seed=seed)
        print("\nSee an example data structure from training data:")
        print(train.__getitem__(40, seeDescription=True))

    print(f"\nTraining data contains {len(train)} patients")
    print(f"Validation data contains {len(valid)} patients")
    print(f"Test data contains {len(test)} patients"
          if test else "No test file provided")
    ###########################################################################
    # 2. Model loading
    ###########################################################################
    print(f"\n{args.which_model} model initialization...", end="")
    pack_pad = True if which_model == "RNN" else False
    if which_model == 'RNN':
        ehr_model = models.EHR_RNN(input_size=input_size,
                                   embed_dim=embed_dim,
                                   hidden_size=hidden_size,
                                   use_cuda=use_cuda,
                                   n_layers=n_layers,
                                   dropout_r=dropout_r,
                                   cell_type=cell_type,
                                   bii=bii,
                                   time=time,
                                   preTrainEmb=preTrainEmb)
    elif which_model == 'DRNN':
        ehr_model = models.EHR_DRNN(
            input_size=input_size,
            embed_dim=embed_dim,
            hidden_size=hidden_size,
            use_cuda=use_cuda,
            n_layers=n_layers,
            dropout_r=dropout_r,  #default =0 
            cell_type=cell_type,  #default ='DRNN'
            bii=False,
            time=time,
            preTrainEmb=preTrainEmb)
    elif which_model == 'QRNN':
        ehr_model = models.EHR_QRNN(
            input_size=input_size,
            embed_dim=embed_dim,
            hidden_size=hidden_size,
            use_cuda=use_cuda,
            n_layers=n_layers,
            dropout_r=dropout_r,  #default =0.1
            cell_type='QRNN',  #doesn't support normal cell types
            bii=False,  #QRNN doesn't support bi
            time=time,
            preTrainEmb=preTrainEmb)
    elif which_model == 'TLSTM':
        ehr_model = models.EHR_TLSTM(
            input_size=input_size,
            embed_dim=embed_dim,
            hidden_size=hidden_size,
            use_cuda=use_cuda,
            n_layers=n_layers,
            dropout_r=dropout_r,  #default =0.1
            cell_type='TLSTM',  #doesn't support normal cell types
            bii=False,
            time=time,
            preTrainEmb=preTrainEmb)
    elif which_model == 'RETAIN':
        ehr_model = models.RETAIN(input_size=input_size,
                                  embed_dim=embed_dim,
                                  hidden_size=hidden_size,
                                  use_cuda=use_cuda,
                                  n_layers=n_layers)
    else:
        ehr_model = models.EHR_LR_emb(input_size=input_size,
                                      embed_dim=embed_dim,
                                      use_cuda=use_cuda,
                                      preTrainEmb=preTrainEmb)
    print("Done")
    ###########################################################################
    # 3. call dataloader and create a list of minibatches
    ###########################################################################
    # separate loader and minibatches for train, test, validation
    # Note: mbs stands for minibatches
    print('\nCreating the list of training minibatches')
    train_mbs = list(
        tqdm(
            EHRdataloader(train,
                          use_cuda=use_cuda,
                          batch_size=batch_size,
                          packPadMode=pack_pad)))
    print('\nCreating the list of valid minibatches')
    valid_mbs = list(
        tqdm(
            EHRdataloader(valid,
                          use_cuda=use_cuda,
                          batch_size=batch_size,
                          packPadMode=pack_pad)))
    print('\nCreating the list of test minibatches')
    test_mbs = list(
        tqdm(
            EHRdataloader(test,
                          use_cuda=use_cuda,
                          batch_size=batch_size,
                          packPadMode=pack_pad))) if test else None

    # make sure cuda is working
    if use_cuda:
        ehr_model = ehr_model.cuda()

    print(f"\n{args.optimizer.title()} optimizer initialization...", end="")
    #model optimizers to choose from. Upper/lower case dont matter
    if args.optimizer.lower() == 'adam':
        optimizer = optim.Adam(ehr_model.parameters(),
                               lr=lr,
                               weight_decay=L2,
                               eps=eps)
    elif args.optimizer.lower() == 'adadelta':
        optimizer = optim.Adadelta(ehr_model.parameters(),
                                   lr=lr,
                                   weight_decay=L2,
                                   eps=eps)
    elif args.optimizer.lower() == 'adagrad':
        optimizer = optim.Adagrad(ehr_model.parameters(),
                                  lr=lr,
                                  weight_decay=L2)
    elif args.optimizer.lower() == 'adamax':
        optimizer = optim.Adamax(ehr_model.parameters(),
                                 lr=lr,
                                 weight_decay=L2,
                                 eps=eps)
    elif args.optimizer.lower() == 'asgd':
        optimizer = optim.ASGD(ehr_model.parameters(), lr=lr, weight_decay=L2)
    elif args.optimizer.lower() == 'rmsprop':
        optimizer = optim.RMSprop(ehr_model.parameters(),
                                  lr=lr,
                                  weight_decay=L2,
                                  eps=eps)
    elif args.optimizer.lower() == 'rprop':
        optimizer = optim.Rprop(ehr_model.parameters(), lr=lr)
    elif args.optimizer.lower() == 'sgd':
        optimizer = optim.SGD(ehr_model.parameters(), lr=lr, weight_decay=L2)
    else:
        raise NotImplementedError
    print("Done")
    ###########################################################################
    # 4. Train, validation and test. default: batch shuffle = true
    ###########################################################################
    try:
        ut.epochs_run(
            num_epochs,
            train=train_mbs,
            valid=valid_mbs,
            test=test_mbs,
            model=ehr_model,
            optimizer=optimizer,
            shuffle=True,
            #batch_size = batch_size,
            which_model=which_model,
            patience=patience,
            output_dir=output_dir,
            model_prefix=model_prefix,
            model_customed=model_customed)

    #we can keyboard interupt now
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')
        # get output layer output
        y_pred = self.out(h_output2)

        return y_pred


# define a neural network using the customised structure
net = MultiLayerNet(input_neurons, output_neurons)

# define loss function
loss_func = torch.nn.CrossEntropyLoss()

# define optimizer for standard network
optimiser = optim.Rprop(net.parameters(),
                        lr=learning_rate,
                        etas=(0.5, 1.2),
                        step_sizes=(1e-06, 50))

# store all losses for visualisation
all_losses = []

previous_loss = None

# train a neural network
for epoch in range(num_epochs):
    # Perform forward pass: compute predicted y by passing x to the model.
    Y_pred = net(X)

    # Compute loss
    loss = loss_func(Y_pred, Y)
Example #7
0
def reconstruct_stim(features, net,
                     img_mean=np.array((0, 0, 0)).astype(np.float32),
                     img_std=np.array((1, 1, 1)).astype(np.float32),
                     norm=255,
                     bgr=False,
                     initial_input=None,
                     input_size=(224, 224, 3),
                     feature_masks=None,
                     layer_weight=None, channel=None, mask=None,
                     opt_name='SGD',
                     prehook_dict = {},
                     lr_start=0.02, lr_end=1e-12,
                      momentum_start=0.009, momentum_end=0.009,
                      decay_start=0.02, decay_end=1e-11,
                      grad_normalize = True,
                      image_jitter=False, jitter_size=4,
                      image_blur=True, sigma_start=2, sigma_end=0.5,
                      p=3, lamda=0.5,
                      TVlambda = [0,0],
                      clip_extreme=False, clip_extreme_every=4, e_pct_start=1, e_pct_end=1,
                      clip_small_norm=False, clip_small_norm_every=4, n_pct_start=5., n_pct_end=5.,

                     loss_type='l2', iter_n=200,  save_intermediate=False,
                     save_intermediate_every=1, save_intermediate_path=None,
                     disp_every=1,
                     ):
    if loss_type == "l2":
        loss_fun = torch.nn.MSELoss(reduction='sum')
    elif loss_type == "L2_with_reg":
        loss_fun = MSE_with_regulariztion(L_lambda=lamda, alpha=p, TV_lambda=TVlambda)
    else:
        assert loss_type + ' is not correct'
    # make save dir
    if save_intermediate:
        if save_intermediate_path is None:
            save_intermediate_path = os.path.join('..', 'recon_img_by_icnn' + datetime.now().strftime('%Y%m%dT%H%M%S'))
        if not os.path.exists(save_intermediate_path):
            os.makedirs(save_intermediate_path)

    # image size
    input_size = input_size

    # image mean
    img_mean = img_mean
    img_std = img_std
    norm = norm
    # image norm
    noise_img = np.random.randint(0, 256, (input_size))
    img_norm0 = np.linalg.norm(noise_img)
    img_norm0 = img_norm0/2.

    # initial input
    if initial_input is None:
        initial_input = np.random.randint(0, 256, (input_size))
    else:
        input_size = initial_input.shape

    if save_intermediate:
        if len(input_size) == 3:
            #image
            save_name = 'initial_image.jpg'
            if bgr:
                PIL.Image.fromarray(np.uint8(initial_input[...,[2,1,0]])).save(os.path.join(save_intermediate_path, save_name))
            else:
                PIL.Image.fromarray(np.uint8(initial_input)).save(os.path.join(save_intermediate_path, save_name))
        elif len(input_size) == 4:
            # video
            # if you install cv2 and ffmpeg, you can use save_video function which save preferred video as video format
            save_name = 'initial_video.avi'
            save_video(initial_input, save_name, save_intermediate_path, bgr)

            save_name = 'initial_video.gif'
            save_gif(initial_input, save_name, save_intermediate_path, bgr,
                     fr_rate=150)

        else:
            print('Input size is not appropriate for save')
            assert len(input_size) not in [3,4]


    # layer_list
    layer_dict = features
    layer_list = list(features.keys())

    # number of layers
    num_of_layer = len(layer_list)

    # layer weight
    if layer_weight is None:
        weights = np.ones(num_of_layer)
        weights = np.float32(weights)
        weights = weights / weights.sum()
        layer_weight = {}
        for j, layer in enumerate(layer_list):
            layer_weight[layer] = weights[j]

    # feature mask
    if feature_masks is None:
        feature_masks = create_feature_masks(layer_dict, masks=mask, channels=channel)

    # iteration for gradient descent
    input = initial_input.copy().astype(np.float32)
    if len(input_size) == 3:
        input = img_preprocess(input, img_mean, img_std, norm)
    else:
        input = vid_preprocess(input, img_mean, img_std, norm)

    loss_list = np.zeros(iter_n, dtype='float32')

    for t in range(iter_n):
        # parameters
        lr = lr_start + t * (lr_end - lr_start) / iter_n
        momentum = momentum_start + t * (momentum_end - momentum_start) / iter_n
        decay = decay_start + t * (decay_end - decay_start) / iter_n
        sigma = sigma_start + t * (sigma_end - sigma_start) / iter_n

        # shift
        if image_jitter:
            ox, oy = np.random.randint(-jitter_size, jitter_size+1, 2)
            input = np.roll(np.roll(input, ox, -1), oy, -2)

        # forward
        input = torch.tensor(input[np.newaxis], requires_grad=True)
        if opt_name == 'Adam':
            #op = optim.Adam([input], lr = lr)
            op = optim.Adam([input], lr = lr)
        elif opt_name == 'SGD':
            op = optim.SGD([input], lr=lr, momentum=momentum)
            #op = optim.SGD([input], lr=lr)
        elif opt_name == 'Adadelta':
            op = optim.Adadelta([input],lr = lr)
        elif opt_name == 'Adagrad':
            op = optim.Adagrad([input], lr = lr)
        elif opt_name == 'AdamW':
            op = optim.AdamW([input], lr = lr)
        elif opt_name == 'SparseAdam':
            op = optim.SparseAdam([input], lr = lr)
        elif opt_name == 'Adamax':
            op = optim.Adamax([input], lr = lr)
        elif opt_name == 'ASGD':
            op = optim.ASGD([input], lr = lr)

        elif opt_name == 'RMSprop':
            op = optim.RMSprop([input], lr = lr)
        elif opt_name == 'Rprop':
            op = optim.Rprop([input], lr = lr)
        fw = get_cnn_features(net, input, features.keys(), prehook_dict)
        # backward for net
        err = 0.
        loss = 0.
        # set the grad of network to 0
        net.zero_grad()
        op.zero_grad()
        for j in range(num_of_layer):

            # op.zero_grad()
            
            target_layer_id = num_of_layer -1 -j
            target_layer = layer_list[target_layer_id]
            # extract activation or mask at input true video, and mask
            act_j = fw[target_layer_id].clone()
            feat_j = features[target_layer].clone()
            mask_j = feature_masks[target_layer]

            layer_weight_j = layer_weight[target_layer]

            masked_act_j = torch.masked_select(act_j, torch.FloatTensor(mask_j).bool())
            masked_feat_j = torch.masked_select(feat_j, torch.FloatTensor(mask_j).bool())
            # calculate loss using pytorch loss function
            loss_j = loss_fun(masked_act_j, masked_feat_j) * layer_weight_j

            # backward the gradient to the video
            loss_j.backward(retain_graph=True)

            loss += loss_j.detach().numpy()
        if grad_normalize:
            grad_mean = torch.abs(input.grad).mean()
            if grad_mean > 0:
                input.grad /= grad_mean
        op.step()

        input = input.detach().numpy()[0]

        err = err + loss
        loss_list[t] = loss

        # clip pixels with extreme value
        if clip_extreme and (t+1) % clip_extreme_every == 0:
            e_pct = e_pct_start + t * (e_pct_end - e_pct_start) / iter_n
            input = clip_extreme_value(input, e_pct)

        # clip pixels with small norm
        if clip_small_norm and (t+1) % clip_small_norm_every == 0:
            n_pct = n_pct_start + t * (n_pct_end - n_pct_start) / iter_n
            input = clip_small_norm_value(input, n_pct)

        # unshift
        if image_jitter:
            input = np.roll(np.roll(input, -ox, -1), -oy, -2)

        # L_2 decay
        input = (1-decay) * input

        # gaussian blur
        if image_blur:
            if len(input_size) == 3:
                input = gaussian_blur(input, sigma)
            else:
                for i in range(input.shape[1]):
                    input[:, i] = gaussian_blur(input[:, i], sigma)

        # disp info
        if (t+1) % disp_every == 0:
            print('iter=%d; err=%g;' % (t+1, err))


        # save image
        if save_intermediate and ((t+1) % save_intermediate_every == 0):
            if len(input_size) == 3:
                save_name = '%05d.jpg' % (t+1)
                PIL.Image.fromarray(normalise_img(img_deprocess(input, img_mean, img_std, norm))).save(
                    os.path.join(save_intermediate_path, save_name))
            else:
                save_stim = input
                # if you install cv2 and ffmpeg, you can use save_video function which save preferred video as video format
                save_name = '%05d.avi' % (t + 1)
                save_video(normalise_vid(vid_deprocess(save_stim, img_mean, img_std, norm)), save_name,
                           save_intermediate_path, bgr, fr_rate=30)
                save_name = '%05d.gif' % (t + 1)
                save_gif(normalise_vid(vid_deprocess(save_stim, img_mean, img_std, norm)), save_name,
                         save_intermediate_path,
                         bgr, fr_rate=150)
    # return img
    if len(input_size) == 3:
        return img_deprocess(input, img_mean, img_std, norm), loss_list
    else:
        return vid_deprocess(input, img_mean, img_std, norm), loss_list
 def optim_selection(self):
     if self.config.optim == "Nesterov":
         return optim.SGD(
             self.model.parameters(),
             lr=self.config.lr,
             momentum=0.9,
             nesterov=True,
             weight_decay=0.0001,
         )
     elif self.config.optim == "SGD":  # weight_decay = l2 regularization
         return optim.SGD(
             self.model.parameters(),
             lr=self.config.lr,
             momentum=0.9,
             nesterov=False,
             weight_decay=0.0001,
         )
     elif self.config.optim == "Adadelta":  # default lr = 1.0
         return optim.Adadelta(
             self.model.parameters(),
             lr=self.config.lr,
             rho=0.9,
             eps=1e-06,
             weight_decay=1e-6,
         )
     elif self.config.optim == "Adagrad":  # default lr = 0.01
         return optim.Adagrad(
             self.model.parameters(),
             lr=self.config.lr,
             lr_decay=0,
             weight_decay=1e-6,
             initial_accumulator_value=0,
             eps=1e-10,
         )
     elif self.config.optim == "Adam":  # default lr=0.001
         return optim.Adam(self.model.parameters(),
                           lr=self.config.lr,
                           weight_decay=1e-6)
     elif self.config.optim == "AdamW":  # default lr=0.001
         return optim.AdamW(
             self.model.parameters(),
             lr=self.config.lr,
             betas=(0.9, 0.999),
             eps=1e-08,
             weight_decay=0.01,
             amsgrad=False,
         )
     elif self.config.optim == "SparseAdam":  # default lr = 0.001
         return optim.SparseAdam(
             self.model.parameters(),
             lr=self.config.lr,
             betas=(0.9, 0.999),
             eps=1e-08,
         )
     elif self.config.optim == "Adamax":  # default lr=0.002
         return optim.Adamax(
             self.model.parameters(),
             lr=self.config.lr,
             betas=(0.9, 0.999),
             eps=1e-08,
             weight_decay=1e-6,
         )
     elif self.config.optim == "ASGD":
         return optim.ASGD(
             self.model.parameters(),
             lr=self.config.lr,
             lambd=0.0001,
             alpha=0.75,
             t0=1000000.0,
             weight_decay=1e-6,
         )
     elif self.config.optim == "RMSprop":  # default lr=0.01
         return optim.RMSprop(
             self.model.parameters(),
             lr=self.config.lr,
             alpha=0.99,
             eps=1e-08,
             weight_decay=0,
             momentum=0,
             centered=False,
         )
     elif self.config.optim == "Rprop":  # default lr=0.01
         return optim.Rprop(
             self.model.parameters(),
             lr=self.config.lr,
             etas=(0.5, 1.2),
             step_sizes=(1e-06, 50),
         )
Example #9
0
     {"params": model.parameters()},
     optim.LBFGS(lr=0.1, params=model.parameters()),
     id="LBFGSConf",
 ),
 pytest.param(
     "RMSprop",
     {"lr": 0.1},
     {"params": model.parameters()},
     optim.RMSprop(lr=0.1, params=model.parameters()),
     id="RMSpropConf",
 ),
 pytest.param(
     "Rprop",
     {"lr": 0.1},
     {"params": model.parameters()},
     optim.Rprop(lr=0.1, params=model.parameters()),
     id="RpropConf",
 ),
 pytest.param(
     "SGD",
     {"lr": 0.1},
     {"params": model.parameters()},
     optim.SGD(lr=0.1, params=model.parameters()),
     id="SGDConf",
 ),
 pytest.param(
     "SparseAdam",
     {"lr": 0.1},
     {"params": model.parameters()},
     optim.SparseAdam(lr=0.1, params=model.parameters()),
     id="SparseAdamConf",
Example #10
0
def run_model(trainloader,
              validloader,
              epochs,
              use_rprop,
              learning_rate,
              momentum=0,
              etas=None,
              step_sizes=None,
              num_filters=6,
              fc1_size=120,
              fc2_size=84,
              save_weights=False,
              gpu=False):
    '''
    :param use_rprop: True if using rprop optimizer, False if using SGD optimizer
    :param learning_rate:
    :param momentum:
    :return:
    '''
    # set up the model and optimizer
    net = create_model(num_filters, fc1_size, fc2_size)
    if gpu:
        print('using gpu!!!!!')
        net = net.cuda()

    criterion = nn.CrossEntropyLoss()
    if (use_rprop):
        print("using rprop!!!!")
        optimizer = optim.Rprop(
            net.parameters(),
            lr=learning_rate,
            etas=etas,
            step_sizes=step_sizes
        )  #(default params: lr = 0.01, etas = (0.5,1.2), step_sizes(1e-06,50))
        print("etas2: ", etas)
        print("Step2: ", step_sizes)
        print("opt: ", optimizer)
    else:
        print("using sgd!!!")
        optimizer = optim.SGD(net.parameters(),
                              lr=learning_rate,
                              momentum=momentum)

    # train the model
    weights = [[net.conv1.weight], [net.conv2.weight], [net.fc1.weight],
               [net.fc2.weight], [net.fc3.weight]]
    start_time = datetime.now()
    for epoch in range(epochs):  # loop over the dataset multiple times

        running_loss = 0.0
        total_train = 0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:  # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

            total_train += labels.size(0)

        weights[0].append(net.conv1.weight)
        weights[1].append(net.conv2.weight)
        weights[2].append(net.fc1.weight)
        weights[3].append(net.fc2.weight)
        weights[4].append(net.fc3.weight)

        # test the model ont he validation set
        correct = 0
        total = 0
        with torch.no_grad():
            for data in validloader:
                images, labels = data
                outputs = net(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the ', total,
              ' validation images: %d %%' % (100 * correct / total))
        valid_accuracy = 100 * correct / total

    # do timing stuff
    end_time = datetime.now()
    total_time = end_time - start_time
    print('Finished Training in: ', total_time)
    timer_arr.append(total_time)

    print("train size: ", total_train)

    if save_weights:
        pickle.dump(weights, open("weights.p", "wb"))

    return valid_accuracy, net
Example #11
0
        # 第一个1 为输入维度1, 第二个为输出维度
        self.linear = nn.Linear(1, 1)

    def forward(self, x):
        out = self.linear(x)
        return out


if __name__ == '__main__':
    model = LR()

    # 定义损失函数
    criterion = nn.MSELoss()

    # 定义优化器
    optimizer = optim.Rprop(model.parameters(), lr=.0001)

    # 训练模型
    num_epoches = 1000
    for epach in range(num_epoches):
        inputs = Variable(x_train)
        target = Variable(y_train)  # 实际值
        out = model(inputs)  # 预测值
        loss = criterion(out, target)
        # 优化器用于更新网络参数
        optimizer.zero_grad()
        # 求导
        loss.backward()
        # 更新参数
        optimizer.step()
        if (epach + 1) % 1 == 0:
Example #12
0
def get_optimizer(optimizer, optimizer_config, params):
    '''
    get the optimizer of worker model
    '''
    if optimizer == 'SGD':
        p = ['lr', 'momentum', 'dampening', 'weight_decay', 'nesterov']
        keys = list(optimizer_config.keys())
        unde = list(set(p) ^ set(keys))
        for i in unde:
            if i == 'nesterov':
                optimizer_config[i] = False
            else:
                optimizer_config[i] = 0

        opti = optim.SGD(params,
                         lr=optimizer_config['lr'],
                         momentum=optimizer_config['momentum'],
                         dampening=optimizer_config['dampening'],
                         weight_decay=optimizer_config['weight_decay'],
                         nesterov=optimizer_config['nesterov'])
        return opti

    elif optimizer == 'Rprop':
        p = ['lr', 'etas', 'step_sizes']
        keys = list(optimizer_config.keys())
        unde = list(set(p) ^ set(keys))
        for i in unde:
            if i == 'lr':
                optimizer_config[i] = 1e-2
            elif i == 'etas':
                optimizer_config[i] = (0.5, 1.2)
            elif i == 'step_sizes':
                optimizer_config[i] = (1e-6, 50)

        opti = optim.Rprop(params,
                           lr=optimizer_config['lr'],
                           etas=optimizer_config['etas'],
                           step_sizes=optimizer_config['step_sizes'])
        return opti

    elif optimizer == 'RMSprop':
        p = ['lr', 'alpha', 'eps', 'weight_decay', 'momentum', 'centered']
        keys = list(optimizer_config.keys())
        unde = list(set(p) ^ set(keys))
        for i in unde:
            if i == 'lr':
                optimizer_config[i] = 1e-2
            elif i == 'alpha':
                optimizer_config[i] = 0.99
            elif i == 'eps':
                optimizer_config[i] = 1e-8
            elif i == 'weight_decay':
                optimizer_config[i] = 0
            elif i == 'momentum':
                optimizer_config[i] = 0
            elif i == 'centered':
                optimizer_config[i] = False

        opti = optim.SGD(params,
                         lr=optimizer_config['lr'],
                         alpha=optimizer_config['alpha'],
                         eps=optimizer_config['eps'],
                         weight_decay=optimizer_config['weight_decay'],
                         momentum=optimizer_config['momentum'],
                         centered=optimizer_config['centered'])
        return opti

    elif optimizer == 'LBFGS':
        p = [
            'lr', 'max_iter', 'max_eval', 'tolerance_grad', 'tolerance_change',
            'history_size', 'line_search_fn'
        ]
        keys = list(optimizer_config.keys())
        unde = list(set(p) ^ set(keys))
        for i in unde:
            if i == 'lr':
                optimizer_config[i] = 1
            elif i == 'max_iter':
                optimizer_config[i] = 20
            elif i == 'max_eval':
                optimizer_config[i] = None
            elif i == 'tolerance_grad':
                optimizer_config[i] = 1e-5
            elif i == 'tolerance_change':
                optimizer_config[i] = 1e-9
            elif i == 'history_size':
                optimizer_config[i] = 100
            elif i == 'line_search_fn':
                optimizer_config[i] = None

        opti = optim.SGD(params,
                         lr=optimizer_config['lr'],
                         max_iter=optimizer_config['max_iter'],
                         tolerance_grad=optimizer_config['tolerance_grad'],
                         tolerance_change=optimizer_config['tolerance_change'],
                         history_size=optimizer_config['history_size'],
                         line_search_fn=optimizer_config['line_search_fn'])
        return opti

    elif optimizer == 'ASGD':
        p = ['lr', 'lambd', 'alpha', 't0', 'weight_decay']
        keys = list(optimizer_config.keys())
        unde = list(set(p) ^ set(keys))
        for i in unde:
            if i == 'lr':
                optimizer_config[i] = 1e-2
            elif i == 'lambd':
                optimizer_config[i] = 1e-4
            elif i == 'alpha':
                optimizer_config[i] = 0.75
            elif i == 't0':
                optimizer_config[i] = 1e-6
            elif i == 'weight_decay':
                optimizer_config[i] = 0

        opti = optim.SGD(params,
                         lr=optimizer_config['lr'],
                         lambd=optimizer_config['lambd'],
                         alpha=optimizer_config['alpha'],
                         t0=optimizer_config['t0'],
                         weight_decay=optimizer_config['weight_decay'])
        return opti

    elif optimizer == 'Adamax':
        p = ['lr', 'betas', 'eps', 'weight_decay']
        keys = list(optimizer_config.keys())
        unde = list(set(p) ^ set(keys))
        for i in unde:
            if i == 'lr':
                optimizer_config[i] = 0.002
            elif i == 'betas':
                optimizer_config[i] = (0.9, 0.999)
            elif i == 'eps':
                optimizer_config[i] = 1e-08
            elif i == 'weight_decay':
                optimizer_config[i] = 0

        opti = optim.SGD(params,
                         lr=optimizer_config['lr'],
                         betas=optimizer_config['betas'],
                         eps=optimizer_config['eps'],
                         weight_decay=optimizer_config['weight_decay'])
        return opti

    elif optimizer == 'SparseAdam':
        p = ['lr', 'betas', 'eps']
        keys = list(optimizer_config.keys())
        unde = list(set(p) ^ set(keys))
        for i in unde:
            if i == 'lr':
                optimizer_config[i] = 0.001
            elif i == 'betas':
                optimizer_config[i] = (0.9, 0.999)
            elif i == 'eps':
                optimizer_config[i] = 1e-08

        opti = optim.SGD(params,
                         lr=optimizer_config['lr'],
                         betas=optimizer_config['betas'],
                         eps=optimizer_config['eps'])
        return opti

    elif optimizer == 'Adam':
        p = ['lr', 'betas', 'eps', 'weight_decay']
        keys = list(optimizer_config.keys())
        unde = list(set(p) ^ set(keys))
        for i in unde:
            if i == 'lr':
                optimizer_config[i] = 0.001
            elif i == 'betas':
                optimizer_config[i] = (0.9, 0.999)
            elif i == 'eps':
                optimizer_config[i] = 1e-08
            elif i == 'weight_decay':
                optimizer_config[i] = 0

        opti = optim.SGD(params,
                         lr=optimizer_config['lr'],
                         betas=optimizer_config['betas'],
                         eps=optimizer_config['eps'],
                         weight_decay=optimizer_config['weight_decay'])
        return opti

    elif optimizer == 'Adagrad':
        p = ['lr', 'lr_decay', 'weight_decay']
        keys = list(optimizer_config.keys())
        unde = list(set(p) ^ set(keys))
        for i in unde:
            if i == 'lr':
                optimizer_config[i] = 0.01
            elif i == 'lr_decay':
                optimizer_config[i] = 0
            elif i == 'weight_decay':
                optimizer_config[i] = 0

        opti = optim.SGD(params,
                         lr=optimizer_config['lr'],
                         lr_decay=optimizer_config['lr_decay'],
                         weight_decay=optimizer_config['weight_decay'])
        return opti

    elif optimizer == 'Adadelta':
        p = ['lr', 'rho', 'eps', 'weight_decay']
        keys = list(optimizer_config.keys())
        unde = list(set(p) ^ set(keys))
        for i in unde:
            if i == 'lr':
                optimizer_config[i] = 1.0
            elif i == 'rho':
                optimizer_config[i] = 0.9
            elif i == 'eps':
                optimizer_config[i] = 1e-06
            elif i == 'weight_decay':
                optimizer_config[i] = 0

        opti = optim.SGD(params,
                         lr=optimizer_config['lr'],
                         rho=optimizer_config['rho'],
                         eps=optimizer_config['eps'],
                         weight_decay=optimizer_config['weight_decay'])
        return opti

    else:
        raise ValueError(
            'the optimizer is exactly the same as the original pytorch, please check again!'
        )
Example #13
0
def tune_train_eval(loader, model, criterion, metric, config, tuned, reporter):
    for key, value in tuned.items():
        config[key] = str(value)

    #choose model type(whether DataParallel or not)
    if 'multiGPU' in config.keys() and config['multiGPU'] == 'Y':
        model = nn.DataParallel(model)

    if torch.cuda.is_available():
        model = model.cuda()

    #default optimizer -> adam
    optimizer = optim.Adam(model.parameters())

    #start setting optimizer
    if 'optimizer' in config.keys():
        if config['optimizer'] == 'sgd':
            optimizer = optim.SGD(model.parameters(),
                                  lr=float(config['learning_rate']))
        elif config['optimizer'] == 'rmsprop':
            optimizer = optim.RMSprop(model.parameters())
        elif config['optimizer'] == 'adadelta':
            optimizer = optim.Adadelta(model.parameters())
        elif config['optimizer'] == 'adagrad':
            optimizer = optim.Adagrad(model.parameters())
        elif config['optimizer'] == 'sparseAdam':
            optimizer = optim.SparseAdam(model.parameters())
        elif config['optimizer'] == 'adamax':
            optimier = optim.Adamax(model.parameters())
        elif config['optimizer'] == 'asgd':
            optimizer = optim.ASGD(model.parameters())
        elif config['optimizer'] == 'lbfgs':
            optimizer = optim.LBFGS(model.parameters())
        elif config['optimizer'] == 'rprop':
            optimizer = optim.Rprop(model.parameters())

    optimizer.param_groups[0]['lr'] = float(config['learning_rate'])

    if 'momentum' in config.keys():
        optimizer.param_groups[0]['momentum'] = float(config['momentum'])
    if 'lr_deacy' in config.keys():
        optimizer.param_groups[0]['lr_decay'] = float(config['lr_decay'])
    if 'weight_deacy' in config.keys():
        optimizer.param_groups[0]['weight_decay'] = float(
            config['weight_decay'])
    if 'amsgrad' in config.keys():
        optimizer.param_groups[0]['amsgrad'] = eval(config['amsgrad'])
    if 'weight_deacy' in config.keys():
        optimizer.param_groups[0]['weight_decay'] = float(
            config['weight_decay'])
    if 'nesterov' in config.keys():
        optimizer.param_groups[0]['nesterov'] = float(config['nesterov'])
    #end setting optimizer

    #prepare model save dir
    if 'model_save_dir' in config.keys():
        if not os.path.isdir(config['model_save_dir']):
            os.mkdir(config['model_save_dir'])

    #prepare trainLoder, testLoder seperately
    trainLoader = loader.trainLoader
    testLoader = loader.testLoader

    def train_epoch(epoch, loader, model, criterion, config, optimizer):
        if type(model) == list:
            for eachModel in model:
                eachModel.train()
        else:
            model.train()

        for batch_idx, (data, target) in enumerate(loader):
            if torch.cuda.is_available():
                data, target = data.cuda(), target.cuda()
            data, target = Variable(data), Variable(target)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            if 'train_log_interval' in config.keys():
                if batch_idx % int(config['train_log_interval']) == 0:
                    print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.
                          format(epoch, batch_idx * len(data),
                                 len(loader.dataset),
                                 100. * batch_idx / len(loader),
                                 loss.data.item()))
                    if 'mlflow_tracking_URI' in config.keys():
                        mlflow.log_metric('train_loss', loss.data.item())

    def test_epoch(loader, model, criterion, metric, config):
        model.eval()
        test_loss = 0
        correct = 0

        predictions = []
        answers = []

        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(loader):
                if torch.cuda.is_available():
                    data, target = data.cuda(), target.cuda()
                data, target = Variable(data), Variable(target)

                output = model(data)
                test_loss += criterion(
                    output, target).sum().item()  # sum up batch loss

                #apply custom metric(in this case, Accuracy)
                predictions += list(output.data.max(1)[1].cpu().numpy(
                ))  # get the index of the max log-probability
                answers += list(target.data.cpu().numpy())

        test_loss /= len(loader.dataset)

        test_accuracy = metric.evaluate(predictions, answers)
        print('\nTest set: Average loss: {:.4f}, Accuracy: ({:.2f}%)\n'.format(
            test_loss, test_accuracy * 100))

        if 'mlflow_tracking_URI' in config.keys():
            mlflow.log_metric('test_loss', test_loss)
            mlflow.log_metric('test_accuracy', test_accuracy)

        print('test acc:' + str(test_accuracy))
        reporter(mean_loss=test_loss, mean_accuracy=test_accuracy)

    #set MLflow tracking server
    if 'mlflow_tracking_URI' in config.keys():
        print("MLflow Tracking URI: %s" % (config['mlflow_tracking_URI']))
        mlflow.set_tracking_uri(config['mlflow_tracking_URI'])

        with mlflow.start_run():
            print('setting parameters')
            for key, value in config.items():
                mlflow.log_param(key, value)
                print(key + '\t:\t' + value)

            for epoch in range(1, int(config['epoch']) + 1):
                print('epoch: ' + str(epoch))

                train_epoch(epoch, trainLoader, model, criterion, config,
                            optimizer)
                if 'model_save_dir' in config.keys(
                ) and 'model_save_interval' in config.keys():
                    if epoch % int(config['model_save_interval']) == 0:
                        if 'multiGPU' in config.keys(
                        ) and config['multiGPU'] == 'Y':
                            torch.save(
                                model.module,
                                os.getcwd() + os.sep +
                                config['model_save_dir'] + os.sep +
                                config['model_name_prefix'] + '_epoch_' +
                                str(epoch) + '.pkl')
                        else:
                            torch.save(
                                model,
                                os.getcwd() + os.sep +
                                config['model_save_dir'] + os.sep +
                                config['model_name_prefix'] + '_epoch_' +
                                str(epoch) + '.pkl')
                        print('model saved: ' + os.getcwd() + os.sep +
                              config['model_save_dir'] + os.sep +
                              config['model_name_prefix'] + '_epoch_' +
                              str(epoch) + '.pkl')

                test_epoch(testLoader, model, criterion, metric, config)
Example #14
0
    def fit(self,
            training_data,
            validation_data,
            options,
            device=None,
            detail=False):
        """
        Perform the training.  This is not called "train" because the base class already defines
        that method with a different meaning.  The base class "train" method puts the Module into
        "training mode".
        """
        print(
            "Training {} using {} rows of featurized training input...".format(
                self.name(), training_data.num_rows))
        start = time.time()
        loss_function = nn.NLLLoss()
        initial_rate = options.learning_rate
        lr_scheduler = options.lr_scheduler
        oo = options.optimizer_options

        if options.optimizer == "Adadelta":
            optimizer = optim.Adadelta(self.parameters(),
                                       lr=initial_rate,
                                       weight_decay=oo.weight_decay,
                                       rho=oo.rho,
                                       eps=oo.eps)
        elif options.optimizer == "Adagrad":
            optimizer = optim.Adagrad(self.parameters(),
                                      lr=initial_rate,
                                      weight_decay=oo.weight_decay,
                                      lr_decay=oo.lr_decay)
        elif options.optimizer == "Adam":
            optimizer = optim.Adam(self.parameters(),
                                   lr=initial_rate,
                                   weight_decay=oo.weight_decay,
                                   betas=oo.betas,
                                   eps=oo.eps)
        elif options.optimizer == "Adamax":
            optimizer = optim.Adamax(self.parameters(),
                                     lr=initial_rate,
                                     weight_decay=oo.weight_decay,
                                     betas=oo.betas,
                                     eps=oo.eps)
        elif options.optimizer == "ASGD":
            optimizer = optim.ASGD(self.parameters(),
                                   lr=initial_rate,
                                   weight_decay=oo.weight_decay,
                                   lambd=oo.lambd,
                                   alpha=oo.alpha,
                                   t0=oo.t0)
        elif options.optimizer == "RMSprop":
            optimizer = optim.RMSprop(self.parameters(),
                                      lr=initial_rate,
                                      weight_decay=oo.weight_decay,
                                      eps=oo.eps,
                                      alpha=oo.alpha,
                                      momentum=oo.momentum,
                                      centered=oo.centered)
        elif options.optimizer == "Rprop":
            optimizer = optim.Rprop(self.parameters(),
                                    lr=initial_rate,
                                    etas=oo.etas,
                                    step_sizes=oo.step_sizes)
        elif options.optimizer == "SGD":
            optimizer = optim.SGD(self.parameters(),
                                  lr=initial_rate,
                                  weight_decay=oo.weight_decay,
                                  momentum=oo.momentum,
                                  dampening=oo.dampening)

        print(optimizer)
        num_epochs = options.max_epochs
        batch_size = options.batch_size
        learning_rate = options.learning_rate
        lr_min = options.lr_min
        lr_peaks = options.lr_peaks
        ticks = training_data.num_rows / batch_size  # iterations per epoch
        total_iterations = ticks * num_epochs
        gamma = options.lr_gamma

        if not lr_min:
            lr_min = learning_rate
        scheduler = None
        if lr_scheduler == "TriangleLR":
            steps = lr_peaks * 2 + 1
            stepsize = num_epochs / steps
            scheduler = TriangularLR(optimizer, stepsize * ticks, lr_min,
                                     learning_rate, gamma)
        elif lr_scheduler == "CosineAnnealingLR":
            # divide by odd number to finish on the minimum learning rate
            cycles = lr_peaks * 2 + 1
            scheduler = optim.lr_scheduler.CosineAnnealingLR(
                optimizer, T_max=total_iterations / cycles, eta_min=lr_min)
        elif lr_scheduler == "ExponentialLR":
            scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma)
        elif lr_scheduler == "StepLR":
            scheduler = optim.lr_scheduler.StepLR(
                optimizer, step_size=options.lr_step_size, gamma=gamma)
        elif lr_scheduler == "ExponentialResettingLR":
            reset = (num_epochs * ticks) / 3  # reset at the 1/3 mark.
            scheduler = ExponentialResettingLR(optimizer, gamma, reset)

        # optimizer = optim.Adam(model.parameters(), lr=0.0001)
        log = []

        for epoch in range(num_epochs):
            self.train()
            iteration = 0
            for i_batch, (audio, labels) in enumerate(
                    training_data.get_data_loader(batch_size)):
                if not self.batch_first:
                    audio = audio.transpose(1,
                                            0)  # GRU wants seq,batch,feature

                if device:
                    audio = audio.to(device)
                    labels = labels.to(device)

                # Also, we need to clear out the hidden state,
                # detaching it from its history on the last instance.
                self.init_hidden()

                # Before the backward pass, use the optimizer object to zero all of the
                # gradients for the variables it will update (which are the learnable
                # weights of the model). This is because by default, gradients are
                # accumulated in buffers( i.e, not overwritten) whenever .backward()
                # is called. Checkout docs of torch.autograd.backward for more details.
                optimizer.zero_grad()

                # Run our forward pass.
                keyword_scores = self(audio)

                # Compute the loss, gradients
                loss = loss_function(keyword_scores, labels)

                # Backward pass: compute gradient of the loss with respect to all the learnable
                # parameters of the model. Internally, the parameters of each Module are stored
                # in Tensors with requires_grad=True, so this call will compute gradients for
                # all learnable parameters in the model.
                loss.backward()

                # move to next learning rate
                if scheduler:
                    scheduler.step()

                # Calling the step function on an Optimizer makes an update to its parameters
                # applying the gradients we computed during back propagation
                optimizer.step()

                learning_rate = optimizer.param_groups[0]['lr']
                if detail:
                    learning_rate = optimizer.param_groups[0]['lr']
                    log += [{
                        'iteration': iteration,
                        'loss': loss.item(),
                        'learning_rate': learning_rate
                    }]
                iteration += 1

            # Find the best prediction in each sequence and return it's accuracy
            passed, total, rate = self.evaluate(validation_data, batch_size,
                                                device)
            learning_rate = optimizer.param_groups[0]['lr']
            print(
                "Epoch {}, Loss {}, Validation Accuracy {:.3f}, Learning Rate {}"
                .format(epoch, loss.item(), rate * 100, learning_rate))
            log += [{
                'epoch': epoch,
                'loss': loss.item(),
                'accuracy': rate,
                'learning_rate': learning_rate
            }]

        end = time.time()
        print("Trained in {:.2f} seconds".format(end - start))
        return log
def get_optimizer(optimizer_name, optimizer_dict, network_params):
    if optimizer_name == "Adadelta":
        optimizer = optim.Adadelta(network_params,
                                   weight_decay=optimizer_dict["weight_decay"],
                                   rho=optimizer_dict["rho"],
                                   eps=optimizer_dict["eps"],
                                   lr=optimizer_dict["lr"])
    if optimizer_name == "Adagrad":
        optimizer = optim.Adagrad(network_params,
                                  weight_decay=optimizer_dict["weight_decay"],
                                  lr_decay=optimizer_dict["lr_decay"],
                                  eps=optimizer_dict["eps"],
                                  lr=optimizer_dict["lr"])
    if optimizer_name == "Adam":
        optimizer = optim.Adam(network_params,
                               weight_decay=optimizer_dict["weight_decay"],
                               betas=eval(optimizer_dict["betas"]),
                               eps=optimizer_dict["eps"],
                               lr=optimizer_dict["lr"],
                               amsgrad=optimizer_dict["amsgrad"])
    if optimizer_name == "AdamW":
        optimizer = optim.AdamW(network_params,
                                weight_decay=optimizer_dict["weight_decay"],
                                betas=eval(optimizer_dict["betas"]),
                                eps=optimizer_dict["eps"],
                                lr=optimizer_dict["lr"],
                                amsgrad=optimizer_dict["amsgrad"])
    if optimizer_name == "SparseAdam":
        optimizer = optim.SparseAdam(network_params,
                                     betas=eval(optimizer_dict["betas"]),
                                     eps=optimizer_dict["eps"],
                                     lr=optimizer_dict["lr"])
    if optimizer_name == "Adamax":
        optimizer = optim.Adamax(network_params,
                                 betas=eval(optimizer_dict["betas"]),
                                 eps=optimizer_dict["eps"],
                                 lr=optimizer_dict["lr"],
                                 weight_decay=optimizer_dict["weight_decay"])
    if optimizer_name == "ASGD":
        optimizer = optim.ASGD(network_params,
                               lr=optimizer_dict["lr"],
                               lambd=optimizer_dict["lambd"],
                               alpha=optimizer_dict["alpha"],
                               t0=optimizer_dict["t0"],
                               weight_decay=optimizer_dict["weight_decay"])
    if optimizer_name == "LBFGS":
        optimizer = optim.LBFGS(
            network_params,
            lr=optimizer_dict["lr"],
            max_iter=optimizer_dict["max_iter"],
            max_eval=optimizer_dict["max_eval"],
            tolerance_grad=optimizer_dict["tolerance_grad"],
            tolerance_change=optimizer_dict["tolerance_change"],
            history_size=optimizer_dict["history_size"],
            line_search_fn=optimizer_dict["line_search_fn"])
    if optimizer_name == "RMSprop":
        optimizer = optim.RMSprop(network_params,
                                  weight_decay=optimizer_dict["weight_decay"],
                                  lr=optimizer_dict["lr"],
                                  momentum=optimizer_dict["momentum"],
                                  alpha=optimizer_dict["alpha"],
                                  eps=optimizer_dict["eps"],
                                  centered=optimizer_dict["centered"])
    if optimizer_name == "Rprop":
        optimizer = optim.Rprop(network_params,
                                lr=optimizer_dict["lr"],
                                eta=optimizer_dict["eta"],
                                step_sizes=optimizer_dict["step_sizes"])
    if optimizer_name == "SGD":
        optimizer = optim.SGD(network_params,
                              weight_decay=optimizer_dict["weight_decay"],
                              momentum=optimizer_dict["momentum"],
                              dampening=optimizer_dict["dampening"],
                              lr=optimizer_dict["lr"],
                              nesterov=optimizer_dict["nesterov"])
    if optimizer_name == None:
        optimizer = None

    return optimizer
Example #16
0
def neuro_fitt_q(epoch,
                 train_env_max_steps,
                 eval_env_max_steps,
                 discount,
                 init_experience=0,
                 seed=None):
    """Run NFQ."""
    CONFIG = AlgorithmConfig(
        EPOCH=epoch,
        TRAIN_ENV_MAX_STEPS=train_env_max_steps,
        EVAL_ENV_MAX_STEPS=eval_env_max_steps,
        DISCOUNT=discount,
        INIT_EXPERIENCE=init_experience,
        INCREMENT_EXPERIENCE=True,
        HINT_TO_GOAL=True,
        RANDOM_SEED=seed,
        TRAIN_RENDER=False,
        EVAL_RENDER=False,
        SAVE_PATH="",
        LOAD_PATH="",
        USE_TENSORBOARD=False,
        USE_WANDB=False,
    )

    # Log to File, Console, TensorBoard, W&B
    logger = get_logger()

    # Setup environment
    train_env = CartPoleRegulatorEnv(mode="train",
                                     max_steps=train_env_max_steps)
    eval_env = CartPoleRegulatorEnv(mode="eval", max_steps=eval_env_max_steps)

    # Fix random seeds
    if CONFIG.RANDOM_SEED is not None:
        make_reproducible(CONFIG.RANDOM_SEED, use_numpy=True, use_torch=True)
        train_env.seed(CONFIG.RANDOM_SEED)
        eval_env.seed(CONFIG.RANDOM_SEED)
    #else:
    #    logger.warning("Running without a random seed: this run is NOT reproducible.")

    # Setup agent
    nfq_net = NFQNetwork()
    optimizer = optim.Rprop(nfq_net.parameters())
    nfq_agent = NFQAgent(nfq_net, optimizer)

    # Load trained agent
    # if CONFIG.LOAD_PATH:
    #     load_models(CONFIG.LOAD_PATH, nfq_net=nfq_net, optimizer=optimizer)

    # NFQ Main loop
    # A set of transition samples denoted as D
    all_rollouts = []
    total_cost = 0

    if CONFIG.INIT_EXPERIENCE:
        for _ in range(CONFIG.INIT_EXPERIENCE):
            rollout, episode_cost = train_env.generate_rollout(
                None, render=CONFIG.TRAIN_RENDER)
            all_rollouts.extend(rollout)
            total_cost += episode_cost

    stats = EpisodeStats(episode_lengths=np.zeros(CONFIG.EPOCH),
                         episode_rewards=np.zeros(CONFIG.EPOCH))

    for epoch in range(CONFIG.EPOCH + 1):
        # Variant 1: Incermentally add transitions (Section 3.4)
        # TODO(seungjaeryanlee): Done before or after training?
        if CONFIG.INCREMENT_EXPERIENCE:
            new_rollout, episode_cost = train_env.generate_rollout(
                nfq_agent.get_best_action, render=CONFIG.TRAIN_RENDER)
            all_rollouts.extend(new_rollout)
            total_cost += episode_cost

        state_action_b, target_q_values = nfq_agent.generate_pattern_set(
            all_rollouts)

        # Variant 2: Clamp function to zero in goal region
        # TODO(seungjaeryanlee): Since this is a regulator setting, should it
        #                        not be clamped to zero?
        if CONFIG.HINT_TO_GOAL:
            goal_state_action_b, goal_target_q_values = train_env.get_goal_pattern_set(
            )
            goal_state_action_b = torch.FloatTensor(goal_state_action_b)
            goal_target_q_values = torch.FloatTensor(goal_target_q_values)
            state_action_b = torch.cat([state_action_b, goal_state_action_b],
                                       dim=0)
            target_q_values = torch.cat(
                [target_q_values, goal_target_q_values], dim=0)

        loss = nfq_agent.train((state_action_b, target_q_values))

        # TODO(seungjaeryanlee): Evaluation should be done with 3000 episodes
        eval_episode_length, eval_success, eval_episode_cost = nfq_agent.evaluate(
            eval_env, CONFIG.EVAL_RENDER)

        if eval_success:
            break

        #stats.episode_rewards[epoch] = eval_episode_cost
        stats.episode_rewards[epoch] = eval_episode_length + 1
        stats.episode_lengths[epoch] = eval_episode_length

    train_env.close()
    eval_env.close()

    return stats
Example #17
0
model.cuda()
log_tr = LogMeters(args.prefix + args.optimizer + '_Train', n_classes)
log_te = LogMeters(args.prefix + args.optimizer + '_Test', n_classes)

if args.optimizer == 'SGD':
    optimizer = optim.SGD(model.parameters(),
                          lr=args.learning_rate,
                          momentum=0.9)
elif args.optimizer == 'Adam':
    optimizer = optim.Adam(model.parameters())
elif args.optimizer == 'LBFGS':
    optimizer = optim.LBFGS(model.parameters())
elif args.optimizer == 'RMSprop':
    optimizer = optim.RMSprop(model.parameters())
elif args.optimizer == 'Rprop':
    optimizer = optim.Rprop(model.parameters())
elif args.optimizer == 'Adadelta':
    optimizer = optim.Adadelta(model.parameters())
elif args.optimizer == 'Adabound':
    optimizer = adabound.AdaBound(model.parameters(), lr=1e-3, final_lr=0.1)
else:
    raise 'ERROR'

# learning rate decay
# scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4,8,12], gamma=0.9)


def train(epoch, data_loader, log):
    model.train()
    log.reset()
    total_loss, total_batches = 0.0, 0.0
Example #18
0
# define a neural network using the customised structure
net = CasperNetwork(input_neurons, output_neurons)

# define loss function
loss_func = torch.nn.CrossEntropyLoss()

# define optimiser with per layer learning rates
# optimiser without any hidden neurons
optimiser = optim.Rprop([{
    'params': net.Initial.parameters(),
    'lr': L1
}, {
    'params': net.output_layer.parameters()
}, {
    'params': net.old_input_neurons.parameters()
}, {
    'params': net.old_output_neurons.parameters()
}],
                        lr=L3,
                        etas=(0.5, 1.2),
                        step_sizes=(1e-06, 50))

# store all losses for visualisation
all_losses = []

previous_loss = None

# train a neural network
for epoch in range(num_epochs):
    # Perform forward pass: compute predicted y by passing x to the model.
Example #19
0
 def rprop_constructor(params):
     rprop = optim.Rprop(params, lr=1e-2)
     return StochasticWeightAveraging(rprop,
                                      swa_start=1000,
                                      swa_freq=1,
                                      swa_lr=1e-3)
Example #20
0
    x_test = [data[0] for data in test]
    y_test = [data[1] for data in test]
    # print(f"Total amount of 1's test: {sum(y_test)}/{len(y_test)}")

    print("creating network")
    test = PieceSelection()

    # cwd = os.getcwd()
    #
    # string = cwd + f"\\model games_{500} epoch_{10}.pb"
    #
    # test.load_state_dict(torch.load(string))
    criterion = nn.CrossEntropyLoss()
    lr = .01
    opt = optim.Rprop(test.parameters(), lr=.01)

    print("training")
    for epoch in range(36):
        running_loss = 0.0
        opt = optim.SGD(test.parameters(),
                        lr=lr * (.9**epoch),
                        weight_decay=1e-5)
        for index, data in enumerate(x):
            # print(index)
            opt.zero_grad()
            output = test(torch.tensor([data]).type(torch.FloatTensor))
            # print(len(x))
            # print(len(y))

            # print(output)
Example #21
0
 def rprop_constructor(params):
     rprop = optim.Rprop(params, lr=1e-2)
     return contriboptim.SWA(rprop,
                             swa_start=1000,
                             swa_freq=1,
                             swa_lr=1e-3)
def get_optimizer(type, model, lr, wd):
    if type == 'SGD':
        optimizer = optim.SGD(model.parameters(),
                              lr=lr,
                              weight_decay=wd,
                              momentum=0.9)
    elif type == 'ASGD':
        optimizer = optim.ASGD(model.parameters(),
                               lr=lr,
                               lambd=0.0001,
                               alpha=0.75,
                               t0=1000000.0,
                               weight_decay=wd)
    elif type == 'Adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=lr,
                               weight_decay=wd,
                               amsgrad=True)
    elif type == 'Rprop':
        optimizer = optim.Rprop(model.parameters(),
                                lr=lr,
                                etas=(0.5, 1.2),
                                step_sizes=(1e-06, 50))
    elif type == 'Adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=lr,
                                  lr_decay=0,
                                  weight_decay=wd,
                                  initial_accumulator_value=0)
    elif type == 'Adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=lr,
                                   rho=0.9,
                                   eps=1e-06,
                                   weight_decay=wd)
    elif type == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=lr,
                                  alpha=0.99,
                                  eps=1e-08,
                                  weight_decay=wd,
                                  momentum=0,
                                  centered=False)
    elif typpe == 'Adamax':
        optimizer = optim.Adamax(model.parameters(),
                                 lr=lr,
                                 betas=(0.9, 0.999),
                                 eps=1e-08,
                                 weight_decay=wd)
    elif type == 'SparseAdam':
        optimizer = torch.optim.SparseAdam(params,
                                           lr=lr,
                                           betas=(0.9, 0.999),
                                           eps=1e-08)
    elif type == 'LBFGS':
        optimizer = optim.LBFGS(params,
                                lr=lr,
                                max_iter=20,
                                max_eval=None,
                                tolerance_grad=1e-05,
                                tolerance_change=1e-09,
                                history_size=100,
                                line_search_fn=None)

    else:
        optimizer = optim.Adam(model.parameters(),
                               lr=lr,
                               weight_decay=wd,
                               amsgrad=True)

    return optimizer
Example #23
0
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = [Net(), Net(), Net(), Net()]
optimizer = [None] * len(net)
criterion = nn.MSELoss()
lossrec = np.zeros([4, niter])
outputfin = [None] * 4

# train
for ii, neti in enumerate(net):
    neti.cuda()
    optimizeri = optim.Rprop(neti.parameters(), lr=0.001)
    optimizer[ii] = optimizeri
    # wrap them in Variable
    inputstn = torch.FloatTensor(
        data['nninput'][ii]
        [0:-nvalid]).cuda()  # cast tensors to a CUDA datatype
    labelstn = torch.FloatTensor(
        data['nnoutput'][ii]
        [0:-nvalid]).cuda()  # cast tensors to a CUDA datatype

    inputs, labels = Variable(inputstn), Variable(labelstn)  # ???
    # zero the parameter gradients
    for jj in range(niter):
        optimizeri.zero_grad()

        # forward + backward + optimize
Example #24
0
#import net2net as n2n

loss_dict = {"categorical_crossentropy":F.nll_loss,
             "binary_crossentropy": F.binary_cross_entropy,
             "dice_loss": lf.dice_loss,
             "dice_loss_modified": lf.dice_loss_modified,
             "seg_binary_cross_entropy": lf.seg_binary_cross_entropy,
             "mse":torch.nn.MSELoss(size_average=False),
             "L1Loss":torch.nn.L1Loss(),
             "bce_localize":lf.bce_localize}

optimizer_dict = {"adadelta":lambda model, lr: optim.Adadelta(model.parameters(), lr=lr),
                  "adam":lambda model, lr: optim.Adam(model.parameters(), lr=lr),
                  "svrg":lambda model, lr: opt.SVRG(model, lr=lr),
                  "sgd":lambda model, lr: optim.SGD(model.parameters(), lr=lr),
                  "rprop":lambda model, lr: optim.Rprop(model.parameters(), lr=lr)}

weight_dict = {0:"weight", 1:"bias"}

class BaseModel(nn.Module):
    """INSPIRED BY KERAS AND SCIKIT-LEARN API"""
    def __init__(self, 
                 problem_type="classification", 
                 loss_name="categorical_crossentropy",
                 optimizer_name="adadelta"):

        super(BaseModel, self).__init__()
        self.loss_name = loss_name
        self.problem_type = problem_type
        self.my_optimizer = None
        self.optimizer_name = optimizer_name
Example #25
0
def main():
    """Run NFQ."""
    # Setup hyperparameters
    parser = configargparse.ArgParser()
    parser.add("-c", "--config", required=True, is_config_file=True)
    parser.add("--EPOCH", type=int)
    parser.add("--TRAIN_ENV_MAX_STEPS", type=int)
    parser.add("--EVAL_ENV_MAX_STEPS", type=int)
    parser.add("--DISCOUNT", type=float)
    parser.add("--INIT_EXPERIENCE", type=int)
    parser.add("--INCREMENT_EXPERIENCE", action="store_true")
    parser.add("--HINT_TO_GOAL", action="store_true")
    parser.add("--RANDOM_SEED", type=int)
    parser.add("--TRAIN_RENDER", action="store_true")
    parser.add("--EVAL_RENDER", action="store_true")
    parser.add("--SAVE_PATH", type=str, default="")
    parser.add("--LOAD_PATH", type=str, default="")
    parser.add("--USE_TENSORBOARD", action="store_true")
    parser.add("--USE_WANDB", action="store_true")
    CONFIG = parser.parse_args()
    if not hasattr(CONFIG, "INCREMENT_EXPERIENCE"):
        CONFIG.INCREMENT_EXPERIENCE = False
    if not hasattr(CONFIG, "HINT_TO_GOAL"):
        CONFIG.HINT_TO_GOAL = False
    if not hasattr(CONFIG, "TRAIN_RENDER"):
        CONFIG.TRAIN_RENDER = False
    if not hasattr(CONFIG, "EVAL_RENDER"):
        CONFIG.EVAL_RENDER = False
    if not hasattr(CONFIG, "USE_TENSORBOARD"):
        CONFIG.USE_TENSORBOARD = False
    if not hasattr(CONFIG, "USE_WANDB"):
        CONFIG.USE_WANDB = False

    print()
    print(
        "+--------------------------------+--------------------------------+")
    print(
        "| Hyperparameters                | Value                          |")
    print(
        "+--------------------------------+--------------------------------+")
    for arg in vars(CONFIG):
        print("| {:30} | {:<30} |".format(
            arg,
            getattr(CONFIG, arg) if getattr(CONFIG, arg) is not None else ""))
    print(
        "+--------------------------------+--------------------------------+")
    print()

    # Log to File, Console, TensorBoard, W&B
    logger = get_logger()

    if CONFIG.USE_TENSORBOARD:
        from torch.utils.tensorboard import SummaryWriter

        writer = SummaryWriter(log_dir="tensorboard_logs")
    if CONFIG.USE_WANDB:
        import wandb

        wandb.init(project="implementations-nfq", config=CONFIG)

    # Setup environment
    train_env = CartPoleRegulatorEnv(mode="train")
    eval_env = CartPoleRegulatorEnv(mode="eval")

    # Fix random seeds
    if CONFIG.RANDOM_SEED is not None:
        make_reproducible(CONFIG.RANDOM_SEED, use_numpy=True, use_torch=True)
        train_env.seed(CONFIG.RANDOM_SEED)
        eval_env.seed(CONFIG.RANDOM_SEED)
    else:
        logger.warning(
            "Running without a random seed: this run is NOT reproducible.")

    # Setup agent
    nfq_net = NFQNetwork()
    optimizer = optim.Rprop(nfq_net.parameters())
    nfq_agent = NFQAgent(nfq_net, optimizer)

    # Load trained agent
    if CONFIG.LOAD_PATH:
        load_models(CONFIG.LOAD_PATH, nfq_net=nfq_net, optimizer=optimizer)

    # NFQ Main loop
    # A set of transition samples denoted as D
    all_rollouts = []
    total_cost = 0
    if CONFIG.INIT_EXPERIENCE:
        for _ in range(CONFIG.INIT_EXPERIENCE):
            rollout, episode_cost = train_env.generate_rollout(
                None, render=CONFIG.TRAIN_RENDER)
            all_rollouts.extend(rollout)
            total_cost += episode_cost
    for epoch in range(CONFIG.EPOCH + 1):
        # Variant 1: Incermentally add transitions (Section 3.4)
        # TODO(seungjaeryanlee): Done before or after training?
        if CONFIG.INCREMENT_EXPERIENCE:
            new_rollout, episode_cost = train_env.generate_rollout(
                nfq_agent.get_best_action, render=CONFIG.TRAIN_RENDER)
            all_rollouts.extend(new_rollout)
            total_cost += episode_cost

        state_action_b, target_q_values = nfq_agent.generate_pattern_set(
            all_rollouts)

        # Variant 2: Clamp function to zero in goal region
        # TODO(seungjaeryanlee): Since this is a regulator setting, should it
        #                        not be clamped to zero?
        if CONFIG.HINT_TO_GOAL:
            goal_state_action_b, goal_target_q_values = train_env.get_goal_pattern_set(
            )
            goal_state_action_b = torch.FloatTensor(goal_state_action_b)
            goal_target_q_values = torch.FloatTensor(goal_target_q_values)
            state_action_b = torch.cat([state_action_b, goal_state_action_b],
                                       dim=0)
            target_q_values = torch.cat(
                [target_q_values, goal_target_q_values], dim=0)

        loss = nfq_agent.train((state_action_b, target_q_values))

        # TODO(seungjaeryanlee): Evaluation should be done with 3000 episodes
        eval_episode_length, eval_success, eval_episode_cost = nfq_agent.evaluate(
            eval_env, CONFIG.EVAL_RENDER)

        if CONFIG.INCREMENT_EXPERIENCE:
            logger.info(
                "Epoch {:4d} | Train {:3d} / {:4.2f} | Eval {:4d} / {:5.2f} | Train Loss {:.4f}"
                .format(  # noqa: B950
                    epoch,
                    len(new_rollout),
                    episode_cost,
                    eval_episode_length,
                    eval_episode_cost,
                    loss,
                ))
            if CONFIG.USE_TENSORBOARD:
                writer.add_scalar("train/episode_length", len(new_rollout),
                                  epoch)
                writer.add_scalar("train/episode_cost", episode_cost, epoch)
                writer.add_scalar("train/loss", loss, epoch)
                writer.add_scalar("eval/episode_length", eval_episode_length,
                                  epoch)
                writer.add_scalar("eval/episode_cost", eval_episode_cost,
                                  epoch)
            if CONFIG.USE_WANDB:
                wandb.log({"Train Episode Length": len(new_rollout)},
                          step=epoch)
                wandb.log({"Train Episode Cost": episode_cost}, step=epoch)
                wandb.log({"Train Loss": loss}, step=epoch)
                wandb.log({"Evaluation Episode Length": eval_episode_length},
                          step=epoch)
                wandb.log({"Evaluation Episode Cost": eval_episode_cost},
                          step=epoch)
        else:
            logger.info(
                "Epoch {:4d} | Eval {:4d} / {:5.2f} | Train Loss {:.4f}".
                format(epoch, eval_episode_length, eval_episode_cost, loss))
            if CONFIG.USE_TENSORBOARD:
                writer.add_scalar("train/loss", loss, epoch)
                writer.add_scalar("eval/episode_length", eval_episode_length,
                                  epoch)
                writer.add_scalar("eval/episode_cost", eval_episode_cost,
                                  epoch)
            if CONFIG.USE_WANDB:
                wandb.log({"Train Loss": loss}, step=epoch)
                wandb.log({"Evaluation Episode Length": eval_episode_length},
                          step=epoch)
                wandb.log({"Evaluation Episode Cost": eval_episode_cost},
                          step=epoch)

        if eval_success:
            logger.info(
                "Epoch {:4d} | Total Cycles {:6d} | Total Cost {:4.2f}".format(
                    epoch, len(all_rollouts), total_cost))
            if CONFIG.USE_TENSORBOARD:
                writer.add_scalar("summary/total_cycles", len(all_rollouts),
                                  epoch)
                writer.add_scalar("summary/total_cost", total_cost, epoch)
            if CONFIG.USE_WANDB:
                wandb.log({"Total Cycles": len(all_rollouts)}, step=epoch)
                wandb.log({"Total Cost": total_cost}, step=epoch)
            break

    # Save trained agent
    if CONFIG.SAVE_PATH:
        save_models(CONFIG.SAVE_PATH, nfq_net=nfq_net, optimizer=optimizer)

    train_env.close()
    eval_env.close()
Example #26
0
model.classifier = classifier

if (args.optim == 'SGD'):
    optimizer = optim.SGD(model.classifier.parameters(), lr=args.rate)
elif (args.optim == 'Adadelta'):
    optimizer = optim.Adadelta(model.classifier.parameters(), lr=args.rate)
elif (args.optim == 'Adagrad'):
    optimizer = optimizer = optim.Adagrad(model.classifier.parameters(),
                                          lr=args.rate)
elif (args.optim == 'Adam'):
    optimizer = optim.Adam(model.classifier.parameters(), lr=args.rate)
elif (args.optim == 'RMS'):
    optimizer = optim.RMS(model.classifier.parameters(), lr=args.rate)
else:
    optimizer = optim.Rprop(model.classifier.parameters(), lr=args.rate)

if args.loss == 'L1':
    criterion = nn.L1Loss()
elif args.loss == 'NLL':
    criterion = nn.NLLLoss()
elif args.loss == 'Poisson':
    criterion = nn.PoissonNLLoss()
elif args.loss == 'MSE':
    criterion = nn.MSELoss()
else:
    criterion = nn.CrossEntropyLoss()

optimizer.zero_grad()
model.classifier = classifier
epochs = args.epoch
Example #27
0
# F.load_state_dict(torch.load("./model_tmp_pretr_10.pth", map_location=device))

print("Num. of params: {:d}".format(utils.get_parameters_count(model)))

data = utils.read_pickle(['t', 'x', 'u'], args.data_path)
dataset = utils.generate_torchgeom_dataset(data)
if args.batch_size is None:
    batch_size = len(dataset)
else:
    batch_size = args.batch_size

print(dataset)

loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

optimizer = optim.Rprop(F.parameters(), lr=args.lr, step_sizes=(1e-8, 10.))
loss_fn = nn.MSELoss()

# Training
ts = dataset[0].t.shape[0]  # assumes the same time grid for all sim-s.
print(dataset[0].t)

for epoch in range(args.epochs):
    losses = torch.zeros(len(loader))

    for i, dp in enumerate(loader):
        optimizer.zero_grad()

        edge_index = dp.edge_index
        pos = dp.pos
        with torch.no_grad():
Example #28
0
dataset = utils.generate_torchgeom_dataset(data, sig=0.0)

# #########
sim_inds = [0] #np.random.choice(len(dataset), n_s, replace=False)
print(f'sim_inds = {sim_inds}')
dataset = [ds for i, ds in enumerate(dataset) if i in sim_inds]
print(f'dataset length: {len(dataset)}')
# #########

if args.batch_size is None:
    batch_size = len(dataset)

loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Optimizer, loss
optimizer = optim.Rprop(F.parameters(), lr=args.lr)
loss_fn = nn.MSELoss()

# Training
ts = dataset[0].t.shape[0]  # assumes the same time grid for all sim-s.
print("%%%%%%%%%%%%%%%%%%")
print(dataset[0])
for epoch in range(args.epochs):
    losses = torch.zeros(len(loader))
    
    for i, dp in enumerate(loader):
        optimizer.zero_grad()

        params_dict = {"edge_index": dp.edge_index.to(device), "pos": dp.pos.to(device)}
        F.update_params(params_dict)
Example #29
0
if __name__ == "__main__":
    epoches = 100

    lr = 1e-2

    batch_size = 30

    transform = torch.tensor

    device = torch.device('cpu')

    trainset = PointDataset('./labels/label.csv', transform=transform)

    trainloader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True, num_workers=4)

    testset = PointDataset('./labels/test.csv', transform=transform)

    testloader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=True, num_workers=4)

    # show_original_points()

    classifier_net = Network(2, 5, 3).to(device)

    optimizer1 = optim.SGD(classifier_net.parameters(), lr=lr, momentum=0)
    optimizer2 = optim.Adam(classifier_net.parameters(), lr=lr)
    optimizer3 = optim.Rprop(classifier_net.parameters(), lr=lr)
    classifier_net = train(classifier_net, trainloader, testloader, device, lr, optimizer3)



Example #30
0
    def load_network(self):
        logger.info("Start loading network, loss function and optimizer")

        # Load a network
        # self.net = VGG('VGG11')
        self.net = ResNet18()

        # Move network to GPU if needed
        if self.args.gpu:
            self.net.to('cuda')

        # Define the loss function and the optimizer
        self.criterion = nn.CrossEntropyLoss()

        if self.args.optimizer.lower() == 'adadelta':
            logger.info("Selected adadelta as optimizer")
            self.optimizer = optim.Adadelta(self.net.parameters(),
                                            lr=1.0,
                                            rho=0.9,
                                            eps=1e-06,
                                            weight_decay=0)
        elif self.args.optimizer.lower() == 'adagrad':
            logger.info("Selected adagrad as optimizer")
            self.optimizer = optim.Adagrad(self.net.parameters(),
                                           lr=0.01,
                                           lr_decay=0,
                                           weight_decay=0,
                                           initial_accumulator_value=0)
        elif self.args.optimizer.lower() == 'adam':
            logger.info("Selected adam as optimizer")
            self.optimizer = optim.Adam(self.net.parameters(),
                                        lr=0.001,
                                        betas=(0.9, 0.999),
                                        eps=1e-08,
                                        weight_decay=0,
                                        amsgrad=False)
        elif self.args.optimizer.lower() == 'sparseadam':
            logger.info("Selected sparseadam as optimizer")
            self.optimizer = optim.SparseAdam(self.net.parameters(),
                                              lr=0.001,
                                              betas=(0.9, 0.999),
                                              eps=1e-08)
        elif self.args.optimizer.lower() == 'adamax':
            logger.info("Selected adamax as optimizer")
            self.optimizer = optim.Adamax(self.net.parameters(),
                                          lr=0.002,
                                          betas=(0.9, 0.999),
                                          eps=1e-08,
                                          weight_decay=0)
        elif self.args.optimizer.lower() == 'asgd':
            logger.info("Selected asgd as optimizer")
            self.optimizer = optim.ASGD(self.net.parameters(),
                                        lr=0.01,
                                        lambd=0.0001,
                                        alpha=0.75,
                                        t0=1000000.0,
                                        weight_decay=0)
        elif self.args.optimizer.lower() == 'lbfgs':
            logger.info("Selected lbfgs as optimizer")
            self.optimizer = optim.LBFGS(self.net.parameters(),
                                         lr=1,
                                         max_iter=20,
                                         max_eval=None,
                                         tolerance_grad=1e-05,
                                         tolerance_change=1e-09,
                                         history_size=100,
                                         line_search_fn=None)
        elif self.args.optimizer.lower() == 'rmsprop':
            logger.info("Selected rmsprop as optimizer")
            self.optimizer = optim.RMSprop(self.net.parameters(),
                                           lr=0.01,
                                           alpha=0.99,
                                           eps=1e-08,
                                           weight_decay=0,
                                           momentum=0,
                                           centered=False)
        elif self.args.optimizer.lower() == 'rprop':
            logger.info("Selected rprop as optimizer")
            self.optimizer = optim.Rprop(self.net.parameters(),
                                         lr=0.01,
                                         etas=(0.5, 1.2),
                                         step_sizes=(1e-06, 50))
        elif self.args.optimizer.lower() == 'sgd':
            logger.info("Selected sgd as optimizer")
            self.optimizer = optim.SGD(self.net.parameters(),
                                       lr=0.001,
                                       momentum=0,
                                       dampening=0,
                                       weight_decay=0,
                                       nesterov=False)
        else:
            logger.info("Unknown optimizer given, SGD is chosen instead.")
            self.optimizer = optim.SGD(self.net.parameters(),
                                       lr=0.001,
                                       momentum=0.9)

        logger.info(
            "Loading network, loss function and %s optimizer was successful",
            self.args.optimizer)