def train(self, path_model, epochs, device="cuda"):
        self.cnn_model.to(device)
        optimizer = Adam(self.cnn_model.parameters(), lr=0.001)
        loss_fn = nn.CrossEntropyLoss()
        min_loss = None
        self.cnn_model.train()

        for epoch in range(epochs):
            for train_data in self.train_dataset_loader:
                images, labels = train_data
                images = images.to(device)
                labels = labels.to(device)
                # forward
                outputs = self.cnn_model(images)
                loss = loss_fn(outputs, labels)
                # backward
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            # check if this is the best model so far and store it on disk
            print("epoch: {}, loss: {}".format(epoch, loss.item()))
            if not min_loss or loss.item() < min_loss:
                print("The new best model found!")
                torch_save(self.cnn_model, path_model)
                min_loss = loss.item()

        return self.cnn_model
Esempio n. 2
0
def save_var_to_checkpoint(filename, name, mask, sd_key):
    checkpoint = load_checkpoint(filename)
    sd = checkpoint[sd_key] if sd_key is not None else checkpoint
    if name not in sd:
        raise RuntimeError('Variable {} not found in {}'.format(name, filename))
    sd[name] = sd[name] * mask
    torch_save(checkpoint, filename)
    del checkpoint
Esempio n. 3
0
 def save(self, stats, model):
     m_value = stats['val']['loss'][-1]
     if self.save_mode and (not self.last_value or self.last_value > m_value):
         print("Saving model...")
         if hasattr(model, 'get_init_params'):
             with open(self.init_params_name, 'w') as fd:
                 json.dump(model.get_init_params(), fd)
         torch_save(model.state_dict(), self.m_name)
         self.last_value = m_value
Esempio n. 4
0
def save_checkpoint(path, total_epochs, model, loss, environment):
    make_directories_for_file(path)
    dictionary = dict()
    dictionary["total_epochs"] = total_epochs
    dictionary["model_states"] = model.state_dict()
    dictionary["loss"] = loss
    dictionary["environment"] = environment.to_dict()
    torch_save(dictionary, path)
    logger.info(f"Saved checkpoint in epoch {total_epochs} to '{path}'.")
Esempio n. 5
0
 def save(cls, weights: dict, path: str):
     """
     Save model state
     :param weights: state
     :param path: state file path
     :return: None
     """
     # Lazy load torch
     from torch import save as torch_save
     torch_save(weights, path)
Esempio n. 6
0
 def save() -> None:
     torch_save(
         dict(
             model_state_dict=self.model.state_dict(),
             optimizer_state_dict=self.optimizer.state_dict(),
             scheduler_state_dict=self.scheduler.state_dict(),
             epoch=epoch,
             iteration=iteration,
         ),
         checkpoint_path,
     )
Esempio n. 7
0
    def validation_phase_ended(self, model, criterion, optimizer, **kwargs):
        ForwardStatsCallback.validation_phase_ended(self, model, criterion,
                                                    optimizer, **kwargs)

        m_value = self.stats['val']['loss'][-1]
        if self.save_mode and (not self.last_value
                               or self.last_value > m_value):
            print("Saving model...")
            if hasattr(model, 'get_init_params'):
                with open(self.init_params_name, 'w') as fd:
                    json.dump(model.get_init_params(), fd)
            torch_save(model.state_dict(), self.m_name)
            self.last_value = m_value
    def train_evaluate_trvate(self,
                              train_dl,
                              valid_dl,
                              test_dl,
                              epochCnt=500,
                              saveBestModelName=None):
        # define the optimization
        criterion = CrossEntropyLoss()
        optimizer = SGD(self.parameters(), lr=0.01, momentum=0.9)
        # enumerate epochs
        accvectr = np.zeros(epochCnt)
        accvecva = np.zeros(epochCnt)
        accvecte = np.zeros(epochCnt)
        acc_va_max = 0

        for epoch in range(epochCnt):
            # enumerate mini batches
            self.train()
            for i, (inputs, targets) in enumerate(train_dl):
                # clear the gradients
                optimizer.zero_grad()
                # compute the model output
                yhat = self.forward(inputs)
                # calculate loss
                loss = criterion(yhat, targets.squeeze_())
                # credit assignment
                loss.backward()
                # update model weights
                optimizer.step()
            acc_tr, _, _ = self.evaluate_model(train_dl)
            acc_va, _, _ = self.evaluate_model(valid_dl)
            acc_te, preds_te, labels_te = self.evaluate_model(test_dl)

            if acc_va_max < acc_va:
                preds_best, labels_best = preds_te, labels_te
                print("best validation epoch so far - epoch ", epoch,
                      "va: %.3f" % acc_va, "te: %.3f" % acc_te)
                acc_va_max = acc_va
                if saveBestModelName is not None:
                    print("Saving model at : ", saveBestModelName)
                    torch_save(self.state_dict(), saveBestModelName)
                    print("Model saved..")
            else:
                print("epoch ", epoch, "tr: %.3f" % acc_tr,
                      "va: %.3f" % acc_va, "te: %.3f" % acc_te)

            accvectr[epoch] = acc_tr
            accvecva[epoch] = acc_va
            accvecte[epoch] = acc_te
        return accvectr, accvecva, accvecte, preds_best, labels_best
Esempio n. 9
0
def update_best_model(bmodel,
                      bhist,
                      new_model,
                      new_h,
                      phase="val",
                      metric="loss",
                      ind=-1):
    if new_model is not None and new_h is not None:
        if new_h[phase][metric][ind] < bhist[phase][metric][ind]:
            bmodel = new_model
            bhist = new_h
    if hasattr(bmodel, 'get_init_params'):
        with open('best_model_params.json', 'w') as fd:
            json.dump(bmodel.get_init_params(), fd)
    torch_save(bmodel.state_dict(), "best_model.pth")
    with open('best_history.json', 'w') as fd:
        json.dump(bhist, fd)
    return bmodel, bhist
def save_checkpoint(
    model,
    infos,
    optimizer,
    checkpoint_dir=None,
    job_id=None,
    histories=None,
    append="",
):
    #
    # Modify appendage
    if len(append) > 0:
        append = "-" + append
    #
    # if checkpoint_dir doesn't exist, create it
    if not isdir(checkpoint_dir):
        makedirs(checkpoint_dir)
    #
    # Set file names
    checkpoint_path = join(checkpoint_dir, f"model{append}.pth")
    optimizer_path = join(checkpoint_dir, f"optimizer{append}.pth")
    infos_path = join(checkpoint_dir, f"infos_{job_id}{append}.pkl")
    histories_path = join(checkpoint_dir, f"histories_{job_id}{append}.pkl")

    #
    # Save checkpoint data
    print(f"Saving checkpoint to {checkpoint_path}")
    torch_save(model.state_dict(), checkpoint_path)

    #
    # Save optimizer data
    torch_save(optimizer.state_dict(), optimizer_path)

    #
    # Save infos data
    with open(infos_path, "wb") as f:
        pickle_dump(infos, f)

    #
    # Save histories data
    if histories is not None:
        with open(histories_path, "wb") as f:
            pickle_dump(histories, f)
Esempio n. 11
0
 def _clone_or_save(self, with_data=True, file_path=None, method='torch'):
     data, o = None, None
     if not with_data:
         data = self._pull_data()
     xtrn = self._pull_xtrn()
     if file_path:
         with open(file_path, 'wb') as f:
             if method == 'pickle':
                 pickle.dump(self, f, 2)
             elif method == 'torch':
                 torch_save(self, f)
             else:
                 raise ValueError(f'ERROR: Unknown method "{method}"')
     else:
         o = deepcopy(self)
     if xtrn is not None:
         self._push_xtrn(xtrn)
         if o is not None:
             o._push_xtrn(xtrn)
     if data:
         self._push_data(data)
     return o if o is not None else xtrn
Esempio n. 12
0
def save_checkpoint(engine, model, optimizer, lr_scheduler, amp, no_checkpoints, checkpoint_directory):
    step = engine.state.iteration

    checkpoints = [
        int(e.split("/")[-1].split("_")[-1].split(".")[0])
        for e in glob(checkpoint_directory + "*.pth")
    ]
    checkpoints.sort()

    if len(checkpoints) > no_checkpoints:
        os_remove(checkpoint_directory + "checkpoint_" + str(checkpoints[0]) + ".pth")

    torch_save(
        {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler': lr_scheduler.state_dict(),
            'engine': engine.state_dict(),
            'amp': amp.state_dict()
        },
        checkpoint_directory + "checkpoint_" + str(step) + ".pth"
    )
Esempio n. 13
0
def get_torch_object_bytes(obj):
    with TemporaryFile() as f:
        torch_save(obj, f)
        f.seek(0)
        b = f.read()
    return b
Esempio n. 14
0
def save_model(model, path):
    torch_save(model.state_dict(), path)
if TRAIN_AGENT:
    acc_rewards, stats = train_agent(env_name='Banana',
                                     num_episodes=NUM_TRAIN_EPISODES,
                                     env_platform='unity',
                                     max_num_steps=MAX_NUM_STEPS)

    experiment_filename = '{epoch_train_time}-a_{alpha}-g_{gamma}-e_{epsilon}-edecay_{epsilon_decay}-emin_{epsilon_min}'\
        .format(epoch_train_time=stats.epoch_train_time,
                alpha=ALPHA,
                gamma=GAMMA,
                epsilon=EPSILON_START, epsilon_min=EPSILON_MIN, epsilon_decay=EPSILON_DECAY)

    print("\n\nScore: {}".format(acc_rewards))

    if RESULTS_CONFIG.SAVE_MODEL:
        torch_save(agent.online_q_network.state_dict(),
                   experiment_filename + 'pth')

    if RESULTS_CONFIG.SAVE_REWARDS_DATA:
        pkl_dump(acc_rewards,
                 open('./results/' + experiment_filename + ".p", 'wb'))

if TEST_AGENT:
    model = torch.load('./models/checkpoint' + '.pth')
    model = agent.target_q_network
    model.eval()

    test_epoch_rewards = []  # list containing scores from each episode_idx
    first_time_solved = False

    test_start_time = time()
Esempio n. 16
0
 def save(self, file_name):
     torch_save(self.__ann.state_dict(), file_name)
Esempio n. 17
0
 def save(self, file_name):
     torch_save(self, file_name)
Esempio n. 18
0
 def save_policy_model(self, model, count):
     torch_save(model.state_dict(), self.dir + "/models/" + str(count))
Esempio n. 19
0
    def doTask(self,
               run_id,
               nn,
               lvl,
               task_id,
               chromosome_id,
               env_id,
               rl,
               poet_loop_counter,
               noisy=False,
               algo='CoDE',
               ngames=1000,
               popsize=100):
        """

        :param nn: PyTorch nn state_dict
        :param lvl: flat lvl string
        :param task_id: EVALUATE the NN or OPTIMIZE it
        :param chromosome_id: id of NN-GG pair
        :param rl: use RL?
        :param poet_loop_counter: poet number loop
        :return:
        """

        # update network and env to execute on task
        self.pair.env.generator.update_from_lvl_string(lvl)
        self.pair.nn.load_state_dict(nn)

        if task_id == ADPTASK.EVALUATE:
            self.pair.noisy = noisy
            score = self.pair.evaluate(rl=rl)
            return {
                'won': self.pair.env.win == 'Win',
                'chromosome_id': chromosome_id,
                'env_id': env_id,
                'score': score,
            }

        elif task_id == ADPTASK.OPTIMIZE:
            # run optimization here
            if rl:
                # optimizes in place
                run_ppo(policy_agent=self.pair,
                        env_fn=self.pair.env.make,
                        path=f'{self.pair.prefix}/runs',
                        pair_id=chromosome_id,
                        outer_poet_loop_count=poet_loop_counter,
                        n_concurrent_games=1,
                        frames=ngames * self.game_length)
            else:
                objective = PyTorchObjective(agent=self.pair, popsize=popsize)
                ans = run_DE(AE_pair=objective,
                             results_prefix=self.prefix,
                             unique_run_id=run_id,
                             pair_id=chromosome_id,
                             poet_loop_counter=poet_loop_counter,
                             generation_max=ngames // popsize,
                             scaling_factor=0.6,
                             crossover_rate=0.4,
                             lower_bound=-5,
                             upper_bound=5)
                objective.update_nn(ans)
                del objective

            # get score of optimized weights
            score = self.pair.evaluate(rl=rl)
            state_dict = self.pair.nn.state_dict()

            # save best weights
            #destination = f"{self.pair.prefix}/results_{run_id}/{chromosome_id}/final_weights_poet{poet_loop_counter}.pt"
            #torch_save(state_dict, destination)
            # did the agent WIN the game?
            if self.pair.env.win == 'Win':
                path = os.path.join(
                    f'{self.prefix}', f'{chromosome_id}',
                    f'winning_weights_poet{poet_loop_counter}.pt')
                torch_save(state_dict, path)
            return {
                'won': self.pair.env.win == 'Win',
                'score': score,
                'chromosome_id': chromosome_id,
                'env_id': env_id,
                'nn': state_dict
            }
        else:
            raise ValueError('unspecified task requested')
Esempio n. 20
0
def save_layer(layer: nn.Module, path: str):
    logging.info('### Stored layer as {} ###'.format(path))
    ensure_dir(path)
    torch_save(layer.state_dict(), path)
Esempio n. 21
0
    plt.legend(legend, loc='lower right')
    plt.show()

    # Show a test image
    single_image_dataset = SingleImageDataset()

    while True:
        command = input("Test model on random image (y/n)? ")
        if command != 'y':
            break

        single_image_dataset.load_image(rotor_dataset.random_image_filepath())
        data_loader = DataLoader(single_image_dataset, batch_size=1)
        data_iter = iter(data_loader)
        image = data_iter.next()

        # Generate the steering value prediction
        output = model(image)

        _, result_value = torch_max(output, dim=1)

        result_name = Label.label_index_to_name(result_value)

        show_image(image, 0, result_value, None)

    # Save off the model
    save_model = input("Save model (y/n)? ")
    if save_model == 'y':
        model_export_filepath = str(Path(os.getcwd()) / Path('nn_model.pt'))
        torch_save(model.state_dict(), model_export_filepath)
Esempio n. 22
0
def train_one(model_folder):
    new_model_folder_name = model_folder.replace('_created', '_training')
    os_rename(model_folder, new_model_folder_name)
    frequencies = glob(os_path_join(new_model_folder_name, 'k_*'))
    for frequency in frequencies:
        # Load model
        print('train.py: training {}'.format(frequency))
        model_params_path = os_path_join(frequency, model_params_fname)

        # create model
        model, model_params = get_which_model_from_params_fname(
            model_params_path, return_params=True)
        if 'cuda' in model_params:
            using_cuda = model_params['cuda'] and torch_cuda_is_available()
        else:
            using_cuda = torch_cuda_is_available()

        if using_cuda is True:
            model.cuda()

        # save initial weights
        if 'save_initial' in model_params and model_params[
                'save_initial'] and model_params['save_dir']:
            suffix = '_initial'
            path = add_suffix_to_path(model_params_fname['save_dir'], suffix)  # pylint: disable=E1126
            ensure_dir(path)
            torch_save(model.state_dict(),
                       os_path_join(path, MODEL_DATA_FNAME))
            save_model_params(os_path_join(path, model_params_fname),
                              model_params)

        # loss
        if 'cost_function' in model_params:
            loss = model_params['cost_function']
        elif 'loss_function' in model_params:
            loss = model_params['loss_function']
        else:
            raise ValueError(
                'model_params missing key cost_function or loss_function')

        if loss not in ['MSE', 'L1', 'SmoothL1']:
            raise TypeError('Error must be MSE, L1, or SmoothL1. You gave ' +
                            str(loss))
        if loss == 'MSE':
            from torch.nn import MSELoss
            loss = MSELoss()
        elif loss == 'L1':
            from torch.nn import L1Loss
            loss = L1Loss()
        elif loss == 'SmoothL1':
            from torch.nn import SmoothL1Loss
            loss = SmoothL1Loss()

        # optimizer
        if model_params['optimizer'] == 'Adam':
            from torch.optim import Adam
            optimizer = Adam(model.parameters(),
                             lr=model_params['learning_rate'],
                             weight_decay=model_params['weight_decay'])
        elif model_params['optimizer'] == 'SGD':
            from torch.optim import SGD
            optimizer = SGD(model.parameters(),
                            lr=model_params['learning_rate'],
                            momentum=model_params['momentum'],
                            weight_decay=model_params['weight_decay'])
        else:
            raise ValueError(
                'model_params[\'optimizer\'] must be either Adam or SGD. Got '
                + model_params['optimizer'])

        logger = Logger()

        # Load training, validation, and test data
        # Load primary training data
        dat_train = ApertureDataset(
            model_params['data_train'],
            NUM_SAMPLES_TRAIN,
            k=model_params['k'],
            target_is_data=model_params['data_is_target'])
        loader_train = DataLoader(dat_train,
                                  batch_size=model_params['batch_size'],
                                  shuffle=True,
                                  num_workers=DATALOADER_NUM_WORKERS,
                                  pin_memory=using_cuda)

        # Load secondary training data - used to evaluate training loss after every epoch
        dat_train2 = ApertureDataset(
            model_params['data_train'],
            NUM_SAMPLES_TRAIN_EVAL,
            k=model_params['k'],
            target_is_data=model_params['data_is_target'])
        loader_train_eval = DataLoader(dat_train2,
                                       batch_size=model_params['batch_size'],
                                       shuffle=False,
                                       num_workers=DATALOADER_NUM_WORKERS,
                                       pin_memory=using_cuda)

        # Load validation data - used to evaluate validation loss after every epoch
        dat_val = ApertureDataset(
            model_params['data_val'],
            NUM_SAMPLES_VALID,
            k=model_params['k'],
            target_is_data=model_params['data_is_target'])
        loader_val = DataLoader(dat_val,
                                batch_size=model_params['batch_size'],
                                shuffle=False,
                                num_workers=DATALOADER_NUM_WORKERS,
                                pin_memory=using_cuda)

        trainer = Trainer(
            model=model,
            loss=loss,
            optimizer=optimizer,
            patience=model_params['patience'],
            loader_train=loader_train,
            loader_train_eval=loader_train_eval,
            loader_val=loader_val,
            cuda=using_cuda,
            logger=logger,
            data_noise_gaussian=model_params['data_noise_gaussian'],
            save_dir=frequency)

        # run training
        trainer.train()

    os_rename(new_model_folder_name,
              new_model_folder_name.replace('_training', '_trained'))
Esempio n. 23
0
                                                             [pairs[j].generator for j in range(len(pairs))],
                                                             availableChildren,
                                                             transfer_eval=True)

                send_work(distributed_work, ADPTASK.EVALUATE, parent, unique_run_id, i)

                # get answers from children
                transfer_eval_answers = waitForAndCollectAnswers(parent, availableChildren, distributed_work, unique_run_id, i, ADPTASK.EVALUATE)

                # use information to determine if NN i should migrate to env j.
                perform_transfer(pairs, transfer_eval_answers, i, unique_run_id, stats)

            # save checkpoints of networks into POET folder
            #
            for pair in pairs:
                torch_save(pair.nn.state_dict(), os.path.join(tdir,
                                                              f'network{pair.id}.pt'))
                with open(os.path.join(tdir,
                                       f'lvl{pair.id}.txt'), 'w+') as fname:
                    fname.write(str(pair.generator))

            i += 1
            pbar.update(1)
            if i >= args.num_poet_loops:
                done = True

        except KeyboardInterrupt as e:
            print(e)
            pbar.close()
            dieAndKillChildren(parent, pairs)
            import sys
            sys.exit(0)
Esempio n. 24
0
 def save(self, file_name):
     torch_save(self.__model.state_dict(), file_name)