Example #1
0
    def __init__(self,
                 model_name,
                 config,
                 working_dir,
                 network,
                 sess,
                 episode_runner,
                 summaries_collector,
                 curriculum_coefficient=None):
        self.model_name = model_name
        self.config = config
        self.working_dir = working_dir
        self.network = network
        self.sess = sess
        self.episode_runner = episode_runner
        self.summaries_collector = summaries_collector
        self.curriculum_coefficient = curriculum_coefficient

        self.fixed_start_goal_pairs = self.episode_runner.game.get_fixed_start_goal_pairs(
            challenging=False)
        self.hard_fixed_start_goal_pairs = self.episode_runner.game.get_fixed_start_goal_pairs(
            challenging=True)

        self.batch_size = config['model']['batch_size']
        self.steps_per_trajectory_print = config['general'][
            'cycles_per_trajectory_print']
        self.train_episodes_per_cycle = config['general'][
            'train_episodes_per_cycle']
        self.gain = config['model']['gain']

        self.train_episodes_counter = 0

        self.check_gradients = config['gradient_checker']['enable']
        if self.check_gradients:
            self.gradient_output_dir = os.path.join(working_dir, 'gradient',
                                                    model_name)
            init_dir(self.gradient_output_dir)
            saver_dir = os.path.join(self.gradient_output_dir,
                                     'temp_4_gradient_print')
            self.gradient_saver = ModelSaver(saver_dir,
                                             1,
                                             'gradient_checker',
                                             print_log=False)
        else:
            self.gradient_output_dir, self.gradient_saver = None, None
Example #2
0
def main():
    ms = ModelSaver()
    model = load_model('../artifacts/model.h5')
    ms.save_model(model, '../artifacts/model.horn')
Example #3
0
def run_for_config(config):
    # set the name of the model
    model_name = config['general']['name']
    now = datetime.datetime.fromtimestamp(
        time.time()).strftime('%Y_%m_%d_%H_%M_%S')
    model_name = now + '_' + model_name if model_name is not None else now

    # where we save all the outputs
    scenario = config['general']['scenario']
    working_dir = os.path.join(get_base_directory(), 'sgt', scenario)
    init_dir(working_dir)

    saver_dir = os.path.join(working_dir, 'models', model_name)
    init_dir(saver_dir)
    init_log(log_file_path=os.path.join(saver_dir, 'log.txt'))
    copy_config(config, os.path.join(saver_dir, 'config.yml'))
    episodic_success_rates_path = os.path.join(saver_dir, 'results.txt')
    test_trajectories_dir = os.path.join(working_dir, 'test_trajectories',
                                         model_name)
    init_dir(test_trajectories_dir)

    # generate game
    game = _get_game(config)

    network = Network(config, game)
    network_variables = network.get_all_variables()

    # save model
    latest_saver = ModelSaver(os.path.join(saver_dir, 'latest_model'),
                              2,
                              'latest',
                              variables=network_variables)
    best_saver = ModelSaver(os.path.join(saver_dir, 'best_model'),
                            1,
                            'best',
                            variables=network_variables)

    summaries_collector = SummariesCollector(
        os.path.join(working_dir, 'tensorboard', model_name), model_name)

    with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(
            gpu_options=tf.compat.v1.GPUOptions(
                per_process_gpu_memory_fraction=config['general']
                ['gpu_usage']))) as sess:
        sess.run(tf.compat.v1.global_variables_initializer())

        def policy_function(starts, goals, level, is_train):
            res = network.predict_policy(starts, goals, level, sess, is_train)
            means = 0.5 * (np.array(starts) + np.array(goals))
            distance = np.linalg.norm(res[0] - means, axis=1)
            print(
                f'distance from mean: mean {distance.mean()} min {distance.min()} max {distance.max()}'
            )
            if np.any(np.isnan(res)):
                print_and_log(
                    '######################## Nan predictions detected...')
            return res

        episode_runner = EpisodeRunnerSubgoal(config, game, policy_function)
        trainer = TrainerSubgoal(
            model_name,
            config,
            working_dir,
            network,
            sess,
            episode_runner,
            summaries_collector,
            curriculum_coefficient=get_initial_curriculum(config))

        decrease_learn_rate_if_static_success = config['model'][
            'decrease_learn_rate_if_static_success']
        stop_training_after_learn_rate_decrease = config['model'][
            'stop_training_after_learn_rate_decrease']
        reset_best_every = config['model']['reset_best_every']

        global_step = 0
        best_curriculum_coefficient = None

        for current_level in range(config['model']['starting_level'],
                                   config['model']['levels'] + 1):

            best_cost, best_cost_global_step = None, None
            no_test_improvement, consecutive_learn_rate_decrease = 0, 0

            if config['model']['init_from_lower_level'] and current_level > 1:
                print_and_log('initiating level {} from previous level'.format(
                    current_level))
                network.init_policy_from_lower_level(sess, current_level)

            for cycle in range(config['general']['training_cycles_per_level']):
                print_and_log('starting cycle {}, level {}'.format(
                    cycle, current_level))

                new_global_step, success_ratio = trainer.train_policy_at_level(
                    current_level, global_step)
                if new_global_step == global_step:
                    print_and_log(
                        'no data found in training cycle {} global step still {}'
                        .format(cycle, global_step))
                    continue
                else:
                    global_step = new_global_step

                if (cycle + 1) % config['policy']['decrease_std_every'] == 0:
                    network.decrease_base_std(sess, current_level)
                    print_and_log('new base stds {}'.format(
                        network.get_base_stds(sess, current_level)))

                print_and_log('done training cycle {} global step {}'.format(
                    cycle, global_step))

                # save every now and then
                if cycle % config['general']['save_every_cycles'] == 0:
                    latest_saver.save(sess, global_step=global_step)

                if cycle % config['general']['test_frequency'] == 0:
                    # do test
                    test_successes, test_cost, _, endpoints_by_path = trainer.collect_test_data(
                        current_level, False)
                    summaries_collector.write_test_success_summaries(
                        sess, global_step, test_successes, test_cost,
                        trainer.curriculum_coefficient)
                    with open(episodic_success_rates_path, 'a') as f:
                        f.write('{} {} {} {} {}'.format(
                            current_level, trainer.train_episodes_counter,
                            test_successes, test_cost, os.linesep))

                    # decide how to act next
                    print_and_log('old cost was {} at step {}'.format(
                        best_cost, best_cost_global_step))
                    print_and_log('current learn rates {}'.format(
                        network.get_learn_rates(sess, current_level)))
                    print_and_log('current base stds {}'.format(
                        network.get_base_stds(sess, current_level)))
                    if best_cost is None or test_cost < best_cost:
                        print_and_log('new best cost {} at step {}'.format(
                            test_cost, global_step))
                        best_cost, best_cost_global_step = test_cost, global_step
                        best_curriculum_coefficient = trainer.curriculum_coefficient
                        no_test_improvement, consecutive_learn_rate_decrease = 0, 0
                        best_saver.save(sess, global_step)
                        test_trajectories_file = os.path.join(
                            test_trajectories_dir,
                            '{}.txt'.format(global_step))
                        serialize_compress(endpoints_by_path,
                                           test_trajectories_file)
                    else:
                        print_and_log(
                            'new model is not the best with cost {} at step {}'
                            .format(test_cost, global_step))
                        no_test_improvement += 1
                        print_and_log('no improvement count {} of {}'.format(
                            no_test_improvement,
                            decrease_learn_rate_if_static_success))
                        if reset_best_every > 0 and no_test_improvement % reset_best_every == reset_best_every - 1:
                            # restore the model every once in a while if did not find a better solution in a while
                            restore_best(sess, best_saver,
                                         best_curriculum_coefficient, trainer)
                        if no_test_improvement == decrease_learn_rate_if_static_success:
                            # restore the best model
                            if config['model']['restore_on_decrease']:
                                restore_best(sess, best_saver,
                                             best_curriculum_coefficient,
                                             trainer)
                            # decrease learn rates
                            network.decrease_learn_rates(sess, current_level)
                            no_test_improvement = 0
                            consecutive_learn_rate_decrease += 1
                            print_and_log(
                                'decreasing learn rates {} of {}'.format(
                                    consecutive_learn_rate_decrease,
                                    stop_training_after_learn_rate_decrease))
                            print_and_log('new learn rates {}'.format(
                                network.get_learn_rates(sess, current_level)))
                            if consecutive_learn_rate_decrease == stop_training_after_learn_rate_decrease:
                                break

                if trainer.curriculum_coefficient is not None:
                    if success_ratio > config['curriculum'][
                            'raise_when_train_above']:
                        print_and_log(
                            'current curriculum coefficient {}'.format(
                                trainer.curriculum_coefficient))
                        trainer.curriculum_coefficient *= config['curriculum'][
                            'raise_times']
                        print_and_log(
                            'curriculum coefficient raised to {}'.format(
                                trainer.curriculum_coefficient))

                # mark in log the end of cycle
                print_and_log(os.linesep)

            # if we finished because we ran out of cycles, we still need to make one more test
            end_of_level_test(best_cost, best_cost_global_step,
                              best_curriculum_coefficient, best_saver, sess,
                              test_trajectories_dir, trainer, current_level)

        print_and_log('trained all levels - needs to stop')
        close_log()
        return best_cost
import tensorflow as tf
from simple_ddqrn import DDQRN
from target_ddqrn import target_ddqrn

import parameter_config as cfg
from model_saver import ModelSaver
from ddqrn_trainer import DDQRNTrainer

sess = tf.Session()

ddqrn = DDQRN(sess, "main_DDQRN")
ddqrn_target = target_ddqrn(DDQRN(sess, "target_DDQRN"),
                            [tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="main_DDQRN"),
                             tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="target_DDQRN")])

sess.run(tf.global_variables_initializer())

trainer = DDQRNTrainer(ddqrn, ddqrn_target, sess)

model = ModelSaver(ddqrn, trainer)

model.load(cfg.save_path)
ddqrn_target.update(sess, tau=1.0)
model.save(cfg.save_path)
    def __init__(self, base_folder, json_files, batch_size=64):
        self.verbose = False
        self.spot = False

        serialization_dir = 'tmp'
        self.mfb = MultiLabelBinarizer()
        mf_labels = np.arange(0, 228)
        # NUM_CLASSES = len(mf_labels)
        # class_names = mf_labels #image_datasets['train'].classes
        self.mfb.fit_transform([mf_labels])
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        log_dirs = {
            'train':
            SummaryWriter(os.path.join(serialization_dir, "log", "train")),
            'val':
            SummaryWriter(os.path.join(serialization_dir, "log", "validation"))
        }
        self.tensorboard = TensorboardWriter(log_dirs['train'],
                                             log_dirs['val'])
        # summary_interval = 100
        self.model_saver = ModelSaver(serialization_dir)

        data_transforms = {
            'train':
            transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ]),
            'val':
            transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ]),
        }

        print('[i] Loading datasets...')
        dataset = MaterialistFashion(base_folder,
                                     json_files,
                                     data_transforms['train'],
                                     id_as_path=True,
                                     load_first=8 * 139)
        validation_split = .2

        # Creating data indices for training and validation splits:
        dataset_size = len(dataset)
        indices = list(range(dataset_size))
        split = int(np.floor(validation_split * dataset_size))
        shuffle_dataset = True
        random_seed = 42
        if shuffle_dataset:
            np.random.seed(random_seed)
            np.random.shuffle(indices)
        train_indices, val_indices = indices[split:], indices[:split]

        # Creating PT data samplers and loaders:
        train_sampler = SubsetRandomSampler(train_indices)
        valid_sampler = SubsetRandomSampler(val_indices)

        # image_datasets = {
        #     'train': MaterialistFashion(train_folder, train_json, data_transforms['train']),
        #     'val': MaterialistFashion(val_folder, val_json, data_transforms['val'])
        # }

        self.dataloaders = {
            'train':
            torch.utils.data.DataLoader(dataset,
                                        batch_size=batch_size,
                                        sampler=train_sampler,
                                        num_workers=8),
            'val':
            torch.utils.data.DataLoader(dataset,
                                        batch_size=batch_size,
                                        sampler=valid_sampler,
                                        num_workers=8)
        }

        self.dataset_sizes = {
            'train': len(train_indices),
            'val': len(val_indices)
        }  # {x: len(image_datasets[x]) for x in ['train', 'val']}

        print('[i] Done loading datasets.')
class MfTrainer:
    def __init__(self, base_folder, json_files, batch_size=64):
        self.verbose = False
        self.spot = False

        serialization_dir = 'tmp'
        self.mfb = MultiLabelBinarizer()
        mf_labels = np.arange(0, 228)
        # NUM_CLASSES = len(mf_labels)
        # class_names = mf_labels #image_datasets['train'].classes
        self.mfb.fit_transform([mf_labels])
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        log_dirs = {
            'train':
            SummaryWriter(os.path.join(serialization_dir, "log", "train")),
            'val':
            SummaryWriter(os.path.join(serialization_dir, "log", "validation"))
        }
        self.tensorboard = TensorboardWriter(log_dirs['train'],
                                             log_dirs['val'])
        # summary_interval = 100
        self.model_saver = ModelSaver(serialization_dir)

        data_transforms = {
            'train':
            transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ]),
            'val':
            transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ]),
        }

        print('[i] Loading datasets...')
        dataset = MaterialistFashion(base_folder,
                                     json_files,
                                     data_transforms['train'],
                                     id_as_path=True,
                                     load_first=8 * 139)
        validation_split = .2

        # Creating data indices for training and validation splits:
        dataset_size = len(dataset)
        indices = list(range(dataset_size))
        split = int(np.floor(validation_split * dataset_size))
        shuffle_dataset = True
        random_seed = 42
        if shuffle_dataset:
            np.random.seed(random_seed)
            np.random.shuffle(indices)
        train_indices, val_indices = indices[split:], indices[:split]

        # Creating PT data samplers and loaders:
        train_sampler = SubsetRandomSampler(train_indices)
        valid_sampler = SubsetRandomSampler(val_indices)

        # image_datasets = {
        #     'train': MaterialistFashion(train_folder, train_json, data_transforms['train']),
        #     'val': MaterialistFashion(val_folder, val_json, data_transforms['val'])
        # }

        self.dataloaders = {
            'train':
            torch.utils.data.DataLoader(dataset,
                                        batch_size=batch_size,
                                        sampler=train_sampler,
                                        num_workers=8),
            'val':
            torch.utils.data.DataLoader(dataset,
                                        batch_size=batch_size,
                                        sampler=valid_sampler,
                                        num_workers=8)
        }

        self.dataset_sizes = {
            'train': len(train_indices),
            'val': len(val_indices)
        }  # {x: len(image_datasets[x]) for x in ['train', 'val']}

        print('[i] Done loading datasets.')

    def check_label_distribution(self, dataloader, filter_threshold=100):
        label_bins = {}
        for i in range(0, 228):
            label_bins[i] = 0
        for i, (_, labels, _) in enumerate(dataloader):
            for batch in self.mfb.inverse_transform(labels):
                for label in batch:
                    label_bins[int(label)] += 1
        label_bins = sorted(label_bins.items(),
                            key=lambda x: x[1],
                            reverse=True)
        max = label_bins[0][1]
        rescailing = [1] * 228
        filtered_mask = [0] * 228
        for key, value in label_bins:
            print("Label ID: {} -> Count: {}".format(key, value))
            if not value == max:
                rescailing[key] = rescailing[key] - value / max
            else:
                rescailing[key] = 1e-10
            if value >= filter_threshold:
                filtered_mask[key] = 1
        return rescailing, filtered_mask

    def imshow(self, inp, title=None):
        """shows a batch of images."""
        inp = inp.numpy().transpose((1, 2, 0))
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        inp = std * inp + mean
        inp = np.clip(inp, 0, 1)
        plt.imshow(inp)
        if title is not None:
            plt.title(title)
        plt.pause(0.001)  # pause a bit so that plots are updated

    def show_first_batch(self):
        inputs, labels, image_id = next(iter(self.dataloaders['train']))
        # Make a grid from batch
        out = torchvision.utils.make_grid(inputs)
        self.imshow(out, title=[x for x in image_id])
        print("Images shown")

    # def metrics_to_tensorboard(epoch: int, train_metrics: dict, val_metrics: dict = None) -> None:
    #     """
    #     Sends all of the train metrics (and validation metrics, if provided) to tensorboard.
    #     """
    #     metric_names = set(train_metrics.keys())
    #     if val_metrics is not None:
    #         metric_names.update(val_metrics.keys())
    #     val_metrics = val_metrics or {}
    #
    #     for name in metric_names:
    #         train_metric = train_metrics.get(name)
    #         if train_metric is not None:
    #             tensorboard.add_train_scalar(name, train_metric, epoch)
    #         val_metric = val_metrics.get(name)
    #         if val_metric is not None:
    #             tensorboard.add_validation_scalar(name, val_metric, epoch)

    ######################################################################
    # Training the model
    def train_model(self,
                    model,
                    criterion,
                    optimizer,
                    scheduler,
                    num_epochs=10):
        since = time.time()

        best_model_wts = copy.deepcopy(model.state_dict())
        best_f1 = 0.0

        model, optimizer, epoch_counter, global_step = self.model_saver.restore_checkpoint(
            model, optimizer)
        val_step = global_step

        chosen_threshold = 0.2
        for epoch in range(epoch_counter, num_epochs):
            print('Epoch {}/{}'.format(epoch, num_epochs - 1))
            print('-' * 10)

            # Each epoch has a training and validation phase
            for phase in ['train', 'val']:
                if phase == 'train':
                    scheduler.step()
                    model.train()  # Set model to training mode
                else:
                    model.eval()  # Set model to evaluate mode

                running_loss = 0.0
                running_f1 = 0.0
                low_threshold = 0.01
                high_threshold = 0.5
                step_threshold = 0.01
                running_th_f1 = {}
                for threshold in np.arange(low_threshold,
                                           high_threshold,
                                           step=step_threshold):
                    running_th_f1[threshold] = 0.0

                # Iterate over data.
                for n_iter, (inputs, labels,
                             _) in enumerate(self.dataloaders[phase]):
                    inputs = inputs.to(self.device)
                    labels = labels.to(self.device)
                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        soft_out = F.sigmoid(
                            outputs)  #F.softmax(outputs, dim=1)
                        # if n_iter % 100 == 0:
                        #     self.imshow(torchvision.utils.make_grid(torch.cat((inputs.detach().cpu(),model.stn(inputs).detach().cpu()))), title='stn')
                        th_selection_preds = {}
                        for threshold in np.arange(low_threshold,
                                                   high_threshold,
                                                   step=step_threshold):
                            th_selection_preds[threshold] = soft_out.ge(
                                threshold).type(torch.cuda.FloatTensor)

                        preds = soft_out.ge(chosen_threshold).type(
                            torch.cuda.FloatTensor)
                        loss = criterion(outputs, labels)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            global_step += 1
                            loss.backward()
                            optimizer.step()
                        else:
                            val_step += 1

                    # statistics
                    if (n_iter == 1 or self.verbose) and phase == 'val':
                        # self.imshow(torchvision.utils.make_grid(torch.cat((inputs.detach().cpu(),model.stn(inputs).detach().cpu()))),title='stn')
                        for i, (true_label, pred_label) in enumerate(
                                zip(self.mfb.inverse_transform(labels),
                                    self.mfb.inverse_transform(preds))):
                            true_label_output_probs = [
                                soft_out.cpu().data.numpy()[i][x]
                                for x in true_label
                            ]
                            pred_label_output_probs = [
                                soft_out.cpu().data.numpy()[i][x]
                                for x in pred_label
                            ]
                            print('{} True labels[{}]: {}'.format(
                                phase, i, true_label))
                            print('{} True probs [{}]: {}'.format(
                                phase, i, true_label_output_probs))
                            print('{} Pred labels[{}]: {}'.format(
                                phase, i, pred_label))
                            print('{} Pred probs [{}]: {}'.format(
                                phase, i, pred_label_output_probs))

                    running_loss += loss.item() * inputs.size(0)
                    mf_f1 = f1_score(labels, preds, average='micro')
                    running_f1 += mf_f1 * inputs.size(
                        0)  # torch.sum(preds == labels.data)

                    for key in th_selection_preds.keys():
                        th_f1 = f1_score(labels,
                                         th_selection_preds[key],
                                         average='micro')
                        running_th_f1[key] += th_f1 * inputs.size(0)

                    spot_loss = running_loss / (n_iter + 1)
                    if phase == 'train':
                        self.tensorboard.add_train_scalar(
                            'loss', spot_loss, global_step)
                        self.tensorboard.add_train_scalar(
                            'microF1', mf_f1, global_step)
                    else:
                        self.tensorboard.add_validation_scalar(
                            'loss', spot_loss, val_step)
                        self.tensorboard.add_validation_scalar(
                            'microF1', mf_f1, val_step)

                    if self.spot:
                        print('{} Spot:  Loss: {:.4f} F1: {:.4f} Step: {}'.
                              format(phase, spot_loss, mf_f1, global_step))

                epoch_loss = running_loss / self.dataset_sizes[phase]
                epoch_f1 = running_f1 / self.dataset_sizes[phase]
                # print('F1 for different thresholds: ')
                sorted_thresholds = sorted(running_th_f1.items(),
                                           key=lambda x: x[1],
                                           reverse=True)
                if sorted_thresholds[0][1] > running_f1 and phase == 'val':
                    chosen_threshold = (chosen_threshold +
                                        sorted_thresholds[0][0]) / 2
                # for item in sorted_thresholds:
                #     print('{} Threshold: {}, F1: {}'.format(phase, item[0], item[1] / self.dataset_sizes[phase]))
                print(
                    '{} Loss: {:.4f} F1: {:.4f} Chosen threshold {:.4f} <------------------------------------------------------------------'
                    .format(phase, epoch_loss, epoch_f1, chosen_threshold))

                # deep copy the model
                if phase == 'val':
                    if epoch_f1 > best_f1:
                        best_f1 = epoch_f1
                        best_model_wts = copy.deepcopy(model.state_dict())
                        print('[i] Saving new best F1 {:.4f}'.format(best_f1))
                        self.model_saver.save_checkpoint(
                            model, epoch, optimizer, global_step, True)
                        chosen_threshold = sorted_thresholds[0][0]

                    print("[i] Saving last epoch model.")
                    self.model_saver.save_checkpoint(model, epoch, optimizer,
                                                     global_step, False)

            print()

        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        print('Best val F1: {:4f}'.format(best_f1))
        print('Best threshold: {}'.format(chosen_threshold))

        # load best model weights
        model.load_state_dict(best_model_wts)
        return model

    ######################################################################
    # Visualizing the model predictions
    def visualize_model(self, model, num_images=6):
        was_training = model.training
        model.eval()
        images_so_far = 0
        fig = plt.figure()

        with torch.no_grad():
            for i, (inputs, labels, _) in enumerate(self.dataloaders['val']):
                inputs = inputs.to(self.device)
                labels = labels.to(self.device)

                outputs = model(inputs)
                preds = outputs.ge(0.2).type(torch.cuda.FloatTensor)

                for j in range(inputs.size()[0]):
                    images_so_far += 1
                    ax = plt.subplot(num_images // 2, 2, images_so_far)
                    ax.axis('off')
                    ax.set_title('predicted: {}'.format(
                        self.mfb.inverse_transform(preds[j])))
                    self.imshow(inputs.cpu().data[j])

                    if images_so_far == num_images:
                        model.train(mode=was_training)
                        return
            model.train(mode=was_training)

    ######################################################################

    def train_fashion_model(self, num_epochs=10):
        model_ft = FashionModel()
        model_ft = model_ft.to(self.device)
        criterion = nn.MultiLabelSoftMarginLoss()

        params = list(model_ft.localization.parameters()) + \
                 list(model_ft.fc_loc.parameters()) + \
                 list(model_ft.resnet.fc.parameters()) + \
                 list(model_ft.fc.parameters())
        # params = list(model_ft.resnet.fc.parameters()) + list(model_ft.fc.parameters())
        optimizer_ft = optim.Adam(params)  # , lr=0.001, momentum=0.9)

        exp_lr_scheduler = lr_scheduler.StepLR(
            optimizer_ft, step_size=10,
            gamma=0.1)  # Decay LR by a factor of 0.1 every 7 epochs
        return self.train_model(model_ft,
                                criterion,
                                optimizer_ft,
                                exp_lr_scheduler,
                                num_epochs=num_epochs)

    def train_standard_resnet(self, num_epochs):
        # rescaling_weights, filtered_mask = check_label_distribution(dataloaders['train'])

        model_ft = models.resnet50(pretrained=True)

        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 228)
        model_ft = model_ft.to(self.device)

        criterion = nn.MultiLabelSoftMarginLoss()

        optimizer_ft = optim.Adam(
            model_ft.parameters())  # , lr=0.001, momentum=0.9)

        exp_lr_scheduler = lr_scheduler.StepLR(
            optimizer_ft, step_size=7,
            gamma=0.1)  # Decay LR by a factor of 0.1 every 7 epochs
        return self.train_model(model_ft,
                                criterion,
                                optimizer_ft,
                                exp_lr_scheduler,
                                num_epochs=num_epochs)

    def train_extended_standard_resnet(self, num_epochs):
        # rescaling_weights, filtered_mask = check_label_distribution(dataloaders['train'])

        model_ft = nn.Sequential(models.resnet50(pretrained=True),
                                 nn.Linear(1000, 512, bias=True), nn.Dropout(),
                                 nn.ReLU(), nn.Linear(512, 228, bias=True))

        model_ft = model_ft.to(self.device)

        criterion = nn.MultiLabelSoftMarginLoss()

        params = list(model_ft[0].fc.parameters()) + \
                 list(model_ft[1].parameters()) + \
                 list(model_ft[4].parameters())
        optimizer_ft = optim.Adam(params)

        exp_lr_scheduler = lr_scheduler.StepLR(
            optimizer_ft, step_size=7,
            gamma=0.1)  # Decay LR by a factor of 0.1 every 7 epochs
        return self.train_model(model_ft,
                                criterion,
                                optimizer_ft,
                                exp_lr_scheduler,
                                num_epochs=num_epochs)
Example #7
0
def train_and_evaluate():
    """Train the model with custom training loop, evaluating at given intervals."""

    # Set mixed precision policy
    if FLAGS.mixed_precision:
        policy = mixed_precision.Policy('mixed_float16')
        mixed_precision.set_policy(policy)

    # Get dataset
    dataset = _get_dataset(dataset=FLAGS.dataset,
                           label_mode=FLAGS.label_mode,
                           input_mode=FLAGS.input_mode,
                           input_length=FLAGS.input_length,
                           seq_shift=FLAGS.seq_shift,
                           def_val=DEF_VAL)

    # Define representation
    rep = Representation(blank_index=BLANK_INDEX,
                         def_val=DEF_VAL,
                         loss_mode=FLAGS.loss_mode,
                         num_event_classes=dataset.num_event_classes(),
                         pad_val=PAD_VAL,
                         use_def=FLAGS.use_def,
                         decode_fn=FLAGS.decode_fn,
                         beam_width=FLAGS.beam_width)

    # Get model
    model = _get_model(model=FLAGS.model,
                       dataset=FLAGS.dataset,
                       num_classes=rep.get_num_classes(),
                       input_length=FLAGS.input_length,
                       l2_lambda=L2_LAMBDA)
    seq_length = model.get_seq_length()
    rep.set_seq_length(seq_length)

    # Instantiate learning rate schedule and optimizer
    if FLAGS.lr_decay_fn == "exponential":
        lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=FLAGS.lr_base,
            decay_steps=LR_DECAY_STEPS,
            decay_rate=FLAGS.lr_decay_rate,
            staircase=True)
    elif FLAGS.lr_decay_fn == "piecewise_constant":
        values = np.divide(FLAGS.lr_base, LR_VALUE_DIV)
        lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(
            boundaries=LR_BOUNDARIES, values=values.tolist())
    elif FLAGS.lr_decay_fn == "constant":
        lr_schedule = ConstantLR(FLAGS.lr_base)
    optimizer = Adam(learning_rate=lr_schedule)
    # Get LossScaleOptimizer
    if FLAGS.mixed_precision:
        optimizer = LossScaleOptimizer(optimizer=optimizer,
                                       loss_scale='dynamic')

    # Get loss function
    train_loss_fn = rep.get_loss_fn(batch_size=FLAGS.batch_size)
    eval_loss_fn = rep.get_loss_fn(batch_size=FLAGS.eval_batch_size)

    # Get train and eval dataset
    collapse_fn = rep.get_loss_collapse_fn()
    train_dataset = dataset(batch_size=FLAGS.batch_size,
                            data_dir=FLAGS.train_dir,
                            is_predicting=False,
                            is_training=True,
                            label_fn=model.get_label_fn(FLAGS.batch_size),
                            collapse_fn=collapse_fn,
                            num_shuffle=FLAGS.num_shuffle)
    eval_dataset = dataset(batch_size=FLAGS.eval_batch_size,
                           data_dir=FLAGS.eval_dir,
                           is_predicting=False,
                           is_training=False,
                           label_fn=model.get_label_fn(FLAGS.eval_batch_size),
                           collapse_fn=collapse_fn,
                           num_shuffle=FLAGS.num_shuffle)

    # Load model
    if FLAGS.model_ckpt is not None:
        logging.info("Loading model from {}".format(FLAGS.model_ckpt))
        load_status = model.load_weights(
            os.path.join(FLAGS.model_dir, "checkpoints", FLAGS.model_ckpt))
        load_status.assert_consumed()

    # Set up log writer and metrics
    train_writer = tf.summary.create_file_writer(
        os.path.join(FLAGS.model_dir, "log/train"))
    eval_writer = tf.summary.create_file_writer(
        os.path.join(FLAGS.model_dir, "log/eval"))
    train_metrics = TrainMetrics(representation=rep, writer=train_writer)
    eval_metrics = EvalMetrics(representation=rep, writer=eval_writer)

    # Save best checkpoints in terms of f1
    model_saver = ModelSaver(os.path.join(FLAGS.model_dir, "checkpoints"),
                             compare_fn=lambda x, y: x.score > y.score,
                             sort_reverse=True)

    # Keep track of total global step
    global_step = 0

    # Iterate over epochs
    for epoch in range(FLAGS.train_epochs):
        logging.info('Starting epoch %d' % (epoch, ))

        # Iterate over training batches
        for step, (train_features, train_labels, train_labels_c,
                   train_labels_l) in enumerate(train_dataset):
            # Assert sizes
            assert train_labels.shape == [
                FLAGS.batch_size, seq_length
            ], "Labels shape [batch_size, seq_length]"
            # Run the train step
            train_logits, train_loss, train_l2_loss, train_grads = train_step(
                model, train_features, train_labels, train_labels_c,
                train_labels_l, train_loss_fn, optimizer)
            # Assert sizes
            assert train_logits.shape == [
                FLAGS.batch_size, seq_length,
                rep.get_num_classes()
            ], "Logits shape [batch_size, seq_length, num_classes]"
            # Log every FLAGS.log_steps steps.
            if global_step % FLAGS.log_steps == 0:
                logging.info("Memory used: {} GB".format(
                    psutil.virtual_memory().used / 2**30))
                # Decode logits into predictions
                train_predictions_u = None
                if FLAGS.loss_mode == "ctc":
                    train_predictions_u, _ = rep.get_decode_fn(
                        FLAGS.batch_size)(train_logits)
                    train_predictions_u = rep.get_inference_collapse_fn()(
                        train_predictions_u)
                # General logs
                logging.info('Step %s in epoch %s; global step %s' %
                             (step, epoch, global_step))
                logging.info('Seen this epoch: %s samples' %
                             ((step + 1) * FLAGS.batch_size))
                logging.info('Total loss (this step): %s' %
                             float(train_loss + train_l2_loss))
                with train_writer.as_default():
                    tf.summary.scalar("training/global_gradient_norm",
                                      data=tf.linalg.global_norm(train_grads),
                                      step=global_step)
                    tf.summary.scalar('training/loss',
                                      data=train_loss,
                                      step=global_step)
                    tf.summary.scalar('training/l2_loss',
                                      data=train_l2_loss,
                                      step=global_step)
                    tf.summary.scalar('training/total_loss',
                                      data=train_loss + train_l2_loss,
                                      step=global_step)
                    tf.summary.scalar('training/learning_rate',
                                      data=lr_schedule(epoch),
                                      step=global_step)
                # Update metrics
                train_metrics.update(train_labels, train_logits,
                                     train_predictions_u)
                # Log metrics
                train_metrics.log(global_step)
                # Save latest model
                model_saver.save_latest(model=model,
                                        step=global_step,
                                        file="model")
                # Flush TensorBoard
                train_writer.flush()

            # Evaluate every FLAGS.eval_steps steps.
            if global_step % FLAGS.eval_steps == 0:
                logging.info('Evaluating at global step %s' % global_step)
                # Keep track of eval losses
                eval_losses = []
                eval_l2_losses = []
                # Iterate through eval batches
                for i, (eval_features, eval_labels, eval_labels_c,
                        eval_labels_l) in enumerate(eval_dataset):
                    # Assert sizes
                    assert eval_labels.shape == [
                        FLAGS.eval_batch_size, seq_length
                    ], "Labels shape [batch_size, seq_length]"
                    # Run the eval step
                    eval_logits, eval_loss, eval_l2_loss = eval_step(
                        model, eval_features, eval_labels, eval_labels_c,
                        eval_labels_l, eval_loss_fn)
                    eval_losses.append(eval_loss.numpy())
                    eval_l2_losses.append(eval_l2_loss.numpy())
                    # Assert sizes
                    assert eval_logits.shape == [
                        FLAGS.eval_batch_size, seq_length,
                        rep.get_num_classes()
                    ], "Logits shape [batch_size, seq_length, num_classes]"
                    # Decode logits into predictions
                    eval_predictions_u = None
                    if FLAGS.loss_mode == "ctc":
                        eval_predictions_u, _ = rep.get_decode_fn(
                            FLAGS.eval_batch_size)(eval_logits)
                        eval_predictions_u = rep.get_inference_collapse_fn()(
                            eval_predictions_u)
                    # Update metrics for this batch
                    eval_metrics.update_i(eval_labels, eval_logits,
                                          eval_predictions_u)
                # Update mean metrics
                eval_score = eval_metrics.update()
                # General logs
                eval_loss = np.mean(eval_losses)
                eval_l2_loss = np.mean(eval_l2_losses)
                logging.info('Evaluation loss: %s' %
                             float(eval_loss + eval_l2_loss))
                with eval_writer.as_default():
                    tf.summary.scalar('training/loss',
                                      data=eval_loss,
                                      step=global_step)
                    tf.summary.scalar('training/l2_loss',
                                      data=eval_l2_loss,
                                      step=global_step)
                    tf.summary.scalar('training/total_loss',
                                      data=eval_loss + eval_l2_loss,
                                      step=global_step)
                # Log metrics
                eval_metrics.log(global_step)
                # Save best models
                model_saver.save_best(model=model,
                                      score=float(eval_score),
                                      step=global_step,
                                      file="model")
                # Flush TensorBoard
                eval_writer.flush()

            # Clean up memory
            tf.keras.backend.clear_session()
            gc.collect()

            # Increment global step
            global_step += 1

        # Save and keep latest model for every 10th epoch
        if epoch % 10 == 9:
            model_saver.save_keep(model=model, step=global_step, file="model")

        logging.info('Finished epoch %s' % (epoch, ))
        optimizer.finish_epoch()

    # Save final model
    model_saver.save_latest(model=model, step=global_step, file="model")
    # Finished training
    logging.info("Finished training")
Example #8
0
class GloveModel:
    def __init__(self, one_file, class_file):
        self.saver = ModelSaver(one_file, class_file)
        self.one_class, self.multi_class = self.saver.load_all()
        self.one_file = one_file
        self.class_file = class_file

    def train(self, data, data_novelty):
        X, y = self.preprocessing(data)
        X_novelty, _ = self.preprocessing(data_novelty)
        print("start one")
        self.train_outlier(X, X_novelty)
        print("start two")
        self.train_class(X, y)
        print("end")
        self.saver.save_all(self.one_class, self.multi_class)

    def train_outlier(self, X, X_novelty):
        X_train = np.concatenate((X, X_novelty))
        y_train = [1] * len(X) + [-1] * len(X_novelty)
        tuned_parameters = [{
            'kernel': ['rbf'],
            'gamma': ['scale'],
            'nu': [.5, .7, .9]
        }]
        clf = GridSearchCV(OneClassSVM(),
                           tuned_parameters,
                           scoring="recall",
                           verbose=0)
        clf.fit(X_train, y_train)
        self.one_class = clf

    def train_class(self, X, y):
        param_grid = {
            'C': [0.1, 1, 10, 100],
            'gamma': ['scale'],
            'kernel': ['rbf', 'linear'],
            'degree': [1, 2, 3, 4]
        }
        self.multi_class = GridSearchCV(SVC(),
                                        param_grid,
                                        refit=True,
                                        verbose=0)
        self.multi_class.fit(X, y)

    def predict(self, X):

        is_predict = self.predict_outlier(X)
        #print(is_predict)
        if is_predict[0] == -1:
            return [""]

        pred = self.predic_class(X)

        return pred

    def predict_outlier(self, X):
        return self.one_class.predict(X)

    def predic_class(self, X):
        return self.multi_class.predict(X)

    def load_model(self):
        pass

    def save_model(self):

        with open(self.class_file, "wb") as f:
            pickle.dump(self.multi_class, self.f)

    def preprocessing(self, data):

        other = []
        for i in range(6):
            other.extend(["gx%d" % i, "gy%d" % i, "gz%d" % i])
        other.append("category")
        other.append("timestamp")
        X = data.drop(other, axis=1).to_numpy()
        y = data["category"].values
        return X, y
Example #9
0
 def __init__(self, one_file, class_file):
     self.saver = ModelSaver(one_file, class_file)
     self.one_class, self.multi_class = self.saver.load_all()
     self.one_file = one_file
     self.class_file = class_file
Example #10
0
    def SaveModel(self, model_name, op_parser):
        saver = ModelSaver(model_name, op_parser)

        if self.save_config == True:
            saver.SaveConfigInfo(self.save_prefix)
Example #11
0
    trajectory_global_step = '128600'
    trajectory_name = 'success_310.txt'

    # read the config
    config = read_config()

    # where we save all the outputs
    scenario = config['general']['scenario']
    working_dir = os.path.join(get_base_directory(), scenario)

    saver_dir = os.path.join(working_dir, 'models', model_name)
    best_saver_path = os.path.join(saver_dir, 'best_model')

    # generate graph:
    network = Network(config, )
    best_saver = ModelSaver(best_saver_path, 1, 'best')

    # read trajectory
    trajectory_file_path = os.path.join(working_dir, 'trajectories',
                                        model_name, trajectory_global_step,
                                        trajectory_name)
    with open(trajectory_file_path, 'r') as f:
        endpoints = [parse_trajectory_line(l) for l in f.readlines()]
    start = endpoints[0]
    goal = endpoints[-1]
    mid = endpoints[(len(endpoints) - 1) / 2]

    with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
            per_process_gpu_memory_fraction=config['general']
        ['gpu_usage']))) as sess:
        x = np.linspace(-1, 1, 500)
Example #12
0
def train_on_tasks(task_dict, PARAMS, logger, is_fine_tuning):
    # Clear GPU cache
    torch.cuda.empty_cache()

    # Initialize the object for saving models
    model_saver = ModelSaver(model_dir="./models")

    # Get the per task eval metric against which best models are chosen
    task_eval_metrics = {task_name: [0] for task_name, task in task_dict.items()}

    # Evaluation engine for each task
    task_eval_engines = {task_name: create_eval_engine(model=task.model, is_multilabel=task.is_multilabel, n_classes=task.n_classes, cpu=PARAMS.cpu) for task_name, task in task_dict.items()}

    # Get a list of task names to determine order of training, with one entry for each batch of that task (e.g. [Maalej2015, Maalej2015, Maalej2015] for Maalej2015 if it had 3 batches)
    task_training_list = []
    for task_name, task in task_dict.items():
        task_training_list.extend([task_name]*task.train_length)

    # You only need to shuffle training tasks when the tasks have a shared layer (I.e. are not fine tuning)
    if not is_fine_tuning:
        # Set the random seeds for shuffling the train task list
        random.seed(PARAMS.random_state)
        # Shuffle task list during multi-task training so that tasks are trained roughly evenly throughout
        random.shuffle(task_training_list)

    # initialize global step number
    step_num = 0
    # Record the number of steps taken for each task in a dict
    task_steps = {task_name: 0 for task_name, task in task_dict.items()}
    # Record the number of epochs since the best performance of the model
    epochs_since_last_best = {task_name: 0 for task_name, task in task_dict.items()}

    # Specify in the logs whether a given result is from fine tuning or multi-task training
    run_type_log_prefix = "FT " if is_fine_tuning else "MTL "

    # Get the required number of epochs for training
    epochs = PARAMS.num_fine_tuning_epochs if is_fine_tuning else PARAMS.num_epochs

    def is_patience_exceeded(task_name):
        return is_fine_tuning and epochs_since_last_best[task_name] >= PARAMS.early_stopping_patience

    # Start clock before training to measure how long it takes to find a validated best model
    train_time_start = time.time()

    # Save initial model before training starts (overwriting any previous models that may have been on disc)
    for task_name, task in task_dict.items():
        model_saver.save_model(file_name=task_name, model=task.model)

    for epoch in range(epochs):
        # Clean GPU cache
        torch.cuda.empty_cache()

        # Reset iterable for each task and set model for training
        for task_name, task in task_dict.items():
            task.model.train()
            task.training_iterable = iter(task.train_data)

        # TRAIN
        for task_name in task_training_list:
            # Skip training this task if training patience already exceeded (during fine tuning only).
            # We do not skip on MTL training as there could be complex interactions between the training of multiple tasks.
            if is_patience_exceeded(task_name):
                print(f"{task_name} patience exceeded, ceasing training on this task")
                continue

            task = task_dict[task_name]

            X, y  = next(task.training_iterable)

            loss_fn = task.loss_fn()

            if PARAMS.cpu:
                logits = task.model(X.cpu())
                golds = y.cpu()
            else:
                logits = task.model(X.cuda())
                golds = y.cuda()

            if task.is_multilabel:
                loss = loss_fn(logits.view(-1, task.n_classes), golds)
            else:
                loss = loss_fn(logits.view(-1, task.n_classes), golds.view(-1))

            loss.backward()
            task.optimizer.step()
            task.model.zero_grad()

            logger.log_metric(f'{run_type_log_prefix} {task_name} - loss', x=task_steps[task_name], y=loss.item())

            # Only log overall loss when the tasks have a shared language model layer. During fine tuning, their models are no longer shared, making this metric useless.
            if not is_fine_tuning:
                logger.log_metric(f'{run_type_log_prefix} overall loss', x=step_num, y=loss.item())

            step_num += 1
            task_steps[task_name] += 1

            # Moves the golds and logits from the GPU
            del golds, logits, loss

        # VALIDATE
        for task_name, task in task_dict.items():
            torch.cuda.empty_cache()

            with torch.no_grad():
                task.model.eval()

                if is_patience_exceeded(task_name):
                    print(f"{task_name} patience exceeded, ceasing evaluation on this task")
                    continue

                validation_results = task_eval_engines[task_name].run(task.valid_data).metrics
                logger.log_results(run_type_log_prefix + task_name, "valid", epoch, validation_results)

                # What metric will we compare all previous performance against
                comparison_metric = validation_results[PARAMS.best_metric]

                if comparison_metric > max(task_eval_metrics[task_name]):
                    model_saver.save_model(file_name=task_name, model=task.model)
                    epochs_since_last_best[task_name] = 0
                else:
                    epochs_since_last_best[task_name] += 1
                task_eval_metrics[task_name].append(comparison_metric)

    train_time_end = time.time()

    task_eval_metrics["time_elapsed"] = train_time_end - train_time_start

    # TEST
    task_test_metrics = {task_name: None for task_name, task in task_dict.items()}
    for task_name, task in task_dict.items():
        torch.cuda.empty_cache()

        with torch.no_grad():
            task.model.eval()

            model_saver.load_model(file_name=task_name, model=task.model)

            test_engine = create_eval_engine(model=task.model, is_multilabel=task.is_multilabel, n_classes=task.n_classes, cpu=PARAMS.cpu)
            test_results = test_engine.run(task.test_data).metrics

            task_test_metrics[task_name] = test_results

            epoch = 1 if is_fine_tuning else 0
            logger.log_results(run_type_log_prefix + task_name, "test", epoch, test_results)

    return task_eval_metrics, task_test_metrics
Example #13
0
def run_for_config(config):
    # set the name of the model
    model_name = config['general']['name']
    now = datetime.datetime.fromtimestamp(
        time.time()).strftime('%Y_%m_%d_%H_%M_%S')
    model_name = now + '_' + model_name if model_name is not None else now

    # where we save all the outputs
    scenario = config['general']['scenario']
    working_dir = os.path.join(get_base_directory(), 'sequential', scenario)
    init_dir(working_dir)

    saver_dir = os.path.join(working_dir, 'models', model_name)
    init_dir(saver_dir)
    init_log(log_file_path=os.path.join(saver_dir, 'log.txt'))
    copy_config(config, os.path.join(saver_dir, 'config.yml'))
    episodic_success_rates_path = os.path.join(saver_dir, 'results.txt')
    weights_log_dir = os.path.join(saver_dir, 'weights_logs')
    init_dir(weights_log_dir)
    test_trajectories_dir = os.path.join(working_dir, 'test_trajectories',
                                         model_name)
    init_dir(test_trajectories_dir)

    # generate game
    game = _get_game(config)

    network = NetworkSequential(config,
                                game.get_state_space_size(),
                                game.get_action_space_size(),
                                is_rollout_agent=False)
    network_variables = network.get_all_variables()

    # save model
    latest_saver = ModelSaver(os.path.join(saver_dir, 'latest_model'),
                              2,
                              'latest',
                              variables=network_variables)
    best_saver = ModelSaver(os.path.join(saver_dir, 'best_model'),
                            1,
                            'best',
                            variables=network_variables)

    summaries_collector = SummariesCollector(
        os.path.join(working_dir, 'tensorboard', model_name), model_name)

    with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(
            gpu_options=tf.compat.v1.GPUOptions(
                per_process_gpu_memory_fraction=config['general']
                ['gpu_usage']))) as sess:
        sess.run(tf.compat.v1.global_variables_initializer())

        episode_runner = EpisodeRunnerSequential(
            config,
            game,
            curriculum_coefficient=get_initial_curriculum(config))

        trainer = TrainerSequential(model_name, config, working_dir, network,
                                    sess, episode_runner, summaries_collector)

        decrease_learn_rate_if_static_success = config['model'][
            'decrease_learn_rate_if_static_success']
        stop_training_after_learn_rate_decrease = config['model'][
            'stop_training_after_learn_rate_decrease']
        reset_best_every = config['model']['reset_best_every']

        global_step = 0
        best_cost, best_cost_global_step, best_curriculum_coefficient = None, None, None
        no_test_improvement, consecutive_learn_rate_decrease = 0, 0

        for cycle in range(config['general']['training_cycles']):
            print_and_log('starting cycle {}'.format(cycle))

            global_step, success_ratio = trainer.train_policy(global_step)

            if (cycle + 1) % config['policy']['decrease_std_every'] == 0:
                network.decrease_base_std(sess)
                print_and_log('new base stds {}'.format(
                    network.get_base_std(sess)))

            print_and_log('done training cycle {} global step {}'.format(
                cycle, global_step))

            # save every now and then
            if cycle % config['general']['save_every_cycles'] == 0:
                latest_saver.save(sess, global_step=global_step)

            if cycle % config['general']['test_frequency'] == 0:
                # do test
                test_successes, test_cost, _, endpoints_by_path = trainer.collect_data(
                    config['general']['test_episodes'],
                    is_train=False,
                    use_fixed_start_goal_pairs=True)
                summaries_collector.write_test_success_summaries(
                    sess, global_step, test_successes, test_cost,
                    episode_runner.curriculum_coefficient)
                with open(episodic_success_rates_path, 'a') as f:
                    f.write('{} {} {} {}'.format(
                        trainer.train_episodes_counter, test_successes,
                        test_cost, os.linesep))

                # decide how to act next
                print_and_log('old cost was {} at step {}'.format(
                    best_cost, best_cost_global_step))
                print_and_log('current learn rates {}'.format(
                    network.get_learn_rate(sess)))
                print_and_log('current base stds {}'.format(
                    network.get_base_std(sess)))
                if best_cost is None or test_cost < best_cost:
                    print_and_log('new best cost {} at step {}'.format(
                        test_cost, global_step))
                    best_cost, best_cost_global_step = test_cost, global_step
                    best_curriculum_coefficient = episode_runner.curriculum_coefficient
                    no_test_improvement = 0
                    consecutive_learn_rate_decrease = 0
                    best_saver.save(sess, global_step)
                    test_trajectories_file = os.path.join(
                        test_trajectories_dir, '{}.txt'.format(global_step))
                    serialize_compress(endpoints_by_path,
                                       test_trajectories_file)
                else:
                    print_and_log(
                        'new model is not the best with cost {} at step {}'.
                        format(test_cost, global_step))
                    no_test_improvement += 1
                    print_and_log('no improvement count {} of {}'.format(
                        no_test_improvement,
                        decrease_learn_rate_if_static_success))
                    if reset_best_every > 0 and no_test_improvement % reset_best_every == reset_best_every - 1:
                        # restore the model every once in a while if did not find a better solution in a while
                        best_saver.restore(sess)
                        episode_runner.curriculum_coefficient = best_curriculum_coefficient
                    if no_test_improvement == decrease_learn_rate_if_static_success:
                        # restore the best model
                        if config['model']['restore_on_decrease']:
                            best_saver.restore(sess)
                            episode_runner.curriculum_coefficient = best_curriculum_coefficient
                        network.decrease_learn_rates(sess)

                        no_test_improvement = 0
                        consecutive_learn_rate_decrease += 1
                        print_and_log('decreasing learn rates {} of {}'.format(
                            consecutive_learn_rate_decrease,
                            stop_training_after_learn_rate_decrease))
                        print_and_log('new learn rates {}'.format(
                            network.get_learn_rate(sess)))
                        if consecutive_learn_rate_decrease == stop_training_after_learn_rate_decrease:
                            print_and_log('needs to stop')
                            best_saver.restore(sess)
                            break

            if episode_runner.curriculum_coefficient is not None:
                if success_ratio > config['curriculum'][
                        'raise_when_train_above']:
                    print_and_log('current curriculum coefficient {}'.format(
                        episode_runner.curriculum_coefficient))
                    episode_runner.curriculum_coefficient *= config[
                        'curriculum']['raise_times']
                    print_and_log('curriculum coefficient raised to {}'.format(
                        episode_runner.curriculum_coefficient))

            # mark in log the end of cycle
            print_and_log(os.linesep)

        print_and_log('end of run best: {} from step: {}'.format(
            best_cost, best_cost_global_step))
        print_and_log('testing on a new set of start-goal pairs')
        best_saver.restore(sess)
        test_trajectories_file = os.path.join(test_trajectories_dir, '-1.txt')
        endpoints_by_path = trainer.collect_data(
            config['general']['test_episodes'],
            is_train=False,
            use_fixed_start_goal_pairs=True)[-1]
        serialize_compress(endpoints_by_path, test_trajectories_file)

        close_log()
        return best_cost
Example #14
0
 def _get_callbacks(self):
     tensor_board_callback = MyTensorBoard(log_dir=self._log_directory, histogram_freq=1, embeddings_layer_names=True, write_graph=True)
     model_saver_callback = ModelSaver(self._save_model_path, monitor='mean_q', mode='max', logger=self._logger)
     episode_logger_callback = EpisodeLogger(logger=self._logger)
     callbacks = [tensor_board_callback, model_saver_callback, episode_logger_callback]
     return callbacks