Beispiel #1
0
 def __init__(self, model, action_space, meta_data_util, config, constants):
     self.max_epoch = constants["max_epochs"]
     self.model = model
     self.action_space = action_space
     self.meta_data_util = meta_data_util
     self.config = config
     self.constants = constants
     self.tensorboard = Tensorboard()
     self.entropy_coef = constants["entropy_coefficient"]
     self.optimizer = optim.Adam(model.get_parameters(),
                                 lr=constants["learning_rate"])
     AbstractLearning.__init__(self, self.model, self.calc_loss,
                               self.optimizer, self.config, self.constants)
Beispiel #2
0
    def __init__(self, args):
        self.args = args
        self.model = None
        self.optimizer = None
        self.scheduler = None
        self.epoch = 0

        # s = State(args)
        set_seed(self.args.seed, self.args.cudnn_behavoir)
        self.log = Log(self.args.log_path)
        self.writer = Tensorboard(self.args.tensorboard_path)
        self.stati  = Statistic(self.args.expernameid, self.args.experid_path, self.args.root_path)
        self.stati.add('hparam', self.args.dict())
        # s.writer.add_hparams(hparam_dict=s.args.dict(), metric_dict={})
        self.record = Record()
Beispiel #3
0
    def train_from_learned_homing_policies(self,
                                           env,
                                           load_folder,
                                           train_episodes,
                                           experiment_name,
                                           logger,
                                           use_pushover,
                                           trial=1):

        horizon = self.config["horizon"]
        actions = self.config["actions"]
        num_state_budget = self.constants["num_homing_policy"]
        logger.log("Training episodes %d" % train_episodes)

        tensorboard = Tensorboard(log_dir=self.config["save_path"])

        homing_policies = dict(
        )  # Contains a set of homing policies for every time step

        # Load homing policy from folder
        logger.log("Loading Homing policies...")
        for step in range(1, horizon + 1):

            homing_policies[step] = []

            for i in range(0, num_state_budget):
                # TODO can fail if the policy doesn't exist. Add checks to prevent that.
                policy_folder_name = load_folder + "/trial_%d_horizon_%d_homing_policy_%d/" % (
                    trial, step, i)
                if not os.path.exists(policy_folder_name):
                    logger.log("Did not find %s" % policy_folder_name)
                    continue
                previous_step_homing_policy = None if step == 1 else homing_policies[
                    step - 1]
                policy = self.reward_free_planner.read_policy(
                    policy_folder_name, step, previous_step_homing_policy)
                homing_policies[step].append(policy)
        logger.log("Loaded Homing policy.")

        logger.log(
            "Reward Sensitive Learning: Computing the optimal policy for the given reward"
        )

        # Compute the optimal policy
        psdp_start = time.time()
        approx_optimal_policy, _, info = self.reward_sensitive_planner.train(
            None, env, actions, horizon, None, homing_policies, logger,
            tensorboard, True, use_pushover)
        logger.log("PSDP Time %r" % (time.time() - psdp_start))

        train_episodes = train_episodes + info["total_episodes"]
        train_reward = info["sum_rewards"]

        # Evaluate the optimal policy
        return policy_evaluate.evaluate(env, approx_optimal_policy, horizon,
                                        logger, train_episodes, train_reward)
def handler(context):
    dataset_alias = context.datasets
    dataset_id = dataset_alias['train']  # set alias specified in console
    data = list(load_dataset_from_api(dataset_id))

    np.random.seed(0)
    data = np.random.permutation(data)
    nb_data = len(data)
    nb_train = int(7 * nb_data // 10)
    train_data_raw = data[:nb_train]
    test_data_raw = data[nb_train:]

    simple_net = SimpleNet(num_classes)
    model = L.Classifier(simple_net)

    if USE_GPU >= 0:
        chainer.cuda.get_device(USE_GPU).use()  # Make a specified GPU current
        model.to_gpu()

    def make_optimizer(model, alpha=0.001, beta1=0.9):
        optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1)
        optimizer.setup(model)
        return optimizer

    optimizer = make_optimizer(model)

    train_data = ImageDatasetFromAPI(train_data_raw)
    train_iter = chainer.iterators.SerialIterator(train_data, batch_size)
    test_data = ImageDatasetFromAPI(test_data_raw)
    test_iter = chainer.iterators.SerialIterator(test_data,
                                                 batch_size,
                                                 repeat=False,
                                                 shuffle=False)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=USE_GPU)
    trainer = training.Trainer(updater, (epochs, 'epoch'),
                               out=ABEJA_TRAINING_RESULT_DIR)

    trainer.extend(extensions.Evaluator(test_iter, model, device=USE_GPU))

    trainer.extend(extensions.snapshot_object(simple_net, 'simple_net.model'),
                   trigger=(epochs, 'epoch'))

    report_entries = [
        'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
        'validation/main/accuracy'
    ]

    trainer.extend(extensions.LogReport())
    trainer.extend(Statistics(report_entries, epochs), trigger=(1, 'epoch'))
    trainer.extend(Tensorboard(report_entries, out_dir=log_path))

    trainer.extend(extensions.PrintReport(report_entries))

    trainer.run()
Beispiel #5
0
    def do_test_(shared_model,
                 config,
                 action_space,
                 meta_data_util,
                 constants,
                 test_dataset,
                 experiment_name,
                 rank,
                 server,
                 logger,
                 model_type,
                 use_pushover=False):

        server.initialize_server()

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = PushoverLogger(experiment_name)
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)
        # local_model.train()

        # Create the Agent
        logger.log("STARTING AGENT")
        agent = Agent(server=server,
                      model=local_model,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)
        logger.log("Created Agent...")

        tune_dataset_size = len(test_dataset)

        local_model.load_from_state_dict(shared_model.get_state_dict())

        if tune_dataset_size > 0:
            # Test on tuning data
            agent.test(test_dataset,
                       tensorboard=tensorboard,
                       logger=logger,
                       pushover_logger=pushover_logger)
Beispiel #6
0
    def do_test_(house_id, goal_prediction_model, navigation_model, action_type_model, config,
                 action_space, meta_data_util, constants, test_dataset,
                 experiment_name, rank, server, logger, vocab, goal_type, use_pushover=False):

        logger.log("In Testing...")
        launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64",
                              arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json",
                              cwd="./simulators/house/")
        logger.log("Launched Builds.")
        server.initialize_server()
        logger.log("Server Initialized.")

        # Test policy
        test_policy = gp.get_argmax_action

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
            logger.log('Created Tensorboard Server.')
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = None
        else:
            pushover_logger = None

        # Create the Agent
        tmp_agent = HouseDecoupledPredictorNavigatorAgent(server=server,
                                                          goal_prediction_model=goal_prediction_model,
                                                          navigation_model=navigation_model,
                                                          action_type_model=action_type_model,
                                                          test_policy=test_policy,
                                                          action_space=action_space,
                                                          meta_data_util=meta_data_util,
                                                          config=config,
                                                          constants=constants)
        logger.log("Created Agent.")
        tune_dataset_size = len(test_dataset)

        if tune_dataset_size > 0:
            # Test on tuning data
            # tmp_agent.test_single_step(test_dataset, vocab, goal_type=goal_type, tensorboard=tensorboard,
            #                            logger=logger, pushover_logger=pushover_logger)
            # tmp_agent.test_multi_step(test_dataset, vocab, num_outer_loop_steps=10, num_inner_loop_steps=4,
            #                           goal_type=goal_type, tensorboard=tensorboard, logger=logger,
            #                           pushover_logger=pushover_logger)
            # tmp_agent.test_multi_step_action_types(test_dataset, vocab, goal_type=goal_type, tensorboard=tensorboard,
            #                                        logger=logger, pushover_logger=pushover_logger)
            tmp_agent.test_goal_distance(house_id, test_dataset, vocab, goal_type=goal_type, tensorboard=tensorboard,
                                                   logger=logger, pushover_logger=pushover_logger)
Beispiel #7
0
    def do_test(house_id, chaplot_baseline, config, action_space,
                meta_data_util, constants, test_dataset, experiment_name, rank,
                server, logger):

        # torch.manual_seed(args.seed + rank)

        # Launch the Unity Build
        launch_k_unity_builds([config["port"]],
                              "./house_" + str(house_id) + "_elmer.x86_64",
                              arg_str="--config ./AssetsHouse/config" +
                              str(house_id) + ".json",
                              cwd="./simulators/house/")

        # Initialize Server
        server.initialize_server()
        server.clear_metadata()
        logger.log("Server Initialized")

        # Test policy
        test_policy = gp.get_argmax_action

        # Create the Agent
        agent = TmpHouseAgent(server=server,
                              model=chaplot_baseline,
                              test_policy=test_policy,
                              action_space=action_space,
                              meta_data_util=meta_data_util,
                              config=config,
                              constants=constants)

        # Create tensorboard server
        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
            logger.log('Created Tensorboard Server...')
        else:
            tensorboard = None

        agent.test(test_dataset,
                   vocab=None,
                   tensorboard=tensorboard,
                   logger=logger)
def handler(context):
    # Triggers
    log_trigger = (50, 'iteration')
    validation_trigger = (2000, 'iteration')
    end_trigger = (nb_iterations, 'iteration')

    # Dataset
    dataset_alias = context.datasets
    train_dataset_id = dataset_alias['train']
    val_dataset_id = dataset_alias['val']
    train = SegmentationDatasetFromAPI(train_dataset_id)
    val = SegmentationDatasetFromAPI(val_dataset_id)
    class_weight = calc_weight(train)

    print(class_weight)

    train = TransformDataset(train, transform)

    # Iterator
    train_iter = iterators.SerialIterator(train, BATCHSIZE)
    val_iter = iterators.SerialIterator(val,
                                        BATCHSIZE,
                                        shuffle=False,
                                        repeat=False)

    # Model
    model = SegNetBasic(n_class=len(camvid_label_names))
    model = PixelwiseSoftmaxClassifier(model, class_weight=class_weight)

    if USE_GPU >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(USE_GPU).use()
        model.to_gpu()  # Copy the model to the GPU

    # Optimizer
    optimizer = optimizers.MomentumSGD(lr=0.1, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(rate=0.0005))

    # Updater
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=USE_GPU)

    # Trainer
    trainer = training.Trainer(updater,
                               end_trigger,
                               out=ABEJA_TRAINING_RESULT_DIR)

    trainer.extend(extensions.LogReport(trigger=log_trigger))
    trainer.extend(extensions.observe_lr(), trigger=log_trigger)
    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.extend(extensions.snapshot_object(
        model.predictor, filename='model_iteration-{.updater.iteration}'),
                   trigger=end_trigger)

    print_entries = [
        'iteration', 'main/loss', 'validation/main/miou',
        'validation/main/mean_class_accuracy', 'validation/main/pixel_accuracy'
    ]

    report_entries = [
        'epoch', 'iteration', 'lr', 'main/loss', 'validation/main/miou',
        'validation/main/mean_class_accuracy', 'validation/main/pixel_accuracy'
    ]

    trainer.extend(Statistics(report_entries,
                              nb_iterations,
                              obs_key='iteration'),
                   trigger=log_trigger)
    trainer.extend(Tensorboard(report_entries, out_dir=log_path))
    trainer.extend(extensions.PrintReport(print_entries), trigger=log_trigger)

    trainer.extend(SemanticSegmentationEvaluator(val_iter, model.predictor,
                                                 camvid_label_names),
                   trigger=validation_trigger)

    trainer.run()
Beispiel #9
0
    def do_train_(simulator_file,
                  shared_model,
                  config,
                  action_space,
                  meta_data_util,
                  constants,
                  train_dataset,
                  tune_dataset,
                  experiment,
                  experiment_name,
                  rank,
                  server,
                  logger,
                  model_type,
                  use_pushover=False):

        # Launch unity
        launch_k_unity_builds([config["port"]], simulator_file)
        server.initialize_server()

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = PushoverLogger(experiment_name)
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)
        # local_model.train()

        # Create the Agent
        logger.log("STARTING AGENT")
        agent = Agent(server=server,
                      model=local_model,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)
        logger.log("Created Agent...")

        action_counts = [0] * action_space.num_actions()
        max_epochs = constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        # Create the learner to compute the loss
        learner = AsynchronousContextualBandit(shared_model, local_model,
                                               action_space, meta_data_util,
                                               config, constants, tensorboard)

        for epoch in range(1, max_epochs + 1):

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %
                               (data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" %
                               action_counts)

                num_actions = 0
                max_num_actions = constants["horizon"] + constants[
                    "max_extra_horizon"]

                image, metadata = agent.server.reset_receive_feedback(
                    data_point)

                state = AgentObservedState(instruction=data_point.instruction,
                                           config=config,
                                           constants=constants,
                                           start_image=image,
                                           previous_action=None,
                                           data_point=data_point)
                meta_data_util.start_state_update_metadata(state, metadata)

                model_state = None
                batch_replay_items = []
                total_reward = 0
                forced_stop = True

                while num_actions < max_num_actions:

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, volatile = \
                        local_model.get_probs(state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))[0]

                    # Sample action from the probability
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[action] += 1

                    if action == action_space.get_stop_action_index():
                        forced_stop = False
                        break

                    # Send the action and get feedback
                    image, reward, metadata = agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state,
                                                   action,
                                                   reward,
                                                   log_prob=log_probabilities,
                                                   volatile=volatile)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    state = state.update(image, action, data_point=data_point)
                    meta_data_util.state_update_metadata(state, metadata)

                    num_actions += 1
                    total_reward += reward

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                if tensorboard is not None:
                    meta_data_util.state_update_metadata(tensorboard, metadata)

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(
                        state,
                        action_space.get_stop_action_index(),
                        reward,
                        log_prob=log_probabilities,
                        volatile=volatile)
                    batch_replay_items.append(replay_item)

                # Perform update
                if len(batch_replay_items) > 0:
                    loss_val = learner.do_update(batch_replay_items)

                    if tensorboard is not None:
                        entropy = float(
                            learner.entropy.data[0]) / float(num_actions + 1)
                        tensorboard.log_scalar("loss", loss_val)
                        tensorboard.log_scalar("entropy", entropy)
                        tensorboard.log_scalar("total_reward", total_reward)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" +
                                   str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)

            if tune_dataset_size > 0:
                # Test on tuning data
                agent.test(tune_dataset,
                           tensorboard=tensorboard,
                           logger=logger,
                           pushover_logger=pushover_logger)
Beispiel #10
0
    logging.log(logging.DEBUG, "CREATING MODEL")
    model = IncrementalModelChaplot(config, constants)
    model.load_saved_model(
        "./results/model-folder-name/contextual_bandit_5_epoch_4")
    logging.log(logging.DEBUG, "MODEL CREATED")

    # Create the agent
    logging.log(logging.DEBUG, "STARTING AGENT")
    agent = HumanDrivenAgent(server=server,
                             model=model,
                             test_policy=test_policy,
                             action_space=action_space,
                             meta_data_util=meta_data_util,
                             config=config,
                             constants=constants)

    # create tensorboard
    tensorboard = Tensorboard("Human-Driven-Agent")

    dev_dataset = DatasetParser.parse("data/nav_drone/dev_annotations_6000.json", config)

    agent.test(dev_dataset, tensorboard)

    server.kill()

except Exception:
    server.kill()
    exc_info = sys.exc_info()
    traceback.print_exception(*exc_info)
    # raise e
    def do_train_(shared_model,
                  config,
                  action_space,
                  meta_data_util,
                  args,
                  constants,
                  train_dataset,
                  tune_dataset,
                  experiment,
                  experiment_name,
                  rank,
                  server,
                  logger,
                  model_type,
                  use_pushover=False):

        server.initialize_server()

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = PushoverLogger(experiment_name)
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(args, config=config)
        if torch.cuda.is_available():
            local_model.cuda()
        local_model.train()

        # Create the Agent
        logger.log("STARTING AGENT")
        agent = Agent(server=server,
                      model=local_model,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)
        logger.log("Created Agent...")

        action_counts = [0] * action_space.num_actions()
        max_epochs = constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        # Create the learner to compute the loss
        learner = AsynchronousContextualBandit(shared_model, local_model,
                                               action_space, meta_data_util,
                                               config, constants, tensorboard)

        # Launch unity
        launch_k_unity_builds([
            config["port"]
        ], "/home/dipendra/Downloads/NavDroneLinuxBuild/NavDroneLinuxBuild.x86_64"
                              )

        for epoch in range(1, max_epochs + 1):

            if tune_dataset_size > 0:
                # Test on tuning data
                agent.test(tune_dataset,
                           tensorboard=tensorboard,
                           logger=logger,
                           pushover_logger=pushover_logger)

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix,
                                 dataset_size)
                    logging.info("Training data action counts %r",
                                 action_counts)

                num_actions = 0
                # max_num_actions = len(data_point.get_trajectory())
                # max_num_actions += self.constants["max_extra_horizon"]
                max_num_actions = constants["horizon"]

                image, metadata = agent.server.reset_receive_feedback(
                    data_point)

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = AgentObservedState(
                    instruction=data_point.instruction,
                    config=config,
                    constants=constants,
                    start_image=image,
                    previous_action=None,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=data_point)

                model_state = None
                batch_replay_items = []
                total_reward = 0
                forced_stop = True

                while num_actions < max_num_actions:

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, state_feature = \
                        local_model.get_probs(state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))[0]

                    # Sample action from the probability
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[action] += 1

                    if action == action_space.get_stop_action_index():
                        forced_stop = False
                        break

                    # Send the action and get feedback
                    image, reward, metadata = agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    rewards = learner.get_all_rewards(metadata)
                    replay_item = ReplayMemoryItem(state,
                                                   action,
                                                   reward,
                                                   log_prob=log_probabilities,
                                                   all_rewards=rewards)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    pose = int(metadata["y_angle"] / 15.0)
                    position_orientation = (metadata["x_pos"],
                                            metadata["z_pos"],
                                            metadata["y_angle"])
                    state = state.update(
                        image,
                        action,
                        pose=pose,
                        position_orientation=position_orientation,
                        data_point=data_point)

                    num_actions += 1
                    total_reward += reward

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                rewards = learner.get_all_rewards(metadata)
                total_reward += reward

                if tensorboard is not None:
                    tensorboard.log_all_train_errors(
                        metadata["edit_dist_error"],
                        metadata["closest_dist_error"],
                        metadata["stop_dist_error"])

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(
                        state,
                        action_space.get_stop_action_index(),
                        reward,
                        log_prob=log_probabilities,
                        all_rewards=rewards)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:
                    loss_val = learner.do_update(batch_replay_items)
                    # self.action_prediction_loss_calculator.predict_action(batch_replay_items)
                    del batch_replay_items[:]  # in place list clear

                    if tensorboard is not None:
                        cross_entropy = float(learner.cross_entropy.data[0])
                        tensorboard.log(cross_entropy, loss_val, 0)
                        entropy = float(learner.entropy.data[0])
                        tensorboard.log_scalar("entropy", entropy)

                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar(
                            "Abs_objective_to_entropy_ratio", ratio)

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(
                                learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(
                                action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(
                                learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(
                                temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(
                                learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(
                                object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(
                                learner.symbolic_language_prediction_loss.
                                data[0])
                            tensorboard.log_scalar(
                                "sym_language_prediction_loss",
                                symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(
                                learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss",
                                                   goal_prediction_loss)
                        if learner.mean_factor_entropy is not None:
                            mean_factor_entropy = float(
                                learner.mean_factor_entropy.data[0])
                            tensorboard.log_factor_entropy_loss(
                                mean_factor_entropy)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" +
                                   str(rank) + "_epoch_" + str(epoch))

            logging.info("Training data action counts %r", action_counts)
    def do_train_(shared_model,
                  config,
                  action_space,
                  meta_data_util,
                  constants,
                  train_dataset,
                  tune_dataset,
                  experiment,
                  experiment_name,
                  rank,
                  server,
                  logger,
                  model_type,
                  use_pushover=False):

        server.initialize_server()

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = PushoverLogger(experiment_name)
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)

        # Create the Agent
        logger.log("STARTING AGENT")
        agent = Agent(server=server,
                      model=local_model,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)
        logger.log("Created Agent...")

        action_counts = [0] * action_space.num_actions()
        max_epochs = constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        # Create the learner to compute the loss
        learner = AsynchronousSupervisedLearning(shared_model, local_model,
                                                 action_space, meta_data_util,
                                                 config, constants,
                                                 tensorboard)

        # Launch unity
        launch_k_unity_builds([config["port"]],
                              "./simulators/NavDroneLinuxBuild.x86_64")

        for epoch in range(1, max_epochs + 1):

            learner.epoch = epoch

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %
                               (data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" %
                               action_counts)

                num_actions = 0
                trajectory = data_point.get_trajectory()
                image, metadata = agent.server.reset_receive_feedback(
                    data_point)

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = AgentObservedState(
                    instruction=data_point.instruction,
                    config=config,
                    constants=constants,
                    start_image=image,
                    previous_action=None,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=data_point)

                model_state = None
                batch_replay_items = []
                total_reward = 0

                for action in trajectory:

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, volatile = \
                        local_model.get_probs(state, model_state)

                    action_counts[action] += 1

                    # Generate goal
                    if config["do_goal_prediction"]:
                        goal = learner.goal_prediction_calculator.get_goal_location(
                            metadata, data_point, 8, 8)
                        # learner.goal_prediction_calculator.save_attention_prob(image, volatile)
                        # time.sleep(5)
                    else:
                        goal = None

                    # Send the action and get feedback
                    image, reward, metadata = agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state,
                                                   action,
                                                   reward,
                                                   log_prob=log_probabilities,
                                                   volatile=volatile,
                                                   goal=goal)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    pose = int(metadata["y_angle"] / 15.0)
                    position_orientation = (metadata["x_pos"],
                                            metadata["z_pos"],
                                            metadata["y_angle"])
                    state = state.update(
                        image,
                        action,
                        pose=pose,
                        position_orientation=position_orientation,
                        data_point=data_point)

                    num_actions += 1
                    total_reward += reward

                # Sample action using the policy
                log_probabilities, model_state, image_emb_seq, volatile = \
                    local_model.get_probs(state, model_state)

                # Generate goal
                if config["do_goal_prediction"]:
                    goal = learner.goal_prediction_calculator.get_goal_location(
                        metadata, data_point, 8, 8)
                    # learner.goal_prediction_calculator.save_attention_prob(image, volatile)
                    # time.sleep(5)
                else:
                    goal = None

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                if tensorboard is not None:
                    tensorboard.log_all_train_errors(
                        metadata["edit_dist_error"],
                        metadata["closest_dist_error"],
                        metadata["stop_dist_error"])

                # Store it in the replay memory list
                replay_item = ReplayMemoryItem(
                    state,
                    action_space.get_stop_action_index(),
                    reward,
                    log_prob=log_probabilities,
                    volatile=volatile,
                    goal=goal)
                batch_replay_items.append(replay_item)

                ###########################################3
                AsynchronousSupervisedLearning.save_goal(
                    batch_replay_items, data_point_ix, trajectory)
                ###########################################3

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:  # 32:
                    loss_val = learner.do_update(batch_replay_items)
                    # self.action_prediction_loss_calculator.predict_action(batch_replay_items)
                    # del batch_replay_items[:]  # in place list clear

                    if tensorboard is not None:
                        cross_entropy = float(learner.cross_entropy.data[0])
                        tensorboard.log(cross_entropy, loss_val, 0)
                        entropy = float(
                            learner.entropy.data[0]) / float(num_actions + 1)
                        tensorboard.log_scalar("entropy", entropy)
                        tensorboard.log_scalar("total_reward", total_reward)

                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar(
                            "Abs_objective_to_entropy_ratio", ratio)

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(
                                learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(
                                action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(
                                learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(
                                temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(
                                learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(
                                object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(
                                learner.symbolic_language_prediction_loss.
                                data[0])
                            tensorboard.log_scalar(
                                "sym_language_prediction_loss",
                                symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(
                                learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss",
                                                   goal_prediction_loss)
                        if learner.goal_prob is not None:
                            goal_prob = float(learner.goal_prob.data[0])
                            tensorboard.log_scalar("goal_prob", goal_prob)
                        if learner.mean_factor_entropy is not None:
                            mean_factor_entropy = float(
                                learner.mean_factor_entropy.data[0])
                            tensorboard.log_factor_entropy_loss(
                                mean_factor_entropy)

            # Save the model
            local_model.save_model(experiment + "/supervised_learning_" +
                                   str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)

            if tune_dataset_size > 0:
                # Test on tuning data
                agent.test_goal_prediction(tune_dataset,
                                           tensorboard=tensorboard,
                                           logger=logger,
                                           pushover_logger=pushover_logger)
Beispiel #13
0
    # Load settings
    if args.conf_file:
        cfg_from_file(args.conf_file)

    # For train and test, usually we do not need cache; unless overridden by amend
    cfg.TEST.NO_CACHE = True
    if args.set_cfgs:
        cfg_from_list(args.set_cfgs)

    # Record logs into cfg
    cfg.LOG.CMD = ' '.join(sys.argv)
    cfg.LOG.TIME = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
    np.random.seed(int(cfg.RNG_SEED))

    if cfg.TENSORBOARD.ENABLE:
        tb.client = Tensorboard(hostname=cfg.TENSORBOARD.HOSTNAME,
                                port=cfg.TENSORBOARD.PORT)
        tb.sess = tb.client.create_experiment(cfg.NAME + '_' + cfg.LOG.TIME)

    if args.train == 'true' or args.train == 'True':  # the training entrance
        # Get training imdb
        imdb = get_imdb(cfg.TRAIN.DB)
        roidb = get_training_roidb(imdb)

        # Redirect stderr
        output_dir = get_output_dir(imdb.name, cfg.NAME + '_' + cfg.LOG.TIME)
        f = open(osp.join(output_dir, 'stderr.log'), 'w', 0)
        os.dup2(f.fileno(), sys.stderr.fileno())
        os.dup2(sys.stderr.fileno(), sys.stderr.fileno())

        # Edit solver and train prototxts
        target_sw = osp.join(output_dir, 'solver.prototxt')
Beispiel #14
0
    def train(self,
              experiment,
              env,
              env_name,
              num_processes,
              experiment_name,
              logger,
              use_pushover,
              debug,
              homing_policy_validation_fn,
              trial=1,
              do_reward_sensitive_learning=False):
        """ Execute HOMER algorithm on an environment using
        :param experiment:
        :param env:
        :param env_name:
        :param num_processes:
        :param experiment_name:
        :param logger:
        :param use_pushover: True/False based on whether pushover is used
        :param debug:
        :param homing_policy_validation_fn:
        :param trial:
        :param do_reward_sensitive_learning:
        :return:
        """

        horizon = self.config["horizon"]
        actions = self.config["actions"]
        num_samples = self.constants["encoder_training_num_samples"]
        tensorboard = Tensorboard(log_dir=self.config["save_path"])

        homing_policies = dict(
        )  # Contains a set of homing policies for every time step
        encoding_function = None  # Learned encoding function for the current time step
        dataset = []  # Dataset of samples collected for training the encoder
        replay_memory = dict(
        )  # Replay memory of *all* deviation transitions indexed by time step

        for step in range(1, horizon + 1):

            logger.log("Running Homer: Step %r out of %r " % (step, horizon))

            homing_policies[step] = []  # Homing policies for this time step
            replay_memory[step] = []  # Replay memory for this time step

            # Step 1: Create dataset for learning the encoding function. A single datapoint consists of a transition
            # (x, a, x') and a 0-1 label y. If y=1 then transition was observed and y=0 otherwise.
            time_collection_start = time.time()
            dataset = self.encoder_sampler.gather_samples(
                env, actions, step, homing_policies, num_samples, dataset)
            replay_memory[step] = [
                dp for dp in dataset
                if dp.is_valid() == 1 and dp.get_timestep() == step
            ]
            logger.log("Encoder: %r samples collected in %r sec" %
                       (num_samples, time.time() - time_collection_start))

            # Step 2: Perform binary classification on the dataset. The classifier f(x, a, x') is trained to predict
            # the probability that a transition (x, a, x') was observed. There are two type of classifiers that we
            # support. The first classifier has an internal bottleneck feature that allows for recovering state
            # abstraction function while other performs clustering on top of a train model without discretization.
            time_encoder_start = time.time()

            encoding_function, num_state_budget = self.train_encoding_function.do_train(
                dataset,
                logger,
                tensorboard,
                debug,
                bootstrap_model=encoding_function,
                undiscretized_initialization=True,
                category="backward")

            self.util.save_encoder_model(encoding_function, experiment, trial,
                                         step, "backward")
            logger.log("Encoder: Training time %r" %
                       (time.time() - time_encoder_start))

            # Step 3: Find which abstract states should be explored. This is basically done based on which
            # abstract states have a non-zero count. Example, one can specify a really high budget for abstract
            # states but most of them are never used. This is not a problem when using the clustering oracle.
            count_stats, observation_samples = self.util.get_abstract_state_counts(
                encoding_function, dataset)
            abstract_states_to_explore = self.find_abstract_states_to_explore(
                count_stats, num_state_budget, step)
            logger.log("Abstract State by Counts: %r" % count_stats)
            logger.debug("Abstract States to explore %r" %
                         abstract_states_to_explore)

            # Step 4: Learn homing policies by planning to reach different abstract states
            if num_processes == 1:  # Single process needed. Run it on the current process.
                self.single_process_ps(env, actions, step, replay_memory,
                                       homing_policies,
                                       abstract_states_to_explore, tensorboard,
                                       encoding_function, logger, use_pushover)
            else:
                self.multi_processing_ps(experiment, env, env_name, actions,
                                         step, replay_memory, homing_policies,
                                         abstract_states_to_explore,
                                         num_processes, encoding_function,
                                         logger, use_pushover, trial)
            logger.log("Homer step %r took time %r" %
                       (step, time.time() - time_collection_start))

            # Step 5 (Optional): Automatic evaluation of homing policies if possible. A validation function can
            # check if homing policy has good coverage over the underline state.
            if homing_policy_validation_fn is not None:

                state_dist, _ = self.util.evaluate_homing_policy(
                    env, homing_policies, step, logger)

                if not homing_policy_validation_fn(state_dist, step):
                    logger.log(
                        "Didn't find a useful policy cover for step %r" % step)
                    return policy_evaluate.generate_failure_result(
                        env, env.num_eps)
                else:
                    logger.log("Found useful policy cover for step %r " % step)

            # Step 6 (Optional): Performing debugging based on learned state abstraction and
            # policy cover for this time step.
            if debug:
                # Log the environment reward received by the policy
                self.util.log_homing_policy_reward(env, homing_policies, step,
                                                   logger)

                if self.config["feature_type"] == "image":
                    # For environments generating image, it is often not possible to get access to the underline state
                    # therefore we save images for debugging.
                    self.util.save_homing_policy_figures(
                        env, env_name, homing_policies, step)

                    # Save the abstract state and an image
                    if observation_samples is not None:
                        self.util.save_abstract_state_figures(
                            env_name, observation_samples, step)

                    # Save newly explored states
                    self.util.save_newly_explored_states(
                        env_name, dataset, step)

        if not do_reward_sensitive_learning:

            return dict()
        else:

            logger.log(
                "Reward Sensitive Learning: Computing the optimal policy for the environment reward function"
            )

            # Compute the optimal policy
            reward_planning_start_time = time.time()
            approx_optimal_policy, _, info = self.reward_sensitive_planner.train(
                replay_memory=replay_memory,
                env=env,
                actions=actions,
                horizon=horizon,
                reward_func=None,
                homing_policies=homing_policies,
                logger=logger,
                tensorboard=tensorboard,
                debug=True,
                use_pushover=use_pushover)
            logger.log("Reward Sensitive Learning: Time %r" %
                       (time.time() - reward_planning_start_time))

            logger.log(
                "Actual: Total number of episodes used %d. Total return %f." %
                (env.num_eps, env.sum_total_reward))

            # Evaluate the optimal policy
            return policy_evaluate.evaluate(env, approx_optimal_policy,
                                            horizon, logger, env.num_eps,
                                            env.sum_total_reward)
Beispiel #15
0
    def do_train_(shared_model,
                  config,
                  action_space,
                  meta_data_util,
                  constants,
                  train_dataset,
                  tune_dataset,
                  experiment,
                  experiment_name,
                  rank,
                  server,
                  logger,
                  model_type,
                  use_pushover=False):

        server.initialize_server()

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = PushoverLogger(experiment_name)
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)
        # local_model.train()

        # Create the Agent
        logger.log("STARTING AGENT")
        agent = Agent(server=server,
                      model=local_model,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)
        logger.log("Created Agent...")

        action_counts = [0] * action_space.num_actions()
        max_epochs = constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        # Create the learner to compute the loss
        learner = AsynchronousAdvantageActorGAECritic(shared_model,
                                                      local_model,
                                                      action_space,
                                                      meta_data_util, config,
                                                      constants, tensorboard)

        # Launch unity
        launch_k_unity_builds([config["port"]],
                              "./simulators/NavDroneLinuxBuild.x86_64")

        for epoch in range(1, max_epochs + 1):

            learner.epoch = epoch
            task_completion_accuracy = 0
            mean_stop_dist_error = 0
            stop_dist_errors = []
            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %
                               (data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" %
                               action_counts)

                num_actions = 0
                max_num_actions = constants["horizon"] + constants[
                    "max_extra_horizon"]

                image, metadata = agent.server.reset_receive_feedback(
                    data_point)

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = AgentObservedState(
                    instruction=data_point.instruction,
                    config=config,
                    constants=constants,
                    start_image=image,
                    previous_action=None,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=data_point)
                state.goal = GoalPrediction.get_goal_location(
                    metadata, data_point, learner.image_height,
                    learner.image_width)

                model_state = None
                batch_replay_items = []
                total_reward = 0
                forced_stop = True

                while num_actions < max_num_actions:

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, volatile = \
                        local_model.get_probs(state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))[0]

                    # Sample action from the probability
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[action] += 1

                    # Generate goal
                    if config["do_goal_prediction"]:
                        goal = learner.goal_prediction_calculator.get_goal_location(
                            metadata, data_point, learner.image_height,
                            learner.image_width)
                    else:
                        goal = None

                    if action == action_space.get_stop_action_index():
                        forced_stop = False
                        break

                    # Send the action and get feedback
                    image, reward, metadata = agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state,
                                                   action,
                                                   reward,
                                                   log_prob=log_probabilities,
                                                   volatile=volatile,
                                                   goal=goal)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    pose = int(metadata["y_angle"] / 15.0)
                    position_orientation = (metadata["x_pos"],
                                            metadata["z_pos"],
                                            metadata["y_angle"])
                    state = state.update(
                        image,
                        action,
                        pose=pose,
                        position_orientation=position_orientation,
                        data_point=data_point)
                    state.goal = GoalPrediction.get_goal_location(
                        metadata, data_point, learner.image_height,
                        learner.image_width)

                    num_actions += 1
                    total_reward += reward

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                if metadata["stop_dist_error"] < 5.0:
                    task_completion_accuracy += 1
                mean_stop_dist_error += metadata["stop_dist_error"]
                stop_dist_errors.append(metadata["stop_dist_error"])

                if tensorboard is not None:
                    tensorboard.log_all_train_errors(
                        metadata["edit_dist_error"],
                        metadata["closest_dist_error"],
                        metadata["stop_dist_error"])

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(
                        state,
                        action_space.get_stop_action_index(),
                        reward,
                        log_prob=log_probabilities,
                        volatile=volatile,
                        goal=goal)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:  # 32:
                    loss_val = learner.do_update(batch_replay_items)
                    # self.action_prediction_loss_calculator.predict_action(batch_replay_items)
                    # del batch_replay_items[:]  # in place list clear

                    if tensorboard is not None:
                        cross_entropy = float(learner.cross_entropy.data[0])
                        tensorboard.log(cross_entropy, loss_val, 0)
                        entropy = float(
                            learner.entropy.data[0]) / float(num_actions + 1)
                        v_value_loss_per_step = float(
                            learner.value_loss.data[0]) / float(num_actions +
                                                                1)
                        tensorboard.log_scalar("entropy", entropy)
                        tensorboard.log_scalar("total_reward", total_reward)
                        tensorboard.log_scalar("v_value_loss_per_step",
                                               v_value_loss_per_step)
                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar(
                            "Abs_objective_to_entropy_ratio", ratio)

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(
                                learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(
                                action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(
                                learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(
                                temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(
                                learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(
                                object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(
                                learner.symbolic_language_prediction_loss.
                                data[0])
                            tensorboard.log_scalar(
                                "sym_language_prediction_loss",
                                symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(
                                learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss",
                                                   goal_prediction_loss)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" +
                                   str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)
            mean_stop_dist_error = mean_stop_dist_error / float(
                len(train_dataset))
            task_completion_accuracy = (task_completion_accuracy *
                                        100.0) / float(len(train_dataset))
            logger.log("Training: Mean stop distance error %r" %
                       mean_stop_dist_error)
            logger.log("Training: Task completion accuracy %r " %
                       task_completion_accuracy)
            bins = range(0, 80, 3)  # range of distance
            histogram, _ = np.histogram(stop_dist_errors, bins)
            logger.log("Histogram of train errors %r " % histogram)

            if tune_dataset_size > 0:
                # Test on tuning data
                agent.test(tune_dataset,
                           tensorboard=tensorboard,
                           logger=logger,
                           pushover_logger=pushover_logger)
Beispiel #16
0
def main():

    tensorboard_directory = './tmp/tensorboard/001'
    tensorboard_paths = [
        r'C:\Users\parth\Documents\GitHub\Kaggle-Santander-Value-Prediction-Challenge\tmp\tensorboard\001'
    ]
    tensorboard_names = ['rmse']

    # Model Parameters
    # --------------------------------------------------------------------------

    use_dropout = False
    use_batch_norm = False

    # Dropout inputs
    #     use : to use dropout in this layer
    #     rate : dropout rate
    dropout_parameters = [{
        'use': True,
        'rate': 0.5
    }, {
        'use': True,
        'rate': 0.5
    }, {
        'use': True,
        'rate': 0.5
    }, {
        'use': True,
        'rate': 0.5
    }]

    # Fully Connected Layers unit size
    fc_parameters = [{
        'units': 5000
    }, {
        'units': 5000
    }, {
        'units': 5000
    }, {
        'units': 5000
    }]
    num_dense = len(fc_parameters)

    data_shape = [None, 4990]
    batch_size = 500
    val_size = 5000
    epochs = 100000
    learning_rate = 0.001
    session = tf.Session()

    Tensorboard.make(paths=tensorboard_paths,
                     names=tensorboard_names,
                     host='127.0.0.1',
                     port='6006',
                     output=True,
                     start=False)

    dropout_parameters = []

    model = Model(sess=session,
                  data_shape=data_shape,
                  num_classes=1,
                  num_dense=2,
                  learning_rate=learning_rate,
                  use_batch_norm=use_batch_norm,
                  use_dropout=use_dropout,
                  dropout_parameters=dropout_parameters,
                  fc_parameters=fc_parameters,
                  tensorboard_directory=tensorboard_directory)

    train_data, train_labels = get_data()
    train_data, val_data, train_labels, val_labels = train_test_split(
        train_data, train_labels, test_size=0.30)

    print('> Training Data: {} {}'.format(train_data.shape,
                                          train_labels.shape))
    print('> Val Data: {} {}'.format(val_data.shape, val_labels.shape))
    # print('> Test Data: {} {}'.format(test_data.shape, test_labels.shape))

    model.train_data(data=train_data, labels=train_labels)

    model.val_data(data=val_data, labels=val_labels)

    model.train(batch_size=batch_size, epochs=epochs)
def handler(context):
    class_labels = 10

    dataset_alias = context.datasets
    train_dataset_id = dataset_alias['train']
    test_dataset_id = dataset_alias['test']

    train_data = list(load_dataset_from_api(train_dataset_id))
    test_data = list(load_dataset_from_api(test_dataset_id))

    train = ImageDatasetFromAPI(train_data, train=True)
    test = ImageDatasetFromAPI(test_data)

    net = utils.VGG.VGG(class_labels)
    model = L.Classifier(net)

    if USE_GPU >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(USE_GPU).use()
        model.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.MomentumSGD(learnrate)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4))

    train_iter = chainer.iterators.SerialIterator(train, batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    stop_trigger = (epochs, 'epoch')
    # Early stopping option
    if early_stopping:
        stop_trigger = triggers.EarlyStoppingTrigger(monitor=early_stopping,
                                                     verbose=True,
                                                     max_trigger=(epochs,
                                                                  'epoch'))

    # Set up a trainer
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=USE_GPU)
    trainer = training.Trainer(updater,
                               stop_trigger,
                               out=ABEJA_TRAINING_RESULT_DIR)

    # Evaluate the model with the test dataset for each epoch
    trainer.extend(extensions.Evaluator(test_iter, model, device=USE_GPU))

    # Reduce the learning rate by half every 25 epochs.
    trainer.extend(extensions.ExponentialShift('lr', 0.5),
                   trigger=(25, 'epoch'))

    # Take a snapshot at each epoch
    trainer.extend(extensions.snapshot_object(net, 'net.model'),
                   trigger=(epochs, 'epoch'))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport())

    # Print selected entries of the log to stdout
    # Here "main" refers to the target link of the "main" optimizer again, and
    # "validation" refers to the default name of the Evaluator extension.
    # Entries other than 'epoch' are reported by the Classifier link, called by
    # either the updater or the evaluator.

    report_entries = [
        'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
        'validation/main/accuracy'
    ]

    trainer.extend(Statistics(report_entries, epochs), trigger=(1, 'epoch'))
    trainer.extend(Tensorboard(report_entries, out_dir=log_path))
    trainer.extend(extensions.PrintReport(report_entries))

    trainer.run()
Beispiel #18
0
def main():

    experiment_name = "blocks_experiments"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/test_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/blocks/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config, indent=2))
    setup_validator = BlocksSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config)
    meta_data_util = MetaDataUtil()

    # Create vocabulary
    vocab = dict()
    vocab_list = open("./Assets/vocab_both").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        vocab[token] = i
    vocab["$UNK$"] = len(vocab_list)
    config["vocab_size"] = len(vocab_list) + 1

    # Test policy
    test_policy = gp.get_argmax_action

    # Create tensorboard
    tensorboard = Tensorboard("Agent Test")

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = IncrementalModelEmnlp
        shared_model = model_type(config, constants)
        shared_model.load_saved_model(
            "./results/model-folder-name/model-file-name")

        # Read the dataset
        test_data = DatasetParser.parse("devset.json", config)
        master_logger.log("Created test dataset of size %d " % len(test_data))

        # Create server and launch a client
        simulator_file = "./simulators/blocks/retro_linux_build.x86_64"
        config["port"] = find_k_ports(1)[0]
        server = BlocksServer(config, action_space, vocab=vocab)

        # Launch unity
        launch_k_unity_builds([config["port"]], simulator_file)
        server.initialize_server()

        # Create the agent
        master_logger.log("CREATING AGENT")
        agent = Agent(server=server,
                      model=shared_model,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)

        agent.test(test_data, tensorboard)

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
def handler(context):
    dataset_alias = context.datasets
    data = list(load_dataset_from_api(dataset_alias['train']))

    np.random.seed(0)
    data = np.random.permutation(data)
    nb_data = len(data)
    nb_train = int(7 * nb_data // 10)
    train_data_raw = data[:nb_train]
    test_data_raw = data[nb_train:]

    premodel = SSD300(n_fg_class=20, pretrained_model='voc0712')
    model = SSD300(n_fg_class=1)

    copy_ssd(model, premodel)

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if USE_GPU >= 0:
        chainer.cuda.get_device_from_id(USE_GPU).use()
        model.to_gpu()

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    fix_ssd(train_chain)

    train_data = DetectionDatasetFromAPI(train_data_raw)
    test_data = DetectionDatasetFromAPI(test_data_raw,
                                        use_difficult=True,
                                        return_difficult=True)

    train_data = TransformDataset(
        train_data, Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.SerialIterator(train_data, BATCHSIZE)

    test_iter = chainer.iterators.SerialIterator(test_data,
                                                 BATCHSIZE,
                                                 repeat=False,
                                                 shuffle=False)

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=USE_GPU)
    trainer = training.Trainer(updater, (nb_epochs, 'epoch'),
                               out=ABEJA_TRAINING_RESULT_DIR)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger([1200, 1600],
                                                          'epoch'))

    trainer.extend(DetectionVOCEvaluator(test_iter,
                                         model,
                                         use_07_metric=True,
                                         label_names=['cup']),
                   trigger=(1, 'epoch'))

    log_interval = 1, 'epoch'
    trainer.extend(extensions.LogReport(trigger=log_interval))

    print_entries = [
        'epoch', 'main/loss', 'main/loss/loc', 'main/loss/conf',
        'validation/main/map'
    ]
    report_entries = [
        'epoch', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf',
        'validation/main/map'
    ]

    trainer.extend(Statistics(report_entries, nb_epochs), trigger=log_interval)
    trainer.extend(Tensorboard(report_entries, out_dir=log_path))
    trainer.extend(extensions.PrintReport(print_entries), trigger=log_interval)

    trainer.extend(extensions.snapshot_object(model,
                                              'model_epoch_{.updater.epoch}'),
                   trigger=(nb_epochs, 'epoch'))

    trainer.run()
Beispiel #20
0
    logging.log(logging.DEBUG, "CREATING MODEL")
    model = PolicyNetwork(128, 4)
    logging.log(logging.DEBUG, "MODEL CREATED")

    # Create the agent
    logging.log(logging.DEBUG, "STARTING AGENT")
    agent = Agent(server=server,
                  model=model,
                  test_policy=test_policy,
                  action_space=action_space,
                  meta_data_util=meta_data_util,
                  config=config,
                  constants=constants)

    # create tensorboard
    tensorboard = Tensorboard()

    # Read the dataset
    train_dataset = DatasetParser.parse(
        "data/nav_drone/train_annotations.json", config)
    # train_dataset = train_dataset[0:10]
    logging.info("Created train dataset of size %d ", len(train_dataset))
    test_dataset = DatasetParser.parse("data/nav_drone/test_annotations.json",
                                       config)
    tune_dataset = test_dataset[0:int(0.05 * len(test_dataset))]
    # tune_dataset = test_dataset[0:10]
    logging.info("Created tuning dataset of size %d ", len(tune_dataset))

    # Train on this dataset
    learning_alg = ContextualBandit(model=model,
                                    action_space=action_space,
Beispiel #21
0
        "./results/oracle_gold_prob_cb_6000/contextual_bandit_5_epoch_17")

    logging.log(logging.DEBUG, "MODEL CREATED")

    # Create the agent
    logging.log(logging.DEBUG, "STARTING AGENT")
    agent = Agent(server=server,
                  model=model,
                  test_policy=test_policy,
                  action_space=action_space,
                  meta_data_util=meta_data_util,
                  config=config,
                  constants=constants)

    # create tensorboard
    tensorboard = Tensorboard("dummy")

    # Launch Unity Build
    launch_k_unity_builds([config["port"]],
                          "./simulators/NavDroneLinuxBuild.x86_64")

    test_data = DatasetParser.parse("data/nav_drone/dev_annotations_6000.json",
                                    config)
    agent.test(test_data, tensorboard)

    server.kill()

except Exception:
    server.kill()
    exc_info = sys.exc_info()
    traceback.print_exception(*exc_info)
Beispiel #22
0
def handler(context):
    dataset_alias = context.datasets
    trainval_2007_dataset_id = dataset_alias['trainval2007']
    trainval_2012_dataset_id = dataset_alias['trainval2012']
    test_2007_dataset_id = dataset_alias['test2007']

    trainval_2007_dataset = list(
        load_dataset_from_api(trainval_2007_dataset_id))
    trainval_2012_dataset = list(
        load_dataset_from_api(trainval_2012_dataset_id))
    test_2007_dataset = list(load_dataset_from_api(test_2007_dataset_id))

    if network_model == 'ssd300':
        model = SSD300(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')
    elif network_model == 'ssd512':
        model = SSD512(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if USE_GPU >= 0:
        chainer.cuda.get_device_from_id(USE_GPU).use()
        model.to_gpu()

    trainval_2007 = DetectionDatasetFromAPI(trainval_2007_dataset)
    trainval_2012 = DetectionDatasetFromAPI(trainval_2012_dataset)
    test_2007 = DetectionDatasetFromAPI(test_2007_dataset,
                                        use_difficult=True,
                                        return_difficult=True)

    train = TransformDataset(ConcatenatedDataset(trainval_2007, trainval_2012),
                             Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE)

    test_iter = chainer.iterators.SerialIterator(test_2007,
                                                 BATCHSIZE,
                                                 repeat=False,
                                                 shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=USE_GPU)
    trainer = training.Trainer(updater, (nb_iterations, 'iteration'),
                               out=ABEJA_TRAINING_RESULT_DIR)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger([80000, 100000],
                                                          'iteration'))

    trainer.extend(DetectionVOCEvaluator(test_iter,
                                         model,
                                         use_07_metric=True,
                                         label_names=voc_bbox_label_names),
                   trigger=(10000, 'iteration'))

    log_interval = 100, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)

    print_entries = [
        'iteration', 'main/loss', 'main/loss/loc', 'main/loss/conf',
        'validation/main/map'
    ]
    report_entries = [
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map'
    ]

    trainer.extend(Statistics(report_entries,
                              nb_iterations,
                              obs_key='iteration'),
                   trigger=log_interval)
    trainer.extend(Tensorboard(report_entries, out_dir=log_path))
    trainer.extend(extensions.PrintReport(print_entries), trigger=log_interval)

    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=(nb_iterations, 'iteration'))

    trainer.run()
Beispiel #23
0
    def do_train_(shared_model,
                  config,
                  action_space,
                  meta_data_util,
                  constants,
                  train_dataset,
                  tune_dataset,
                  experiment,
                  experiment_name,
                  rank,
                  server,
                  logger,
                  model_type,
                  vocab,
                  use_pushover=False):

        print("In training...")

        launch_k_unity_builds([config["port"]],
                              "./simulators/house_3_elmer.x86_64")
        server.initialize_server()
        print("launched builds")

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        if use_pushover:
            # pushover_logger = PushoverLogger(experiment_name)
            pushover_logger = None
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)
        # local_model.train()

        # Create the Agent
        logger.log("STARTING AGENT")
        tmp_agent = TmpHouseAgent(server=server,
                                  model=local_model,
                                  test_policy=test_policy,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=config,
                                  constants=constants)
        logger.log("Created Agent...")

        action_counts = [0] * action_space.num_actions()
        max_epochs = constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        # Create the learner to compute the loss
        learner = TmpSupervisedLearning(shared_model, local_model,
                                        action_space, meta_data_util, config,
                                        constants, tensorboard)
        # TODO change 2 --- unity launch moved up

        for epoch in range(1, max_epochs + 1):

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %
                               (data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" %
                               action_counts)

                image, metadata = tmp_agent.server.reset_receive_feedback(
                    data_point)
                # instruction = TmpSupervisedLearning.convert_text_to_indices(metadata["instruction"], vocab)
                instruction = data_point.get_instruction()

                # Pose and Orientation gone TODO change 3
                state = AgentObservedState(instruction=instruction,
                                           config=config,
                                           constants=constants,
                                           start_image=image,
                                           previous_action=None,
                                           data_point=data_point)

                model_state = None
                batch_replay_items = []
                total_reward = 0

                # trajectory = metadata["trajectory"]
                trajectory = data_point.get_trajectory()[0:300]

                for action in trajectory:

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, state_feature = \
                        local_model.get_probs(state, model_state)

                    # Sample action from the probability
                    action_counts[action] += 1

                    # Send the action and get feedback
                    image, reward, metadata = tmp_agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state,
                                                   action,
                                                   reward,
                                                   log_prob=log_probabilities)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    # Pose and orientation gone, TODO change 4
                    state = state.update(image, action, data_point=data_point)

                    total_reward += reward

                # Send final STOP action and get feedback
                # Sample action using the policy
                log_probabilities, model_state, image_emb_seq, state_feature = \
                    local_model.get_probs(state, model_state)
                image, reward, metadata = tmp_agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                # if tensorboard is not None:
                #     tensorboard.log_all_train_errors(
                #         metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"])

                # Store it in the replay memory list
                replay_item = ReplayMemoryItem(
                    state,
                    action_space.get_stop_action_index(),
                    reward,
                    log_prob=log_probabilities)
                batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:  # 32
                    loss_val = learner.do_update(batch_replay_items)
                    # self.action_prediction_loss_calculator.predict_action(batch_replay_items)
                    # del batch_replay_items[:]  # in place list clear

                    if tensorboard is not None:
                        # cross_entropy = float(learner.cross_entropy.data[0])
                        # tensorboard.log(cross_entropy, loss_val, 0)
                        num_actions = len(trajectory) + 1
                        tensorboard.log_scalar(
                            "loss_val", loss_val)  # /float(num_actions))
                        entropy = float(
                            learner.entropy.data[0])  # /float(num_actions)
                        tensorboard.log_scalar("entropy", entropy)
                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar(
                            "Abs_objective_to_entropy_ratio", ratio)

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(
                                learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(
                                action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(
                                learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(
                                temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(
                                learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(
                                object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(
                                learner.symbolic_language_prediction_loss.
                                data[0])
                            tensorboard.log_scalar(
                                "sym_language_prediction_loss",
                                symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(
                                learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss",
                                                   goal_prediction_loss)
                        if learner.mean_factor_entropy is not None:
                            mean_factor_entropy = float(
                                learner.mean_factor_entropy.data[0])
                            tensorboard.log_factor_entropy_loss(
                                mean_factor_entropy)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" +
                                   str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)

            if tune_dataset_size > 0:
                # Test on tuning data
                print("Going for testing")
                tmp_agent.test(tune_dataset,
                               vocab,
                               tensorboard=tensorboard,
                               logger=logger,
                               pushover_logger=pushover_logger)
                print("Done testing")
Beispiel #24
0
    model = ModelSymbolicTextPrediction(config, constants)
    # model.load_saved_model("./results/train_symbolic_text_prediction_1clock/ml_learning_symbolic_text_prediction_epoch_3")
    logging.log(logging.DEBUG, "MODEL CREATED")

    # Create the agent
    logging.log(logging.DEBUG, "STARTING AGENT")
    agent = Agent(server=server,
                  model=model,
                  test_policy=test_policy,
                  action_space=action_space,
                  meta_data_util=meta_data_util,
                  config=config,
                  constants=constants)

    # create tensorboard
    tensorboard = Tensorboard("synthetic_easy_text_prediction")

    # Read the dataset
    all_train_data = DatasetParser.parse(
        "data/nav_drone/train_annotations_4000.json", config)
    num_train = (len(all_train_data) * 19) // 20
    while all_train_data[num_train].get_scene_name().split(
            "_")[1] == all_train_data[num_train -
                                      1].get_scene_name().split("_")[1]:
        num_train += 1
    train_split = all_train_data[:num_train]
    tune_split = all_train_data[num_train:]

    logging.info("Created train dataset of size %d ", len(train_split))
    logging.info("Created tuning dataset of size %d ", len(tune_split))
    def do_train_(house_id, shared_model, config, action_space, meta_data_util, constants,
                  train_dataset, tune_dataset, experiment, experiment_name, rank, server,
                  logger, model_type, vocab, use_pushover=False):

        logger.log("In Training...")
        launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64",
                              arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json",
                              cwd="./simulators/house/")
        logger.log("Launched Builds.")
        server.initialize_server()
        logger.log("Server Initialized.")

        # Test policy
        test_policy = gp.get_argmax_action

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
            logger.log('Created Tensorboard Server.')
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = None
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)
        # local_model.train()

        # Create the Agent
        tmp_agent = TmpHouseAgent(server=server,
                                  model=local_model,
                                  test_policy=test_policy,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=config,
                                  constants=constants)
        logger.log("Created Agent.")

        action_counts = [0] * action_space.num_actions()
        max_epochs = 100000 # constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        if tune_dataset_size > 0:
            # Test on tuning data
            tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard,
                           logger=logger, pushover_logger=pushover_logger)

        # Create the learner to compute the loss
        learner = TmpAsynchronousContextualBandit(shared_model, local_model, action_space, meta_data_util,
                                                  config, constants, tensorboard)
        # TODO change 2 --- unity launch moved up
        learner.logger = logger

        for epoch in range(1, max_epochs + 1):

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %(data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" % action_counts)

                num_actions = 0
                max_num_actions = constants["horizon"]
                max_num_actions += constants["max_extra_horizon"]

                image, metadata = tmp_agent.server.reset_receive_feedback(data_point)
                instruction = data_point.get_instruction()
                # instruction_str = TmpAsynchronousContextualBandit.convert_indices_to_text(instruction, vocab)
                # print("Instruction str is ", instruction_str)

                # Pose and Orientation gone TODO change 3
                state = AgentObservedState(instruction=instruction,
                                           config=config,
                                           constants=constants,
                                           start_image=image,
                                           previous_action=None,
                                           data_point=data_point)
                state.goal = learner.get_goal(metadata)

                model_state = None
                batch_replay_items = []
                total_reward = 0
                forced_stop = True

                while num_actions < max_num_actions:

                    # logger.log("Training: Meta Data %r " % metadata)

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, state_feature = \
                        local_model.get_probs(state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))[0]

                    # Sample action from the probability
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[action] += 1

                    if action == action_space.get_stop_action_index():
                        forced_stop = False
                        break

                    # Send the action and get feedback
                    image, reward, metadata = tmp_agent.server.send_action_receive_feedback(action)
                    # logger.log("Action is %r, Reward is %r Probability is %r " % (action, reward, probabilities))

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    # Pose and orientation gone, TODO change 4
                    state = state.update(image, action, data_point=data_point)
                    state.goal = learner.get_goal(metadata)

                    num_actions += 1
                    total_reward += reward

                # Send final STOP action and get feedback
                image, reward, metadata = tmp_agent.server.halt_and_receive_feedback()
                total_reward += reward

                # Store it in the replay memory list
                if not forced_stop:
                    # logger.log("Action is Stop, Reward is %r Probability is %r " % (reward, probabilities))
                    replay_item = ReplayMemoryItem(state, action_space.get_stop_action_index(),
                                                   reward, log_prob=log_probabilities)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:  # 32
                    loss_val = learner.do_update(batch_replay_items)

                    if tensorboard is not None:
                        # cross_entropy = float(learner.cross_entropy.data[0])
                        # tensorboard.log(cross_entropy, loss_val, 0)
                        tensorboard.log_scalar("loss", loss_val)
                        entropy = float(learner.entropy.data[0])/float(num_actions + 1)
                        tensorboard.log_scalar("entropy", entropy)
                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar("Abs_objective_to_entropy_ratio", ratio)
                        tensorboard.log_scalar("total_reward", total_reward)
                        tensorboard.log_scalar("mean navigation error", metadata['mean-navigation-error'])

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(learner.symbolic_language_prediction_loss.data[0])
                            tensorboard.log_scalar("sym_language_prediction_loss", symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)

            if tune_dataset_size > 0:
                # Test on tuning data
                tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard,
                               logger=logger, pushover_logger=pushover_logger)
Beispiel #26
0
    def learn_model(self,
                    env,
                    load_folder,
                    experiment_name,
                    experiment,
                    logger,
                    use_pushover,
                    trial=1):

        horizon = self.config["horizon"]
        actions = self.config["actions"]
        num_samples = self.constants["encoder_training_num_samples"]
        num_state_budget = self.constants["num_homing_policy"]

        tensorboard = Tensorboard(log_dir=self.config["save_path"])

        homing_policies = dict(
        )  # Contains a set of homing policies for every time step

        # Load homing policy from folder
        logger.log("Loading Homing policies...")
        for step in range(1, horizon + 1):

            homing_policies[step] = []

            for i in range(0, num_state_budget):
                # TODO can fail if the policy doesn't exist. Add checks to prevent that.
                policy_folder_name = load_folder + "/trial_%d_horizon_%d_homing_policy_%d/" % (
                    trial, step, i)
                if not os.path.exists(policy_folder_name):
                    logger.log("Did not find %s" % policy_folder_name)
                    continue
                previous_step_homing_policy = None if step == 1 else homing_policies[
                    step - 1]
                policy = self.reward_free_planner.read_policy(
                    policy_folder_name, step, previous_step_homing_policy)
                homing_policies[step].append(policy)
        logger.log("Loaded Homing policy.")

        # Load the encoder models
        backward_models = dict()
        backward_models[1] = None
        for step in range(1, horizon + 1):
            backward_model = EncoderModelWrapper.get_encoder_model(
                self.constants["model_type"], self.config, self.constants)
            backward_model.load(
                load_folder + "/trial_%d_encoder_model/" % trial,
                "encoder_model_%d" % step)
            backward_models[step + 1] = backward_model

        encoding_function = None  # Learned encoding function for the current time step
        dataset = []  # Dataset of samples collected for training the encoder
        selection_weights = None  # A distribution over homing policies from the previous time step (can be None)

        # Learn Forward Model and Estimate the Model
        forward_models = dict()
        forward_models[horizon + 1] = None
        prev_dataset = None

        for step in range(1, horizon + 1):

            logger.log("Step %r out of %r " % (step, horizon))

            # Step 1: Create dataset for learning the encoding function. A single datapoint consists of a transition
            # (x, a, x') and a 0-1 label y. If y=1 then transition was observed and y=0 otherwise.
            time_collection_start = time.time()
            dataset = self.encoder_sampler.gather_samples(
                env, actions, step, homing_policies, num_samples, dataset,
                selection_weights)
            logger.log("Encoder: %r sample collected in %r sec" %
                       (num_samples, time.time() - time_collection_start))

            # Step 2: Train a binary classifier on this dataset. The classifier f(x, a, x') is trained to predict
            # the probability that the transition (x, a, x') was observed. Importantly, the classifier has a special
            # structure f(x, a, x') = p(x, a, \phi(x')) where \phi maps x' to a set of discrete values.
            time_encoder_start = time.time()
            if not self.constants["bootstrap_encoder_model"]:
                encoding_function = None
            encoding_function, _ = self.train_encoding_function.do_train_with_discretized_models(
                dataset,
                logger,
                tensorboard,
                False,
                bootstrap_model=encoding_function,
                undiscretized_initialization=True,
                category="forward")
            self.util.save_encoder_model(encoding_function, experiment, trial,
                                         step, "forward")
            forward_models[step] = encoding_function
            logger.log("Encoder: Training time %r" %
                       (time.time() - time_encoder_start))

            if step > 1:

                self._estimate_and_save_transition_dynamics(
                    env, experiment, prev_dataset, step,
                    forward_models[step - 1], backward_models[step - 1],
                    forward_models[step], backward_models[step], logger, trial)

            prev_dataset = dataset
Beispiel #27
0
    def do_train(chaplot_baseline,
                 shared_model,
                 config,
                 action_space,
                 meta_data_util,
                 args,
                 constants,
                 train_dataset,
                 tune_dataset,
                 experiment,
                 experiment_name,
                 rank,
                 server,
                 logger,
                 model_type,
                 contextual_bandit,
                 use_pushover=False):

        sys.stderr = sys.stdout
        server.initialize_server()
        # Local Config Variables
        lstm_size = 256

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        # Create the Agent
        logger.log("STARTING AGENT")
        agent = Agent(server=server,
                      model=chaplot_baseline,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)
        logger.log("Created Agent...")

        # Create a local model for rollouts
        local_model = model_type(args, config=config)
        if torch.cuda.is_available():
            local_model.cuda()
        chaplot_baseline.shared_model = local_model
        local_model.train()

        #  Our Environment Interface
        env = NavDroneServerInterface(agent, local_model, experiment, config,
                                      constants, None, train_dataset,
                                      tune_dataset, rank, logger, use_pushover)
        env.game_init()
        # logging.info("Contextual bandit is %r and horizon is %r", self.contextual_bandit, args.max_episode_length)
        logger.log("Created NavDroneServerInterface")

        # optimizer = optim.SGD(self.shared_model.parameters(), lr=self.args.lr) --- changed Chaplot's optimizer
        optimizer = optim.Adam(shared_model.parameters(), lr=0.00025)
        p_losses = []
        v_losses = []

        launch_k_unity_builds([config["port"]],
                              "./simulators/NavDroneLinuxBuild.x86_64")
        (image, instr), _, _ = env.reset()
        curr_instr, prev_instr, next_instr = instr
        curr_instruction_idx = np.array(curr_instr)
        prev_instruction_idx = np.array(prev_instr)
        next_instruction_idx = np.array(next_instr)

        image = torch.from_numpy(image).float()
        curr_instruction_idx = torch.from_numpy(curr_instruction_idx).view(
            1, -1)
        prev_instruction_idx = torch.from_numpy(prev_instruction_idx).view(
            1, -1)
        next_instruction_idx = torch.from_numpy(next_instruction_idx).view(
            1, -1)

        done = True

        episode_length = 0
        num_iters = 0

        while True:
            # Sync with the shared model
            local_model.load_state_dict(shared_model.state_dict())
            if done:
                episode_length = 0
                cx = Variable(torch.zeros(1, lstm_size).cuda())
                hx = Variable(torch.zeros(1, lstm_size).cuda())

            else:
                # assert False, "Assertion put by Max and Dipendra. Code shouldn't reach here."
                cx = Variable(cx.data.cuda())
                hx = Variable(hx.data.cuda())

            values = []
            log_probs = []
            rewards = []
            entropies = []
            cached_information = None

            for step in range(args.num_steps):
                episode_length += 1
                tx = Variable(
                    torch.from_numpy(np.array([episode_length])).long().cuda())

                value, logit, (hx, cx), cached_information = local_model(
                    (Variable(image.unsqueeze(0).cuda()),
                     Variable(curr_instruction_idx.cuda()),
                     Variable(prev_instruction_idx.cuda()),
                     Variable(next_instruction_idx.cuda()), (tx, hx, cx)),
                    cached_information)

                prob = F.softmax(logit, dim=1)
                log_prob = F.log_softmax(logit, dim=1)
                entropy = -(log_prob * prob).sum(1)
                entropies.append(entropy)

                action = prob.multinomial().data
                log_prob = log_prob.gather(1, Variable(action.cuda()))
                action = action.cpu().numpy()[0, 0]

                (image, _), reward, done, _ = env.step(action)

                # done = done or (episode_length >= self.args.max_episode_length)
                if not done and (episode_length >= args.max_episode_length):
                    # If the agent has not taken
                    _, _, done, _ = env.step(
                        env.client.agent.action_space.get_stop_action_index())
                    done = True

                if done:
                    (image, instr), _, _ = env.reset()
                    curr_instr, prev_instr, next_instr = instr
                    curr_instruction_idx = np.array(curr_instr)
                    prev_instruction_idx = np.array(prev_instr)
                    next_instruction_idx = np.array(next_instr)
                    curr_instruction_idx = torch.from_numpy(
                        curr_instruction_idx).view(1, -1)
                    prev_instruction_idx = torch.from_numpy(
                        prev_instruction_idx).view(1, -1)
                    next_instruction_idx = torch.from_numpy(
                        next_instruction_idx).view(1, -1)

                image = torch.from_numpy(image).float()

                values.append(value)
                log_probs.append(log_prob)
                rewards.append(reward)

                if done:
                    break

            if rank == 0 and tensorboard is not None:
                # Log total reward and entropy
                tensorboard.log_scalar("Total_Reward", sum(rewards))
                mean_entropy = sum(entropies).data[0] / float(
                    max(episode_length, 1))
                tensorboard.log_scalar("Chaplot_Baseline_Entropy",
                                       mean_entropy)

            R = torch.zeros(1, 1)
            if not done:
                tx = Variable(
                    torch.from_numpy(np.array([episode_length])).long().cuda())
                value, _, _, _ = local_model(
                    (Variable(image.unsqueeze(0).cuda()),
                     Variable(curr_instruction_idx.cuda()),
                     Variable(prev_instruction_idx.cuda()),
                     Variable(next_instruction_idx.cuda()), (tx, hx, cx)))
                R = value.data

            values.append(Variable(R.cuda()))
            policy_loss = 0
            value_loss = 0
            R = Variable(R.cuda())

            gae = torch.zeros(1, 1).cuda()
            for i in reversed(range(len(rewards))):
                R = args.gamma * R + rewards[i]
                advantage = R - values[i]
                value_loss = value_loss + 0.5 * advantage.pow(2)

                if contextual_bandit:
                    # Just focus on immediate reward
                    gae = torch.from_numpy(np.array([[rewards[i]]])).float()
                else:
                    # Generalized Advantage Estimataion
                    delta_t = rewards[i] + args.gamma * \
                              values[i + 1].data - values[i].data
                    gae = gae * args.gamma * args.tau + delta_t

                policy_loss = policy_loss - \
                              log_probs[i] * Variable(gae.cuda()) - 0.02 * entropies[i]

            optimizer.zero_grad()

            p_losses.append(policy_loss.data[0, 0])
            v_losses.append(value_loss.data[0, 0])

            if len(p_losses) > 1000:
                num_iters += 1
                logger.log(" ".join([
                    # "Training thread: {}".format(rank),
                    "Num iters: {}K".format(num_iters),
                    "Avg policy loss: {}".format(np.mean(p_losses)),
                    "Avg value loss: {}".format(np.mean(v_losses))
                ]))
                p_losses = []
                v_losses = []

            (policy_loss + 0.5 * value_loss).backward()
            torch.nn.utils.clip_grad_norm(local_model.parameters(), 40)

            ChaplotBaseline.ensure_shared_grads(local_model, shared_model)
            optimizer.step()
    def do_supervsed_train(chaplot_baseline,
                           shared_model,
                           config,
                           action_space,
                           meta_data_util,
                           args,
                           constants,
                           train_dataset,
                           tune_dataset,
                           experiment,
                           experiment_name,
                           rank,
                           server,
                           logger,
                           model_type,
                           use_pushover=False):
        raise NotImplementedError()
        sys.stderr = sys.stdout
        server.initialize_server()
        # Local Config Variables
        lstm_size = 256

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        # Create the Agent
        logger.log("STARTING AGENT")
        agent = Agent(server=server,
                      model=chaplot_baseline,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)
        logger.log("Created Agent...")

        # Create a local model for rollouts
        local_model = model_type(args, config=config)
        if torch.cuda.is_available():
            local_model.cuda()
        chaplot_baseline.shared_model = local_model
        local_model.train()

        env = StreetViewServerInterface(agent, local_model, experiment, config,
                                        constants, None, train_dataset,
                                        tune_dataset, rank, logger,
                                        use_pushover)

        env.game_init()

        shared_model.train()

        # optimizer = optim.SGD(self.shared_model.parameters(), lr=self.args.lr)
        optimizer = optim.Adam(shared_model.parameters(), lr=0.00025)

        p_losses = []
        v_losses = []
        num_iters = 0

        while True:

            # Get datapoint
            (image, instr), _, _ = env.reset()
            instruction_idx = np.array(instr)

            image = torch.from_numpy(image).float()
            instruction_idx = torch.from_numpy(instruction_idx).view(1, -1)

            # Sync with the shared model
            # model.load_state_dict(shared_model.state_dict())
            episode_length = 0
            cx = Variable(torch.zeros(1, lstm_size).cuda())
            hx = Variable(torch.zeros(1, lstm_size).cuda())

            log_probs = []
            rewards = []
            entropies = []
            trajectory = env.get_trajectory()
            min_length = min(len(trajectory), args.max_episode_length - 1)
            trajectory = trajectory[0:min_length]
            trajectory.append(agent.action_space.get_stop_action_index())

            for action in trajectory:
                episode_length += 1
                tx = Variable(
                    torch.from_numpy(np.array([episode_length])).long().cuda())

                value, logit, (hx, cx) = shared_model(
                    (Variable(image.unsqueeze(0).cuda()),
                     Variable(instruction_idx.cuda()), None, None, (tx, hx,
                                                                    cx)))

                prob = F.softmax(logit)
                log_prob = F.log_softmax(logit)
                entropy = -(log_prob * prob).sum(1)
                entropies.append(entropy)

                action_tensor = torch.from_numpy(np.array([[action]]))
                log_prob = log_prob.gather(1, Variable(action_tensor.cuda()))
                (image, _), reward, done, _ = env.step(action)
                image = torch.from_numpy(image).float()

                log_probs.append(log_prob)
                rewards.append(reward)

                if done:
                    break

            policy_loss = 0
            for i in range(0, len(rewards)):
                policy_loss = policy_loss - log_probs[i] - 0.01 * entropies[i]

            # Log total reward and entropy
            if tensorboard is not None:
                tensorboard.log_scalar("Total_Reward", sum(rewards))
                mean_entropy = sum(entropies) / float(max(episode_length, 1))
                tensorboard.log_scalar("Chaplot_Baseline_Entropy",
                                       mean_entropy)
                tensorboard.log_scalar("Policy_Loss", policy_loss)

            optimizer.zero_grad()
            p_losses.append(policy_loss.data[0, 0])

            if len(p_losses) > 1000:
                num_iters += 1
                logger.log(" ".join([
                    # "Training thread: {}".format(rank),
                    "Num iters: {}K".format(num_iters),
                    "Avg policy loss: {}".format(np.mean(p_losses)),
                    "Avg value loss: {}".format(np.mean(v_losses))
                ]))
                p_losses = []
                v_losses = []

            policy_loss.backward()
            torch.nn.utils.clip_grad_norm(shared_model.parameters(), 40)

            # ensure_shared_grads(model, shared_model)
            optimizer.step()
Beispiel #29
0
    for line in f.xreadlines():
        logging.info(">>> " + line.strip())
logging.info("END SCRIPT CONTENTS")

action_space = ActionSpace(config["action_names"], config["stop_action"])
meta_data_util = MetaDataUtil()

# Create the server
logging.log(logging.DEBUG, "STARTING SERVER")
server = NavDroneServer(config, action_space)
logging.log(logging.DEBUG, "STARTED SERVER")
print("Launched Server...")

try:
    # create tensorboard
    tensorboard = Tensorboard(experiment_name)

    # Create the model
    logging.log(logging.DEBUG, "CREATING MODEL")
    # shared_model = a3c_lstm_ga_concat_instructions(args, config=config)
    shared_model = a3c_lstm_ga_concat_gavector(args, config=config)
    # shared_model = a3c_lstm_ga_attention_multigru(args, config=config)
    lstm_size = 256
    if isinstance(shared_model, a3c_lstm_ga_concat_gavector):
        lstm_size *= 3
    # if isinstance(shared_model, A3C_LSTM_GA):
    #     args.input_size -= 2
    model = ChaplotBaseline(args,
                            shared_model,
                            config,
                            constants,
Beispiel #30
0
class State(object):
    def __init__(self, args):
        self.args = args
        self.model = None
        self.optimizer = None
        self.scheduler = None
        self.epoch = 0

        # s = State(args)
        set_seed(self.args.seed, self.args.cudnn_behavoir)
        self.log = Log(self.args.log_path)
        self.writer = Tensorboard(self.args.tensorboard_path)
        self.stati  = Statistic(self.args.expernameid, self.args.experid_path, self.args.root_path)
        self.stati.add('hparam', self.args.dict())
        # s.writer.add_hparams(hparam_dict=s.args.dict(), metric_dict={})
        self.record = Record()

    def show_args(self):
        print('----------------------------------------------------------------------------------------------')
        print('args:')
        print(self.args)
        print('----------------------------------------------------------------------------------------------')


    def close(self):
        self.stati.close()
        self.log.close()

    def exit(self):
        self.writer.close()

    def save(self, dir_path, filename, last_epoch=None, best_epoch=None):
        checkpoint = {
            "model": self.model.state_dict(),
            "optimizer": self.optimizer.state_dict(),
            'scheduler': self.scheduler.state_dict(),
            'record': self.record,
            'epoch': self.epoch
        }
        torch.save(checkpoint, os.path.join(dir_path, filename))

        if last_epoch:
            symlink_force('epoch_' + str(last_epoch) + '.pth', os.path.join(dir_path, 'epoch_last.pth'))

        if best_epoch:
            symlink_force('epoch_' + str(best_epoch) + '.pth', os.path.join(dir_path, 'epoch_best.pth'))

    def load(self, path):
        if os.path.isfile(path):
            checkpoint = torch.load(path, map_location=self.args.device)
            assert self.model, 'self.model is not defined before laoding a checkpoint'
            self.model.load_state_dict(checkpoint['model'])
            if self.optimizer: self.optimizer.load_state_dict(checkpoint['optimizer'])
            if self.scheduler: self.scheduler.load_state_dict(checkpoint['scheduler'])
            self.record = checkpoint['record']
            # checkpoint['epoch']
        else:
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path)
            # warnings.warn('checkpoint path '+path+' not exist; go on without load it.')

    def deploy(self):
        self.model = nn.DataParallel(self.model)
        self.model.to(self.args.device)
        if self.optimizer: self.optimizer.to(self.args.device)
        # if self.scheduler: self.scheduler.to(self.args.device)

    def show_para(self):
        # Print model'self state_dict
        print("Net's state_dict:")
        for param_tensor in self.model.state_dict():
            print(param_tensor, "\t", self.model.state_dict()[param_tensor].size())

        # Print optimizer's state_dict
        print("Optimizer's state_dict:")
        for var_name in self.optimizer.state_dict():
            print(var_name, "\t", self.optimizer.state_dict()[var_name])