예제 #1
0
                
                
                # update model based module
                action_np_vec = np.zeros([1,6])
                action_np_vec[0,action-1] = 1.
                action_vec = torch.from_numpy(action_np_vec).float().cuda()
                current_state_action = torch.cat([state,action_vec],1)
                BBN_dynamic.train(current_state_action, next_state)
                
                # info gain
                hyperparameters = BBN_dynamic.dump_hyparameters()
                info_gain = BBN_dynamic.get_info_gain(hyperparameters, pre_hyperparameters)           
                
             
            # Store the transition in memory   
            agent.store_transition(state, action-1, reward+info_gain*ratio*(1-epoch/epochs))

 

            print('epoch: %d, image: %d, step: %d, reward: %d' %(epoch ,i, step, reward))    

            
            # Move to the next state
            state = next_state

            # Perform the optimization 
            if done:
               print("updating model !")
               agent.REINFORCE()
               print("finish updating model !")
               break
예제 #2
0
            else:
                offset, region_image, size_mask, region_mask = get_crop_image_and_mask(
                    original_shape, offset, region_image, size_mask, action)
                # update history vector and get next state
                history_vector = update_history_vector(history_vector, action)
                next_state = get_state(region_image, history_vector, model_vgg)

                # find the max bounding box in the region image
                new_iou = find_max_bounding_box(gt_masks, region_mask,
                                                classes_gt_objects,
                                                CLASS_OBJECT)
                reward = get_reward_movement(iou, new_iou)
                iou = new_iou

            # Store the transition in memory
            agent.store_transition(state, action - 1, reward)

            print('epoch: %d, image: %d, step: %d, reward: %d' %
                  (epoch, i, step, reward))

            # Move to the next state
            state = next_state

            # Perform the optimization
            if done:
                print("updating model !")
                agent.REINFORCE()
                print("finish updating model !")
                break

        #==================== loop of training procedure ==========================================#
예제 #3
0
                TUC_dynamic.train_enc_dec(state, next_state, action_vec)

                if step > 0:
                    mean, std = TUC_dynamic.dump_z_mean_std(state, action_vec)
                    intrinsic_reward = TUC_dynamic.dump_exploration_reward(
                        pre_mean, pre_std, mean, std)

            if i > 0:
                penalty = TUC_dynamic.dump_regret(state, action - 1)

            #print(intrinsic_reward,penalty)
            #print(ratio_1*((epochs-epoch)/epochs)*intrinsic_reward - ratio_2*(epoch/epochs)*penalty)

            # Store the transition in memory
            agent.store_transition(
                state, action - 1, reward + ratio_1 *
                (epochs - epoch / epochs) * intrinsic_reward - ratio_2 *
                (epoch / epochs) * penalty)

            print('epoch: %d, image: %d, step: %d, reward: %d' %
                  (epoch, i, step, reward))

            # Move to the next state
            state = next_state

            # Perform the optimization
            if done:

                states = torch.cat(agent.states)
                values = torch.tensor(np.expand_dims(np.array(
                    agent.get_values()),
                                                     axis=1),