Ejemplo n.º 1
0
 def __init__(self,
              capacity,
              z_dim,
              env_name,
              action,
              num_actions=6,
              knn=4,
              alpha=0.1,
              beta=0.01):
     self.action = action
     self.alpha = alpha
     self.beta = beta
     self.env_name = env_name
     self.capacity = capacity
     self.num_actions = num_actions
     self.states = np.empty((capacity, z_dim), dtype=np.float32)
     self.external_value = np.zeros(capacity)
     self.reward = np.zeros(capacity)
     self.done = np.zeros(capacity, dtype=np.bool)
     self.internal_value = np.zeros(capacity)
     self.next_id = -1 * np.ones((capacity, 2))
     self.newly_added = np.ones(capacity, dtype=np.bool)
     self.prev_id = [[] for _ in range(capacity)]
     self.brothers = [[] for _ in range(capacity)]
     self.rmax = self.beta * 2400
     self.count = np.zeros(capacity)
     self.lru = np.zeros(capacity)
     # self.best_action = np.zeros((capacity, num_actions), dtype=np.int)
     self.curr_capacity = 0
     self.tm = 0.0
     self.threshold = 1e-7
     self.knn = knn
     # self.beta = beta
     self.address = knn_cuda_fixmem.allocate(capacity, z_dim, 32, knn)
Ejemplo n.º 2
0
 def __init__(self,
              capacity,
              z_dim,
              env_name,
              action,
              num_actions=6,
              knn=4,
              alpha=0.1,
              beta=0.01):
     self.action = action
     self.alpha = alpha
     self.beta = beta
     self.env_name = env_name
     self.capacity = capacity
     self.num_actions = num_actions
     self.states = np.empty((capacity, z_dim), dtype=np.float32)
     self.external_value = np.zeros(capacity)
     self.internal_value = np.zeros(capacity)
     self.rmax = self.beta * 2400
     self.count = np.zeros(capacity)
     self.lru = np.zeros(capacity)
     # self.best_action = np.zeros((capacity, num_actions), dtype=np.int)
     self.curr_capacity = 0
     self.tm = 0.0
     self.addnum = 0
     self.buildnum = 256
     self.buildnum_max = 256
     self.bufpath = './buffer/%s' % self.env_name
     self.threshold = 1e-2
     self.knn = knn
     # self.beta = beta
     self.address = knn_cuda_fixmem.allocate(capacity, z_dim, 32, knn)
Ejemplo n.º 3
0
 def __init__(self, capacity, z_dim, env_name, action, num_actions=6, knn=4, debug=True, gamma=0.99,
              alpha=0.1,
              beta=0.01):
     self.action = action
     self.alpha = alpha
     self.beta = beta
     self.env_name = env_name
     self.capacity = capacity
     self.num_actions = num_actions
     self.rmax = 100000
     self.states = np.empty((capacity, z_dim), dtype=np.float32)
     self.address = np.full((capacity, num_actions), np.nan)
     self.reversed_address = np.full((capacity, num_actions), np.nan)
     # self.hash_table = np.empty((capacity, z_dim), dtype=np.float32)
     # self.hashes = {}
     self.external_value = np.full((capacity, num_actions), np.nan)
     self.state_value_v = np.full((capacity,), np.nan)
     self.state_value_u = np.full((capacity,), np.nan)
     self.reward = np.zeros((capacity, num_actions))
     self.done = np.zeros((capacity, num_actions), dtype=np.bool)
     self.newly_added = np.ones((capacity,), dtype=np.bool)
     self.internal_value = self.rmax * np.ones((capacity, num_actions))
     self.prev_id = [[] for _ in range(capacity)]
     self.next_id = [[{} for __ in range(num_actions)] for _ in range(capacity)]
     self.pseudo_count = [[{} for __ in range(num_actions)] for _ in range(capacity)]
     self.pseudo_reward = np.zeros((capacity, num_actions))
     self.pseudo_prev = [{} for _ in range(capacity)]
     self.debug = debug
     self.count = np.zeros((capacity, num_actions))
     self.lru = np.zeros(capacity)
     # self.best_action = np.zeros((capacity, num_actions), dtype=np.int)
     # self.gpu_capacity[0] = 0
     self.tm = 0.0
     self.threshold = 1e-7
     self.knn = knn
     self.gamma = gamma
     self.b = 0.01
     self.z_dim = z_dim
     # self.beta = beta
     batch_size = 32
     self.gpu_address = [knn_cuda_fixmem.allocate(capacity, z_dim, batch_size, knn+1) for _ in range(num_actions+1)]
     self.gpu_capacity = [0 for _ in range(num_actions+1)]
     self.logger = logging.getLogger("ecbp")
Ejemplo n.º 4
0
 def __init__(self, capacity, z_dim, env_name, action, mode="mean", num_actions=6, knn=4):
     self.action = action
     self.env_name = env_name
     self.capacity = capacity
     self.num_actions = num_actions
     self.states = np.empty((capacity, z_dim), dtype=np.float32)
     self.q_values_decay = np.zeros(capacity)
     self.count = np.zeros(capacity)
     self.lru = np.zeros(capacity)
     self.best_action = np.zeros((capacity, num_actions), dtype=np.int)
     self.curr_capacity = 0
     self.tm = 0.0
     self.addnum = 0
     self.buildnum = 256
     self.buildnum_max = 256
     self.bufpath = './buffer/%s' % self.env_name
     self.mode = mode
     self.threshold = 1e-7
     self.knn = knn
     self.address = knn_cuda_fixmem.allocate(capacity, z_dim, 32, knn)
Ejemplo n.º 5
0
from baselines.deepq.experiments.atari.knn_cuda_fixmem import knn
import time
from sklearn.neighbors import BallTree, KDTree
import copy
c = 32
dict_size = 10000
query_size = 4
query_max = 64
capacity = 1000
k = 4
for n in range(4):
    cur_time = time.time()
    query = np.random.rand(query_size, c).astype(np.float32)

    reference = np.random.rand(dict_size, c).astype(np.float32)
    address = knn.allocate(dict_size, c, query_max, k)
    # print(address, address.dtype)
    # address = copy.deepcopy(address)
    # print(address,address.dtype)
    # print("??????")
    print(address)
    for i in range(capacity):
        # print(i,np.array(reference[i]).shape)
        # print(i)
        knn.add(address, i, reference[i])
    print("add time:", time.time() - cur_time)
    cur_time = time.time()
    # print(address)
    # # Index is 1-based
    dist, ind = knn.knn(address, query.reshape(-1, c), k, capacity)
Ejemplo n.º 6
0
 def allocate(self):
     self.address = knn_cuda_fixmem.allocate(self.capacity, self.z_dim, 32, self.knn * self.num_actions)