l1, b1, l2, b2 = model.get_gen_weights() weight_size = np.sum( list(l1.flatten().shape) + list(b1.flatten().shape) + list(l2.flatten().shape) + list(b2.flatten().shape)) dis = models.DiscriminatorZ(args, obs_size, n_actions) dis = dis.to(device) # ms-CEM policy (refers to the guiding CEM policy) dim_theta = (obs_size + 1) * n_actions # Initialize mean and standard deviation theta_mean = np.zeros(dim_theta) theta_std = np.ones(dim_theta) theta = np.random.multivariate_normal(theta_mean, np.diag(theta_std**2)) if LAYER == 2: # 2-layer network msCEM_model = models.NetW_2Layer(args, obs_size, n_actions) if LAYER == 3: # 3-layer network msCEM_model = models.NetW_3Layer(args, obs_size, n_actions) msCEM_model = msCEM_model.to(device) criterionGen = nn.BCELoss() criterionDis = nn.BCELoss() criterionMSE = nn.MSELoss() print(model) print(dis) print("Observation Size: {} \t Action Size: {}".format( obs_size, n_actions)) print("Model_param: {}".format(utils.count_parameters(model))) print("Encoder_param: {}".format(utils.count_parameters(model.encoder)))
device = torch.device("cuda:" + DEVICE if torch.cuda.is_available() else "cpu") if not use_cuda: device = torch.device("cpu") print(device) dim_theta = (obs_size + 1) * n_actions # Initialize mean and standard deviation theta_mean = np.zeros(dim_theta) theta_std = np.ones(dim_theta) # main policy network theta = np.random.multivariate_normal(theta_mean, np.diag(theta_std ** 2)) if LAYER == 2: model = models.NetW_2Layer(args, obs_size, n_actions) # 2-layer if LAYER == 3: model = models.NetW_3Layer(args, obs_size, n_actions) # 3-layer model = model.to(device) print("Observation Size: {} \t Action Size: {}".format(obs_size, n_actions)) start_time = time.time() # define agent start state agent_start_list = [(INI_JOING_ANGLES, [369,430])] e_batch_buffer = utils.elite_batch_bufferW_msCEM(25) # 10 global_eList = [] total_test_reward_accum = 0