def parse(): operation = sys.argv[1].split("=") left = operation[0] right = operation[1] degree = -1 left_degree = utils.get_degree(left) if left_degree == -2: print( 'Syntax error, something goes wrong with your entree, please double check it.' ) return -1 right_degree = utils.get_degree(right) if left_degree > right_degree: degree = left_degree else: degree = right_degree if degree <= -1: print( 'Syntax error, something goes wrong with your entree, please double check it.' ) return -1 if degree == 0: solver.resolve_zero_degree(left, right) elif degree == 1: solver.resolve_first_degree(left, right) elif degree == 2: solver.resolve_second_degree(left, right) elif degree > 2: print( 'The Polynomial degree is stricly greater than 2. I can\'t solve.')
def __init__(self, in_features, out_features, data, bias=True): super(MaskedGCNConv, self).__init__() self.in_features = in_features self.out_features = out_features self.fc = nn.Linear(in_features, out_features, bias=bias) self.degree = get_degree(data.edge_list).float().to(device) self.dense_adj = data.adj.to_dense().to(device) self.sigma = Parameter(torch.Tensor(in_features)) self.reset_parameters()
def laplacian_sharpening(data): deg = get_degree(data.edge_list) + 1 source, target = data.edge_list weight = -torch.ones(data.edge_list.size(1)) weight += 3 * (source == target) deg_inv_sqrt = torch.pow(deg.to(torch.float), -0.5) deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0.0 v = deg_inv_sqrt[source] * weight * deg_inv_sqrt[target] lap_sharp = torch.sparse.FloatTensor(data.edge_list, v) return lap_sharp
def __init__(self, data, nhid=32, latent_dim=16): super(VGAEGRA, self).__init__(data, nhid, latent_dim) alpha = 0.95 A = data.adjmat D = get_degree(data.edge_list) Dinv = 1 / D.float() self.gra = alpha * torch.matmul( torch.inverse( torch.eye(data.num_nodes) - alpha * torch.matmul(A, torch.diag(Dinv))), A) norm = self.gra.sum() self.gra = self.gra / norm * (data.num_nodes**2)
def obs2input2(self): nagent = len(self.obs) - np.sum(self.obs, axis=0, dtype=np.int32)[5] nenemy = len(self.obs) - nagent input = np.zeros([nagent, 68]) n = 0 for i in range(len(self.obs)): if self.obs[i][5] == 0: input[n][0] = self.obs[i][1] input[n][1] = self.obs[i][2] input[n][2] = self.obs[i][3] input[n][3] = self.obs[i][4] input[n][4] = self.obs[i][5] k = 5 for j in range(len(self.obs)): if j != i: dis = utils.get_distance( self.obs[i][6], -self.obs[i][7], self.obs[j][6], -self.obs[j][7]) / DISTANCE_FACTOR - 1 degree = utils.get_degree( self.obs[i][6], -self.obs[i][7], self.obs[j][6], -self.obs[j][7]) / 180 input[n][k] = degree k += 1 input[n][k] = dis k += 1 input[n][k] = self.obs[j][1] k += 1 input[n][k] = self.obs[j][2] k += 1 input[n][k] = self.obs[j][3] k += 1 input[n][k] = self.obs[j][4] k += 1 input[n][k] = self.obs[j][5] k += 1 n += 1 return input
def obs2input(self): n = 0 for i in range(len(self.obs)): if self.obs[i][5] == 0: #initilize the input to the model 5+7*9 = 68 input = np.zeros(68) enemy_table = -np.ones(5) input[0] = self.obs[i][1] input[1] = self.obs[i][2] input[2] = self.obs[i][3] input[3] = self.obs[i][4] input[4] = self.obs[i][5] k = 5 ind_enemy = 0 for j in range(len(self.obs)): if j != i: dis = utils.get_distance( self.obs[i][6], -self.obs[i][7], self.obs[j][6], -self.obs[j][7]) / DISTANCE_FACTOR - 1 degree = utils.get_degree( self.obs[i][6], -self.obs[i][7], self.obs[j][6], -self.obs[j][7]) / 180 input[k] = degree k += 1 input[k] = dis k += 1 input[k] = self.obs[j][1] k += 1 input[k] = self.obs[j][2] k += 1 input[k] = self.obs[j][3] k += 1 input[k] = self.obs[j][4] k += 1 input[k] = self.obs[j][5] k += 1 return input
def model_eval(args, model, env, random=True, vis=None): if vis is not None: vis, window_id, fps = vis frame_dur = 1.0 / fps last_time = time.time() rewards, start_time = 0, time.time() obs = env.reset() t = 0 while True: nagent = len(obs) - np.sum(obs, axis=0, dtype=np.int32)[5] nenemy = len(obs) - nagent action = np.zeros([nagent, env.action_space.shape[0]]) ''' if nenemy == 0: break ''' n = 0 for i in range(len(obs)): if obs[i][5] == 0: input = np.zeros(68) enemy_table = -np.ones(5) input[0] = obs[i][1] input[1] = obs[i][2] input[2] = obs[i][3] input[3] = obs[i][4] input[4] = obs[i][5] k = 5 ind_enemy = 0 for j in range(len(obs)): if j != i: dis = utils.get_distance(obs[i][6], -obs[i][7], obs[j][6], -obs[j][7]) / DISTANCE_FACTOR - 1 degree = utils.get_degree(obs[i][6], -obs[i][7], obs[j][6], -obs[j][7]) / 180 input[k] = degree k += 1 input[k] = dis k += 1 for l in range(5): input[k] = obs[j][l+1] k += 1 if obs[j][5] == 1: enemy_table[ind_enemy] = obs[j][0] ind_enemy += 1 pout, _ = model.pi_and_v(Variable(torch.from_numpy(input).float().unsqueeze(0), volatile=True)) #pout = model.pi_and_v(Variable(torch.from_numpy(input).float().unsqueeze(0), volatile=True)) command_id = pout.action_indices[0] if random else pout.most_probable_actions[0] action[n][0] = obs[i][0] if command_id < 5: action[n][1] = 1 action[n][4] = enemy_table[command_id] else: action[n][1] = -1 if command_id < 10: action[n][2] = (float(command_id) - 5)/4 else: action[n][2] = (float(command_id) - 13)/4 action[n][3] = 1 n += 1 obs, reward, done, _ = env.step(action) #print(reward) rewards += reward if args.save_path is not None: with open(os.path.join(args.save_path, 'rewards_eval'), 'a+') as f: f.write('{}: {}\n'.format(t, rewards)) #reward += reward if vis is not None and time.time() > last_time + frame_dur: pass if done: break t += 1 if t > 501: break return EvalResult(rewards, time.time()-start_time)
def model_eval_commnet(args, model, env, random=True, vis=None): if vis is not None: vis, window_id, fps = vis frame_dur = 1.0 / fps last_time = time.time() rewards, start_time = 0, time.time() obs = env.reset() t = 0 nagent_pre = len(obs) - np.sum(obs, axis=0, dtype=np.int32)[5] while True: nagent = len(obs) - np.sum(obs, axis=0, dtype=np.int32)[5] if nagent_pre != nagent: #print('reseting the model {} {}'.format(nagent, nagent_pre)) model.reset_state() nagent_pre = nagent nenemy = len(obs) - nagent #print('nagent: {} | env,episodes_step: {}'.format(nagent, env.episode_steps)) ''' if nagent == 0 and env.episode_steps == 0: obs = env.reset() continue elif nagent == 0 and env.episode_steps != 0: break ''' action = np.zeros([nagent, env.action_space.shape[0]]) input = np.zeros([nagent, 68]) #"enemy table" enemy_table = - np.ones(5) n = 0 ind_enemy = 0 for i in range(len(obs)): if obs[i][5] == 0: action[n][0] = obs[i][0] input[n][0] = obs[i][1] input[n][1] = obs[i][2] input[n][2] = obs[i][3] input[n][3] = obs[i][4] input[n][4] = obs[i][5] k = 5 for j in range(len(obs)): if j != i: dis = utils.get_distance(obs[i][6], -obs[i][7], obs[j][6], -obs[j][7]) / DISTANCE_FACTOR - 1 degree = utils.get_degree(obs[i][6], -obs[i][7], obs[j][6], -obs[j][7]) / 180 input[n][k] = degree k += 1 input[n][k] = dis k += 1 for l in range(5): input[n][k] = obs[j][l+1] k += 1 n += 1 else: enemy_table[ind_enemy] = obs[i][0] ind_enemy += 1 #print(input) if len(input) != 0: #print(input) pout, _ = model.pi_and_v(Variable(torch.from_numpy(input).float())) for i in range(nagent): command_id = pout.action_indices[i] if random else pout.most_probable_actions[i] if command_id < 5: action[i][1] = 1 action[i][4] = enemy_table[command_id] else: action[i][1] = -1 if command_id < 10: action[i][2] = (float(command_id) - 5)/4 else: action[i][2] = (float(command_id) - 13)/4 action[i][3] = 1 obs, reward, done, _ = env.step(action) #print('Reward: {} | done: {}'.format(reward, done)) rewards += reward if args.save_path is not None: with open(os.path.join(args.save_path, 'rewards_eval'), 'a+') as f: f.write('{}: {}\n'.format(t, rewards)) #reward += reward if vis is not None and time.time() > last_time + frame_dur: pass if done: break t += 1 if t > 501: break return EvalResult(rewards, time.time()-start_time)
def act(self, args): self.model.load_state_dict(self.shared_model.state_dict()) self.model.train() if self.done: self.reset_state() log_probs, entropies, rewards, values, actions = [], [], [], [], [] for _ in range(self.t_max): nagent = len(self.obs) - np.sum(self.obs, axis=0, dtype=np.int32)[5] nenemy = len(self.obs) - nagent #print('Length of obs {}, number of agent is {}, number of enemy is {}').format(len(self.obs), nagent, nenemy) action = np.zeros([nagent, self.env.action_space.shape[0]]) n = 0 for i in range(len(self.obs)): if self.obs[i][5] == 0: # initilize the input for model(the independent) 5+7*9=68 input = np.zeros(68) enemy_table = -np.ones(5) input[0] = self.obs[i][1] input[1] = self.obs[i][2] input[2] = self.obs[i][3] input[3] = self.obs[i][4] input[4] = self.obs[i][5] k = 5 ind_enemy = 0 for j in range(len(self.obs)): if j != i: dis = utils.get_distance( self.obs[i][6], -self.obs[i][7], self.obs[j] [6], -self.obs[j][7]) / DISTANCE_FACTOR - 1 degree = utils.get_degree( self.obs[i][6], -self.obs[i][7], self.obs[j][6], -self.obs[j][7]) / 180 input[k] = degree k += 1 input[k] = dis k += 1 input[k] = self.obs[j][1] k += 1 input[k] = self.obs[j][2] k += 1 input[k] = self.obs[j][3] k += 1 input[k] = self.obs[j][4] k += 1 input[k] = self.obs[j][5] k += 1 if self.obs[j][5] == 1: enemy_table[ind_enemy] = self.obs[j][0] ind_enemy += 1 pout, vout = self.model.pi_and_v( Variable(torch.from_numpy(input).float().unsqueeze(0))) action[n][0] = self.obs[i][0] command_id = pout.action_indices[0] #self.sum.add_scalar_value('command', command_id) with open(os.path.join(args.save_path, 'command_id'), 'a+') as f: f.write('{}\n'.format(command_id)) if command_id < 5: action[n][1] = 1 action[n][4] = enemy_table[command_id] else: action[n][1] = -1 if command_id < 10: action[n][2] = (float(command_id) - 5) / 4 else: action[n][2] = (float(command_id) - 13) / 4 action[n][3] = 1 n += 1 log_probs.append(pout.sampled_actions_log_probs) entropies.append(pout.entropy) values.append(vout) self.obs, reward, done, _ = self.env.step(action) if action is not None: n = len(action) else: n = 0 for i in range(n): rewards.append(reward) if done: self.done = done break if self.env.episode_steps == self.env.max_episode_steps: self.done = True break R = 0 input_one_agent = self.obs2input() if not self.done and self.obs is not None and input_one_agent is not None: _, vout = self.model.pi_and_v( Variable( torch.from_numpy(input_one_agent).float().unsqueeze(0))) R = float(vout.data.numpy()) else: self.model.reset_state() t = len(rewards) if t == 0: return t pi_loss, v_loss = 0, 0 for i in reversed(range(t)): R = self.gamma * R + rewards[i] v = values[i] advantage = R - float(v.data.numpy()[0, 0]) #print('R:{} v:{}'.format(R, v)) # Accumulate gradients of policy log_prob = log_probs[i] entropy = entropies[i] # Log probability is increased proportionally to advantage pi_loss -= log_prob * advantage # Entropy is maximized pi_loss -= self.beta * entropy # Accumulate gradients of value function v_loss += (v - R).pow(2).div_(2) #self.sum.add_scalar_value('r', rewards[i]) #self.sum.add_scalar_value('v', v.data[0,0]) #self.sum.add_scalar_value('R', R) #self.sum.add_scalar_value('Advantage', advantage) if self.pi_loss_coef != 1.0: pi_loss *= self.pi_loss_coef if self.v_loss_coef != 1.0: v_loss *= self.v_loss_coef # Normalize the loss of sequences trunctated by terminal states if self.keep_loss_scale_same and t < self.t_max: factor = self.t_max / t pi_loss *= factor v_loss *= factor total_loss = pi_loss + v_loss #print('total_loss:{}'.format(total_loss)) # Compute gradients using thread-specific model self.optimizer.zero_grad() total_loss.backward() torch.nn.utils.clip_grad_norm(self.model.parameters(), 40) # Copy the gradients to the globally shared model ensure_shared_grads(self.model, self.shared_model, self.process_idx) self.optimizer.step() self.model.unchain_backward() #self.sum.add_scalar_value('total_loss', total_loss.data[0,0]) return t
def act(self, obs): nagent = len(obs) - np.sum(obs, axis=0, dtype=np.int32)[5] nenemy = len(obs) - nagent action = np.zeros([nagent, self.action_space.shape[0]]) if nenemy == 0: return None n = 0 for i in range(len(obs)): if obs[i][5] == 0: # initilize the input to the model 5+7*9 = 68 input = np.zeros(68) enemy_table = -np.ones(5) input[0] = obs[i][1] input[1] = obs[i][2] input[2] = obs[i][3] input[3] = obs[i][4] input[4] = obs[i][5] k = 5 ind_enemy = 0 for j in range(len(obs)): if j != i: dis = utils.get_distance( obs[i][6], -obs[i][7], obs[j][6], -obs[j][7]) / DISTANCE_FACTOR - 1 degree = utils.get_degree(obs[i][6], -obs[i][7], obs[j][6], -obs[j][7]) / 180 input[k] = degree k += 1 input[k] = dis k += 1 # hp[0,100] input[k] = obs[j][1] k += 1 # sheild[0,100] input[k] = obs[j][2] k += 1 # cooldown[0,1] input[k] = obs[j][3] k += 1 # fround range[0,1] input[k] = obs[j][4] k += 1 # is enemy, 0 for myself 1 for enemy input[k] = obs[j][5] k += 1 if obs[j][5] == 1: enemy_table[ind_enemy] = obs[j][0] ind_enemy += 1 act = select_action(input) action[n][0] = obs[i][0] #action[n][1] = 1 #action[n][2] = 0 action[n][3] = 1 action[n][4] = -1 if act[0, 0] == 0: action[n][1] = 1 action[n][4] = enemy_table[0] elif act[0, 0] == 1: action[n][1] = 1 action[n][4] = enemy_table[1] elif act[0, 0] == 2: action[n][1] = 1 action[n][4] = enemy_table[2] elif act[0, 0] == 3: action[n][1] = 1 action[n][4] = enemy_table[3] elif act[0, 0] == 4: action[n][1] = 1 action[n][4] = enemy_table[4] else: action[n][1] = -1 if act[0, 0] == 5: action[n][2] = 0 elif act[0, 0] == 6: action[n][2] = 0.25 elif act[0, 0] == 7: action[n][2] = 0.5 elif act[0, 0] == 8: action[n][2] = 0.75 elif act[0, 0] == 9: action[n][2] = 1 elif act[0, 0] == 10: action[n][2] = -0.75 elif act[0, 0] == 11: action[n][2] = -0.5 elif act[0, 0] == 12: action[n][2] = -0.25 n = n + 1 #for i in range(n-1): # model.rewards.append(0) return action