def __init__(self, n_states, n_actions, args): if args.seed > 0: self.seed(args.seed) self.n_states = n_states self.n_actions = n_actions # create agent network net_cfg = { 'hidden1': args.hidden1, 'hidden2': args.hidden2, 'init_w': args.init_w } self.agent = Learner(self.n_states, self.n_actions, **net_cfg) self.target = Learner(self.n_states, self.n_actions, **net_cfg) self.agent_optim = Adam(self.agent.parameters(), lr=args.lr) self.update_target_steps = args.update_target_timing hard_update(self.target, self.agent) # create replay memory self.memory = ReplayMemory(capacity=args.rmsize) # hyper parameters self.batch_size = args.bsize self.discount_rate = args.discount_rate self.decay_epsilon = 1 / args.decay_epsilon self.min_epsilon = args.min_epsilon self.epsilon = 1.0 if USE_CUDA: self.cuda()
def __init__(self, state_size, action_size): self.state_size = state_size self.action_size = action_size self.memory = deque(maxlen=2000) self.gamma = 0.95 self.epsilon = 1.0 self.epsilon_min = 0.01 self.epsilon_decay = 0.995 self.learning_rate = 0.001 self.model = Learner(state_size, action_size, self.learning_rate)
class DQNAgent(object): def __init__(self, state_size, action_size): self.state_size = state_size self.action_size = action_size self.memory = deque(maxlen=2000) self.gamma = 0.95 self.epsilon = 1.0 self.epsilon_min = 0.01 self.epsilon_decay = 0.995 self.learning_rate = 0.001 self.model = Learner(state_size, action_size, self.learning_rate) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def act(self, state): if np.random.rand() <= self.epsilon: return random.randrange(self.action_size) act_values = self.model.predict(state) return np.argmax(act_values.data.numpy()[0]) def replay(self, batch_size): batch_size = batch_size if len(self.memory) >= batch_size\ else len(self.memory) batch = random.sample(self.memory, batch_size) for state, action, reward, next_state, done in batch: target = reward if not done: target = reward + self.gamma *\ self.model.predict(next_state).max() target = target.data.numpy() target_f = self.model.predict(state).data.numpy() target_f[0][action] = target self.model.fit(Variable(torch.Tensor(state)), Variable(torch.Tensor(target_f))) if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay
os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.gpu) try: mp.set_start_method('forkserver', force=True) print("forkserver init") except RuntimeError: pass processes = [] # data communication q_trace = Queue(maxsize=300) q_batch = Queue(maxsize=3) q_manager = QManeger(opt, q_trace, q_batch) p = mp.Process(target=q_manager.listening) p.start() processes.append(p) learner = Learner(opt, q_batch) # inner shared network was used by actors. actors = [ Actor(opt, q_trace, learner), Actor(opt, q_trace, learner), Actor(opt, q_trace, learner) ] for rank, a in enumerate(actors): p = mp.Process(target=a.performing, args=(rank, )) p.start() processes.append(p) learner.learning() for p in processes: p.join()
class DDQN(object): def __init__(self, n_states, n_actions, args): if args.seed > 0: self.seed(args.seed) self.n_states = n_states self.n_actions = n_actions # create agent network net_cfg = { 'hidden1': args.hidden1, 'hidden2': args.hidden2, 'init_w': args.init_w } self.agent = Learner(self.n_states, self.n_actions, **net_cfg) self.target = Learner(self.n_states, self.n_actions, **net_cfg) self.agent_optim = Adam(self.agent.parameters(), lr=args.lr) self.update_target_steps = args.update_target_timing hard_update(self.target, self.agent) # create replay memory self.memory = ReplayMemory(capacity=args.rmsize) # hyper parameters self.batch_size = args.bsize self.discount_rate = args.discount_rate self.decay_epsilon = 1 / args.decay_epsilon self.min_epsilon = args.min_epsilon self.epsilon = 1.0 if USE_CUDA: self.cuda() def update(self, step): state_batch, action_batch, next_state_batch, reward_batch, terminal_batch = self.memory.sample_and_split(self.batch_size) q_predict = self.agent(to_tensor(state_batch)) n_q_predict = self.agent(to_tensor(next_state_batch)) q_batch = torch.zeros(self.batch_size, 1) n_act_batch = np.zeros(self.batch_size) next_q_value = torch.zeros(self.batch_size, 1) for n in range(self.batch_size): q_batch[n] = q_predict[n][action_batch[n]] n_act_batch = torch.argmax(n_q_predict[n]) # print(n_act_batch) # print(self.target(to_tensor(next_state_batch[n]))) next_q_value[n] = self.target(to_tensor(next_state_batch[n]))[n_act_batch] # next_q_value = torch.max(self.target(to_tensor(next_state_batch)), 1)[0].reshape(self.batch_size, 1) # next_q_value = self.target(to_tensor(next_state_batch))[n_act_batch] target_q_batch = to_tensor(reward_batch).reshape(self.batch_size, 1) + self.discount_rate * next_q_value * to_tensor(1-terminal_batch.astype(np.float).reshape(self.batch_size, 1)) # q_predict = self.agent(to_tensor(state_batch)) # print("q_predict:{}" .format(q_predict)) # q_batch = torch.zeros(self.batch_size, 1) # print("q_batch:{}" .format(q_batch.shape)) # print("q_batch:{}" .format(q_batch)) value_loss = criterion(q_batch, target_q_batch) # print("loss:{}" .format(value_loss)) self.agent.zero_grad() value_loss.backward() self.agent_optim.step() if step % self.update_target_steps == 0: # print("update target") self.update_target() def update_target(self): hard_update(self.target, self.agent) def random_action(self): action = np.random.uniform(-1., 1., self.n_actions) # self.a_t = action action = np.argmax(action) # idx = np.where(action == max(action)) # action = np.random.choice(idx[0]) # print(action) return action def select_action(self, s_t, decay_epsilon=True): if np.random.random () < self.epsilon: action = self.random_action() else: action = to_numpy( self.agent(to_tensor(np.array([s_t]))) ).squeeze(0) # print("action:{}".format(action)) action = np.argmax(action) # idx = np.where(action == max(action)) # action = np.random.choice(idx[0]) # print("action:{}" .format(action)) # action = np.clip(action, -1, 1) if self.epsilon > self.min_epsilon and decay_epsilon: self.epsilon = max(self.min_epsilon, self.epsilon - self.decay_epsilon) return action def observe(self, obs, act, new_obs, rew, done): items = np.asarray([obs, act, new_obs, rew, done]) self.memory.push(items) def seed(self, s): torch.manual_seed(s) if USE_CUDA: torch.cuda.manual_seed(s)
def main(): parser = argparse.ArgumentParser(description="Experiment setup") # misc parser.add_argument('--seed', default=33, type=int) parser.add_argument('--gpu', default="", type=str) parser.add_argument('--no_train', default=False, action="store_true") parser.add_argument('--from_model_ckpt', default=None, type=str) parser.add_argument('--no_rules', default=False, action="store_true") parser.add_argument('--rule_thr', default=1e-2, type=float) parser.add_argument('--no_preds', default=False, action="store_true") parser.add_argument('--get_vocab_embed', default=False, action="store_true") parser.add_argument('--exps_dir', default=None, type=str) parser.add_argument('--exp_name', default=None, type=str) # data property parser.add_argument('--datadir', default=None, type=str) parser.add_argument('--resplit', default=False, action="store_true") parser.add_argument('--no_link_percent', default=0., type=float) parser.add_argument('--type_check', default=False, action="store_true") parser.add_argument('--domain_size', default=128, type=int) parser.add_argument('--no_extra_facts', default=False, action="store_true") parser.add_argument('--query_is_language', default=False, action="store_true") parser.add_argument('--vocab_embed_size', default=128, type=int) # model architecture parser.add_argument('--num_step', default=3, type=int) parser.add_argument('--num_layer', default=1, type=int) parser.add_argument('--rnn_state_size', default=128, type=int) parser.add_argument('--query_embed_size', default=128, type=int) # optimization parser.add_argument('--batch_size', default=64, type=int) parser.add_argument('--print_per_batch', default=3, type=int) parser.add_argument('--max_epoch', default=10, type=int) parser.add_argument('--min_epoch', default=5, type=int) parser.add_argument('--learning_rate', default=0.001, type=float) parser.add_argument('--no_norm', default=False, action="store_true") parser.add_argument('--thr', default=1e-20, type=float) parser.add_argument('--dropout', default=0., type=float) # evaluation parser.add_argument('--get_phead', default=False, action="store_true") parser.add_argument('--adv_rank', default=False, action="store_true") parser.add_argument('--rand_break', default=False, action="store_true") parser.add_argument('--accuracy', default=False, action="store_true") parser.add_argument('--top_k', default=10, type=int) d = vars(parser.parse_args()) option = Option(d) if option.exp_name is None: option.tag = time.strftime("%y-%m-%d-%H-%M") else: option.tag = option.exp_name if option.resplit: assert not option.no_extra_facts if option.accuracy: assert option.top_k == 1 os.environ["CUDA_VISIBLE_DEVICES"] = option.gpu tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) if not option.query_is_language: data = Data(option.datadir, option.seed, option.type_check, option.domain_size, option.no_extra_facts) else: data = DataPlus(option.datadir, option.seed) print("Data prepared.") option.num_entity = data.num_entity option.num_operator = data.num_operator if not option.query_is_language: option.num_query = data.num_query else: option.num_vocab = data.num_vocab option.num_word = data.num_word # the number of words in each query option.this_expsdir = os.path.join(option.exps_dir, option.tag) if not os.path.exists(option.this_expsdir): os.makedirs(option.this_expsdir) option.ckpt_dir = os.path.join(option.this_expsdir, "ckpt") if not os.path.exists(option.ckpt_dir): os.makedirs(option.ckpt_dir) option.model_path = os.path.join(option.ckpt_dir, "model") option.save() print("Option saved.") ## build the model learner = Learner(option) print("Model built.") saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = False config.log_device_placement = False config.allow_soft_placement = True with tf.Session(config=config) as sess: tf.set_random_seed(option.seed) sess.run(tf.global_variables_initializer()) print("Session initialized.") if option.from_model_ckpt is not None: saver.restore(sess, option.from_model_ckpt) print("Checkpoint restored from model %s" % option.from_model_ckpt) data.reset(option.batch_size) experiment = Experiment(sess, saver, option, learner, data) print("Experiment created.") if not option.no_train: print("Start training...") experiment.train() if not option.no_preds: print("Start getting test predictions...") experiment.get_predictions() if not option.no_rules: print("Start getting rules...") experiment.get_rules() if option.get_vocab_embed: print("Start getting vocabulary embedding...") experiment.get_vocab_embedding() experiment.close_log_file() print("=" * 36 + "Finish" + "=" * 36)
def main(): parser = argparse.ArgumentParser(description="Experiment setup") #杂项 parser.add_argument('--seed', default=33, type=int) parser.add_argument('--gpu', default="", type=str) parser.add_argument('--exps_dir', default="../exps/", type=str) parser.add_argument('--exp_name', default="demo", type=str) parser.add_argument('--no_train', default=False, action="store_true") parser.add_argument('--no_rules', default=False, action="store_true") parser.add_argument('--no_preds', default=False, action="store_true") parser.add_argument('--rule_thr', default=1e-2, type=float) #数据属性 parser.add_argument('--datadir', default="../datasets/kinship", type=str) parser.add_argument('--resplit', default=False, action="store_true") parser.add_argument('--no_link_percent', default=0., type=float) #模型结构 parser.add_argument('--num_step', default=3, type=int) parser.add_argument('--num_layer', default=1, type=int) parser.add_argument('--rnn_state_size', default=128, type=int) parser.add_argument('--query_embed_size', default=128, type=int) #优化 parser.add_argument('--batch_size', default=64, type=int) parser.add_argument('--learning_rate', default=0.001, type=float) parser.add_argument('--print_per_batch', default=3, type=int) parser.add_argument('--norm', default=True, action="store_true") parser.add_argument('--thr', default=1e-20, type=float) parser.add_argument('--dropout', default=0., type=float) parser.add_argument('--max_epoch', default=10, type=int) parser.add_argument('--min_epoch', default=5, type=int) #评估 parser.add_argument('--get_phead', default=True, action="store_true") parser.add_argument('--accuracy', default=False, action="store_true") parser.add_argument('--top-k', default=10, type=int) d = vars(parser.parse_args()) option = Option(d) if option.exp_name is None: option.tag = time.strftime("%y-%m-%d-%H-%M") else: option.tag = option.exp_name if option.accuracy: assert option.top_k == 1 os.environ["CUDA_VISIBLE_DEVICE"] = option.gpu tf.logging.set_verbosity(tf.logging.ERROR) data = Data(option.datadir, option.seed) print("Data prepared.") option.num_entity = data.num_entity option.num_operator = data.num_operator option.num_query = data.num_query option.this_expsdir = os.path.join(option.exps_dir, option.tag) if not os.path.exists(option.this_expsdir): os.makedirs(option.this_expsdir) option.ckpt_dir = os.path.join(option.this_expsdir, "ckpt") if not os.path.exists(option.ckpt_dir): os.makedirs(option.ckpt_dir) option.model_path = os.path.join(option.ckpt_dir, "model") option.save() print("Option saved.") learner = Learner(option) print("Learner built.") saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = False config.log_device_placement = False config.allow_soft_placement = True with tf.Session(config=config) as sess: tf.set_random_seed(option.seed) sess.run(tf.global_variables_initializer()) print("Session initialized.") data.reset(option.batch_size) experiment = Experiment(sess, saver, option, learner, data) print("Experiment created.") if not option.no_train: print("Start training...") experiment.train() if not option.no_preds: print("Start getting test predictions...") experiment.get_predictions() if not option.no_rules: print("Start getting rules...") experiment.get_rules() experiment.close_log_file() print("=" * 36 + "Finish" + "=" * 36)