Exemple #1
0
    def __init__(self, n_states, n_actions, args):
        if args.seed > 0:
            self.seed(args.seed)

        self.n_states = n_states
        self.n_actions = n_actions

        # create agent network
        net_cfg = {
            'hidden1': args.hidden1,
            'hidden2': args.hidden2,
            'init_w': args.init_w
        }
        self.agent = Learner(self.n_states, self.n_actions, **net_cfg)
        self.target = Learner(self.n_states, self.n_actions, **net_cfg)
        self.agent_optim = Adam(self.agent.parameters(), lr=args.lr)

        self.update_target_steps = args.update_target_timing

        hard_update(self.target, self.agent)

        # create replay memory
        self.memory = ReplayMemory(capacity=args.rmsize)

        # hyper parameters
        self.batch_size = args.bsize
        self.discount_rate = args.discount_rate
        self.decay_epsilon = 1 / args.decay_epsilon
        self.min_epsilon = args.min_epsilon
        
        self.epsilon = 1.0
        
        if USE_CUDA: self.cuda()
Exemple #2
0
 def __init__(self, state_size, action_size):
     self.state_size = state_size
     self.action_size = action_size
     self.memory = deque(maxlen=2000)
     self.gamma = 0.95
     self.epsilon = 1.0
     self.epsilon_min = 0.01
     self.epsilon_decay = 0.995
     self.learning_rate = 0.001
     self.model = Learner(state_size, action_size, self.learning_rate)
Exemple #3
0
class DQNAgent(object):
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = Learner(state_size, action_size, self.learning_rate)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values.data.numpy()[0])

    def replay(self, batch_size):
        batch_size = batch_size if len(self.memory) >= batch_size\
                                else len(self.memory)
        batch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in batch:
            target = reward
            if not done:
                target = reward + self.gamma *\
                            self.model.predict(next_state).max()
                target = target.data.numpy()
            target_f = self.model.predict(state).data.numpy()
            target_f[0][action] = target
            self.model.fit(Variable(torch.Tensor(state)),
                           Variable(torch.Tensor(target_f)))
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
Exemple #4
0
    os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.gpu)
    try:
        mp.set_start_method('forkserver', force=True)
        print("forkserver init")
    except RuntimeError:
        pass

    processes = []
    # data communication
    q_trace = Queue(maxsize=300)
    q_batch = Queue(maxsize=3)
    q_manager = QManeger(opt, q_trace, q_batch)
    p = mp.Process(target=q_manager.listening)
    p.start()
    processes.append(p)

    learner = Learner(opt, q_batch)  # inner shared network was used by actors.
    actors = [
        Actor(opt, q_trace, learner),
        Actor(opt, q_trace, learner),
        Actor(opt, q_trace, learner)
    ]
    for rank, a in enumerate(actors):
        p = mp.Process(target=a.performing, args=(rank, ))
        p.start()
        processes.append(p)

    learner.learning()
    for p in processes:
        p.join()
Exemple #5
0
class DDQN(object):
    def __init__(self, n_states, n_actions, args):
        if args.seed > 0:
            self.seed(args.seed)

        self.n_states = n_states
        self.n_actions = n_actions

        # create agent network
        net_cfg = {
            'hidden1': args.hidden1,
            'hidden2': args.hidden2,
            'init_w': args.init_w
        }
        self.agent = Learner(self.n_states, self.n_actions, **net_cfg)
        self.target = Learner(self.n_states, self.n_actions, **net_cfg)
        self.agent_optim = Adam(self.agent.parameters(), lr=args.lr)

        self.update_target_steps = args.update_target_timing

        hard_update(self.target, self.agent)

        # create replay memory
        self.memory = ReplayMemory(capacity=args.rmsize)

        # hyper parameters
        self.batch_size = args.bsize
        self.discount_rate = args.discount_rate
        self.decay_epsilon = 1 / args.decay_epsilon
        self.min_epsilon = args.min_epsilon
        
        self.epsilon = 1.0
        
        if USE_CUDA: self.cuda()

    def update(self, step):
        state_batch, action_batch, next_state_batch, reward_batch, terminal_batch = self.memory.sample_and_split(self.batch_size)
        q_predict = self.agent(to_tensor(state_batch))
        n_q_predict = self.agent(to_tensor(next_state_batch))
        q_batch = torch.zeros(self.batch_size, 1)
        n_act_batch = np.zeros(self.batch_size)
        next_q_value = torch.zeros(self.batch_size, 1)

        for n in range(self.batch_size):
            q_batch[n] = q_predict[n][action_batch[n]]
            n_act_batch = torch.argmax(n_q_predict[n])
            # print(n_act_batch)
            # print(self.target(to_tensor(next_state_batch[n])))
            next_q_value[n] = self.target(to_tensor(next_state_batch[n]))[n_act_batch]

        # next_q_value = torch.max(self.target(to_tensor(next_state_batch)), 1)[0].reshape(self.batch_size, 1)
        
        # next_q_value = self.target(to_tensor(next_state_batch))[n_act_batch]

        target_q_batch = to_tensor(reward_batch).reshape(self.batch_size, 1) + self.discount_rate * next_q_value * to_tensor(1-terminal_batch.astype(np.float).reshape(self.batch_size, 1))

        # q_predict = self.agent(to_tensor(state_batch))
        # print("q_predict:{}" .format(q_predict))
        # q_batch = torch.zeros(self.batch_size, 1)
        # print("q_batch:{}" .format(q_batch.shape))
        # print("q_batch:{}" .format(q_batch))
        value_loss = criterion(q_batch, target_q_batch)
        # print("loss:{}" .format(value_loss))
        self.agent.zero_grad()
        value_loss.backward()
        self.agent_optim.step()

        if step % self.update_target_steps == 0:
            # print("update target")
            self.update_target()

    def update_target(self):
        hard_update(self.target, self.agent)

    def random_action(self):
        action = np.random.uniform(-1., 1., self.n_actions)
        # self.a_t = action
        
        action = np.argmax(action)

        # idx = np.where(action == max(action))

        # action = np.random.choice(idx[0])
        # print(action)
        return action

    def select_action(self, s_t, decay_epsilon=True):
        if np.random.random () < self.epsilon:
            action = self.random_action()
        else:
            action = to_numpy(
                self.agent(to_tensor(np.array([s_t])))
            ).squeeze(0)
            # print("action:{}".format(action))
            action = np.argmax(action)
            # idx = np.where(action == max(action))
            # action = np.random.choice(idx[0])
            
            # print("action:{}" .format(action))
            # action = np.clip(action, -1, 1)

        if self.epsilon > self.min_epsilon and decay_epsilon:
            self.epsilon = max(self.min_epsilon, self.epsilon - self.decay_epsilon)    

        return action

    def observe(self, obs, act, new_obs, rew, done):
        items = np.asarray([obs, act, new_obs, rew, done])
        self.memory.push(items)

    def seed(self, s):
        torch.manual_seed(s)
        if USE_CUDA:
            torch.cuda.manual_seed(s)
Exemple #6
0
def main():
    parser = argparse.ArgumentParser(description="Experiment setup")
    # misc
    parser.add_argument('--seed', default=33, type=int)
    parser.add_argument('--gpu', default="", type=str)
    parser.add_argument('--no_train', default=False, action="store_true")
    parser.add_argument('--from_model_ckpt', default=None, type=str)
    parser.add_argument('--no_rules', default=False, action="store_true")
    parser.add_argument('--rule_thr', default=1e-2, type=float)
    parser.add_argument('--no_preds', default=False, action="store_true")
    parser.add_argument('--get_vocab_embed',
                        default=False,
                        action="store_true")
    parser.add_argument('--exps_dir', default=None, type=str)
    parser.add_argument('--exp_name', default=None, type=str)
    # data property
    parser.add_argument('--datadir', default=None, type=str)
    parser.add_argument('--resplit', default=False, action="store_true")
    parser.add_argument('--no_link_percent', default=0., type=float)
    parser.add_argument('--type_check', default=False, action="store_true")
    parser.add_argument('--domain_size', default=128, type=int)
    parser.add_argument('--no_extra_facts', default=False, action="store_true")
    parser.add_argument('--query_is_language',
                        default=False,
                        action="store_true")
    parser.add_argument('--vocab_embed_size', default=128, type=int)
    # model architecture
    parser.add_argument('--num_step', default=3, type=int)
    parser.add_argument('--num_layer', default=1, type=int)
    parser.add_argument('--rnn_state_size', default=128, type=int)
    parser.add_argument('--query_embed_size', default=128, type=int)
    # optimization
    parser.add_argument('--batch_size', default=64, type=int)
    parser.add_argument('--print_per_batch', default=3, type=int)
    parser.add_argument('--max_epoch', default=10, type=int)
    parser.add_argument('--min_epoch', default=5, type=int)
    parser.add_argument('--learning_rate', default=0.001, type=float)
    parser.add_argument('--no_norm', default=False, action="store_true")
    parser.add_argument('--thr', default=1e-20, type=float)
    parser.add_argument('--dropout', default=0., type=float)
    # evaluation
    parser.add_argument('--get_phead', default=False, action="store_true")
    parser.add_argument('--adv_rank', default=False, action="store_true")
    parser.add_argument('--rand_break', default=False, action="store_true")
    parser.add_argument('--accuracy', default=False, action="store_true")
    parser.add_argument('--top_k', default=10, type=int)

    d = vars(parser.parse_args())
    option = Option(d)
    if option.exp_name is None:
        option.tag = time.strftime("%y-%m-%d-%H-%M")
    else:
        option.tag = option.exp_name
    if option.resplit:
        assert not option.no_extra_facts
    if option.accuracy:
        assert option.top_k == 1

    os.environ["CUDA_VISIBLE_DEVICES"] = option.gpu
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

    if not option.query_is_language:
        data = Data(option.datadir, option.seed, option.type_check,
                    option.domain_size, option.no_extra_facts)
    else:
        data = DataPlus(option.datadir, option.seed)
    print("Data prepared.")

    option.num_entity = data.num_entity
    option.num_operator = data.num_operator
    if not option.query_is_language:
        option.num_query = data.num_query
    else:
        option.num_vocab = data.num_vocab
        option.num_word = data.num_word  # the number of words in each query

    option.this_expsdir = os.path.join(option.exps_dir, option.tag)
    if not os.path.exists(option.this_expsdir):
        os.makedirs(option.this_expsdir)
    option.ckpt_dir = os.path.join(option.this_expsdir, "ckpt")
    if not os.path.exists(option.ckpt_dir):
        os.makedirs(option.ckpt_dir)
    option.model_path = os.path.join(option.ckpt_dir, "model")

    option.save()
    print("Option saved.")

    ## build the model
    learner = Learner(option)
    print("Model built.")

    saver = tf.train.Saver()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = False
    config.log_device_placement = False
    config.allow_soft_placement = True
    with tf.Session(config=config) as sess:
        tf.set_random_seed(option.seed)
        sess.run(tf.global_variables_initializer())
        print("Session initialized.")

        if option.from_model_ckpt is not None:
            saver.restore(sess, option.from_model_ckpt)
            print("Checkpoint restored from model %s" % option.from_model_ckpt)

        data.reset(option.batch_size)
        experiment = Experiment(sess, saver, option, learner, data)
        print("Experiment created.")

        if not option.no_train:
            print("Start training...")
            experiment.train()

        if not option.no_preds:
            print("Start getting test predictions...")
            experiment.get_predictions()

        if not option.no_rules:
            print("Start getting rules...")
            experiment.get_rules()

        if option.get_vocab_embed:
            print("Start getting vocabulary embedding...")
            experiment.get_vocab_embedding()

    experiment.close_log_file()
    print("=" * 36 + "Finish" + "=" * 36)
Exemple #7
0
def main():
    parser = argparse.ArgumentParser(description="Experiment setup")
    #杂项
    parser.add_argument('--seed', default=33, type=int)
    parser.add_argument('--gpu', default="", type=str)
    parser.add_argument('--exps_dir', default="../exps/", type=str)
    parser.add_argument('--exp_name', default="demo", type=str)
    parser.add_argument('--no_train', default=False, action="store_true")
    parser.add_argument('--no_rules', default=False, action="store_true")
    parser.add_argument('--no_preds', default=False, action="store_true")
    parser.add_argument('--rule_thr', default=1e-2, type=float)

    #数据属性
    parser.add_argument('--datadir', default="../datasets/kinship", type=str)
    parser.add_argument('--resplit', default=False, action="store_true")
    parser.add_argument('--no_link_percent', default=0., type=float)

    #模型结构
    parser.add_argument('--num_step', default=3, type=int)
    parser.add_argument('--num_layer', default=1, type=int)
    parser.add_argument('--rnn_state_size', default=128, type=int)
    parser.add_argument('--query_embed_size', default=128, type=int)

    #优化
    parser.add_argument('--batch_size', default=64, type=int)
    parser.add_argument('--learning_rate', default=0.001, type=float)
    parser.add_argument('--print_per_batch', default=3, type=int)
    parser.add_argument('--norm', default=True, action="store_true")
    parser.add_argument('--thr', default=1e-20, type=float)
    parser.add_argument('--dropout', default=0., type=float)
    parser.add_argument('--max_epoch', default=10, type=int)
    parser.add_argument('--min_epoch', default=5, type=int)

    #评估
    parser.add_argument('--get_phead', default=True, action="store_true")
    parser.add_argument('--accuracy', default=False, action="store_true")
    parser.add_argument('--top-k', default=10, type=int)

    d = vars(parser.parse_args())
    option = Option(d)

    if option.exp_name is None:
        option.tag = time.strftime("%y-%m-%d-%H-%M")
    else:
        option.tag = option.exp_name

    if option.accuracy:
        assert option.top_k == 1

    os.environ["CUDA_VISIBLE_DEVICE"] = option.gpu
    tf.logging.set_verbosity(tf.logging.ERROR)

    data = Data(option.datadir, option.seed)
    print("Data prepared.")

    option.num_entity = data.num_entity
    option.num_operator = data.num_operator
    option.num_query = data.num_query

    option.this_expsdir = os.path.join(option.exps_dir, option.tag)
    if not os.path.exists(option.this_expsdir):
        os.makedirs(option.this_expsdir)
    option.ckpt_dir = os.path.join(option.this_expsdir, "ckpt")
    if not os.path.exists(option.ckpt_dir):
        os.makedirs(option.ckpt_dir)
    option.model_path = os.path.join(option.ckpt_dir, "model")

    option.save()
    print("Option saved.")

    learner = Learner(option)
    print("Learner built.")

    saver = tf.train.Saver()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = False
    config.log_device_placement = False
    config.allow_soft_placement = True
    with tf.Session(config=config) as sess:
        tf.set_random_seed(option.seed)
        sess.run(tf.global_variables_initializer())
        print("Session initialized.")

        data.reset(option.batch_size)
        experiment = Experiment(sess, saver, option, learner, data)
        print("Experiment created.")

        if not option.no_train:
            print("Start training...")
            experiment.train()

        if not option.no_preds:
            print("Start getting test predictions...")
            experiment.get_predictions()

        if not option.no_rules:
            print("Start getting rules...")
            experiment.get_rules()

    experiment.close_log_file()
    print("=" * 36 + "Finish" + "=" * 36)