def __init__(self, actions, task='egg', name=None, parser=lambda x: x, input_length=None, input_width=1, history_size=1, model_type='MLP', device=torch.device('cpu'), pomdp_mode=True, sparse_reward=True): assert input_length is not None self.device = device self.name = name if name is not None else '' self.actions = actions self.action_size = len(actions) self.parser = parser self.input_length = input_length self.input_width = input_width self.history_size = history_size self.success_reward = 0 self.network = None self.pomdp_mode = pomdp_mode self.sparse_reward = sparse_reward self.env = self._create_env(task, lambda x: x, lambda x: tokenizer(x), 'ZorkGym/gym/zork1.z5', self.success_reward, self.pomdp_mode, self.sparse_reward) self.model_type = model_type
def _create_action_embeddings(self, action_parser): action_embeddings = [] for action in self.actions: action_embeddings.append(action_parser([tokenizer(action)])[0, 0]) return torch.stack(action_embeddings)
def main(): args = parse_args() if args.grad_loss: loss_weighting = 0.0 else: loss_weighting = 1.0 verbs = ['go', 'take', 'open', 'grab', 'run', 'walk', 'climb'] vocabulary = load_list_from_file('./data/vocabulary.txt') basic_actions = [ 'open egg', 'go east', 'go west', 'go north', 'go south', 'go up', 'go down', 'look', 'take egg' ] dictionary = [ 'pray', 'yellow', 'trapdoor', 'open', 'bell', 'touch', 'pile', 'trunk', 'sack', 'inflate', 'southeast', 'of', 'move', 'match', 'figurine', 'railing', 'with', 'map', 'mirror', 'wind', 'examine', 'north', 'out', 'trident', 'turn', 'skull', 'throw', 'northwest', 'case', 'bag', 'red', 'press', 'jewels', 'east', 'pump', 'bolt', 'rusty', 'window', 'douse', 'boat', 'bracelet', 'matchbook', 'basket', 'book', 'coffin', 'bar', 'rug', 'lid', 'drop', 'nasty', 'wrench', 'light', 'sand', 'bauble', 'kill', 'tie', 'painting', 'sword', 'wave', 'in', 'south', 'northeast', 'ring', 'canary', 'lower', 'egg', 'all', 'to', 'candles', 'page', 'and', 'echo', 'emerald', 'tree', 'from', 'rope', 'troll', 'screwdriver', 'torch', 'enter', 'coal', 'go', 'look', 'shovel', 'knife', 'down', 'take', 'switch', 'prayer', 'launch', 'diamond', 'read', 'up', 'get', 'scarab', 'west', 'land', 'southwest', 'climb', 'thief', 'raise', 'wait', 'odysseus', 'button', 'sceptre', 'lamp', 'chalice', 'garlic', 'buoy', 'pot', 'label', 'put', 'dig', 'machine', 'close' ] actions = basic_actions optimize_memory = False sparse_reward = True actor_train_start = 0 eps_start = 1.0 test_params = { 'nn=-1': { 'number_of_neighbors': -1 }, 'nn=1': { 'number_of_neighbors': 1 }, 'nn=3': { 'number_of_neighbors': 3 }, 'nn=11': { 'number_of_neighbors': 11 }, } game_seed = 52 if args.task == 0: buffer_size = 20000 time_steps = 100000 project_name = 'egg_quest_minimal_actions' task = 'egg' elif args.task == 1: buffer_size = 20000 time_steps = 2000000 project_name = 'egg_quest_extended_actions' actions = dictionary task = 'egg' elif args.task == -1: buffer_size = 20000 time_steps = 100000 project_name = 'egg_quest_baby_actions' actions = ['open', 'egg', 'north', 'climb', 'tree', 'take'] task = 'egg' elif args.task == 2: buffer_size = 40000 time_steps = 1000000 project_name = 'troll_imitation' actions = dictionary task = 'troll' sparse_reward = False test_params = { 'nn': { 'number_of_neighbors': args.nn }, } game_seed = 12 elif args.task == 3: buffer_size = 40000 time_steps = 1000000 project_name = 'troll' actions = [ 'north', 'south', 'east', 'west', 'open window', 'take sword', 'take lamp', 'move rug', 'open trapdoor', 'go down', 'light lamp', 'kill troll with sword' ] task = 'troll' sparse_reward = False else: raise NotImplementedError words = list() words.append('') for action in actions: tokens = tokenizer(action) for token in tokens: if token not in words: words.append(token) sentences = list() for i, word1 in enumerate(words): for word2 in words[i + 1:]: if word1 in verbs: sentences.append(word1 + ' ' + word2) else: sentences.append(word2 + ' ' + word1) if args.pomdp: project_name = project_name + '_pomdp' seed = args.seed disable_cuda = False #random.seed(seed) #torch.manual_seed(seed) if torch.cuda.is_available() and not disable_cuda: # free_gpu = get_free_gpu() device = torch.device('cuda') # + str(free_gpu)) #torch.cuda.manual_seed(seed) torch.backends.cudnn.enabled = False else: device = torch.device('cpu') vocab_size = len(vocabulary) bow_parser = BagOfWords( vocabulary=vocabulary, type_func=lambda x: torch.FloatTensor(x).to(device).unsqueeze(1)) # word2vec_model_path = os.getcwd() + '/../ZorkGym/text_utils/GoogleNews-vectors-negative300.bin' # word2vec_model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model_path, binary=True) word2vec_model = glove_api.load('glove-wiki-gigaword-50') embedding_size = word2vec_model.vector_size word2vec_parser = Word2Vec( type_func=lambda x: torch.FloatTensor(x).to(device).unsqueeze(0), word2vec_model=word2vec_model, return_func=lambda x: word2vec_padding(x, 65, embedding_size)) onehot_parser = OneHotParser( type_func=lambda x: torch.FloatTensor(x).to(device).unsqueeze(1), vocabulary=actions) """ Experiments from here and below """ for simulation in args.simulations: tau = 0.0 train_params = {'seed': seed} if simulation == 'dqn_mlp': test_params = None agent = DQN(actions=sentences, model_type='MLP', parser=bow_parser, input_length=vocab_size + 1, input_width=1, history_size=1, device=device, pomdp_mode=args.pomdp, task=task, sparse_reward=sparse_reward) elif simulation == 'dqn_cnn': test_params = None word2vec_parser.return_func = lambda x: word2vec_padding( x, 65, embedding_size) agent = DQN(actions=sentences, model_type='CNN', parser=word2vec_parser, input_length=embedding_size, input_width=65, history_size=1, device=device, pomdp_mode=args.pomdp, task=task, sparse_reward=sparse_reward) elif simulation == 'drrn_mlp': test_params = None agent = DRRN(actions=sentences, model_type='MLP', parser=bow_parser, input_length=vocab_size + 1, input_width=1, history_size=1, device=device, pomdp_mode=args.pomdp, task=task, sparse_reward=sparse_reward) tau = 0.2 elif simulation == 'drrn_cnn': test_params = None agent = DRRN(actions=sentences, model_type='CNN', parser=word2vec_parser, input_length=embedding_size, input_width=65, history_size=1, device=device, pomdp_mode=args.pomdp, task=task, sparse_reward=sparse_reward) tau = 0.2 elif simulation == 'dddpg_mlp': word2vec_parser.return_func = lambda x: word2vec_sum( x, embedding_size) action_vocab_list = [] for action in sentences: tokens = tokenizer(action) for token in tokens: if token not in action_vocab_list: action_vocab_list.append(token) action_vocabulary = {} embedding_size = len(action_vocab_list) for idx, action in enumerate(action_vocab_list): action_vocabulary[action] = np.zeros(embedding_size) action_vocabulary[action][idx] = 1.0 word2vec_parser.word2vec_model = action_vocabulary train_params['number_of_neighbors'] = args.nn agent = DDDPG(actions=sentences, state_parser=bow_parser, action_parser=word2vec_parser, embedding_size=embedding_size, input_length=vocab_size + 1, input_width=1, history_size=1, loss_weighting=loss_weighting, model_type='MLP', device=device, pomdp_mode=args.pomdp, task=task, sparse_reward=sparse_reward) elif simulation == 'dddpg_cnn': action_word2vec_parser = Word2Vec( type_func=lambda x: torch.FloatTensor(x).to(device).unsqueeze( 0), word2vec_model=word2vec_model, return_func=lambda x: word2vec_padding(x, 65, embedding_size)) action_word2vec_parser.return_func = lambda x: word2vec_sum( x, embedding_size) train_params['number_of_neighbors'] = args.nn agent = DDDPG(actions=sentences, state_parser=word2vec_parser, action_parser=action_word2vec_parser, embedding_size=embedding_size, input_length=embedding_size, input_width=65, history_size=1, loss_weighting=loss_weighting, model_type='CNN', device=device, pomdp_mode=args.pomdp, task=task, sparse_reward=sparse_reward) elif simulation == 'ompddpg_mlp': words = set() for action in sentences: for word in tokenizer(action): words.add(word) action_vocabulary = {} if args.action_w2v: for word in words: action_vocabulary[word] = word2vec_model[word] action_vocabulary[''] = [ 0 for _ in range(len(action_vocabulary['open'])) ] else: words.add('') for idx, word in enumerate(words): action_vocabulary[word] = np.zeros(len(words)) action_vocabulary[word][idx] = 1.0 embedding_size = len(action_vocabulary['open']) train_params['number_of_neighbors'] = args.nn agent = OMPDDPG(actions=action_vocabulary, state_parser=bow_parser, embedding_size=embedding_size, input_length=vocab_size + 1, input_width=1, history_size=1, model_type='MLP', device=device, pomdp_mode=args.pomdp, loss_weighting=loss_weighting, linear=args.linear, improved_omp=args.improved_omp, model_path=args.model_path, task=task, sparse_reward=sparse_reward) elif simulation == 'ompddpg_cnn': words = set() for action in sentences: for word in tokenizer(action): words.add(word) action_vocabulary = {} if args.action_w2v: for word in words: action_vocabulary[word] = word2vec_model[word] action_vocabulary[''] = [ 0 for _ in range(len(action_vocabulary['open'])) ] else: words.add('') for idx, word in enumerate(words): action_vocabulary[word] = np.zeros(len(words)) action_vocabulary[word][idx] = 1.0 embedding_size = len(action_vocabulary['open']) train_params['number_of_neighbors'] = args.nn agent = OMPDDPG(actions=action_vocabulary, state_parser=word2vec_parser, embedding_size=embedding_size, input_length=embedding_size, input_width=65, history_size=1, model_type='CNN', device=device, pomdp_mode=args.pomdp, loss_weighting=loss_weighting, linear=args.linear, improved_omp=args.improved_omp, model_path=args.model_path, task=task, sparse_reward=sparse_reward) else: raise NotImplementedError model_name = simulation if not ('dqn' in simulation or 'drrn' in simulation): model_name += '/neighbors=' + str(args.nn) if args.action_w2v: model_name += '/w2v' if args.linear: model_name += '/linear' agent.learn( total_timesteps=time_steps, buffer_size=buffer_size, visualize=True, vis_name=project_name + '/' + model_name + '/' + str(seed), optimize_memory=optimize_memory, tau=tau, learn_start_steps=0, #20000, train_params=train_params, test_params=test_params, eps_start=eps_start, test_interval=args.test_interval, game_seed=game_seed)
def __init__(self, actions, task='egg', name=None, state_parser=lambda x: x, input_length=None, input_width=None, history_size=1, model_type='MLP', embedding_size=None, device=torch.device('cpu'), pomdp_mode=True, loss_weighting=0, linear=False, improved_omp=False, model_path=None, sparse_reward=True): super(OMPDDPG, self).__init__(actions=actions, task=task, name=name, parser=state_parser, input_length=input_length, input_width=input_width, history_size=history_size, model_type=model_type, device=device, pomdp_mode=pomdp_mode, sparse_reward=sparse_reward) assert embedding_size is not None """ actions should be like this: { 'verbs': {'go': 0.11423, 'take': 342342}, 'nouns': {'egg': 1111, 'tree': 6544} } """ self.linear = linear self.embedding_size = embedding_size self.improved_omp = improved_omp self.sentence_length = 4 self.network = self._create_network() if model_path is not None: self.network[0].load_state_dict(torch.load(model_path)) self.sparse_drrn = True self.hash = {} else: self.sparse_drrn = False self.hash = None self.verbs = ['raise', 'turn', 'bring', 'grab', 'run', 'close', 'go', 'heave', 'drop', 'ring', 'kill', 'climb', 'push', 'light', 'lower', 'wave', 'enter', 'read', 'hit', 'put', 'walk', 'press', 'get', 'throw', 'examine', 'take', 'touch', 'douse', 'move', 'dig', 'open', 'inflate'] # all_actions and synonyms are merely a remark. not in use in the code. all_actions = ['button', 'tie', 'turn', 'land', 'ring', 'coffin', 'wait', 'match', 'bolt', 'douse', 'boat', 'light', 'pile', 'launch', 'and', 'basket', 'window', 'switch', 'wrench', 'map', 'knife', 'wave', 'trapdoor', 'dig', 'lamp', 'wind', 'shovel', 'lid', 'emerald', 'scarab', 'case', 'figurine', 'from', 'railing', 'odysseus', 'bag', 'enter', 'drop', 'troll', 'label', 'east', 'coal', 'red', 'pot', 'get', 'trident', 'buoy', 'bracelet', 'move', 'mirror', 'rope', 'trunk', 'with', 'sceptre', 'all', 'torch', 'canary', 'north', 'rusty', 'read', 'yellow', 'examine', 'take', 'candles', 'rug', 'pray', 'echo', 'prayer', 'south', 'press', 'screwdriver', 'egg', 'chalice', 'page', 'bauble', 'inflate', 'painting', 'matchbook', 'to', 'put', 'out', 'look', 'in', 'touch', 'southeast', 'southwest', 'sand', 'up', 'kill', 'bell', 'sword', 'raise', 'throw', 'close', 'down', 'thief', 'skull', 'bar', 'open', 'diamond', 'west', 'garlic', 'northeast', 'nasty', 'of', 'northwest', 'machine', 'jewels', 'book', 'sack', 'pump', 'lower'] synonyms = ['walk', 'run', 'take', 'place'] long_sentences = [ 'kill troll with sword', 'tie rope to railing', 'put coffin in case', 'light candles with match', 'put skull in case', 'put bar in case', 'get knife and bag', 'put bag in case', 'drop rusty knife', 'press red button', 'press yellow button', 'turn bolt with wrench', 'put trunk in case', 'put trident in case', 'throw sceptre in boat', 'get out of boat', 'put sceptre in case', 'put pot in case', 'put emerald in case', 'put scarab in case', 'get rusty knife', 'get nasty knife', 'kill thief with knife', 'drop rusty knife', 'drop nasty knife', 'put painting in case', 'put chalice in case', 'put egg in case', 'put canary in case', 'put bauble in case', 'put jewels in case', 'put torch in basket', 'put screwdriver in basket', 'put coal in basket', 'get all from basket', 'put coal in machine', 'turn switch with screwdriver', 'put diamond in basket', 'put torch in basket', 'put screwdriver in basket', 'put diamond in case', 'put torch in case', 'put bracelet in case', 'put figurine in case', 'put trunk in case', 'take torch and lamp', 'get rope and knife' ] self.long_sentence_sets = [] for sentence in long_sentences: set_of_tokens = set(tokenizer(sentence)) exists = False for item in self.long_sentence_sets: if set_of_tokens == item[0]: exists = True break if not exists: self.long_sentence_sets.append((set_of_tokens, sentence)) self.words = [] self.word_embeddings_np = [] for word in self.actions: self.words.append(word) self.word_embeddings_np.append(self.actions[word]) self.word_embeddings = torch.Tensor(self.word_embeddings_np) self.word_embeddings_np = np.array(self.word_embeddings_np) self.word_embeddings_device = torch.Tensor(self.word_embeddings).to(self.device) # self.sentences = [] # self.sentence_embeddings = [] # for idx_1, word in enumerate(self.words): # for idx_2, word_2 in enumerate(self.words[idx_1+1:]): # self.sentences.append([self.words.index(word), self.words.index(word_2)]) # self.sentence_embeddings.append(np.array(self.actions[word]) + np.array(self.actions[word_2])) # # self.sentence_embeddings = torch.Tensor(self.sentence_embeddings) # self.sentence_embeddings_device = torch.Tensor(self.sentence_embeddings).to(self.device) self.number_of_words = len(self.words) self.loss_weighting = loss_weighting self.training_steps = 0
word2vec_model=word2vec_model, return_func=lambda x: word2vec_padding(x, 65, embedding_size)) with open(os.getcwd() + '/data/zork_walkthrough_' + task + '.txt', 'rb') as f: data = pickle.load(f) states = [word2vec_parser(state) for state in data['states']] raw_actions = data['actions'] actions = [] noise = MultivariateNormal(torch.zeros(50), torch.eye(50)) dictionary = set() for action in raw_actions: vect = 0 for token in tokenizer(action): dictionary.add(token) vect += word2vec_model[token] sampled_noise = noise.sample().numpy() normalized_noise = snr * np.linalg.norm(vect) * sampled_noise / np.linalg.norm(sampled_noise) actions.append(torch.Tensor(vect + normalized_noise).to(device)) action_vocabulary = {} for word in dictionary: action_vocabulary[word] = word2vec_model[word] action_vocabulary[''] = [0 for _ in range(len(action_vocabulary['open']))] dictionary = list(dictionary) embedding_size = len(action_vocabulary['open'])
noise = MultivariateNormal(torch.zeros(50), torch.eye(50)) #dictionary = ['pray', 'yellow', 'trapdoor', 'open', 'bell', 'touch', 'pile', 'trunk', 'sack', 'inflate', 'southeast', # 'of', 'move', 'match', 'figurine', 'railing', 'with', 'map', 'mirror', 'wind', 'examine', 'north', 'out', # 'trident', 'turn', 'skull', 'throw', 'northwest', 'case', 'bag', 'red', 'press', 'jewels', 'east', 'pump', # 'bolt', 'rusty', 'window', 'douse', 'boat', 'bracelet', 'matchbook', 'basket', 'book', 'coffin', 'bar', # 'rug', 'lid', 'drop', 'nasty', 'wrench', 'light', 'sand', 'bauble', 'kill', 'tie', 'painting', 'sword', # 'wave', 'in', 'south', 'northeast', 'ring', 'canary', 'lower', 'egg', 'all', 'to', 'candles', 'page', # 'and', 'echo', 'emerald', 'tree', 'from', 'rope', 'troll', 'screwdriver', 'torch', 'enter', 'coal', 'go', # 'look', 'shovel', 'knife', 'down', 'take', 'switch', 'prayer', 'launch', 'diamond', 'read', 'up', 'get', # 'scarab', 'west', 'land', 'southwest', 'climb', 'thief', 'raise', 'wait', 'odysseus', 'button', 'sceptre', # 'lamp', 'chalice', 'garlic', 'buoy', 'pot', 'label', 'put', 'dig', 'machine', 'close', 'walk', 'run', 'hit', 'attack'] dictionary = set() for action in raw_actions: for token in tokenizer(action): dictionary.add(token) for ambiguity in ambiguities: for token in ambiguities[ambiguity]: dictionary.add(token) dictionary = list(dictionary) dictionary.sort() print(dictionary) for action in raw_actions: vect = 0 bow = torch.zeros(len(dictionary), device=device) for token in tokenizer(action): vect += word2vec_model[token] bow[dictionary.index(token)] += 1