Esempio n. 1
0
    def __init__(self, actions, task='egg', name=None, parser=lambda x: x, input_length=None,
                 input_width=1, history_size=1, model_type='MLP', device=torch.device('cpu'), pomdp_mode=True,
                 sparse_reward=True):
        assert input_length is not None

        self.device = device

        self.name = name if name is not None else ''
        self.actions = actions
        self.action_size = len(actions)

        self.parser = parser
        self.input_length = input_length
        self.input_width = input_width
        self.history_size = history_size

        self.success_reward = 0
        self.network = None
        self.pomdp_mode = pomdp_mode
        self.sparse_reward = sparse_reward
        self.env = self._create_env(task, lambda x: x, lambda x: tokenizer(x), 'ZorkGym/gym/zork1.z5',
                                    self.success_reward, self.pomdp_mode, self.sparse_reward)
        self.model_type = model_type
Esempio n. 2
0
 def _create_action_embeddings(self, action_parser):
     action_embeddings = []
     for action in self.actions:
         action_embeddings.append(action_parser([tokenizer(action)])[0, 0])
     return torch.stack(action_embeddings)
Esempio n. 3
0
def main():
    args = parse_args()

    if args.grad_loss:
        loss_weighting = 0.0
    else:
        loss_weighting = 1.0

    verbs = ['go', 'take', 'open', 'grab', 'run', 'walk', 'climb']
    vocabulary = load_list_from_file('./data/vocabulary.txt')

    basic_actions = [
        'open egg', 'go east', 'go west', 'go north', 'go south', 'go up',
        'go down', 'look', 'take egg'
    ]

    dictionary = [
        'pray', 'yellow', 'trapdoor', 'open', 'bell', 'touch', 'pile', 'trunk',
        'sack', 'inflate', 'southeast', 'of', 'move', 'match', 'figurine',
        'railing', 'with', 'map', 'mirror', 'wind', 'examine', 'north', 'out',
        'trident', 'turn', 'skull', 'throw', 'northwest', 'case', 'bag', 'red',
        'press', 'jewels', 'east', 'pump', 'bolt', 'rusty', 'window', 'douse',
        'boat', 'bracelet', 'matchbook', 'basket', 'book', 'coffin', 'bar',
        'rug', 'lid', 'drop', 'nasty', 'wrench', 'light', 'sand', 'bauble',
        'kill', 'tie', 'painting', 'sword', 'wave', 'in', 'south', 'northeast',
        'ring', 'canary', 'lower', 'egg', 'all', 'to', 'candles', 'page',
        'and', 'echo', 'emerald', 'tree', 'from', 'rope', 'troll',
        'screwdriver', 'torch', 'enter', 'coal', 'go', 'look', 'shovel',
        'knife', 'down', 'take', 'switch', 'prayer', 'launch', 'diamond',
        'read', 'up', 'get', 'scarab', 'west', 'land', 'southwest', 'climb',
        'thief', 'raise', 'wait', 'odysseus', 'button', 'sceptre', 'lamp',
        'chalice', 'garlic', 'buoy', 'pot', 'label', 'put', 'dig', 'machine',
        'close'
    ]

    actions = basic_actions

    optimize_memory = False
    sparse_reward = True
    actor_train_start = 0
    eps_start = 1.0

    test_params = {
        'nn=-1': {
            'number_of_neighbors': -1
        },
        'nn=1': {
            'number_of_neighbors': 1
        },
        'nn=3': {
            'number_of_neighbors': 3
        },
        'nn=11': {
            'number_of_neighbors': 11
        },
    }

    game_seed = 52
    if args.task == 0:
        buffer_size = 20000
        time_steps = 100000
        project_name = 'egg_quest_minimal_actions'
        task = 'egg'
    elif args.task == 1:
        buffer_size = 20000
        time_steps = 2000000
        project_name = 'egg_quest_extended_actions'
        actions = dictionary
        task = 'egg'
    elif args.task == -1:
        buffer_size = 20000
        time_steps = 100000
        project_name = 'egg_quest_baby_actions'
        actions = ['open', 'egg', 'north', 'climb', 'tree', 'take']
        task = 'egg'
    elif args.task == 2:
        buffer_size = 40000
        time_steps = 1000000
        project_name = 'troll_imitation'
        actions = dictionary
        task = 'troll'
        sparse_reward = False

        test_params = {
            'nn': {
                'number_of_neighbors': args.nn
            },
        }
        game_seed = 12
    elif args.task == 3:
        buffer_size = 40000
        time_steps = 1000000
        project_name = 'troll'
        actions = [
            'north', 'south', 'east', 'west', 'open window', 'take sword',
            'take lamp', 'move rug', 'open trapdoor', 'go down', 'light lamp',
            'kill troll with sword'
        ]
        task = 'troll'
        sparse_reward = False
    else:
        raise NotImplementedError

    words = list()
    words.append('')
    for action in actions:
        tokens = tokenizer(action)
        for token in tokens:
            if token not in words:
                words.append(token)

    sentences = list()
    for i, word1 in enumerate(words):
        for word2 in words[i + 1:]:
            if word1 in verbs:
                sentences.append(word1 + ' ' + word2)
            else:
                sentences.append(word2 + ' ' + word1)

    if args.pomdp:
        project_name = project_name + '_pomdp'

    seed = args.seed
    disable_cuda = False

    #random.seed(seed)
    #torch.manual_seed(seed)
    if torch.cuda.is_available() and not disable_cuda:
        # free_gpu = get_free_gpu()
        device = torch.device('cuda')  # + str(free_gpu))
        #torch.cuda.manual_seed(seed)
        torch.backends.cudnn.enabled = False
    else:
        device = torch.device('cpu')

    vocab_size = len(vocabulary)
    bow_parser = BagOfWords(
        vocabulary=vocabulary,
        type_func=lambda x: torch.FloatTensor(x).to(device).unsqueeze(1))

    # word2vec_model_path = os.getcwd() + '/../ZorkGym/text_utils/GoogleNews-vectors-negative300.bin'
    # word2vec_model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model_path, binary=True)
    word2vec_model = glove_api.load('glove-wiki-gigaword-50')
    embedding_size = word2vec_model.vector_size
    word2vec_parser = Word2Vec(
        type_func=lambda x: torch.FloatTensor(x).to(device).unsqueeze(0),
        word2vec_model=word2vec_model,
        return_func=lambda x: word2vec_padding(x, 65, embedding_size))
    onehot_parser = OneHotParser(
        type_func=lambda x: torch.FloatTensor(x).to(device).unsqueeze(1),
        vocabulary=actions)
    """
        Experiments from here and below
    """
    for simulation in args.simulations:
        tau = 0.0
        train_params = {'seed': seed}
        if simulation == 'dqn_mlp':
            test_params = None
            agent = DQN(actions=sentences,
                        model_type='MLP',
                        parser=bow_parser,
                        input_length=vocab_size + 1,
                        input_width=1,
                        history_size=1,
                        device=device,
                        pomdp_mode=args.pomdp,
                        task=task,
                        sparse_reward=sparse_reward)

        elif simulation == 'dqn_cnn':
            test_params = None
            word2vec_parser.return_func = lambda x: word2vec_padding(
                x, 65, embedding_size)

            agent = DQN(actions=sentences,
                        model_type='CNN',
                        parser=word2vec_parser,
                        input_length=embedding_size,
                        input_width=65,
                        history_size=1,
                        device=device,
                        pomdp_mode=args.pomdp,
                        task=task,
                        sparse_reward=sparse_reward)

        elif simulation == 'drrn_mlp':
            test_params = None
            agent = DRRN(actions=sentences,
                         model_type='MLP',
                         parser=bow_parser,
                         input_length=vocab_size + 1,
                         input_width=1,
                         history_size=1,
                         device=device,
                         pomdp_mode=args.pomdp,
                         task=task,
                         sparse_reward=sparse_reward)
            tau = 0.2
        elif simulation == 'drrn_cnn':
            test_params = None
            agent = DRRN(actions=sentences,
                         model_type='CNN',
                         parser=word2vec_parser,
                         input_length=embedding_size,
                         input_width=65,
                         history_size=1,
                         device=device,
                         pomdp_mode=args.pomdp,
                         task=task,
                         sparse_reward=sparse_reward)
            tau = 0.2
        elif simulation == 'dddpg_mlp':
            word2vec_parser.return_func = lambda x: word2vec_sum(
                x, embedding_size)

            action_vocab_list = []
            for action in sentences:
                tokens = tokenizer(action)
                for token in tokens:
                    if token not in action_vocab_list:
                        action_vocab_list.append(token)
            action_vocabulary = {}
            embedding_size = len(action_vocab_list)
            for idx, action in enumerate(action_vocab_list):
                action_vocabulary[action] = np.zeros(embedding_size)
                action_vocabulary[action][idx] = 1.0
            word2vec_parser.word2vec_model = action_vocabulary

            train_params['number_of_neighbors'] = args.nn

            agent = DDDPG(actions=sentences,
                          state_parser=bow_parser,
                          action_parser=word2vec_parser,
                          embedding_size=embedding_size,
                          input_length=vocab_size + 1,
                          input_width=1,
                          history_size=1,
                          loss_weighting=loss_weighting,
                          model_type='MLP',
                          device=device,
                          pomdp_mode=args.pomdp,
                          task=task,
                          sparse_reward=sparse_reward)

        elif simulation == 'dddpg_cnn':
            action_word2vec_parser = Word2Vec(
                type_func=lambda x: torch.FloatTensor(x).to(device).unsqueeze(
                    0),
                word2vec_model=word2vec_model,
                return_func=lambda x: word2vec_padding(x, 65, embedding_size))
            action_word2vec_parser.return_func = lambda x: word2vec_sum(
                x, embedding_size)

            train_params['number_of_neighbors'] = args.nn

            agent = DDDPG(actions=sentences,
                          state_parser=word2vec_parser,
                          action_parser=action_word2vec_parser,
                          embedding_size=embedding_size,
                          input_length=embedding_size,
                          input_width=65,
                          history_size=1,
                          loss_weighting=loss_weighting,
                          model_type='CNN',
                          device=device,
                          pomdp_mode=args.pomdp,
                          task=task,
                          sparse_reward=sparse_reward)

        elif simulation == 'ompddpg_mlp':
            words = set()
            for action in sentences:
                for word in tokenizer(action):
                    words.add(word)
            action_vocabulary = {}
            if args.action_w2v:
                for word in words:
                    action_vocabulary[word] = word2vec_model[word]
                action_vocabulary[''] = [
                    0 for _ in range(len(action_vocabulary['open']))
                ]
            else:
                words.add('')
                for idx, word in enumerate(words):
                    action_vocabulary[word] = np.zeros(len(words))
                    action_vocabulary[word][idx] = 1.0

            embedding_size = len(action_vocabulary['open'])

            train_params['number_of_neighbors'] = args.nn

            agent = OMPDDPG(actions=action_vocabulary,
                            state_parser=bow_parser,
                            embedding_size=embedding_size,
                            input_length=vocab_size + 1,
                            input_width=1,
                            history_size=1,
                            model_type='MLP',
                            device=device,
                            pomdp_mode=args.pomdp,
                            loss_weighting=loss_weighting,
                            linear=args.linear,
                            improved_omp=args.improved_omp,
                            model_path=args.model_path,
                            task=task,
                            sparse_reward=sparse_reward)

        elif simulation == 'ompddpg_cnn':
            words = set()
            for action in sentences:
                for word in tokenizer(action):
                    words.add(word)
            action_vocabulary = {}
            if args.action_w2v:
                for word in words:
                    action_vocabulary[word] = word2vec_model[word]
                action_vocabulary[''] = [
                    0 for _ in range(len(action_vocabulary['open']))
                ]
            else:
                words.add('')
                for idx, word in enumerate(words):
                    action_vocabulary[word] = np.zeros(len(words))
                    action_vocabulary[word][idx] = 1.0

            embedding_size = len(action_vocabulary['open'])

            train_params['number_of_neighbors'] = args.nn

            agent = OMPDDPG(actions=action_vocabulary,
                            state_parser=word2vec_parser,
                            embedding_size=embedding_size,
                            input_length=embedding_size,
                            input_width=65,
                            history_size=1,
                            model_type='CNN',
                            device=device,
                            pomdp_mode=args.pomdp,
                            loss_weighting=loss_weighting,
                            linear=args.linear,
                            improved_omp=args.improved_omp,
                            model_path=args.model_path,
                            task=task,
                            sparse_reward=sparse_reward)

        else:
            raise NotImplementedError

        model_name = simulation

        if not ('dqn' in simulation or 'drrn' in simulation):
            model_name += '/neighbors=' + str(args.nn)
        if args.action_w2v:
            model_name += '/w2v'
        if args.linear:
            model_name += '/linear'

        agent.learn(
            total_timesteps=time_steps,
            buffer_size=buffer_size,
            visualize=True,
            vis_name=project_name + '/' + model_name + '/' + str(seed),
            optimize_memory=optimize_memory,
            tau=tau,
            learn_start_steps=0,  #20000,
            train_params=train_params,
            test_params=test_params,
            eps_start=eps_start,
            test_interval=args.test_interval,
            game_seed=game_seed)
Esempio n. 4
0
    def __init__(self, actions, task='egg', name=None, state_parser=lambda x: x, input_length=None, input_width=None,
                 history_size=1, model_type='MLP', embedding_size=None, device=torch.device('cpu'), pomdp_mode=True,
                 loss_weighting=0, linear=False, improved_omp=False, model_path=None, sparse_reward=True):
        super(OMPDDPG, self).__init__(actions=actions, task=task, name=name, parser=state_parser,
                                      input_length=input_length, input_width=input_width, history_size=history_size,
                                      model_type=model_type, device=device, pomdp_mode=pomdp_mode,
                                      sparse_reward=sparse_reward)

        assert embedding_size is not None

        """
        actions should be like this:
        {   
            'verbs': {'go': 0.11423, 'take': 342342},
            'nouns': {'egg': 1111, 'tree': 6544}
        }
        """

        self.linear = linear
        self.embedding_size = embedding_size
        self.improved_omp = improved_omp

        self.sentence_length = 4

        self.network = self._create_network()

        if model_path is not None:
            self.network[0].load_state_dict(torch.load(model_path))
            self.sparse_drrn = True
            self.hash = {}
        else:
            self.sparse_drrn = False
            self.hash = None

        self.verbs = ['raise', 'turn', 'bring', 'grab', 'run', 'close', 'go', 'heave', 'drop', 'ring', 'kill', 'climb',
                      'push', 'light', 'lower', 'wave', 'enter', 'read', 'hit', 'put', 'walk', 'press', 'get', 'throw',
                      'examine', 'take', 'touch', 'douse', 'move', 'dig', 'open', 'inflate']

        # all_actions and synonyms are merely a remark. not in use in the code.
        all_actions = ['button', 'tie', 'turn', 'land', 'ring', 'coffin', 'wait', 'match', 'bolt', 'douse', 'boat',
                       'light', 'pile', 'launch', 'and', 'basket', 'window', 'switch', 'wrench', 'map', 'knife', 'wave',
                       'trapdoor', 'dig', 'lamp', 'wind', 'shovel', 'lid', 'emerald', 'scarab', 'case', 'figurine',
                       'from', 'railing', 'odysseus', 'bag', 'enter', 'drop', 'troll', 'label', 'east', 'coal', 'red',
                       'pot', 'get', 'trident', 'buoy', 'bracelet', 'move', 'mirror', 'rope', 'trunk', 'with',
                       'sceptre', 'all', 'torch', 'canary', 'north', 'rusty', 'read', 'yellow', 'examine', 'take',
                       'candles', 'rug', 'pray', 'echo', 'prayer', 'south', 'press', 'screwdriver', 'egg', 'chalice',
                       'page', 'bauble', 'inflate', 'painting', 'matchbook', 'to', 'put', 'out', 'look', 'in', 'touch',
                       'southeast', 'southwest', 'sand', 'up', 'kill', 'bell', 'sword', 'raise', 'throw', 'close',
                       'down', 'thief', 'skull', 'bar', 'open', 'diamond', 'west', 'garlic', 'northeast', 'nasty', 'of',
                       'northwest', 'machine', 'jewels', 'book', 'sack', 'pump', 'lower']
        synonyms = ['walk', 'run', 'take', 'place']

        long_sentences = [
            'kill troll with sword',
            'tie rope to railing',
            'put coffin in case',
            'light candles with match',
            'put skull in case',
            'put bar in case',
            'get knife and bag',
            'put bag in case',
            'drop rusty knife',
            'press red button',
            'press yellow button',
            'turn bolt with wrench',
            'put trunk in case',
            'put trident in case',
            'throw sceptre in boat',
            'get out of boat',
            'put sceptre in case',
            'put pot in case',
            'put emerald in case',
            'put scarab in case',
            'get rusty knife',
            'get nasty knife',
            'kill thief with knife',
            'drop rusty knife',
            'drop nasty knife',
            'put painting in case',
            'put chalice in case',
            'put egg in case',
            'put canary in case',
            'put bauble in case',
            'put jewels in case',
            'put torch in basket',
            'put screwdriver in basket',
            'put coal in basket',
            'get all from basket',
            'put coal in machine',
            'turn switch with screwdriver',
            'put diamond in basket',
            'put torch in basket',
            'put screwdriver in basket',
            'put diamond in case',
            'put torch in case',
            'put bracelet in case',
            'put figurine in case',
            'put trunk in case',
            'take torch and lamp',
            'get rope and knife'
        ]

        self.long_sentence_sets = []
        for sentence in long_sentences:
            set_of_tokens = set(tokenizer(sentence))

            exists = False
            for item in self.long_sentence_sets:
                if set_of_tokens == item[0]:
                    exists = True
                    break
            if not exists:
                self.long_sentence_sets.append((set_of_tokens, sentence))

        self.words = []
        self.word_embeddings_np = []
        for word in self.actions:
            self.words.append(word)
            self.word_embeddings_np.append(self.actions[word])

        self.word_embeddings = torch.Tensor(self.word_embeddings_np)
        self.word_embeddings_np = np.array(self.word_embeddings_np)
        self.word_embeddings_device = torch.Tensor(self.word_embeddings).to(self.device)

        # self.sentences = []
        # self.sentence_embeddings = []
        # for idx_1, word in enumerate(self.words):
        #     for idx_2, word_2 in enumerate(self.words[idx_1+1:]):
        #         self.sentences.append([self.words.index(word), self.words.index(word_2)])
        #         self.sentence_embeddings.append(np.array(self.actions[word]) + np.array(self.actions[word_2]))
        #
        # self.sentence_embeddings = torch.Tensor(self.sentence_embeddings)
        # self.sentence_embeddings_device = torch.Tensor(self.sentence_embeddings).to(self.device)

        self.number_of_words = len(self.words)
        self.loss_weighting = loss_weighting

        self.training_steps = 0
                           word2vec_model=word2vec_model,
                           return_func=lambda x: word2vec_padding(x, 65, embedding_size))

with open(os.getcwd() + '/data/zork_walkthrough_' + task + '.txt', 'rb') as f:
    data = pickle.load(f)

states = [word2vec_parser(state) for state in data['states']]
raw_actions = data['actions']
actions = []

noise = MultivariateNormal(torch.zeros(50), torch.eye(50))

dictionary = set()
for action in raw_actions:
    vect = 0
    for token in tokenizer(action):
        dictionary.add(token)
        vect += word2vec_model[token]

    sampled_noise = noise.sample().numpy()
    normalized_noise = snr * np.linalg.norm(vect) * sampled_noise / np.linalg.norm(sampled_noise)
    actions.append(torch.Tensor(vect + normalized_noise).to(device))

action_vocabulary = {}
for word in dictionary:
    action_vocabulary[word] = word2vec_model[word]
action_vocabulary[''] = [0 for _ in range(len(action_vocabulary['open']))]
dictionary = list(dictionary)

embedding_size = len(action_vocabulary['open'])
noise = MultivariateNormal(torch.zeros(50), torch.eye(50))

#dictionary = ['pray', 'yellow', 'trapdoor', 'open', 'bell', 'touch', 'pile', 'trunk', 'sack', 'inflate', 'southeast',
#              'of', 'move', 'match', 'figurine', 'railing', 'with', 'map', 'mirror', 'wind', 'examine', 'north', 'out',
#              'trident', 'turn', 'skull', 'throw', 'northwest', 'case', 'bag', 'red', 'press', 'jewels', 'east', 'pump',
#              'bolt', 'rusty', 'window', 'douse', 'boat', 'bracelet', 'matchbook', 'basket', 'book', 'coffin', 'bar',
#              'rug', 'lid', 'drop', 'nasty', 'wrench', 'light', 'sand', 'bauble', 'kill', 'tie', 'painting', 'sword',
#              'wave', 'in', 'south', 'northeast', 'ring', 'canary', 'lower', 'egg', 'all', 'to', 'candles', 'page',
#              'and', 'echo', 'emerald', 'tree', 'from', 'rope', 'troll', 'screwdriver', 'torch', 'enter', 'coal', 'go',
#              'look', 'shovel', 'knife', 'down', 'take', 'switch', 'prayer', 'launch', 'diamond', 'read', 'up', 'get',
#              'scarab', 'west', 'land', 'southwest', 'climb', 'thief', 'raise', 'wait', 'odysseus', 'button', 'sceptre',
#              'lamp', 'chalice', 'garlic', 'buoy', 'pot', 'label', 'put', 'dig', 'machine', 'close', 'walk', 'run', 'hit', 'attack']

dictionary = set()
for action in raw_actions:
    for token in tokenizer(action):
        dictionary.add(token)
for ambiguity in ambiguities:
    for token in ambiguities[ambiguity]:
        dictionary.add(token)
dictionary = list(dictionary)
dictionary.sort()
print(dictionary)

for action in raw_actions:
    vect = 0
    bow = torch.zeros(len(dictionary), device=device)
    for token in tokenizer(action):
        vect += word2vec_model[token]
        bow[dictionary.index(token)] += 1