embeddingsdir = cwd + "../glove.6B/"
embedding_dim = 100
embedding_fdim = 64
embeddings, vocab = load_embeddings(
    embeddingsdir=embeddingsdir,
    embedding_dim=embedding_dim,  # try 50
    embedding_fdim=embedding_dim,
    seed=None,
    vocab=textworld_vocab)

index = np.random.permutation(range(embedding_dim))[:embedding_fdim]
embeddings = embeddings[index, :]

# instantiate network
network = nn.AlphaTextWorldNet(embeddings, vocab)
network(inputs={
    'memory_input': tf.constant([[0]], tf.int32),
    'cmdlist_input': tf.constant([[0]], tf.int32),
    'location_input': tf.constant([0], tf.int32),
    'cmdprev_input': tf.constant([[0]], tf.int32),
    'ents2id': {
        ".": 0
    },
    'entvocab_input': tf.constant([[0]], tf.int32)
},
        training=True)

# load latest weights if available
modeldir = cwd + "trained_models/"
models = glob.glob(modeldir + "*.h5")
Ejemplo n.º 2
0
            rewards_list.append(d['reward'])
            cmdlist_list.append(d['cmdlist'])
            counts_list.append(d['counts'])
            memory_list.append(d['memory'])

N = len(cmdlist_list)
idx = np.random.permutation(range(N))
cmdlist_list = [cmdlist_list[i] for i in idx]
memory_list = [memory_list[i] for i in idx]
counts_list = [counts_list[i] for i in idx]
rewards_list = [rewards_list[i] for i in idx]

print("number data points")
print(len(cmdlist_list))

model = nn.AlphaTextWorldNet(embeddings, vocab)

optim = tf.optimizers.Nadam(learning_rate=0.00001,
                            clipnorm=30.0,
                            beta_1=0.9,
                            beta_2=0.98)

ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optim, net=model)
manager = tf.train.CheckpointManager(ckpt, root + 'ckpts', max_to_keep=100)
ckpt.restore(manager.latest_checkpoint)
if manager.latest_checkpoint:
    print("Restored from {}".format(manager.latest_checkpoint))
else:
    print("Initializing from scratch.")