def __init__(self): self.tasks = [] self.index = util.Index() self.vocab = util.Index() self.ingredients = [self.index.index(k) for k in INGREDIENTS] self.recipes = { self.index.index(k): set(self.index.index(vv) for vv in v) for k, v in RECIPES.items() } #print self.recipes #self.hints = [ # (self.index.index(k), tuple(self.vocab.index(vv) for vv in v)) # for k, v in HINTS #] self.hints = [] for k, v in HINTS: self.hints.append((self.index.index(k), v)) for w in v: self.vocab.index(w) self.kind_to_obs = {} self.obs_to_kind = {} for k in self.ingredients: self.kind_to_obs[k] = len(self.kind_to_obs) self.obs_to_kind[self.kind_to_obs[k]] = k self.n_obs = ( 2 * WINDOW_SIZE * WINDOW_SIZE * len(self.kind_to_obs) + len(self.index) + 4) self.n_act = N_ACTIONS self.n_actions = self.n_act self.n_features = self.n_obs self.is_discrete = True self.max_hint_len = 3 self.n_vocab = len(self.vocab) self.random = util.next_random() self.START = START self.STOP = STOP self.tasks = [] for i, (goal, steps) in enumerate(self.hints): self.tasks.append(Minicraft2Task(i, goal, None)) self.n_tasks = len(self.tasks) self.n_train = len(TRAIN_IDS) self.n_val = 0 self.n_test = len(TEST_IDS) self.demos = {}
gflags.DEFINE_boolean("infer_hyp", False, "use hypotheses at test time") gflags.DEFINE_string("restore", None, "model to restore") gflags.DEFINE_float("concept_prior", None, "place a normal prior on concept representations") gflags.DEFINE_integer( "adapt_reprs", 100, "number of representations to sample when doing adaptation") gflags.DEFINE_integer( "adapt_samples", 1000, "number of episodes to spend evaluating sampled representations") N_EMBED = 64 N_HIDDEN = 64 random = util.next_random() class Policy(object): def __init__(self, task): self.task = task self.t_state = tf.placeholder(tf.float32, (None, task.n_features)) self.t_action = tf.placeholder(tf.int32, (None, )) self.t_reward = tf.placeholder(tf.float32, (None, )) self.t_hint = tf.placeholder(tf.int32, (None, None)) self.t_hint_len = tf.placeholder(tf.int32, (None, )) self.t_task = tf.placeholder(tf.int32, (None, )) self.t_last_hyp = tf.placeholder(tf.int32, (None, ), "last_hyp") self.t_last_hyp_hidden = tf.placeholder(tf.float32,
else: new_ingredients.append(ing) ingredients = new_ingredients specialized = [] for ingredient in ingredients: if "?" in ingredient: specialized.append(ingredient.replace("?", variants.pop(0))) else: specialized.append(ingredient) #print goal, specialized HINTS.append((goal, [START] + specialized + [STOP])) for ingredient in INGREDIENTS: HINTS.append((ingredient, [START, ingredient, STOP])) util.next_random().shuffle(HINTS) #for x in RECIPES.items(): # print x # #print #for x in HINTS: # print x #for i, (k, v) in enumerate(HINTS): # print i, k, v #HINTS = [ # ("wood", ["wood"]), # ("ore", ["ore"]),