예제 #1
0
    def __init__(self, config):
        # load configs
        self.config = config
        self.cookbook = Cookbook(config.recipes)
        self.subtask_index = util.Index()
        self.task_index = util.Index()
        with open(config.trainer.hints) as hints_f:
            self.hints = yaml.load(hints_f)

        # initialize randomness
        self.random = np.random.RandomState(config.seed)

        # organize task and subtask indices
        self.tasks_by_subtask = defaultdict(list)
        self.tasks = []
        for hint_key, hint in self.hints.items():
            goal = util.parse_fexp(hint_key)
            goal = (self.subtask_index.index(goal[0]),
                    self.cookbook.index[goal[1]])
            if config.model.use_args:
                steps = [util.parse_fexp(s) for s in hint]
                steps = [(self.subtask_index.index(a), self.cookbook.index[b])
                         for a, b in steps]
                steps = tuple(steps)
                task = Task(goal, steps)
                for subtask, _ in steps:
                    self.tasks_by_subtask[subtask].append(task)
            else:
                steps = [self.subtask_index.index(a) for a in hint]
                steps = tuple(steps)
                task = Task(goal, steps)
                for subtask in steps:
                    self.tasks_by_subtask[subtask].append(task)
            self.tasks.append(task)
            self.task_index.index(task)
예제 #2
0
    def __init__(self):
        self.world = CraftWorld()
        self.cookbook = Cookbook()
        self.subtask_index = util.Index()
        self.task_index = util.Index()
        dir_path = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(dir_path, "hints.yaml")) as hints_f:
            self.hints = yaml.load(hints_f)

        # initialize randomness
        # self.random = np.random.RandomState(0)
        # Think about this

        # organize task and subtask indices
        self.tasks_by_subtask = defaultdict(list)
        self.tasks = []
        for hint_key, hint in self.hints.items():
            goal = util.parse_fexp(hint_key)
            goal = (self.subtask_index.index(goal[0]),
                    self.cookbook.index[goal[1]])
            steps = [self.subtask_index.index(a) for a in hint]
            steps = tuple(steps)
            task = Task(goal, steps)
            for subtask in steps:
                self.tasks_by_subtask[subtask].append(task)
            self.tasks.append(task)
            self.task_index.index(task)
예제 #3
0
    def __init__(self,
                 recipes_path,
                 hints_path,
                 max_steps=100,
                 seed=0,
                 visualise=False,
                 reuse_environments=False):
        self.subtask_index = util.Index()
        self.task_index = util.Index()
        self._max_steps = max_steps
        self._visualise = visualise
        self._reuse_environments = reuse_environments

        # Per task, we reuse the same environment, with same layouts.
        # Should generates much easier tasks where agents can overfit.
        if self._reuse_environments:
            self._env_cache = {}

        # create World
        self.world = craft.CraftWorld(recipes_path, seed)

        # Load the tasks with sub-steps (== hints)
        with open(hints_path) as hints_f:
            self.hints = yaml.load(hints_f)

        # Setup all possible tasks
        self._init_tasks()
예제 #4
0
    def __init__(self):
        self.tasks = []
        self.index = util.Index()
        self.vocab = util.Index()
        self.ingredients = [self.index.index(k) for k in INGREDIENTS]
        self.recipes = {
            self.index.index(k): set(self.index.index(vv) for vv in v)
            for k, v in RECIPES.items()
        }
        #print self.recipes
        #self.hints = [
        #    (self.index.index(k), tuple(self.vocab.index(vv) for vv in v))
        #    for k, v in HINTS
        #]
        self.hints = []
        for k, v in HINTS:
            self.hints.append((self.index.index(k), v))
            for w in v:
                self.vocab.index(w)

        self.kind_to_obs = {}
        self.obs_to_kind = {}
        for k in self.ingredients:
            self.kind_to_obs[k] = len(self.kind_to_obs)
            self.obs_to_kind[self.kind_to_obs[k]] = k

        self.n_obs = (
            2 * WINDOW_SIZE * WINDOW_SIZE * len(self.kind_to_obs)
            + len(self.index)
            + 4)
        self.n_act = N_ACTIONS
        self.n_actions = self.n_act
        self.n_features = self.n_obs
        self.is_discrete = True

        self.max_hint_len = 3
        self.n_vocab = len(self.vocab)
        self.random = util.next_random()

        self.START = START
        self.STOP = STOP

        self.tasks = []
        for i, (goal, steps) in enumerate(self.hints):
            self.tasks.append(Minicraft2Task(i, goal, None))
        self.n_tasks = len(self.tasks)
        self.n_train = len(TRAIN_IDS)
        self.n_val = 0
        self.n_test = len(TEST_IDS)

        self.demos = {}
예제 #5
0
파일: regex2.py 프로젝트: lim0606/l3
    def __init__(self):
        assert FLAGS.hint_type in ("re", "nl", "none")

        with open(os.path.join(sys.path[0],
                               "data/re2/corpus.json")) as corpus_f:
            corpus = json.load(corpus_f)

        self.hint_vocab = util.Index()
        self.str_vocab = util.Index()
        self.str_vocab.index(SEP)
        self.SEP = SEP
        self.START = START
        self.STOP = STOP

        data = {}
        for fold in ["train", "val", "test"]:
            data[fold] = []
            for example in corpus[fold]:
                if FLAGS.hint_type == "re":
                    hint = example["re"]
                    hint = [self.hint_vocab.index(c) for c in hint]
                    hints = [hint]
                elif FLAGS.hint_type == "nl":
                    hints = []
                    for hint in example["hints_aug"]:
                        hint = [self.hint_vocab.index(w) for w in hint]
                        hints.append(hint)
                elif FLAGS.hint_type == "none":
                    hints = [[]]

                pairs = []
                for inp, out in example["examples"]:
                    inp = [self.str_vocab.index(c) for c in inp]
                    out = [self.str_vocab.index(c) for c in out]
                    pairs.append((inp, out))

                datum = FullDatum(hints, pairs)
                data[fold].append(datum)

        self.train_data = data["train"]
        self.val_data = data["val"]
        self.test_data = data["test"]
예제 #6
0
    def __init__(self):
        self.hint_vocab = util.Index()
        self.START = START
        self.STOP = STOP

        with open(os.path.join(birds_path, "hendricks_data", "CUB_feature_dict.pkl")) as feat_f:
            self.features = pickle.load(feat_f)
            #file_to_full = {k.split("/")[1]: k for k in self.features}

        #self.captions = {}
        #for fname in os.listdir(os.path.join(birds_path, "captions")):
        #    name = file_to_full[fname[:-4] + ".jpg"]
        #    inst_capts = []
        #    with open(os.path.join(birds_path, "captions", fname)) as capt_f:
        #        for line in capt_f:
        #            line = line.strip().replace(".", " .").replace(",", " ,")
        #            toks = [START] + line.split() + [STOP]
        #            toks = [self.hint_vocab.index(w) for w in toks]
        #            inst_capts.append(tuple(toks))
        #    self.captions[name] = tuple(inst_capts)
        self.captions = {}
        with open(os.path.join(birds_path, "hendricks_data", "captions.tsv")) as capt_f:
            reader = csv.DictReader(capt_f, delimiter="\t")
            for row in reader:
                caption = row["Description"].lower().replace(".", " .").replace(",", " ,")
                toks = [START] + caption.split() + [STOP]
                toks = [self.hint_vocab.index(w) for w in toks]
                url = row["Input.image_url"]
                inst = "/".join(url.split("/")[-2:])
                if inst not in self.captions:
                    self.captions[inst] = []
                self.captions[inst].append(toks)

        classes = sorted(list(set(k.split("/")[0] for k in self.captions)))
        classes.remove("cub_missing")
        shuf_random = np.random.RandomState(999)
        shuf_random.shuffle(classes)
        assert len(classes) == 200
        data_classes = {
            "train": classes[:100],
            "val": classes[100:110],
            "test": classes[100:200]
        }

        data_insts = {}
        for fold in ("train", "val", "test"):
            classes = data_classes[fold]
            data_classes[fold] = classes

            instances = {cls: [] for cls in classes}
            for key in self.features.keys():
                cls, inst = key.split("/")
                if cls in instances:
                    instances[cls].append(key)
            data_insts[fold] = instances

        #    print fold
        #    for cls in classes:
        #        print cls, len(instances[cls])
        #    print
        #exit()

        self.train_classes = data_classes["train"]
        self.val_classes = data_classes["val"]
        self.test_classes = data_classes["test"]

        self.train_insts = data_insts["train"]
        self.val_insts = data_insts["val"]
        self.test_insts = data_insts["test"]

        self.n_features = self.features[self.features.keys()[0]].size
예제 #7
0
파일: __init__.py 프로젝트: lim0606/l3
    def __init__(self):
        train_local_raw, test_local_raw = loading.load("local", "human", 2000,
                                                       500)
        train_global_raw, test_global_raw = loading.load(
            "global", "human", 1500, 500)
        #train_local, test_local = loading.load("local", "human", 200, 200)
        #train_global, test_global = loading.load("global", "human", 200, 200)
        self.vocab = util.Index()
        self.vocab.index("UNK")
        self.START = START
        self.STOP = STOP

        raw_splits = {
            ("train", "local"): train_local_raw,
            ("train", "global"): train_global_raw,
            ("test", "local"): test_local_raw,
            ("test", "global"): test_global_raw
        }

        templates = defaultdict(list)

        splits = {}
        task_id = 0
        for fold in ("train", "test"):
            for mode in ("local", ):
                data, task_id = self._format_data(fold, raw_splits[fold, mode],
                                                  templates, task_id)
                splits[fold, mode] = data

        final_templates = defaultdict(list)
        for k, vv in templates.items():
            counts = defaultdict(lambda: 0)
            for v in vv:
                counts[v] += 1
            for v, c in counts.items():
                if c > 1:
                    final_templates[k].append(v)
        final_templates = dict(final_templates)

        train_local_fmt = splits["train", "local"]
        test_local_fmt = splits["test", "local"]

        task_counter = 0
        self.train, n_train_tasks = self._build_examples(train_local_fmt,
                                                         5000,
                                                         final_templates,
                                                         task_counter,
                                                         group_tasks=False)
        self.test, n_total_tasks = self._build_examples(test_local_fmt,
                                                        50,
                                                        final_templates,
                                                        n_train_tasks,
                                                        group_tasks=True)
        self.test_ids = sorted(list(set(d.task_id for d in self.test)))

        print "[n_train]", len(self.train)
        print "[n_test]", len(self.test), "test"

        samp = self.sample_train()
        self.n_features = samp.features.size
        self.n_tasks = n_train_tasks
        self.n_actions = ACTS

        print "[task_ids]", n_train_tasks, "->", n_total_tasks
        print "[n_vocab]", len(self.vocab)
예제 #8
0
    def __init__(self):
        self.hint_vocab = util.Index()
        self.feature_index = util.Index()
        self.START = START
        self.STOP = STOP

        #with open(os.path.join(sw_path, "train", "examples.struct.json")) as feature_f:
        #    feature_data = json.load(feature_f)
        #    for datum in feature_data:
        #        for example in datum:
        #            for feature in example:
        #                self.feature_index.index(tuple(feature))

        data = {}
        for fold in ("train", "val", "test", "val_same", "test_same"):
            examples = np.load(os.path.join(sw_path, fold, "examples.npy"))
            inputs = np.load(os.path.join(sw_path, fold, "inputs.npy"))
            labels = np.load(os.path.join(sw_path, fold, "labels.npy"))

            with open(os.path.join(sw_path, fold, "hints.json")) as hint_f:
                hints = json.load(hint_f)

            #new_hints = []
            #for hint in hints:
            #    hint = hint.split()
            #    new_hint = []
            #    for i in range(len(hint) - 1):
            #        new_hint.append(hint[i] + "/" + hint[i+1])
            #    new_hints.append(" ".join(new_hint))
            #hints = new_hints

            indexed_hints = []
            for hint in hints:
                hint = [START] + hint.split() + [STOP]
                indexed_hint = [self.hint_vocab.index(w) for w in hint]
                indexed_hints.append(indexed_hint)
            hints = indexed_hints

            #ex_features = np.zeros((examples.shape[0], examples.shape[1], len(self.feature_index)))
            #inp_features = np.zeros((examples.shape[0], len(self.feature_index)))
            #with open(os.path.join(sw_path, fold, "examples.struct.json")) as ex_struct_f:
            #    examples_struct = json.load(ex_struct_f)
            #    for i_datum, expls in enumerate(examples_struct):
            #        for i_ex, example in enumerate(expls):
            #            for feature in example:
            #                i_feat = self.feature_index[tuple(feature)]
            #                if i_feat:
            #                    ex_features[i_datum, i_ex, i_feat] = 1
            #with open(os.path.join(sw_path, fold, "inputs.struct.json")) as in_struct_f:
            #    inputs_struct = json.load(in_struct_f)
            #    for i_datum, example in enumerate(inputs_struct):
            #        for feature in example:
            #            i_feat = self.feature_index[tuple(feature)]
            #            if i_feat is not None:
            #                inp_features[i_datum, i_feat] = 1
            ex_features = np.load(os.path.join(sw_path, fold, "examples.feats.npy"))
            inp_features = np.load(os.path.join(sw_path, fold, "inputs.feats.npy"))

            fold_data = []

            for i in range(len(hints)):
                if USE_IMAGES:
                    fold_data.append(Datum(
                        hints[i], examples[i, ...], inputs[i, ...], labels[i]))
                else:
                    fold_data.append(Datum(
                        hints[i], ex_features[i, ...], inp_features[i, ...], labels[i]))
                    if FLAGS.vis:
                        # TODO this is so dirty!
                        datum = fold_data[-1]
                        fold_data[-1] = VisDatum(
                            datum.hint, datum.ex_inputs, datum.input,
                            datum.label, examples[i, ...], inputs[i, ...])
            data[fold] = fold_data

        self.train_data = data["train"]
        self.val_data = data["val"]
        self.test_data = data["test"]
        self.val_same_data = data["val_same"]
        self.test_same_data = data["test_same"]

        #self.train_data = data["train"][:8000]
        #self.val_data = data["train"][8000:8500]
        #self.test_data = data["train"][8500:9000]

        if USE_IMAGES:
            self.width, self.height, self.channels = self.train_data[0].input.shape
        else:
            #self.n_features = len(self.feature_index)
            self.n_features = inp_features.shape[1]
예제 #9
0
    def __init__(self, config, world, model):
        # load configs
        self.config = config
        self.cookbook = Cookbook(config.recipes)
        self.subtask_index = util.Index()
        self.task_index = util.Index()
        with open(config.trainer.hints) as hints_f:
            self.hints = yaml.load(hints_f)

        # initialize randomness
        self.random = np.random.RandomState(0)

        # organize task and subtask indices
        self.train_tasks = []
        self.test_tasks = []
        if "train" in self.hints:
            train_hints = self.hints["train"]
            test_hints = self.hints["test"]
        else:
            train_hints = self.hints
            test_hints = {}
        all_hints = dict(train_hints)
        all_hints.update(test_hints)
        hint_keys = [
            'make[cloth]', 'make[bed]', 'make[axe]', 'make[stick]',
            'make[bridge]', 'make[plank]', 'get[gold]', 'make[shears]',
            'get[gem]', 'make[rope]'
        ]
        # hint_keys = ['make[cloth]', 'make[bed]', 'make[stick]', 'make[bridge]', 'make[plank]', 'get[gold]', 'make[shears]', 'get[gem]', 'make[rope]']
        # for hint_key, hint in all_hints.items():
        for hint_key in hint_keys:
            hint = all_hints[hint_key]
            goal = util.parse_fexp(hint_key)
            goal = (self.subtask_index.index(goal[0]),
                    self.cookbook.index[goal[1]])
            if config.model.use_args:
                steps = [util.parse_fexp(s) for s in hint]
                steps = [(self.subtask_index.index(a), self.cookbook.index[b])
                         for a, b in steps]
                steps = tuple(steps)
                task = Task(goal, steps)
            else:
                steps = [self.subtask_index.index(a) for a in hint]
                steps = tuple(steps)
                task = Task(goal, steps)
            if hint_key in train_hints:
                self.train_tasks.append(task)
            else:
                self.test_tasks.append(task)
            self.task_index.index(task)

            print(hint_key, hint, task)

        model.prepare(world, self)

        print("tasks: ", self.test_tasks)

        # self.ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[None, ob_space.shape[0]])
        hid_size = 32
        num_hid_layers = 2
        self.ob = U.get_placeholder(
            name="ob",
            dtype=tf.float32,
            shape=[None, model.n_features + len(self.test_tasks)])
        self.new_ob = U.get_placeholder(
            name="new_ob",
            dtype=tf.float32,
            shape=[None, model.n_features + len(self.test_tasks)])
        self.acts = U.get_placeholder(name="acts",
                                      dtype=tf.int32,
                                      shape=(None, ))

        self.policy = Policy(name="policy",
                             ob=self.ob,
                             ac_space=world.n_actions,
                             hid_size=hid_size,
                             num_hid_layers=num_hid_layers,
                             num_subpolicies=len(self.subtask_index))
        self.old_policy = Policy(name="old_policy",
                                 ob=self.ob,
                                 ac_space=world.n_actions,
                                 hid_size=hid_size,
                                 num_hid_layers=num_hid_layers,
                                 num_subpolicies=len(self.subtask_index))
        self.stochastic = True

        policy_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                        scope='policy')
        old_policy_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                            scope='old_policy')
        not_init_initializers = [
            var.initializer for var in policy_vars + old_policy_vars
        ]
        model.session.run(not_init_initializers)

        self.policy.reset(model.session)
        self.old_policy.reset(model.session)

        hid_size = 1024
        num_hid_layers = 2
        self.env_model = EnvModel(name="env_model",
                                  ob=self.ob,
                                  acts=self.acts,
                                  hid_size=hid_size,
                                  num_hid_layers=num_hid_layers,
                                  num_subpolicies=len(self.subtask_index))
        self.old_env_model = EnvModel(name="old_env_model",
                                      ob=self.ob,
                                      acts=self.acts,
                                      hid_size=hid_size,
                                      num_hid_layers=num_hid_layers,
                                      num_subpolicies=len(self.subtask_index))

        with model.session.as_default() as sess:
            self.learner = Learner(self.policy,
                                   self.old_policy,
                                   self.env_model,
                                   self.old_env_model,
                                   len(self.subtask_index),
                                   None,
                                   clip_param=0.2,
                                   entcoeff=0,
                                   optim_epochs=10,
                                   optim_stepsize=3e-5,
                                   optim_batchsize=64)

        model.load()
        # self.learner.syncMasterPolicies()
        print("policy variables: ", self.policy.get_variables())
        print("policy trainable variables: ",
              self.policy.get_trainable_variables())