Example #1
0
    def __init__(self, config, opt_config):
        self.config = config
        self.opt_config = opt_config

        self.models = []
        for cmodel in config.models:
            with open(cmodel.config) as config_f:
                mconfig = Struct(**yaml.load(config_f))
                model = models.build_model(mconfig.model, mconfig.opt)
            model.load(cmodel.weights)
            self.models.append(model)

        self.n_models = len(self.models)

        self.apollo_net = ApolloNet()
Example #2
0
 def __init__(self, categorical=False):
     self.net = ApolloNet()
     self.opt_state = adadelta.State()
     if categorical:
         self.n_targets = N_CLASSES
     else:
         self.n_targets = 2
     self.categorical = categorical
Example #3
0
    def __init__(self, config, opt_config):
        self.config = config
        self.opt_config = opt_config

        self.models = []
        for cmodel in config.models:
            with open(cmodel.config) as config_f:
                mconfig = Struct(**yaml.load(config_f))
                model = models.build_model(mconfig.model, mconfig.opt)
            model.load(cmodel.weights)
            self.models.append(model)

        self.n_models = len(self.models)

        self.apollo_net = ApolloNet()
Example #4
0
class EnsembleModel:
    def __init__(self, config, opt_config):
        self.config = config
        self.opt_config = opt_config

        self.models = []
        for cmodel in config.models:
            with open(cmodel.config) as config_f:
                mconfig = Struct(**yaml.load(config_f))
                model = models.build_model(mconfig.model, mconfig.opt)
            model.load(cmodel.weights)
            self.models.append(model)

        self.n_models = len(self.models)

        self.apollo_net = ApolloNet()

    def forward(self, layout_type, indices, string, input, target, 
                compute_eval=False):
        batch_size = -1

        for i_model, model in enumerate(self.models):
            model.forward(layout_type, indices, string, input, target, compute_eval)
            answer = model.apollo_net.blobs[model.answer_layer].data
            batch_size = answer.shape[0]
            self.apollo_net.f(layers.NumpyData("output_%d" % i_model, answer))

        self.apollo_net.f(layers.Concat(
            "concat", bottoms=["output_%d" % i for i in range(self.n_models)]))

        self.apollo_net.blobs["concat"].reshape(
            (batch_size, self.n_models, len(ANSWER_INDEX), 1))

        self.apollo_net.f(layers.Convolution(
            "merge", (1,1), 1, bottoms=["concat"]))

        self.apollo_net.blobs["merge"].reshape(
            (batch_size, len(ANSWER_INDEX)))

        self.apollo_net.f(layers.NumpyData("target", target))
        loss = self.apollo_net.f(layers.SoftmaxWithLoss("loss", bottoms=["merge", "target"],
                                     normalize=False))
        if compute_eval:
            eval = self.apollo_net.f(my_layers.Accuracy("acc", bottoms=["merge", "target"]))
            return loss, eval
        else: 
            return loss, None

    def train(self):
        self.apollo_net.backward()
        self.apollo_net.update(lr=self.opt_config.learning_rate,
                               momentum=self.opt_config.momentum,
                               clip_gradients=self.opt_config.clip)
        if self.config.train_submodels:
            for model in self.models:
                model.train()

    def clear(self):
        for model in self.models:
            model.clear()
        self.apollo_net.clear_forward()

    def save(self, dest):
        pass
Example #5
0
class Iterator(object):
    def __init__(self, categorical=False):
        self.net = ApolloNet()
        self.opt_state = adadelta.State()
        assert not categorical

    def forward(self, data, train=False):
        features = np.asarray([d.features for d in data])
        max_len = max(len(d.demonstration) for d in data)

        n_targets = len(d.demonstration[0])
        targets = np.zeros((len(data), max_len, n_targets))
        masks = np.zeros((len(data), max_len, n_targets))

        for i_datum in range(len(data)):
            demo_len = len(data[i_datum].demonstration)
            targets[i_datum, :demo_len, ...] = data[i_datum].demonstration
            masks[i_datum, :demo_len, ...] = 1

        l_features = "features"
        l_ip_repr = "ip_repr"
        l_relu_repr = "relu_repr"
        lt_mask = "mask_%d"
        lt_target = "target_%d"

        self.net.clear_forward()
        self.net.f(NumpyData(l_features, features))
        self.net.f(InnerProduct(l_ip_repr, N_HIDDEN, bottoms=[l_features]))
        self.net.f(ReLU(l_relu_repr, bottoms=[l_ip_repr]))

        ll_pred1 = self.initialize(l_relu_repr, max_len, n_targets, data,
                self_init=not train)
        ll_pred2 = self.refine(ll_pred1, n_targets)
        #ll_pred2 = ll_pred1

        if train:
            ll_targets = []
            ll_masks = []
            for i_target in range(1, max_len):
                l_target = lt_target % i_target
                l_mask = lt_mask % i_target
                self.net.f(NumpyData(l_target, targets[:, i_target]))
                self.net.f(NumpyData(l_mask, masks[:, i_target]))
                ll_targets.append(l_target)
                ll_masks.append(l_mask)

            loss1 = self.loss("pred1", ll_pred1, ll_targets, ll_masks)
            loss2 = self.loss("pred2", ll_pred2, ll_targets, ll_masks)
            loss = np.asarray([loss1, loss2])
            self.net.backward()
            adadelta.update(self.net, self.opt_state, OPT_PARAMS)
        else:
            loss = None

        return loss, ll_pred2

    def initialize(self, l_repr, max_len, n_targets, data, self_init=False):
        lt_state_repr = "state_repr_%d"
        lt_pred = "pred_%d"

        ll_state_reprs = [lt_state_repr % t for t in range(max_len)]

        init_state_reprs = np.asarray([d.inject_state_features(d.init) 
                for d in data])
        self.net.f(NumpyData(ll_state_reprs[0], init_state_reprs))

        ll_predictions = []

        for t, l_hidden in enumerate(
                lstm("init", [[l] for l in ll_state_reprs[:-1]], N_HIDDEN, self.net)):
            l_pred = lt_pred % t

            self.net.f(InnerProduct(l_pred, n_targets, bottoms=[l_hidden]))
            ll_predictions.append(l_pred)

            if self_init:
                state_reprs = []
                for i_datum, datum in enumerate(data):
                    state = self.net.blobs[l_pred].data[i_datum, :]
                    state = np.round(state).astype(int)
                    state_reprs.append(datum.inject_state_features(state))
            else:
                state_reprs = []
                for i_datum, datum in enumerate(data):
                    if t < len(datum.demonstration) - 1:
                        state_reprs.append(datum.inject_state_features(datum.demonstration[t+1]))
                    else:
                        state_reprs.append(np.zeros(self.net.blobs[ll_state_reprs[0]].shape[1:]))
            self.net.f(NumpyData(ll_state_reprs[t+1], np.asarray(state_reprs)))

        return ll_predictions

    def refine(self, ll_prev, n_targets):
        ll_hidden = bilstm("refine", [[l] for l in ll_prev], N_HIDDEN, self.net)
        lt_concat = "refine_concat_%d"
        lt_pred = "refine_pred_%d"
        ll_predictions = []
        for t, l_hidden in enumerate(ll_hidden):
            l_concat = lt_concat % t
            l_pred = lt_pred % t
            #self.net.f(Concat(l_concat, bottoms=[, l_hidden]))
            self.net.f(InnerProduct(l_pred, n_targets, bottoms=[l_hidden]))
            ll_predictions.append(l_pred)
        return ll_predictions

    def loss(self, prefix, ll_pred, ll_targets, ll_masks):
        lt_apply_mask = "apply_mask_%%d_%s" % prefix
        lt_loss = "loss_%%d_%s" % prefix
        loss = 0
        for t, l_pred in enumerate(ll_pred):
            l_pred = ll_pred[t]
            l_apply_mask = lt_apply_mask % t
            l_loss = lt_loss % t

            l_mask = ll_masks[t]
            l_target = ll_targets[t]

            self.net.f(Eltwise(l_apply_mask, "PROD", 
                    bottoms=[l_pred, l_mask]))
            loss += self.net.f(EuclideanLoss(l_loss, 
                    bottoms=[l_apply_mask, l_target]))
        return loss

    def demonstrate(self, data):
        loss, _ = self.forward(data, train=True)
        return loss

    def predict(self, data):
        _, ll_predictions = self.forward(data, train=False)
        predictions = []
        for i_datum, datum in enumerate(data):
            prediction = [datum.init]
            for t in range(len(datum.demonstration) - 1):
                state = self.net.blobs[ll_predictions[t]].data[i_datum, :]
                prediction.append(tuple(state))
            predictions.append(tuple(prediction))
        return predictions
Example #6
0
class Reflex(object):
    def __init__(self, categorical=False):
        self.net = ApolloNet()
        self.opt_state = adadelta.State()
        if categorical:
            self.n_targets = N_CLASSES
        else:
            self.n_targets = 2
        self.categorical = categorical

    def forward(self, features, targets, masks, train=False):
        features = np.asarray(features)
        #positions = np.asarray(positions)
        target = np.asarray(targets)
        mask = np.asarray(masks)

        l_features = "features"
        l_positions = "positions"
        l_concat = "concat"
        lt_ip = "ip_%d"
        lt_relu = "relu_%d"
        l_target = "targets"
        l_mask = "mask"
        l_mul_mask = "mul_mask"
        l_loss = "loss"

        self.net.clear_forward()
        self.net.f(NumpyData(l_features, features))
        #self.net.f(NumpyData(l_positions, positions))
        #self.net.f(Concat(l_concat, bottoms=[l_features, l_positions]))

        l_prev = l_features
        for i_layer in range(N_LAYERS - 1):
            l_ip = lt_ip % i_layer
            l_relu = lt_relu % i_layer
            self.net.f(InnerProduct(l_ip, N_HIDDEN, bottoms=[l_prev]))
            self.net.f(ReLU(l_relu, bottoms=[l_ip]))
            l_prev = l_relu

        l_ip = lt_ip % (N_LAYERS - 1)
        self.net.f(InnerProduct(l_ip, self.n_targets, bottoms=[l_prev]))
        self.l_predict = l_ip

        if train:
            self.net.f(NumpyData(l_target, target))
            if self.categorical:
                loss = self.net.f(SoftmaxWithLoss(l_loss, 
                    bottoms=[self.l_predict, l_target]))
            else:
                self.net.f(NumpyData(l_mask, mask))
                self.net.f(Eltwise(l_mul_mask, "PROD", bottoms=[l_mask, self.l_predict]))
                loss = self.net.f(EuclideanLoss(l_loss, bottoms=[l_target, l_mul_mask]))
            self.net.backward()
            adadelta.update(self.net, self.opt_state, OPT_PARAMS)
            return np.asarray([loss])

    def demonstrate(self, data):
        max_len = max(len(d.demonstration) for d in data)
        loss = 0
        for t in range(1, max_len):
            features = []
            #positions = []
            targets = []
            masks = []

            for datum in data:
                #features.append(datum.features)
                features.append(datum.inject_state_features(datum.demonstration[t-1]))
                if t < len(datum.demonstration):
                    #positions.append(datum.demonstration[t-1])
                    targets.append(datum.demonstration[t])
                    masks.append((1,) * self.n_targets)
                else:
                    #positions.append((0,) * self.n_targets)
                    targets.append((0,) * self.n_targets)
                    masks.append((0,) * self.n_targets)

            loss += self.forward(features, targets, masks, train=True)

        return loss

    def predict(self, data):
        max_len = max(len(d.demonstration) for d in data)
        paths = [[datum.init] for datum in data]
        for t in range(1, max_len):
            features = []
            #positions = []

            for i_datum in range(len(data)):
                datum = data[i_datum]
                features.append(datum.inject_state_features(datum.demonstration[t-1]))
                #features.append(datum.features)
                #positions.append(paths[i_datum][-1])
            
            #self.forward(features, positions, [], [])
            self.forward(features, [], [])

            for i_datum in range(len(data)):
                paths[i_datum].append(tuple(self.net.blobs[self.l_predict].data[i_datum]))

        return paths
Example #7
0
    def __init__(self):
        self.net = ApolloNet()

        self.transitions = []
Example #8
0
class Planner(object):
    def __init__(self, categorical=False):
        self.net = ApolloNet()
        self.opt_state = adadelta.State()
        self.categorical = categorical

    def forward(self, data, train=False):
        features = np.asarray([d.features for d in data])
        max_len = max(len(d.demonstration) for d in data)
        if self.categorical:
            n_targets = N_CLASSES
            targets = np.zeros((len(data), max_len))
        else:
            n_targets = len(d.demonstration[0])
            targets = np.zeros((len(data), max_len, n_targets))
        masks = np.zeros((len(data), max_len, n_targets))
        for i_datum in range(len(data)):
            demo_len = len(data[i_datum].demonstration)
            targets[i_datum, :demo_len, ...] = data[i_datum].demonstration
            masks[i_datum, :demo_len, ...] = 1

        l_features = "features"
        l_ip_repr = "ip_repr"
        l_relu_repr = "relu_repr"
        lt_mask = "mask_%d"
        lt_target = "target_%d"

        self.net.clear_forward()
        self.net.f(NumpyData(l_features, features))
        self.net.f(InnerProduct(l_ip_repr, N_HIDDEN, bottoms=[l_features]))
        self.net.f(ReLU(l_relu_repr, bottoms=[l_ip_repr]))

        l_plan = self.think(l_relu_repr, randomize=train)

        if train:
            ll_targets = []
            ll_masks = []
            for i_target in range(1, max_len):
                l_target = lt_target % i_target
                l_mask = lt_mask % i_target
                self.net.f(NumpyData(l_target, targets[:, i_target]))
                self.net.f(NumpyData(l_mask, masks[:, i_target]))
                ll_targets.append(l_target)
                ll_masks.append(l_mask)
        else:
            ll_targets = None
            ll_masks = None

        loss, ll_predictions = self.act(l_plan, max_len, data, ll_targets,
                ll_masks, self_init=not train)

        if train:
            self.net.backward()
            adadelta.update(self.net, self.opt_state, OPT_PARAMS)

        return loss, ll_predictions


    def think(self, l_repr, randomize):
        time = np.random.randint(THINK_TIME) + 1 if randomize else THINK_TIME
        reprs = [l for l in lstm("think", [[l_repr] for i in range(time)],
                self.net)]
        return reprs[-1]

    def act(self, l_plan, max_len, data, ll_targets, ll_masks, self_init):
        #n_actions = data[0].n_actions
        if self.categorical:
            n_targets = N_CLASSES
        else:
            n_targets = len(data[0].demonstration[0])

        lt_state_repr = "state_repr_%d"
        lt_pred = "pred_%d"
        lt_apply_mask = "apply_mask_%d"
        lt_loss = "loss_%d"
        ll_state_reprs = [lt_state_repr % t for t in range(max_len)]

        init_state_reprs = np.asarray([d.inject_state_features(d.init) 
                for d in data])
        self.net.f(NumpyData(ll_state_reprs[0], init_state_reprs))

        loss = 0 if ll_targets is not None else None
        ll_predictions = []
        for t, l_hidden in enumerate(self.lstm("act", [[l, l_plan] for l in
                ll_state_reprs[:-1]])):
            l_pred = lt_pred % t
            l_apply_mask = lt_apply_mask % t
            l_loss = lt_loss % t

            self.net.f(InnerProduct(l_pred, n_targets, bottoms=[l_hidden]))
            ll_predictions.append(l_pred)

            if ll_targets is not None:
                l_mask = ll_masks[t]
                l_target = ll_targets[t]

                if self.categorical:
                    loss += self.net.f(SoftmaxWithLoss(l_loss, 
                            bottoms=[l_pred, l_target]))
                else:
                    self.net.f(Eltwise(l_apply_mask, "PROD", 
                            bottoms=[l_pred, l_mask]))
                    loss += self.net.f(EuclideanLoss(l_loss, 
                            bottoms=[l_apply_mask, l_target]))

            if self_init:
                state_reprs = []
                for i_datum, datum in enumerate(data):
                    state = self.net.blobs[l_pred].data[i_datum, :]
                    state = np.round(state).astype(int)
                    state_reprs.append(datum.inject_state_features(state))
            else:
                state_reprs = []
                for i_datum, datum in enumerate(data):
                    if t < len(datum.demonstration) - 1:
                        state_reprs.append(datum.inject_state_features(datum.demonstration[t+1]))
                    else:
                        state_reprs.append(np.zeros(self.net.blobs[ll_state_reprs[0]].shape[1:]))
            self.net.f(NumpyData(ll_state_reprs[t+1], np.asarray(state_reprs)))

        return loss, ll_predictions

    def demonstrate(self, data):
        loss, _ = self.forward(data, train=True)
        return loss

    def predict(self, data):
        _, ll_predictions = self.forward(data, train=False)
        predictions = []
        for i_datum, datum in enumerate(data):
            prediction = [datum.init]
            for t in range(len(datum.demonstration) - 1):
                state = self.net.blobs[ll_predictions[t]].data[i_datum, :]
                prediction.append(tuple(state))
            predictions.append(tuple(prediction))
        return predictions
Example #9
0
class EnsembleModel:
    def __init__(self, config, opt_config):
        self.config = config
        self.opt_config = opt_config

        self.models = []
        for cmodel in config.models:
            with open(cmodel.config) as config_f:
                mconfig = Struct(**yaml.load(config_f))
                model = models.build_model(mconfig.model, mconfig.opt)
            model.load(cmodel.weights)
            self.models.append(model)

        self.n_models = len(self.models)

        self.apollo_net = ApolloNet()

    def forward(self,
                layout_type,
                indices,
                string,
                input,
                target,
                compute_eval=False):
        batch_size = -1

        for i_model, model in enumerate(self.models):
            model.forward(layout_type, indices, string, input, target,
                          compute_eval)
            answer = model.apollo_net.blobs[model.answer_layer].data
            batch_size = answer.shape[0]
            self.apollo_net.f(layers.NumpyData("output_%d" % i_model, answer))

        self.apollo_net.f(
            layers.Concat(
                "concat",
                bottoms=["output_%d" % i for i in range(self.n_models)]))

        self.apollo_net.blobs["concat"].reshape(
            (batch_size, self.n_models, len(ANSWER_INDEX), 1))

        self.apollo_net.f(
            layers.Convolution("merge", (1, 1), 1, bottoms=["concat"]))

        self.apollo_net.blobs["merge"].reshape((batch_size, len(ANSWER_INDEX)))

        self.apollo_net.f(layers.NumpyData("target", target))
        loss = self.apollo_net.f(
            layers.SoftmaxWithLoss("loss",
                                   bottoms=["merge", "target"],
                                   normalize=False))
        if compute_eval:
            eval = self.apollo_net.f(
                my_layers.Accuracy("acc", bottoms=["merge", "target"]))
            return loss, eval
        else:
            return loss, None

    def train(self):
        self.apollo_net.backward()
        self.apollo_net.update(lr=self.opt_config.learning_rate,
                               momentum=self.opt_config.momentum,
                               clip_gradients=self.opt_config.clip)
        if self.config.train_submodels:
            for model in self.models:
                model.train()

    def clear(self):
        for model in self.models:
            model.clear()
        self.apollo_net.clear_forward()

    def save(self, dest):
        pass
Example #10
0
 def __init__(self):
     self.net = ApolloNet()
     self.experiences = []
Example #11
0
class MetricAgent(object):
    def __init__(self):
        self.net = ApolloNet()
        self.experiences = []

    def choose(self, state):
        self.current_state = state

        dirs = [NORTH, EAST, SOUTH, WEST]
        interesting_dirs = \
                [d for d in dirs 
                 if state.step(d)[1].agent_pos != state.agent_pos]
        return np.random.choice(interesting_dirs)

    def forward(self, prefix, feats_from, feats_to, cost, n_actions):
        net = self.net

        l_data_from = prefix + "data_from"
        l_ip1_from = prefix + "ip1_from"
        l_relu1_from = prefix + "relu1_from"
        l_ip2_from = prefix + "ip2_from"

        l_data_to = prefix + "data_to"
        l_ip1_to = prefix + "ip1_to"
        l_relu1_to = prefix + "relu1_to"
        l_ip2_to = prefix + "ip2_to"

        l_inv = "inv"
        l_diff = "diff"
        l_sq = "sq"
        l_reduce = "reduce"
        l_target = "target"
        l_loss = "loss"

        p_ip1 = [prefix + "ip1_weight", prefix + "ip1_bias"]
        p_ip2 = [prefix + "ip2_weight", prefix + "ip2_bias"]
        p_reduce = ["reduce_weight", "reduce_bias"]

        net.f(NumpyData(l_data_from, feats_from))
        net.f(InnerProduct(l_ip1_from, 50, bottoms=[l_data_from], param_names=p_ip1))
        net.f(ReLU(l_relu1_from, bottoms=[l_ip1_from]))
        net.f(InnerProduct(l_ip2_from, 50, bottoms=[l_relu1_from], param_names=p_ip2))

        net.f(NumpyData(l_data_to, feats_to))
        net.f(InnerProduct(l_ip1_to, 50, bottoms=[l_data_to], param_names=p_ip1))
        net.f(ReLU(l_relu1_to, bottoms=[l_ip1_to]))
        net.f(InnerProduct(l_ip2_to, 50, bottoms=[l_relu1_to], param_names=p_ip2))

        net.f(Power(l_inv, scale=-1, bottoms=[l_ip2_from]))
        net.f(Eltwise(l_diff, "SUM", bottoms=[l_ip2_to, l_inv]))
        net.f(Power(l_sq, power=2, bottoms=[l_diff]))

        net.f(InnerProduct(
            l_reduce, 
            1, 
            bottoms=[l_sq],
            param_names=p_reduce,
            param_lr_mults=[0, 0], 
            weight_filler=Filler("constant", 1), 
            bias_filler=Filler("constant", 0)))

        net.f(NumpyData(l_target, np.ones((BATCH_SIZE, 1))))
        loss = net.f(EuclideanLoss(l_loss, bottoms=[l_reduce, l_target]))

        return loss

    def update(self, reward, new_state):
        self.experiences.append((self.current_state, new_state))
        if len(self.experiences) < BATCH_SIZE:
            return 0

        replay_choices = np.random.choice(len(self.experiences), BATCH_SIZE)
        replay_transitions = [self.experiences[i] for i in replay_choices]

        from_features = np.asarray([t[0].features for t in replay_transitions])
        to_features = np.asarray([t[1].features for t in replay_transitions])

        self.net.clear_forward()
        loss = self.forward("", from_features, to_features, 1, new_state.n_actions)
        self.net.backward()
        self.net.update(lr=0.01)

        return loss

    def update_target(self):
        pass
Example #12
0
    def __init__(self):
        self.net = ApolloNet()

        self.transitions = []
Example #13
0
class Agent(object):
    def __init__(self):
        self.net = ApolloNet()

        self.transitions = []

    def forward(self, features, n_actions, prefix=""):
        net = self.net

        l_data = prefix + "data"
        l_ip1 = prefix + "ip1"
        l_relu1 = prefix + "relu1"
        l_ip2 = prefix + "ip2"
        l_relu2 = prefix + "relu2"
        l_ip3 = prefix + "ip3"

        p_ip1 = [prefix + "ip1_weight", prefix + "ip1_bias"]
        p_ip2 = [prefix + "ip2_weight", prefix + "ip2_bias"]

        net.f(NumpyData(l_data, features))
        net.f(InnerProduct(l_ip1, 50, bottoms=[l_data], param_names=p_ip1))
        net.f(ReLU(l_relu1, bottoms=[l_ip1]))
        net.f(InnerProduct(l_ip2, n_actions, bottoms=[l_relu1], param_names=p_ip2))
        #net.f(ReLU(relu2, bottoms=[ip2]))
        #net.f(InnerProduct(ip3, state.n_actions, bottoms=[relu2]))

        return l_ip2


    def choose(self, state):
        self.net.clear_forward()
        q_layer = self.forward(np.asarray([state.features]), state.n_actions, "now_")
        q_data = self.net.blobs[q_layer].data.ravel()

        if len(self.transitions) < BATCH_SIZE or np.random.random() < 0.1:
        #if True:
            action = np.random.choice(state.n_actions)
        else:
            action = np.argmax(q_data)

        self.current_state = state
        self.current_action = action

        return action

    def update(self, reward, new_state):
        current_transition = \
                (self.current_state, self.current_action, new_state, reward)
        self.transitions.append(current_transition)

        if len(self.transitions) < BATCH_SIZE:
            return

        replay_choices = np.random.choice(len(self.transitions), BATCH_SIZE)
        replay_transitions = [self.transitions[i] for i in replay_choices]

        replay_before_data = np.asarray([r[0].features for r in replay_transitions])
        replay_action_data = np.asarray([r[1] for r in replay_transitions])
        replay_after_data = np.asarray([r[2].features for r in replay_transitions])
        replay_reward_data = np.asarray([r[3] for r in replay_transitions])

        net = self.net
        
        self.net.clear_forward()
        l_q_now = self.forward(replay_before_data, new_state.n_actions, "now_")
        l_q_fut = self.forward(replay_after_data, new_state.n_actions, "fut_")

        pred_value = 0.9 * np.max(net.blobs[l_q_fut].data, axis=1) + replay_reward_data

        l_action_now = "action_now"
        l_index = "index"
        l_pred_value = "pred_value"
        l_loss = "loss"

        net.f(NumpyData(l_action_now, replay_action_data))
        net.f(Index(l_index, {}, bottoms=[l_q_now, l_action_now]))
        net.f(NumpyData(l_pred_value, pred_value))
        net.f(EuclideanLoss(l_loss, bottoms=[l_index, l_pred_value]))

        net.backward()
        net.update(lr=0.01)

    def update_target(self):
        net = self.net
        if "fut_ip1_weight" not in net.params.keys():
            return
        net.params["fut_ip1_weight"].data[...] = net.params["now_ip1_weight"].data
        net.params["fut_ip1_bias"].data[...] = net.params["now_ip1_bias"].data
        net.params["fut_ip2_weight"].data[...] = net.params["now_ip2_weight"].data
        net.params["fut_ip2_bias"].data[...] = net.params["now_ip2_bias"].data
Example #14
0
 def __init__(self):
     self.net = ApolloNet()
     self.experiences = []
Example #15
0
class MetricAgent(object):
    def __init__(self):
        self.net = ApolloNet()
        self.experiences = []

    def choose(self, state):
        self.current_state = state

        dirs = [NORTH, EAST, SOUTH, WEST]
        interesting_dirs = \
                [d for d in dirs
                 if state.step(d)[1].agent_pos != state.agent_pos]
        return np.random.choice(interesting_dirs)

    def forward(self, prefix, feats_from, feats_to, cost, n_actions):
        net = self.net

        l_data_from = prefix + "data_from"
        l_ip1_from = prefix + "ip1_from"
        l_relu1_from = prefix + "relu1_from"
        l_ip2_from = prefix + "ip2_from"

        l_data_to = prefix + "data_to"
        l_ip1_to = prefix + "ip1_to"
        l_relu1_to = prefix + "relu1_to"
        l_ip2_to = prefix + "ip2_to"

        l_inv = "inv"
        l_diff = "diff"
        l_sq = "sq"
        l_reduce = "reduce"
        l_target = "target"
        l_loss = "loss"

        p_ip1 = [prefix + "ip1_weight", prefix + "ip1_bias"]
        p_ip2 = [prefix + "ip2_weight", prefix + "ip2_bias"]
        p_reduce = ["reduce_weight", "reduce_bias"]

        net.f(NumpyData(l_data_from, feats_from))
        net.f(
            InnerProduct(l_ip1_from,
                         50,
                         bottoms=[l_data_from],
                         param_names=p_ip1))
        net.f(ReLU(l_relu1_from, bottoms=[l_ip1_from]))
        net.f(
            InnerProduct(l_ip2_from,
                         50,
                         bottoms=[l_relu1_from],
                         param_names=p_ip2))

        net.f(NumpyData(l_data_to, feats_to))
        net.f(
            InnerProduct(l_ip1_to, 50, bottoms=[l_data_to], param_names=p_ip1))
        net.f(ReLU(l_relu1_to, bottoms=[l_ip1_to]))
        net.f(
            InnerProduct(l_ip2_to, 50, bottoms=[l_relu1_to],
                         param_names=p_ip2))

        net.f(Power(l_inv, scale=-1, bottoms=[l_ip2_from]))
        net.f(Eltwise(l_diff, "SUM", bottoms=[l_ip2_to, l_inv]))
        net.f(Power(l_sq, power=2, bottoms=[l_diff]))

        net.f(
            InnerProduct(l_reduce,
                         1,
                         bottoms=[l_sq],
                         param_names=p_reduce,
                         param_lr_mults=[0, 0],
                         weight_filler=Filler("constant", 1),
                         bias_filler=Filler("constant", 0)))

        net.f(NumpyData(l_target, np.ones((BATCH_SIZE, 1))))
        loss = net.f(EuclideanLoss(l_loss, bottoms=[l_reduce, l_target]))

        return loss

    def update(self, reward, new_state):
        self.experiences.append((self.current_state, new_state))
        if len(self.experiences) < BATCH_SIZE:
            return 0

        replay_choices = np.random.choice(len(self.experiences), BATCH_SIZE)
        replay_transitions = [self.experiences[i] for i in replay_choices]

        from_features = np.asarray([t[0].features for t in replay_transitions])
        to_features = np.asarray([t[1].features for t in replay_transitions])

        self.net.clear_forward()
        loss = self.forward("", from_features, to_features, 1,
                            new_state.n_actions)
        self.net.backward()
        self.net.update(lr=0.01)

        return loss

    def update_target(self):
        pass
Example #16
0
def main():
    apollocaffe.set_device(0)
    #apollocaffe.set_cpp_loglevel(0)
    apollocaffe.set_random_seed(0)
    np.random.seed(0)

    job = sys.argv[1]
    corpus_name = sys.argv[2]

    config = util.Struct(**yaml.load(CONFIG))
    if corpus_name == "abstract":
        train_scenes, dev_scenes, test_scenes = corpus.load_abstract()
    else:
        assert corpus_name == "birds"
        train_scenes, dev_scenes, test_scenes = corpus.load_birds()
    apollo_net = ApolloNet()
    print "loaded data"
    print "%d training examples" % len(train_scenes)

    listener0_model = Listener0Model(apollo_net, config.model)
    speaker0_model = Speaker0Model(apollo_net, config.model)
    sampling_speaker1_model = SamplingSpeaker1Model(apollo_net, config.model)
    compiled_speaker1_model = CompiledSpeaker1Model(apollo_net, config.model)

    if job == "train.base":
        train(train_scenes, dev_scenes, listener0_model, apollo_net, config.opt)
        train(train_scenes, dev_scenes, speaker0_model, apollo_net, config.opt)
        apollo_net.save("models/%s.base.caffemodel" % corpus_name)
        exit()

    if job == "train.compiled":
        apollo_net.load("models/%s.base.caffemodel" % corpus_name)
        print "loaded model"
        train(train_scenes, dev_scenes, compiled_speaker1_model, apollo_net,
                config.opt)
        apollo_net.save("models/%s.compiled.caffemodel" % corpus_name)
        exit()

    if job in ("sample.base", "sample.compiled"):
        if job == "sample.base":
            apollo_net.load("models/%s.base.caffemodel" % corpus_name)
        else:
            apollo_net.load("models/%s.compiled.caffemodel" % corpus_name)
        print "loaded model"
        if job == "sample.base":
            models = {
                "sampling_speaker1": sampling_speaker1_model,
            }
        elif job == "sample.compiled":
            models = {
                "compiled_speaker1": compiled_speaker1_model,
            }

        name = job.split(".")[1]

        run_experiment("one_different", corpus_name, name, models, dev_scenes)
        run_experiment("by_similarity", corpus_name, name, models, dev_scenes)
        run_experiment("all_same", corpus_name, name, models, dev_scenes)
Example #17
0
#!/usr/bin/env python2

import caffe
import apollocaffe
from apollocaffe import ApolloNet, layers
import numpy as np
import timeit

#caffe.set_mode_gpu()
apollocaffe.set_device(0)
net = ApolloNet()
batch_size = 64

data = np.random.random(size=(batch_size, 512, 20, 20)).astype(np.float32)
labels = np.random.randint(10, size=(batch_size, )).astype(np.int32).astype(
    np.float32)

#print data.dtype
#print labels.dtype

#def load_mem():
#    net.clear_forward()
#    net.f(layers.MemoryData(
#        "mem", data, labels, tops=["input_top", "label_top"],
#        batch_size=batch_size, channels=512, width=20, height=20))
#
#def load_np():
#    net.clear_forward()
#    net.f(layers.NumpyData("np", data))
#
#load_mem()
Example #18
0
#!/usr/bin/env python2

import caffe
import apollocaffe
from apollocaffe import ApolloNet, layers
import numpy as np
import timeit

# caffe.set_mode_gpu()
apollocaffe.set_device(0)
net = ApolloNet()
batch_size = 64

data = np.random.random(size=(batch_size, 512, 20, 20)).astype(np.float32)
labels = np.random.randint(10, size=(batch_size,)).astype(np.int32).astype(np.float32)

# print data.dtype
# print labels.dtype

# def load_mem():
#    net.clear_forward()
#    net.f(layers.MemoryData(
#        "mem", data, labels, tops=["input_top", "label_top"],
#        batch_size=batch_size, channels=512, width=20, height=20))
#
# def load_np():
#    net.clear_forward()
#    net.f(layers.NumpyData("np", data))
#
# load_mem()
# load_np()
Example #19
0
 def __init__(self, categorical=False):
     self.net = ApolloNet()
     self.opt_state = adadelta.State()
     self.categorical = categorical
Example #20
0
class Agent(object):
    def __init__(self):
        self.net = ApolloNet()

        self.transitions = []

    def forward(self, features, n_actions, prefix=""):
        net = self.net

        l_data = prefix + "data"
        l_ip1 = prefix + "ip1"
        l_relu1 = prefix + "relu1"
        l_ip2 = prefix + "ip2"
        l_relu2 = prefix + "relu2"
        l_ip3 = prefix + "ip3"

        p_ip1 = [prefix + "ip1_weight", prefix + "ip1_bias"]
        p_ip2 = [prefix + "ip2_weight", prefix + "ip2_bias"]

        net.f(NumpyData(l_data, features))
        net.f(InnerProduct(l_ip1, 50, bottoms=[l_data], param_names=p_ip1))
        net.f(ReLU(l_relu1, bottoms=[l_ip1]))
        net.f(
            InnerProduct(l_ip2,
                         n_actions,
                         bottoms=[l_relu1],
                         param_names=p_ip2))
        #net.f(ReLU(relu2, bottoms=[ip2]))
        #net.f(InnerProduct(ip3, state.n_actions, bottoms=[relu2]))

        return l_ip2

    def choose(self, state):
        self.net.clear_forward()
        q_layer = self.forward(np.asarray([state.features]), state.n_actions,
                               "now_")
        q_data = self.net.blobs[q_layer].data.ravel()

        if len(self.transitions) < BATCH_SIZE or np.random.random() < 0.1:
            #if True:
            action = np.random.choice(state.n_actions)
        else:
            action = np.argmax(q_data)

        self.current_state = state
        self.current_action = action

        return action

    def update(self, reward, new_state):
        current_transition = \
                (self.current_state, self.current_action, new_state, reward)
        self.transitions.append(current_transition)

        if len(self.transitions) < BATCH_SIZE:
            return

        replay_choices = np.random.choice(len(self.transitions), BATCH_SIZE)
        replay_transitions = [self.transitions[i] for i in replay_choices]

        replay_before_data = np.asarray(
            [r[0].features for r in replay_transitions])
        replay_action_data = np.asarray([r[1] for r in replay_transitions])
        replay_after_data = np.asarray(
            [r[2].features for r in replay_transitions])
        replay_reward_data = np.asarray([r[3] for r in replay_transitions])

        net = self.net

        self.net.clear_forward()
        l_q_now = self.forward(replay_before_data, new_state.n_actions, "now_")
        l_q_fut = self.forward(replay_after_data, new_state.n_actions, "fut_")

        pred_value = 0.9 * np.max(net.blobs[l_q_fut].data,
                                  axis=1) + replay_reward_data

        l_action_now = "action_now"
        l_index = "index"
        l_pred_value = "pred_value"
        l_loss = "loss"

        net.f(NumpyData(l_action_now, replay_action_data))
        net.f(Index(l_index, {}, bottoms=[l_q_now, l_action_now]))
        net.f(NumpyData(l_pred_value, pred_value))
        net.f(EuclideanLoss(l_loss, bottoms=[l_index, l_pred_value]))

        net.backward()
        net.update(lr=0.01)

    def update_target(self):
        net = self.net
        if "fut_ip1_weight" not in net.params.keys():
            return
        net.params["fut_ip1_weight"].data[
            ...] = net.params["now_ip1_weight"].data
        net.params["fut_ip1_bias"].data[...] = net.params["now_ip1_bias"].data
        net.params["fut_ip2_weight"].data[
            ...] = net.params["now_ip2_weight"].data
        net.params["fut_ip2_bias"].data[...] = net.params["now_ip2_bias"].data