class EnsembleModel: def __init__(self, config, opt_config): self.config = config self.opt_config = opt_config self.models = [] for cmodel in config.models: with open(cmodel.config) as config_f: mconfig = Struct(**yaml.load(config_f)) model = models.build_model(mconfig.model, mconfig.opt) model.load(cmodel.weights) self.models.append(model) self.n_models = len(self.models) self.apollo_net = ApolloNet() def forward(self, layout_type, indices, string, input, target, compute_eval=False): batch_size = -1 for i_model, model in enumerate(self.models): model.forward(layout_type, indices, string, input, target, compute_eval) answer = model.apollo_net.blobs[model.answer_layer].data batch_size = answer.shape[0] self.apollo_net.f(layers.NumpyData("output_%d" % i_model, answer)) self.apollo_net.f(layers.Concat( "concat", bottoms=["output_%d" % i for i in range(self.n_models)])) self.apollo_net.blobs["concat"].reshape( (batch_size, self.n_models, len(ANSWER_INDEX), 1)) self.apollo_net.f(layers.Convolution( "merge", (1,1), 1, bottoms=["concat"])) self.apollo_net.blobs["merge"].reshape( (batch_size, len(ANSWER_INDEX))) self.apollo_net.f(layers.NumpyData("target", target)) loss = self.apollo_net.f(layers.SoftmaxWithLoss("loss", bottoms=["merge", "target"], normalize=False)) if compute_eval: eval = self.apollo_net.f(my_layers.Accuracy("acc", bottoms=["merge", "target"])) return loss, eval else: return loss, None def train(self): self.apollo_net.backward() self.apollo_net.update(lr=self.opt_config.learning_rate, momentum=self.opt_config.momentum, clip_gradients=self.opt_config.clip) if self.config.train_submodels: for model in self.models: model.train() def clear(self): for model in self.models: model.clear() self.apollo_net.clear_forward() def save(self, dest): pass
class MetricAgent(object): def __init__(self): self.net = ApolloNet() self.experiences = [] def choose(self, state): self.current_state = state dirs = [NORTH, EAST, SOUTH, WEST] interesting_dirs = \ [d for d in dirs if state.step(d)[1].agent_pos != state.agent_pos] return np.random.choice(interesting_dirs) def forward(self, prefix, feats_from, feats_to, cost, n_actions): net = self.net l_data_from = prefix + "data_from" l_ip1_from = prefix + "ip1_from" l_relu1_from = prefix + "relu1_from" l_ip2_from = prefix + "ip2_from" l_data_to = prefix + "data_to" l_ip1_to = prefix + "ip1_to" l_relu1_to = prefix + "relu1_to" l_ip2_to = prefix + "ip2_to" l_inv = "inv" l_diff = "diff" l_sq = "sq" l_reduce = "reduce" l_target = "target" l_loss = "loss" p_ip1 = [prefix + "ip1_weight", prefix + "ip1_bias"] p_ip2 = [prefix + "ip2_weight", prefix + "ip2_bias"] p_reduce = ["reduce_weight", "reduce_bias"] net.f(NumpyData(l_data_from, feats_from)) net.f(InnerProduct(l_ip1_from, 50, bottoms=[l_data_from], param_names=p_ip1)) net.f(ReLU(l_relu1_from, bottoms=[l_ip1_from])) net.f(InnerProduct(l_ip2_from, 50, bottoms=[l_relu1_from], param_names=p_ip2)) net.f(NumpyData(l_data_to, feats_to)) net.f(InnerProduct(l_ip1_to, 50, bottoms=[l_data_to], param_names=p_ip1)) net.f(ReLU(l_relu1_to, bottoms=[l_ip1_to])) net.f(InnerProduct(l_ip2_to, 50, bottoms=[l_relu1_to], param_names=p_ip2)) net.f(Power(l_inv, scale=-1, bottoms=[l_ip2_from])) net.f(Eltwise(l_diff, "SUM", bottoms=[l_ip2_to, l_inv])) net.f(Power(l_sq, power=2, bottoms=[l_diff])) net.f(InnerProduct( l_reduce, 1, bottoms=[l_sq], param_names=p_reduce, param_lr_mults=[0, 0], weight_filler=Filler("constant", 1), bias_filler=Filler("constant", 0))) net.f(NumpyData(l_target, np.ones((BATCH_SIZE, 1)))) loss = net.f(EuclideanLoss(l_loss, bottoms=[l_reduce, l_target])) return loss def update(self, reward, new_state): self.experiences.append((self.current_state, new_state)) if len(self.experiences) < BATCH_SIZE: return 0 replay_choices = np.random.choice(len(self.experiences), BATCH_SIZE) replay_transitions = [self.experiences[i] for i in replay_choices] from_features = np.asarray([t[0].features for t in replay_transitions]) to_features = np.asarray([t[1].features for t in replay_transitions]) self.net.clear_forward() loss = self.forward("", from_features, to_features, 1, new_state.n_actions) self.net.backward() self.net.update(lr=0.01) return loss def update_target(self): pass
class EnsembleModel: def __init__(self, config, opt_config): self.config = config self.opt_config = opt_config self.models = [] for cmodel in config.models: with open(cmodel.config) as config_f: mconfig = Struct(**yaml.load(config_f)) model = models.build_model(mconfig.model, mconfig.opt) model.load(cmodel.weights) self.models.append(model) self.n_models = len(self.models) self.apollo_net = ApolloNet() def forward(self, layout_type, indices, string, input, target, compute_eval=False): batch_size = -1 for i_model, model in enumerate(self.models): model.forward(layout_type, indices, string, input, target, compute_eval) answer = model.apollo_net.blobs[model.answer_layer].data batch_size = answer.shape[0] self.apollo_net.f(layers.NumpyData("output_%d" % i_model, answer)) self.apollo_net.f( layers.Concat( "concat", bottoms=["output_%d" % i for i in range(self.n_models)])) self.apollo_net.blobs["concat"].reshape( (batch_size, self.n_models, len(ANSWER_INDEX), 1)) self.apollo_net.f( layers.Convolution("merge", (1, 1), 1, bottoms=["concat"])) self.apollo_net.blobs["merge"].reshape((batch_size, len(ANSWER_INDEX))) self.apollo_net.f(layers.NumpyData("target", target)) loss = self.apollo_net.f( layers.SoftmaxWithLoss("loss", bottoms=["merge", "target"], normalize=False)) if compute_eval: eval = self.apollo_net.f( my_layers.Accuracy("acc", bottoms=["merge", "target"])) return loss, eval else: return loss, None def train(self): self.apollo_net.backward() self.apollo_net.update(lr=self.opt_config.learning_rate, momentum=self.opt_config.momentum, clip_gradients=self.opt_config.clip) if self.config.train_submodels: for model in self.models: model.train() def clear(self): for model in self.models: model.clear() self.apollo_net.clear_forward() def save(self, dest): pass
class MetricAgent(object): def __init__(self): self.net = ApolloNet() self.experiences = [] def choose(self, state): self.current_state = state dirs = [NORTH, EAST, SOUTH, WEST] interesting_dirs = \ [d for d in dirs if state.step(d)[1].agent_pos != state.agent_pos] return np.random.choice(interesting_dirs) def forward(self, prefix, feats_from, feats_to, cost, n_actions): net = self.net l_data_from = prefix + "data_from" l_ip1_from = prefix + "ip1_from" l_relu1_from = prefix + "relu1_from" l_ip2_from = prefix + "ip2_from" l_data_to = prefix + "data_to" l_ip1_to = prefix + "ip1_to" l_relu1_to = prefix + "relu1_to" l_ip2_to = prefix + "ip2_to" l_inv = "inv" l_diff = "diff" l_sq = "sq" l_reduce = "reduce" l_target = "target" l_loss = "loss" p_ip1 = [prefix + "ip1_weight", prefix + "ip1_bias"] p_ip2 = [prefix + "ip2_weight", prefix + "ip2_bias"] p_reduce = ["reduce_weight", "reduce_bias"] net.f(NumpyData(l_data_from, feats_from)) net.f( InnerProduct(l_ip1_from, 50, bottoms=[l_data_from], param_names=p_ip1)) net.f(ReLU(l_relu1_from, bottoms=[l_ip1_from])) net.f( InnerProduct(l_ip2_from, 50, bottoms=[l_relu1_from], param_names=p_ip2)) net.f(NumpyData(l_data_to, feats_to)) net.f( InnerProduct(l_ip1_to, 50, bottoms=[l_data_to], param_names=p_ip1)) net.f(ReLU(l_relu1_to, bottoms=[l_ip1_to])) net.f( InnerProduct(l_ip2_to, 50, bottoms=[l_relu1_to], param_names=p_ip2)) net.f(Power(l_inv, scale=-1, bottoms=[l_ip2_from])) net.f(Eltwise(l_diff, "SUM", bottoms=[l_ip2_to, l_inv])) net.f(Power(l_sq, power=2, bottoms=[l_diff])) net.f( InnerProduct(l_reduce, 1, bottoms=[l_sq], param_names=p_reduce, param_lr_mults=[0, 0], weight_filler=Filler("constant", 1), bias_filler=Filler("constant", 0))) net.f(NumpyData(l_target, np.ones((BATCH_SIZE, 1)))) loss = net.f(EuclideanLoss(l_loss, bottoms=[l_reduce, l_target])) return loss def update(self, reward, new_state): self.experiences.append((self.current_state, new_state)) if len(self.experiences) < BATCH_SIZE: return 0 replay_choices = np.random.choice(len(self.experiences), BATCH_SIZE) replay_transitions = [self.experiences[i] for i in replay_choices] from_features = np.asarray([t[0].features for t in replay_transitions]) to_features = np.asarray([t[1].features for t in replay_transitions]) self.net.clear_forward() loss = self.forward("", from_features, to_features, 1, new_state.n_actions) self.net.backward() self.net.update(lr=0.01) return loss def update_target(self): pass