def __init__(self, config, opt_config): self.config = config self.opt_config = opt_config self.models = [] for cmodel in config.models: with open(cmodel.config) as config_f: mconfig = Struct(**yaml.load(config_f)) model = models.build_model(mconfig.model, mconfig.opt) model.load(cmodel.weights) self.models.append(model) self.n_models = len(self.models) self.apollo_net = ApolloNet()
def __init__(self, categorical=False): self.net = ApolloNet() self.opt_state = adadelta.State() if categorical: self.n_targets = N_CLASSES else: self.n_targets = 2 self.categorical = categorical
class EnsembleModel: def __init__(self, config, opt_config): self.config = config self.opt_config = opt_config self.models = [] for cmodel in config.models: with open(cmodel.config) as config_f: mconfig = Struct(**yaml.load(config_f)) model = models.build_model(mconfig.model, mconfig.opt) model.load(cmodel.weights) self.models.append(model) self.n_models = len(self.models) self.apollo_net = ApolloNet() def forward(self, layout_type, indices, string, input, target, compute_eval=False): batch_size = -1 for i_model, model in enumerate(self.models): model.forward(layout_type, indices, string, input, target, compute_eval) answer = model.apollo_net.blobs[model.answer_layer].data batch_size = answer.shape[0] self.apollo_net.f(layers.NumpyData("output_%d" % i_model, answer)) self.apollo_net.f(layers.Concat( "concat", bottoms=["output_%d" % i for i in range(self.n_models)])) self.apollo_net.blobs["concat"].reshape( (batch_size, self.n_models, len(ANSWER_INDEX), 1)) self.apollo_net.f(layers.Convolution( "merge", (1,1), 1, bottoms=["concat"])) self.apollo_net.blobs["merge"].reshape( (batch_size, len(ANSWER_INDEX))) self.apollo_net.f(layers.NumpyData("target", target)) loss = self.apollo_net.f(layers.SoftmaxWithLoss("loss", bottoms=["merge", "target"], normalize=False)) if compute_eval: eval = self.apollo_net.f(my_layers.Accuracy("acc", bottoms=["merge", "target"])) return loss, eval else: return loss, None def train(self): self.apollo_net.backward() self.apollo_net.update(lr=self.opt_config.learning_rate, momentum=self.opt_config.momentum, clip_gradients=self.opt_config.clip) if self.config.train_submodels: for model in self.models: model.train() def clear(self): for model in self.models: model.clear() self.apollo_net.clear_forward() def save(self, dest): pass
class Iterator(object): def __init__(self, categorical=False): self.net = ApolloNet() self.opt_state = adadelta.State() assert not categorical def forward(self, data, train=False): features = np.asarray([d.features for d in data]) max_len = max(len(d.demonstration) for d in data) n_targets = len(d.demonstration[0]) targets = np.zeros((len(data), max_len, n_targets)) masks = np.zeros((len(data), max_len, n_targets)) for i_datum in range(len(data)): demo_len = len(data[i_datum].demonstration) targets[i_datum, :demo_len, ...] = data[i_datum].demonstration masks[i_datum, :demo_len, ...] = 1 l_features = "features" l_ip_repr = "ip_repr" l_relu_repr = "relu_repr" lt_mask = "mask_%d" lt_target = "target_%d" self.net.clear_forward() self.net.f(NumpyData(l_features, features)) self.net.f(InnerProduct(l_ip_repr, N_HIDDEN, bottoms=[l_features])) self.net.f(ReLU(l_relu_repr, bottoms=[l_ip_repr])) ll_pred1 = self.initialize(l_relu_repr, max_len, n_targets, data, self_init=not train) ll_pred2 = self.refine(ll_pred1, n_targets) #ll_pred2 = ll_pred1 if train: ll_targets = [] ll_masks = [] for i_target in range(1, max_len): l_target = lt_target % i_target l_mask = lt_mask % i_target self.net.f(NumpyData(l_target, targets[:, i_target])) self.net.f(NumpyData(l_mask, masks[:, i_target])) ll_targets.append(l_target) ll_masks.append(l_mask) loss1 = self.loss("pred1", ll_pred1, ll_targets, ll_masks) loss2 = self.loss("pred2", ll_pred2, ll_targets, ll_masks) loss = np.asarray([loss1, loss2]) self.net.backward() adadelta.update(self.net, self.opt_state, OPT_PARAMS) else: loss = None return loss, ll_pred2 def initialize(self, l_repr, max_len, n_targets, data, self_init=False): lt_state_repr = "state_repr_%d" lt_pred = "pred_%d" ll_state_reprs = [lt_state_repr % t for t in range(max_len)] init_state_reprs = np.asarray([d.inject_state_features(d.init) for d in data]) self.net.f(NumpyData(ll_state_reprs[0], init_state_reprs)) ll_predictions = [] for t, l_hidden in enumerate( lstm("init", [[l] for l in ll_state_reprs[:-1]], N_HIDDEN, self.net)): l_pred = lt_pred % t self.net.f(InnerProduct(l_pred, n_targets, bottoms=[l_hidden])) ll_predictions.append(l_pred) if self_init: state_reprs = [] for i_datum, datum in enumerate(data): state = self.net.blobs[l_pred].data[i_datum, :] state = np.round(state).astype(int) state_reprs.append(datum.inject_state_features(state)) else: state_reprs = [] for i_datum, datum in enumerate(data): if t < len(datum.demonstration) - 1: state_reprs.append(datum.inject_state_features(datum.demonstration[t+1])) else: state_reprs.append(np.zeros(self.net.blobs[ll_state_reprs[0]].shape[1:])) self.net.f(NumpyData(ll_state_reprs[t+1], np.asarray(state_reprs))) return ll_predictions def refine(self, ll_prev, n_targets): ll_hidden = bilstm("refine", [[l] for l in ll_prev], N_HIDDEN, self.net) lt_concat = "refine_concat_%d" lt_pred = "refine_pred_%d" ll_predictions = [] for t, l_hidden in enumerate(ll_hidden): l_concat = lt_concat % t l_pred = lt_pred % t #self.net.f(Concat(l_concat, bottoms=[, l_hidden])) self.net.f(InnerProduct(l_pred, n_targets, bottoms=[l_hidden])) ll_predictions.append(l_pred) return ll_predictions def loss(self, prefix, ll_pred, ll_targets, ll_masks): lt_apply_mask = "apply_mask_%%d_%s" % prefix lt_loss = "loss_%%d_%s" % prefix loss = 0 for t, l_pred in enumerate(ll_pred): l_pred = ll_pred[t] l_apply_mask = lt_apply_mask % t l_loss = lt_loss % t l_mask = ll_masks[t] l_target = ll_targets[t] self.net.f(Eltwise(l_apply_mask, "PROD", bottoms=[l_pred, l_mask])) loss += self.net.f(EuclideanLoss(l_loss, bottoms=[l_apply_mask, l_target])) return loss def demonstrate(self, data): loss, _ = self.forward(data, train=True) return loss def predict(self, data): _, ll_predictions = self.forward(data, train=False) predictions = [] for i_datum, datum in enumerate(data): prediction = [datum.init] for t in range(len(datum.demonstration) - 1): state = self.net.blobs[ll_predictions[t]].data[i_datum, :] prediction.append(tuple(state)) predictions.append(tuple(prediction)) return predictions
class Reflex(object): def __init__(self, categorical=False): self.net = ApolloNet() self.opt_state = adadelta.State() if categorical: self.n_targets = N_CLASSES else: self.n_targets = 2 self.categorical = categorical def forward(self, features, targets, masks, train=False): features = np.asarray(features) #positions = np.asarray(positions) target = np.asarray(targets) mask = np.asarray(masks) l_features = "features" l_positions = "positions" l_concat = "concat" lt_ip = "ip_%d" lt_relu = "relu_%d" l_target = "targets" l_mask = "mask" l_mul_mask = "mul_mask" l_loss = "loss" self.net.clear_forward() self.net.f(NumpyData(l_features, features)) #self.net.f(NumpyData(l_positions, positions)) #self.net.f(Concat(l_concat, bottoms=[l_features, l_positions])) l_prev = l_features for i_layer in range(N_LAYERS - 1): l_ip = lt_ip % i_layer l_relu = lt_relu % i_layer self.net.f(InnerProduct(l_ip, N_HIDDEN, bottoms=[l_prev])) self.net.f(ReLU(l_relu, bottoms=[l_ip])) l_prev = l_relu l_ip = lt_ip % (N_LAYERS - 1) self.net.f(InnerProduct(l_ip, self.n_targets, bottoms=[l_prev])) self.l_predict = l_ip if train: self.net.f(NumpyData(l_target, target)) if self.categorical: loss = self.net.f(SoftmaxWithLoss(l_loss, bottoms=[self.l_predict, l_target])) else: self.net.f(NumpyData(l_mask, mask)) self.net.f(Eltwise(l_mul_mask, "PROD", bottoms=[l_mask, self.l_predict])) loss = self.net.f(EuclideanLoss(l_loss, bottoms=[l_target, l_mul_mask])) self.net.backward() adadelta.update(self.net, self.opt_state, OPT_PARAMS) return np.asarray([loss]) def demonstrate(self, data): max_len = max(len(d.demonstration) for d in data) loss = 0 for t in range(1, max_len): features = [] #positions = [] targets = [] masks = [] for datum in data: #features.append(datum.features) features.append(datum.inject_state_features(datum.demonstration[t-1])) if t < len(datum.demonstration): #positions.append(datum.demonstration[t-1]) targets.append(datum.demonstration[t]) masks.append((1,) * self.n_targets) else: #positions.append((0,) * self.n_targets) targets.append((0,) * self.n_targets) masks.append((0,) * self.n_targets) loss += self.forward(features, targets, masks, train=True) return loss def predict(self, data): max_len = max(len(d.demonstration) for d in data) paths = [[datum.init] for datum in data] for t in range(1, max_len): features = [] #positions = [] for i_datum in range(len(data)): datum = data[i_datum] features.append(datum.inject_state_features(datum.demonstration[t-1])) #features.append(datum.features) #positions.append(paths[i_datum][-1]) #self.forward(features, positions, [], []) self.forward(features, [], []) for i_datum in range(len(data)): paths[i_datum].append(tuple(self.net.blobs[self.l_predict].data[i_datum])) return paths
def __init__(self): self.net = ApolloNet() self.transitions = []
class Planner(object): def __init__(self, categorical=False): self.net = ApolloNet() self.opt_state = adadelta.State() self.categorical = categorical def forward(self, data, train=False): features = np.asarray([d.features for d in data]) max_len = max(len(d.demonstration) for d in data) if self.categorical: n_targets = N_CLASSES targets = np.zeros((len(data), max_len)) else: n_targets = len(d.demonstration[0]) targets = np.zeros((len(data), max_len, n_targets)) masks = np.zeros((len(data), max_len, n_targets)) for i_datum in range(len(data)): demo_len = len(data[i_datum].demonstration) targets[i_datum, :demo_len, ...] = data[i_datum].demonstration masks[i_datum, :demo_len, ...] = 1 l_features = "features" l_ip_repr = "ip_repr" l_relu_repr = "relu_repr" lt_mask = "mask_%d" lt_target = "target_%d" self.net.clear_forward() self.net.f(NumpyData(l_features, features)) self.net.f(InnerProduct(l_ip_repr, N_HIDDEN, bottoms=[l_features])) self.net.f(ReLU(l_relu_repr, bottoms=[l_ip_repr])) l_plan = self.think(l_relu_repr, randomize=train) if train: ll_targets = [] ll_masks = [] for i_target in range(1, max_len): l_target = lt_target % i_target l_mask = lt_mask % i_target self.net.f(NumpyData(l_target, targets[:, i_target])) self.net.f(NumpyData(l_mask, masks[:, i_target])) ll_targets.append(l_target) ll_masks.append(l_mask) else: ll_targets = None ll_masks = None loss, ll_predictions = self.act(l_plan, max_len, data, ll_targets, ll_masks, self_init=not train) if train: self.net.backward() adadelta.update(self.net, self.opt_state, OPT_PARAMS) return loss, ll_predictions def think(self, l_repr, randomize): time = np.random.randint(THINK_TIME) + 1 if randomize else THINK_TIME reprs = [l for l in lstm("think", [[l_repr] for i in range(time)], self.net)] return reprs[-1] def act(self, l_plan, max_len, data, ll_targets, ll_masks, self_init): #n_actions = data[0].n_actions if self.categorical: n_targets = N_CLASSES else: n_targets = len(data[0].demonstration[0]) lt_state_repr = "state_repr_%d" lt_pred = "pred_%d" lt_apply_mask = "apply_mask_%d" lt_loss = "loss_%d" ll_state_reprs = [lt_state_repr % t for t in range(max_len)] init_state_reprs = np.asarray([d.inject_state_features(d.init) for d in data]) self.net.f(NumpyData(ll_state_reprs[0], init_state_reprs)) loss = 0 if ll_targets is not None else None ll_predictions = [] for t, l_hidden in enumerate(self.lstm("act", [[l, l_plan] for l in ll_state_reprs[:-1]])): l_pred = lt_pred % t l_apply_mask = lt_apply_mask % t l_loss = lt_loss % t self.net.f(InnerProduct(l_pred, n_targets, bottoms=[l_hidden])) ll_predictions.append(l_pred) if ll_targets is not None: l_mask = ll_masks[t] l_target = ll_targets[t] if self.categorical: loss += self.net.f(SoftmaxWithLoss(l_loss, bottoms=[l_pred, l_target])) else: self.net.f(Eltwise(l_apply_mask, "PROD", bottoms=[l_pred, l_mask])) loss += self.net.f(EuclideanLoss(l_loss, bottoms=[l_apply_mask, l_target])) if self_init: state_reprs = [] for i_datum, datum in enumerate(data): state = self.net.blobs[l_pred].data[i_datum, :] state = np.round(state).astype(int) state_reprs.append(datum.inject_state_features(state)) else: state_reprs = [] for i_datum, datum in enumerate(data): if t < len(datum.demonstration) - 1: state_reprs.append(datum.inject_state_features(datum.demonstration[t+1])) else: state_reprs.append(np.zeros(self.net.blobs[ll_state_reprs[0]].shape[1:])) self.net.f(NumpyData(ll_state_reprs[t+1], np.asarray(state_reprs))) return loss, ll_predictions def demonstrate(self, data): loss, _ = self.forward(data, train=True) return loss def predict(self, data): _, ll_predictions = self.forward(data, train=False) predictions = [] for i_datum, datum in enumerate(data): prediction = [datum.init] for t in range(len(datum.demonstration) - 1): state = self.net.blobs[ll_predictions[t]].data[i_datum, :] prediction.append(tuple(state)) predictions.append(tuple(prediction)) return predictions
class EnsembleModel: def __init__(self, config, opt_config): self.config = config self.opt_config = opt_config self.models = [] for cmodel in config.models: with open(cmodel.config) as config_f: mconfig = Struct(**yaml.load(config_f)) model = models.build_model(mconfig.model, mconfig.opt) model.load(cmodel.weights) self.models.append(model) self.n_models = len(self.models) self.apollo_net = ApolloNet() def forward(self, layout_type, indices, string, input, target, compute_eval=False): batch_size = -1 for i_model, model in enumerate(self.models): model.forward(layout_type, indices, string, input, target, compute_eval) answer = model.apollo_net.blobs[model.answer_layer].data batch_size = answer.shape[0] self.apollo_net.f(layers.NumpyData("output_%d" % i_model, answer)) self.apollo_net.f( layers.Concat( "concat", bottoms=["output_%d" % i for i in range(self.n_models)])) self.apollo_net.blobs["concat"].reshape( (batch_size, self.n_models, len(ANSWER_INDEX), 1)) self.apollo_net.f( layers.Convolution("merge", (1, 1), 1, bottoms=["concat"])) self.apollo_net.blobs["merge"].reshape((batch_size, len(ANSWER_INDEX))) self.apollo_net.f(layers.NumpyData("target", target)) loss = self.apollo_net.f( layers.SoftmaxWithLoss("loss", bottoms=["merge", "target"], normalize=False)) if compute_eval: eval = self.apollo_net.f( my_layers.Accuracy("acc", bottoms=["merge", "target"])) return loss, eval else: return loss, None def train(self): self.apollo_net.backward() self.apollo_net.update(lr=self.opt_config.learning_rate, momentum=self.opt_config.momentum, clip_gradients=self.opt_config.clip) if self.config.train_submodels: for model in self.models: model.train() def clear(self): for model in self.models: model.clear() self.apollo_net.clear_forward() def save(self, dest): pass
def __init__(self): self.net = ApolloNet() self.experiences = []
class MetricAgent(object): def __init__(self): self.net = ApolloNet() self.experiences = [] def choose(self, state): self.current_state = state dirs = [NORTH, EAST, SOUTH, WEST] interesting_dirs = \ [d for d in dirs if state.step(d)[1].agent_pos != state.agent_pos] return np.random.choice(interesting_dirs) def forward(self, prefix, feats_from, feats_to, cost, n_actions): net = self.net l_data_from = prefix + "data_from" l_ip1_from = prefix + "ip1_from" l_relu1_from = prefix + "relu1_from" l_ip2_from = prefix + "ip2_from" l_data_to = prefix + "data_to" l_ip1_to = prefix + "ip1_to" l_relu1_to = prefix + "relu1_to" l_ip2_to = prefix + "ip2_to" l_inv = "inv" l_diff = "diff" l_sq = "sq" l_reduce = "reduce" l_target = "target" l_loss = "loss" p_ip1 = [prefix + "ip1_weight", prefix + "ip1_bias"] p_ip2 = [prefix + "ip2_weight", prefix + "ip2_bias"] p_reduce = ["reduce_weight", "reduce_bias"] net.f(NumpyData(l_data_from, feats_from)) net.f(InnerProduct(l_ip1_from, 50, bottoms=[l_data_from], param_names=p_ip1)) net.f(ReLU(l_relu1_from, bottoms=[l_ip1_from])) net.f(InnerProduct(l_ip2_from, 50, bottoms=[l_relu1_from], param_names=p_ip2)) net.f(NumpyData(l_data_to, feats_to)) net.f(InnerProduct(l_ip1_to, 50, bottoms=[l_data_to], param_names=p_ip1)) net.f(ReLU(l_relu1_to, bottoms=[l_ip1_to])) net.f(InnerProduct(l_ip2_to, 50, bottoms=[l_relu1_to], param_names=p_ip2)) net.f(Power(l_inv, scale=-1, bottoms=[l_ip2_from])) net.f(Eltwise(l_diff, "SUM", bottoms=[l_ip2_to, l_inv])) net.f(Power(l_sq, power=2, bottoms=[l_diff])) net.f(InnerProduct( l_reduce, 1, bottoms=[l_sq], param_names=p_reduce, param_lr_mults=[0, 0], weight_filler=Filler("constant", 1), bias_filler=Filler("constant", 0))) net.f(NumpyData(l_target, np.ones((BATCH_SIZE, 1)))) loss = net.f(EuclideanLoss(l_loss, bottoms=[l_reduce, l_target])) return loss def update(self, reward, new_state): self.experiences.append((self.current_state, new_state)) if len(self.experiences) < BATCH_SIZE: return 0 replay_choices = np.random.choice(len(self.experiences), BATCH_SIZE) replay_transitions = [self.experiences[i] for i in replay_choices] from_features = np.asarray([t[0].features for t in replay_transitions]) to_features = np.asarray([t[1].features for t in replay_transitions]) self.net.clear_forward() loss = self.forward("", from_features, to_features, 1, new_state.n_actions) self.net.backward() self.net.update(lr=0.01) return loss def update_target(self): pass
class Agent(object): def __init__(self): self.net = ApolloNet() self.transitions = [] def forward(self, features, n_actions, prefix=""): net = self.net l_data = prefix + "data" l_ip1 = prefix + "ip1" l_relu1 = prefix + "relu1" l_ip2 = prefix + "ip2" l_relu2 = prefix + "relu2" l_ip3 = prefix + "ip3" p_ip1 = [prefix + "ip1_weight", prefix + "ip1_bias"] p_ip2 = [prefix + "ip2_weight", prefix + "ip2_bias"] net.f(NumpyData(l_data, features)) net.f(InnerProduct(l_ip1, 50, bottoms=[l_data], param_names=p_ip1)) net.f(ReLU(l_relu1, bottoms=[l_ip1])) net.f(InnerProduct(l_ip2, n_actions, bottoms=[l_relu1], param_names=p_ip2)) #net.f(ReLU(relu2, bottoms=[ip2])) #net.f(InnerProduct(ip3, state.n_actions, bottoms=[relu2])) return l_ip2 def choose(self, state): self.net.clear_forward() q_layer = self.forward(np.asarray([state.features]), state.n_actions, "now_") q_data = self.net.blobs[q_layer].data.ravel() if len(self.transitions) < BATCH_SIZE or np.random.random() < 0.1: #if True: action = np.random.choice(state.n_actions) else: action = np.argmax(q_data) self.current_state = state self.current_action = action return action def update(self, reward, new_state): current_transition = \ (self.current_state, self.current_action, new_state, reward) self.transitions.append(current_transition) if len(self.transitions) < BATCH_SIZE: return replay_choices = np.random.choice(len(self.transitions), BATCH_SIZE) replay_transitions = [self.transitions[i] for i in replay_choices] replay_before_data = np.asarray([r[0].features for r in replay_transitions]) replay_action_data = np.asarray([r[1] for r in replay_transitions]) replay_after_data = np.asarray([r[2].features for r in replay_transitions]) replay_reward_data = np.asarray([r[3] for r in replay_transitions]) net = self.net self.net.clear_forward() l_q_now = self.forward(replay_before_data, new_state.n_actions, "now_") l_q_fut = self.forward(replay_after_data, new_state.n_actions, "fut_") pred_value = 0.9 * np.max(net.blobs[l_q_fut].data, axis=1) + replay_reward_data l_action_now = "action_now" l_index = "index" l_pred_value = "pred_value" l_loss = "loss" net.f(NumpyData(l_action_now, replay_action_data)) net.f(Index(l_index, {}, bottoms=[l_q_now, l_action_now])) net.f(NumpyData(l_pred_value, pred_value)) net.f(EuclideanLoss(l_loss, bottoms=[l_index, l_pred_value])) net.backward() net.update(lr=0.01) def update_target(self): net = self.net if "fut_ip1_weight" not in net.params.keys(): return net.params["fut_ip1_weight"].data[...] = net.params["now_ip1_weight"].data net.params["fut_ip1_bias"].data[...] = net.params["now_ip1_bias"].data net.params["fut_ip2_weight"].data[...] = net.params["now_ip2_weight"].data net.params["fut_ip2_bias"].data[...] = net.params["now_ip2_bias"].data
class MetricAgent(object): def __init__(self): self.net = ApolloNet() self.experiences = [] def choose(self, state): self.current_state = state dirs = [NORTH, EAST, SOUTH, WEST] interesting_dirs = \ [d for d in dirs if state.step(d)[1].agent_pos != state.agent_pos] return np.random.choice(interesting_dirs) def forward(self, prefix, feats_from, feats_to, cost, n_actions): net = self.net l_data_from = prefix + "data_from" l_ip1_from = prefix + "ip1_from" l_relu1_from = prefix + "relu1_from" l_ip2_from = prefix + "ip2_from" l_data_to = prefix + "data_to" l_ip1_to = prefix + "ip1_to" l_relu1_to = prefix + "relu1_to" l_ip2_to = prefix + "ip2_to" l_inv = "inv" l_diff = "diff" l_sq = "sq" l_reduce = "reduce" l_target = "target" l_loss = "loss" p_ip1 = [prefix + "ip1_weight", prefix + "ip1_bias"] p_ip2 = [prefix + "ip2_weight", prefix + "ip2_bias"] p_reduce = ["reduce_weight", "reduce_bias"] net.f(NumpyData(l_data_from, feats_from)) net.f( InnerProduct(l_ip1_from, 50, bottoms=[l_data_from], param_names=p_ip1)) net.f(ReLU(l_relu1_from, bottoms=[l_ip1_from])) net.f( InnerProduct(l_ip2_from, 50, bottoms=[l_relu1_from], param_names=p_ip2)) net.f(NumpyData(l_data_to, feats_to)) net.f( InnerProduct(l_ip1_to, 50, bottoms=[l_data_to], param_names=p_ip1)) net.f(ReLU(l_relu1_to, bottoms=[l_ip1_to])) net.f( InnerProduct(l_ip2_to, 50, bottoms=[l_relu1_to], param_names=p_ip2)) net.f(Power(l_inv, scale=-1, bottoms=[l_ip2_from])) net.f(Eltwise(l_diff, "SUM", bottoms=[l_ip2_to, l_inv])) net.f(Power(l_sq, power=2, bottoms=[l_diff])) net.f( InnerProduct(l_reduce, 1, bottoms=[l_sq], param_names=p_reduce, param_lr_mults=[0, 0], weight_filler=Filler("constant", 1), bias_filler=Filler("constant", 0))) net.f(NumpyData(l_target, np.ones((BATCH_SIZE, 1)))) loss = net.f(EuclideanLoss(l_loss, bottoms=[l_reduce, l_target])) return loss def update(self, reward, new_state): self.experiences.append((self.current_state, new_state)) if len(self.experiences) < BATCH_SIZE: return 0 replay_choices = np.random.choice(len(self.experiences), BATCH_SIZE) replay_transitions = [self.experiences[i] for i in replay_choices] from_features = np.asarray([t[0].features for t in replay_transitions]) to_features = np.asarray([t[1].features for t in replay_transitions]) self.net.clear_forward() loss = self.forward("", from_features, to_features, 1, new_state.n_actions) self.net.backward() self.net.update(lr=0.01) return loss def update_target(self): pass
def main(): apollocaffe.set_device(0) #apollocaffe.set_cpp_loglevel(0) apollocaffe.set_random_seed(0) np.random.seed(0) job = sys.argv[1] corpus_name = sys.argv[2] config = util.Struct(**yaml.load(CONFIG)) if corpus_name == "abstract": train_scenes, dev_scenes, test_scenes = corpus.load_abstract() else: assert corpus_name == "birds" train_scenes, dev_scenes, test_scenes = corpus.load_birds() apollo_net = ApolloNet() print "loaded data" print "%d training examples" % len(train_scenes) listener0_model = Listener0Model(apollo_net, config.model) speaker0_model = Speaker0Model(apollo_net, config.model) sampling_speaker1_model = SamplingSpeaker1Model(apollo_net, config.model) compiled_speaker1_model = CompiledSpeaker1Model(apollo_net, config.model) if job == "train.base": train(train_scenes, dev_scenes, listener0_model, apollo_net, config.opt) train(train_scenes, dev_scenes, speaker0_model, apollo_net, config.opt) apollo_net.save("models/%s.base.caffemodel" % corpus_name) exit() if job == "train.compiled": apollo_net.load("models/%s.base.caffemodel" % corpus_name) print "loaded model" train(train_scenes, dev_scenes, compiled_speaker1_model, apollo_net, config.opt) apollo_net.save("models/%s.compiled.caffemodel" % corpus_name) exit() if job in ("sample.base", "sample.compiled"): if job == "sample.base": apollo_net.load("models/%s.base.caffemodel" % corpus_name) else: apollo_net.load("models/%s.compiled.caffemodel" % corpus_name) print "loaded model" if job == "sample.base": models = { "sampling_speaker1": sampling_speaker1_model, } elif job == "sample.compiled": models = { "compiled_speaker1": compiled_speaker1_model, } name = job.split(".")[1] run_experiment("one_different", corpus_name, name, models, dev_scenes) run_experiment("by_similarity", corpus_name, name, models, dev_scenes) run_experiment("all_same", corpus_name, name, models, dev_scenes)
#!/usr/bin/env python2 import caffe import apollocaffe from apollocaffe import ApolloNet, layers import numpy as np import timeit #caffe.set_mode_gpu() apollocaffe.set_device(0) net = ApolloNet() batch_size = 64 data = np.random.random(size=(batch_size, 512, 20, 20)).astype(np.float32) labels = np.random.randint(10, size=(batch_size, )).astype(np.int32).astype( np.float32) #print data.dtype #print labels.dtype #def load_mem(): # net.clear_forward() # net.f(layers.MemoryData( # "mem", data, labels, tops=["input_top", "label_top"], # batch_size=batch_size, channels=512, width=20, height=20)) # #def load_np(): # net.clear_forward() # net.f(layers.NumpyData("np", data)) # #load_mem()
#!/usr/bin/env python2 import caffe import apollocaffe from apollocaffe import ApolloNet, layers import numpy as np import timeit # caffe.set_mode_gpu() apollocaffe.set_device(0) net = ApolloNet() batch_size = 64 data = np.random.random(size=(batch_size, 512, 20, 20)).astype(np.float32) labels = np.random.randint(10, size=(batch_size,)).astype(np.int32).astype(np.float32) # print data.dtype # print labels.dtype # def load_mem(): # net.clear_forward() # net.f(layers.MemoryData( # "mem", data, labels, tops=["input_top", "label_top"], # batch_size=batch_size, channels=512, width=20, height=20)) # # def load_np(): # net.clear_forward() # net.f(layers.NumpyData("np", data)) # # load_mem() # load_np()
def __init__(self, categorical=False): self.net = ApolloNet() self.opt_state = adadelta.State() self.categorical = categorical
class Agent(object): def __init__(self): self.net = ApolloNet() self.transitions = [] def forward(self, features, n_actions, prefix=""): net = self.net l_data = prefix + "data" l_ip1 = prefix + "ip1" l_relu1 = prefix + "relu1" l_ip2 = prefix + "ip2" l_relu2 = prefix + "relu2" l_ip3 = prefix + "ip3" p_ip1 = [prefix + "ip1_weight", prefix + "ip1_bias"] p_ip2 = [prefix + "ip2_weight", prefix + "ip2_bias"] net.f(NumpyData(l_data, features)) net.f(InnerProduct(l_ip1, 50, bottoms=[l_data], param_names=p_ip1)) net.f(ReLU(l_relu1, bottoms=[l_ip1])) net.f( InnerProduct(l_ip2, n_actions, bottoms=[l_relu1], param_names=p_ip2)) #net.f(ReLU(relu2, bottoms=[ip2])) #net.f(InnerProduct(ip3, state.n_actions, bottoms=[relu2])) return l_ip2 def choose(self, state): self.net.clear_forward() q_layer = self.forward(np.asarray([state.features]), state.n_actions, "now_") q_data = self.net.blobs[q_layer].data.ravel() if len(self.transitions) < BATCH_SIZE or np.random.random() < 0.1: #if True: action = np.random.choice(state.n_actions) else: action = np.argmax(q_data) self.current_state = state self.current_action = action return action def update(self, reward, new_state): current_transition = \ (self.current_state, self.current_action, new_state, reward) self.transitions.append(current_transition) if len(self.transitions) < BATCH_SIZE: return replay_choices = np.random.choice(len(self.transitions), BATCH_SIZE) replay_transitions = [self.transitions[i] for i in replay_choices] replay_before_data = np.asarray( [r[0].features for r in replay_transitions]) replay_action_data = np.asarray([r[1] for r in replay_transitions]) replay_after_data = np.asarray( [r[2].features for r in replay_transitions]) replay_reward_data = np.asarray([r[3] for r in replay_transitions]) net = self.net self.net.clear_forward() l_q_now = self.forward(replay_before_data, new_state.n_actions, "now_") l_q_fut = self.forward(replay_after_data, new_state.n_actions, "fut_") pred_value = 0.9 * np.max(net.blobs[l_q_fut].data, axis=1) + replay_reward_data l_action_now = "action_now" l_index = "index" l_pred_value = "pred_value" l_loss = "loss" net.f(NumpyData(l_action_now, replay_action_data)) net.f(Index(l_index, {}, bottoms=[l_q_now, l_action_now])) net.f(NumpyData(l_pred_value, pred_value)) net.f(EuclideanLoss(l_loss, bottoms=[l_index, l_pred_value])) net.backward() net.update(lr=0.01) def update_target(self): net = self.net if "fut_ip1_weight" not in net.params.keys(): return net.params["fut_ip1_weight"].data[ ...] = net.params["now_ip1_weight"].data net.params["fut_ip1_bias"].data[...] = net.params["now_ip1_bias"].data net.params["fut_ip2_weight"].data[ ...] = net.params["now_ip2_weight"].data net.params["fut_ip2_bias"].data[...] = net.params["now_ip2_bias"].data