def forward_message_embedding(self, inputs, loss, training): results = [] for i in range(self.slots): results.append(self.local_trans(inputs[i], training=training)) results.append(self.global_trans(inputs[-1], training=training)) if self.use_comm: for i in range(self.slots): tmp = nd.zeros_like(results[i]) for j in range(self.slots): msg = nd.softmax( self.local2local_msg_encode(inputs[j], training=training)) tmp = tmp + self.local2local_extract( self.local2local_embedding(msg, training=training), training=training) msg = nd.softmax( self.global2local_msg_encode(inputs[-1], training=training)) tmp = tmp + self.global2local_extract( self.global2local_embedding(msg, training=training), training=training) results[i] = results[i] + (tmp / float(self.slots)) tmp = nd.zeros_like(results[-1]) for i in range(self.slots): msg = nd.softmax( self.local2global_msg_encode(inputs[i], training=training)) tmp = tmp + self.local2global_extract( self.local2global_embedding(msg, training=training), training=training) results[-1] = results[-1] + (tmp / float(self.slots)) return results
def hybrid_forward(self, F, input_logits, target_logits, sample_weight=None): input_softmax = F.softmax(input_logits, axis=1) target_softmax = F.softmax(target_logits, axis=1) loss = F.square(input_softmax - target_softmax) return F.mean(loss, axis=self._batch_axis, exclude=True)
def check_KL(self): ph_act = nd.dot(self.enum_states, self.W) + self.hb vt = nd.dot(self.enum_states, self.vb) ht = nd.sum(-nd.log(nd.sigmoid(-ph_act)), axis=1) p_th = nd.softmax(vt + ht) KL = nd.sum(self.prob_states * nd.log(self.prob_states / p_th)) return KL.asnumpy()[0]
def inference(self): # self-attention x = self.embedding(1).reshape(-3, 0) # .squeeze() # b x action x h kshape = (1, self.num_total_tokens, self.hidden_size) vshape = (1, self.num_total_tokens, 1) querry = self.querry(x).reshape(*kshape) # b x actions x h key = self.key(x).reshape(*kshape) # b x actions x h value = self.value(x).reshape(*vshape) # b x actions x 1 atten = mx.nd.linalg_gemm2(querry, key, transpose_b=True).softmax(axis=1) alphas = mx.nd.linalg_gemm2(atten, value).squeeze(axis=-1) actions = [] for idx in range(len(self.num_tokens)): i0 = sum(self.num_tokens[:idx]) i1 = sum(self.num_tokens[:idx + 1]) logits = alphas[:, i0:i1] probs = F.softmax(logits, axis=-1) action = mx.nd.argmax(probs, 1) actions.append(action) config = {} for i, action in enumerate(actions): choice = action.asscalar() k, space = self.spaces[i] config[k] = int(choice) return config
def generate_text(model, seed, length=512, top_n=10): """ generates text of specified length from trained model with given seed character sequence. """ logger.info("generating %s characters from top %s choices.", length, top_n) logger.info('generating with seed: "%s".', seed) generated = seed encoded = mx.nd.array(encode_text(seed)) seq_len = encoded.shape[0] x = F.expand_dims(encoded[:seq_len - 1], 1) # input shape: [seq_len, 1] state = model.begin_state() # get rnn state due to seed sequence _, state = model(x, state) next_index = encoded[seq_len - 1].asscalar() for i in range(length): x = mx.nd.array([[next_index]]) # input shape: [1, 1] logit, state = model(x, state) # output shape: [1, vocab_size] probs = F.softmax(logit) next_index = sample_from_probs(probs.asnumpy().squeeze(), top_n) # append to sequence generated += ID2CHAR[next_index] logger.info("generated text: \n%s\n", generated) return generated
def test(ctx, val_data, opt, net): acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) for i, batch in enumerate(val_data): data, label = batch_fn(batch, ctx) outputs = [] for _, X in enumerate(data): X = X.reshape((-1, ) + X.shape[2:]) pred = net(X.astype(opt.dtype, copy=False)) if opt.use_softmax: pred = F.softmax(pred, axis=1) outputs.append(pred) acc_top1.update(label, outputs) acc_top5.update(label, outputs) mx.ndarray.waitall() _, cur_top1 = acc_top1.get() _, cur_top5 = acc_top5.get() if i > 0 and i % opt.log_interval == 0: print('%04d/%04d is done: acc-top1=%f acc-top5=%f' % (i, len(val_data), cur_top1 * 100, cur_top5 * 100)) _, top1 = acc_top1.get() _, top5 = acc_top5.get() return (top1, top5)
def softmax_viterbi_decode(self, feats): feats = nd.softmax(feats).asnumpy() transitions = self.transitions.asnumpy() label_size = len(self.tag_dictionary) sent_len = len(feats) pre_matrix = np.zeros((sent_len, label_size), dtype=int) score_matrix = np.zeros((2, label_size)) score_matrix[0] = feats[0] for i in range(1, sent_len): _i = i & 1 _i_1 = 1 - _i for cur_label in range(label_size): max_score = -sys.float_info.max for pre_label in range(label_size): score = feats[i, cur_label] cur_score = score_matrix[_i_1][pre_label] * transitions[pre_label, cur_label] * score if max_score < cur_score: max_score = cur_score pre_matrix[i, cur_label] = pre_label score_matrix[_i][cur_label] = max_score last_time = (sent_len - 1) & 1 max_score = score_matrix[last_time].max() max_index = np.argmax(score_matrix[last_time]) labels = [] for i in range(sent_len - 1, -1, -1): labels.insert(0, max_index) max_index = pre_matrix[i, max_index] return labels
def forward(self, x): x = self.embedding(nd.array(x)) x = self.bn(x) x = self.pool(self.conv2(self.conv1(x))) x = self.h2(self.h1(x)) return F.softmax(self.output(x))
def sample_v_given_h(self, h0): v1_prob = self.propdown(h0).reshape([-1, self.n_val]) v1_prob = nd.softmax(v1_prob) v1_args = nd.sample_multinomial(v1_prob) v1 = nd.one_hot(v1_args, self.n_val) return [ v1_prob.reshape([-1, self.n_node]), v1.reshape([-1, self.n_node]) ]
def forward(self, x): embed = self.embed(x) xs = [] for i in range(self.C * 2): x = self.outs[i](embed) x = F.softmax(x) xs.append(x) return xs
def forward(self, x): #import pdb #pdb.set_trace() X_ = self.attn(x) # (n, w) -> (n,num_hidden) # should be dot(X_, W) E = self.attn(X_) # (n, hidden) -> (n, hidden) attn_weights = F.softmax(E, axis=1) # (n, hidden) attn_applied = F.elemwise_mul(attn_weights, X_) #(n,hidden) output = self.c*(F.elemwise_mul(X_, attn_weights)) + (1-self.c)*X_ output = self.out(output) #(n,hidden) -> (n,output_size) return output
def forward(self, x): x = x / 255. x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = self.l1(x) value = self.value(x) logits = F.softmax(self.logits(x)) return value, logits
def train(self, s_batch, a_batch_one_hot, V_trace, advantage): batch_size = s_batch.shape[0] action_indx = np.argmax(a_batch_one_hot,axis=1).tolist() action_stats = [action_indx.count(action_indx[i]) for i in range(batch_size)] action_bp_rate = (1 - np.array(action_stats)/float(batch_size))**2 s_batch = copy.deepcopy(s_batch) a_batch_one_hot = copy.deepcopy(a_batch_one_hot) V_trace_batch = copy.deepcopy(V_trace) advantage_batch = copy.deepcopy(advantage) s_batch = nd.array(s_batch, ctx=CTX) a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX) V_trace_batch = nd.array(V_trace_batch, ctx=CTX) advantage_batch = nd.array(advantage_batch, ctx=CTX) action_bp_rate = nd.softmax(nd.array(action_bp_rate, ctx=CTX)) self.actorcritic.collect_params().zero_grad() self.reset_noise() with mx.autograd.record(): loss_vec = [] probs, values, top_decisions = self.actorcritic.forward(s_batch, loss_vec) loss = 0. for element in loss_vec: loss = loss + element # print 'loss_dropout:', loss logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1)+1e-5) entropy = -nd.sum(nd.sum(data=probs*nd.log(probs+1e-5), axis=1), axis=0) top_decision_entropy = -nd.sum(nd.sum(data=top_decisions*nd.log(top_decisions+1e-5), axis=1), axis=0) entropy_loss = - entropy top_decision_entropy_loss = - top_decision_entropy actorloss = -nd.sum(action_bp_rate*(logprob*advantage_batch), axis=0) criticloss = nd.sum(action_bp_rate*nd.square(values-V_trace_batch), axis=0) # actorloss = -nd.sum(logprob*advantage_batch, axis=0) # criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0) loss = actorloss + 0.3*criticloss + 0.001*entropy_loss # loss = actorloss + 0.3*criticloss + 0.0001*top_decision_entropy_loss loss.backward() # CTname = threading.currentThread().getName() # print(CTname + ' actorloss : '+str(actorloss)) # print(CTname + ' criticloss : '+str(criticloss)) # print(CTname + ' entropy_loss : '+str(entropy_loss)) grads_list = [] for name, value in self.actorcritic.collect_params().items(): if name.find('batchnorm') < 0: # grads_list.append(mx.nd.array(value.grad().asnumpy())) grads_list.append(value.grad()) return grads_list, batch_size
def sample(self, batch_size=1, with_details=False, with_entropy=False): """ Returns ------- configs : list of dict list of configurations """ inputs = self.static_inputs[batch_size] hidden = self.static_init_hidden[batch_size] actions = [] entropies = [] log_probs = [] for idx in range(len(self.num_tokens)): logits, hidden = self.forward(inputs, hidden, idx, is_embed=(idx == 0)) probs = F.softmax(logits, axis=-1) log_prob = F.log_softmax(logits, axis=-1) entropy = -(log_prob * probs).sum(1, keepdims=False) if with_entropy else None action = mx.random.multinomial(probs, 1) ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context), action.astype('float32')) selected_log_prob = F.gather_nd(log_prob, ind) actions.append(action[:, 0]) entropies.append(entropy) log_probs.append(selected_log_prob) inputs = action[:, 0] + sum(self.num_tokens[:idx]) inputs.detach() configs = [] for idx in range(batch_size): config = {} for i, action in enumerate(actions): choice = action[idx].asscalar() k, space = self.spaces[i] config[k] = int(choice) configs.append(config) if with_details: entropies = F.stack(*entropies, axis=1) if with_entropy else entropies return configs, F.stack(*log_probs, axis=1), entropies else: return configs
def forward(self, x): x = F.relu(self.conv1(x)) x = self.pool2(F.relu(self.conv2(x))) x = self.drop2D(x) # 0 means copy over size from corresponding dimension. # -1 means infer size from the rest of dimensions. # Essentially flattens to 1D. x = x.reshape((0, -1)) x = F.relu(self.fc1(x)) x = self.drop1D(x) x = F.relu(self.fc2(x)) x = F.softmax(x) return x
def _update(self): # Train if self.frame_counter > self.opt.replay_start_size and \ self.frame_counter % self.opt.learning_frequency == 0: batch_reward, batch_action, batch_done = self.replay_memory.sample( self.opt, self.batch_state, self.batch_state_next) batch_reward, batch_action, batch_done = batch_reward.asnumpy(), \ batch_action.asnumpy(), batch_done.asnumpy() targets_q = self.dqn(self.batch_state_next).asnumpy() targets_q = np.reshape( targets_q, (targets_q.shape[0], self.num_action, self.atoms)) q_values = np.dot(targets_q, self.z_values) target_actions = np.argmax(q_values, axis=1).astype('int32') value_eval = self.target_dqn(self.batch_state_next).asnumpy() value_eval = np.reshape( value_eval, (value_eval.shape[0], self.num_action, self.atoms)) distributed_q = value_eval[:, target_actions, :] m = np.zeros((self.opt.batch_size, self.z_values.size)) for j in range(self.atoms): tzj = np.fmax( np.fmin( batch_reward - batch_done * self.opt.gamma * self.z_values[j], self.v_max), self.v_min) bj = ((tzj - self.z_values[0]) / (self.z_values[1] - self.z_values[0])) u = np.ceil(bj).astype('int32') l = np.floor(bj).astype('int32') m[:, l] = m[:, l] + distributed_q[:, target_actions, j] * (u - bj) m[:, u] = m[:, u] + distributed_q[:, target_actions, j] * (bj - l) m = F.softmax(nd.array(m, self.opt.ctx)) with autograd.record(): TD_targets = nd.reshape( self.dqn(self.batch_state), (self.opt.batch_size, self.num_action, self.atoms)) TD_targets_action = TD_targets[self.batches, batch_action] loss = self.cross_ent_loss(TD_targets_action, m) loss.backward() self.trainer.step(self.opt.batch_size) if self.frame_counter % 800 == 0: print('Loss is', nd.sum(loss).asscalar())
def sample(self, batch_size=1, with_details=False, with_entropy=False): # self-attention x = self.embedding(batch_size).reshape( -3, 0) # .squeeze() # b x action x h kshape = (batch_size, self.num_total_tokens, self.hidden_size) vshape = (batch_size, self.num_total_tokens, 1) querry = self.querry(x).reshape(*kshape) # b x actions x h key = self.key(x).reshape(*kshape) # b x actions x h value = self.value(x).reshape(*vshape) # b x actions x 1 atten = mx.nd.linalg_gemm2(querry, key, transpose_b=True).softmax(axis=1) alphas = mx.nd.linalg_gemm2(atten, value).squeeze(axis=-1) actions = [] entropies = [] log_probs = [] for idx in range(len(self.num_tokens)): i0 = sum(self.num_tokens[:idx]) i1 = sum(self.num_tokens[:idx + 1]) logits = alphas[:, i0:i1] probs = F.softmax(logits, axis=-1) log_prob = F.log_softmax(logits, axis=-1) entropy = -(log_prob * probs).sum(1, keepdims=False) if with_entropy else None action = mx.random.multinomial(probs, 1) ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context), action.astype('float32')) selected_log_prob = F.gather_nd(log_prob, ind) actions.append(action[:, 0]) entropies.append(entropy) log_probs.append(selected_log_prob) configs = [] for idx in range(batch_size): config = {} for i, action in enumerate(actions): choice = action[idx].asscalar() k, space = self.spaces[i] config[k] = int(choice) configs.append(config) if with_details: entropies = F.stack(*entropies, axis=1) if with_entropy else entropies return configs, F.stack(*log_probs, axis=1), entropies else: return configs
def GRU_Cell(input, state): for x in input: z_t = nd.Activation(nd.FullyConnected(data=x,weight=wxz,no_bias=True,num_hidden=num_hidden)+ nd.FullyConnected(data=state,weight=whz,no_bias=True,num_hidden=num_hidden)+bz,act_type="sigmoid") r_t = nd.Activation(nd.FullyConnected(data=x,weight=wxr,no_bias=True,num_hidden=num_hidden)+ nd.FullyConnected(data=state,weight=whr,no_bias=True,num_hidden=num_hidden)+br,act_type="sigmoid") g_t = nd.Activation(nd.FullyConnected(data=x,weight=wxh,no_bias=True,num_hidden=num_hidden)+ nd.FullyConnected(data=r_t*state,weight=whh,no_bias=True,num_hidden=num_hidden)+bh,act_type="tanh") state = nd.multiply(z_t,state) + nd.multiply(1-z_t,g_t) output = nd.FullyConnected(data=state, weight=why, bias=by, num_hidden=num_outputs) output = nd.softmax(data=output) return output, state
def transform_fn(model, request_body, content_type, accept_type): try: input_object=json.loads(request_body) board=input_object["board"] count=input_object["session"].get("count",0) input_object["session"]["count"]=count+1 if count>10: board=nd.array(board) board=nd.concat( (board==1).expand_dims(axis=0), (board==2).expand_dims(axis=0),dim=0 ) board=board.expand_dims(axis=0) mask=board.clip(0,1) mask=-(mask-1) mask=mask.reshape((2,-1)) p=nd.softmax(model(board).reshape((-1,))) p=p*mask[0]*mask[1] while True: loc=int(p.argmax(axis=0).asscalar()) y=loc//board.shape[2] x=loc%board.shape[2] if input_object["board"][y][x]==0: break else: p[loc]=0 else: while True: x=random.randint(0,len(input_object["board"][0])-1) y=random.randint(0,len(input_object["board"])-1) if input_object["board"][y][x]==0: break input_object["session"]["shootType"]="CNNNet" return bytearray(json.dumps({ "shot":{ "x":x, "y":y }, "session":input_object["session"] }),'utf-8'),accept_type except Exception as e: print(traceback.format_exc()) print(e)
def inference(self): actions = [] for idx in range(len(self.num_tokens)): logits = self.decoders[idx](1) probs = F.softmax(logits, axis=-1) action = mx.nd.argmax(probs, 1) actions.append(action) config = {} for i, action in enumerate(actions): choice = action.asscalar() k, space = self.spaces[i] config[k] = int(choice) return config
def forward(self, x): #x: 'nwc' #import pdb #pdb.set_trace() x = F.transpose(x, axes=(0, 2, 1)) # (nwc) -> (ncw) X_ = F.batch_dot(self.w1.data(ctx), x) # (n,c,w) -> (n,c,w) # E = dot(X_, W) E = F.batch_dot(X_, self.w.data(ctx)) # (n,c,w) -> (n,c,w) attn_weights = F.softmax(E, axis=2) # (n, c, w) attn_applied = F.elemwise_mul(attn_weights, X_) #(n,c,w) output = self.c.data(ctx) * (attn_applied) + ( 1 - self.c.data(ctx)) * X_ # (n,c,w) output = F.batch_dot(output, self.w2.data(ctx)) + self.b.data( ctx) # (n, c,w) output = F.transpose(output, axes=(0, 2, 1)) # (ncw) -> (nwc) return output
def forward(self, x): # x=self.fc1(x) #print(x.shape) #print(self.l.shape) out = broad_multiply(x, self.l, self.ctx) # print(out.shape) out = self.avgpool(out) # print(out.shape) out2 = self.fc(out) # print(out2.shape) out3 = nd.softmax(out2) out = out2 * out3 # print(out.shape) # print(x.shape[0],self.l.shape[1]) out = out.reshape((x.shape[0], self.l.shape[1], -1)) # print(out.shape) out = nd.sum(out, axis=1) # print(out.shape) return out
def inference(self): inputs = self.static_inputs[1] hidden = self.static_init_hidden[1] actions = [] for block_idx in range(len(self.num_tokens)): logits, hidden = self.forward(inputs, hidden, block_idx, is_embed=(block_idx==0)) probs = F.softmax(logits, axis=-1) action = mx.nd.argmax(probs, 1) actions.append(action) inputs = action + sum(self.num_tokens[:block_idx]) inputs.detach() config = {} for i, action in enumerate(actions): choice = action.asscalar() k, space = self.spaces[i] config[k] = int(choice) return config
def forward(self, X, stride=1): filters = [] for i in range(self._n_scales): kernel = (i * 2 + 1, ) * 2 pad = (i, ) * 2 f = nd.Pooling(data=data, pool_type='max', kernel=kernel, stride=(stride, stride), pad=pad, cudnn_off=True) f = nd.reshape(f, (f.shape[0], 1) + f.shape[1:]) filters.append(f) filters = nd.concat(*filters, dim=1) weight = nd.softmax(self._get_param(self.weight), axis=1) filters = nd.mean(filters, axis=1) # filters = nd.sum(filters * weight, axis=1) return filters
def train_update(self, s_batch, a_batch_one_hot, V_trace, advantage): batch_size = s_batch.shape[0] action_indx = np.argmax(a_batch_one_hot, axis=1).tolist() action_stats = [action_indx.count(action_indx[i]) for i in range(batch_size)] action_bp_rate = (1 - np.array(action_stats) / float(batch_size)) ** 2 s_batch = copy.deepcopy(s_batch) a_batch_one_hot = copy.deepcopy(a_batch_one_hot) V_trace_batch = copy.deepcopy(V_trace) advantage_batch = copy.deepcopy(advantage) s_batch = nd.array(s_batch, ctx=CTX) a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX) V_trace_batch = nd.array(V_trace_batch, ctx=CTX) advantage_batch = nd.array(advantage_batch, ctx=CTX) action_bp_rate = nd.softmax(nd.array(action_bp_rate, ctx=CTX)) self.actorcritic.collect_params().zero_grad() self.reset_noise() with mx.autograd.record(): loss_vec = [] probs, values, top_decisions = self.actorcritic.forward(s_batch, loss_vec) loss = 0. for element in loss_vec: loss = loss + element # print 'loss_dropout:', loss logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1) + 1e-5) entropy = -nd.sum(nd.sum(data=probs * nd.log(probs + 1e-5), axis=1), axis=0) top_decision_entropy = -nd.sum(nd.sum(data=top_decisions * nd.log(top_decisions + 1e-5), axis=1), axis=0) entropy_loss = - entropy top_decision_entropy_loss = - top_decision_entropy actorloss = -nd.sum(action_bp_rate * (logprob * advantage_batch), axis=0) criticloss = nd.sum(action_bp_rate * nd.square(values - V_trace_batch), axis=0) # actorloss = -nd.sum(logprob*advantage_batch, axis=0) # criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0) loss = actorloss + 0.3 * criticloss + 0.001 * entropy_loss # loss = actorloss + 0.3*criticloss + 0.0001*top_decision_entropy_loss loss.backward() self.trainer.step(batch_size=batch_size, ignore_stale_grad=True)
def forward(self, input, hidden, encoder_outputs): #input shape, (1,) embedded = self.embedding(input) if self.dropout_p > 0: embedded = self.dropout(embedded) attn_weights = F.softmax( self.attn(F.concat(embedded, hidden[0].flatten(), dim=1))) attn_applied = F.batch_dot(attn_weights.expand_dims(0), encoder_outputs.expand_dims(0)) output = F.concat(embedded.flatten(), attn_applied.flatten(), dim=1) output = self.attn_combine(output).expand_dims(0) for i in range(self.n_layers): output = F.relu(output) output, hidden = self.gru(output, hidden) output = self.out(output) return output, hidden, attn_weights
def test(ctx, val_data, opt, net): acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) true_labels = [] predictions = [] for i, batch in enumerate(val_data): data, label = batch_fn(batch, ctx) outputs = [] for _, X in enumerate(data): X = X.reshape((-1, ) + X.shape[2:]) # pred = net(X.astype(opt.dtype, copy=False)) pred = net(X) if opt.use_softmax: pred = F.softmax(pred, axis=1) outputs.append(pred) predictions.append(outputs) true_labels.append(label) acc_top1.update(label, outputs) acc_top5.update(label, outputs) mx.ndarray.waitall() _, cur_top1 = acc_top1.get() _, cur_top5 = acc_top5.get() if i > 0 and i % opt.log_interval == 0: print('%04d/%04d is done: acc-top1=%f acc-top5=%f' % (i, len(val_data), cur_top1 * 100, cur_top5 * 100)) _, top1 = acc_top1.get() _, top5 = acc_top5.get() #save true_labels, predictions predictions = _list_to_numpy(predictions) true_labels = _list_to_numpy(true_labels) np.save(os.path.join(opt.save_dir, "labels"), true_labels) np.save(os.path.join(opt.save_dir, "predictions"), predictions) return top1, top5, true_labels, predictions
def LSTM_Cell(input, h_state, c_state): for x in input: f_t = nd.Activation(nd.FullyConnected( data=x, weight=wxhf, no_bias=True, num_hidden=num_hidden) + nd.FullyConnected(data=h_state, weight=whhf, no_bias=True, num_hidden=num_hidden) + bhf, act_type="sigmoid") i_t = nd.Activation(nd.FullyConnected( data=x, weight=wxhi, no_bias=True, num_hidden=num_hidden) + nd.FullyConnected(data=h_state, weight=whhi, no_bias=True, num_hidden=num_hidden) + bhi, act_type="sigmoid") o_t = nd.Activation(nd.FullyConnected( data=x, weight=wxho, no_bias=True, num_hidden=num_hidden) + nd.FullyConnected(data=h_state, weight=whho, no_bias=True, num_hidden=num_hidden) + bho, act_type="sigmoid") g_t = nd.Activation(nd.FullyConnected( data=x, weight=wxhg, no_bias=True, num_hidden=num_hidden) + nd.FullyConnected(data=h_state, weight=whhg, no_bias=True, num_hidden=num_hidden) + bhg, act_type="tanh") c_state = nd.multiply(f_t, c_state) + nd.multiply(i_t, g_t) h_state = nd.multiply(o_t, nd.tanh(c_state)) output = nd.FullyConnected(data=h_state, weight=why, bias=by, num_hidden=num_outputs) output = nd.softmax(data=output) return output, h_state, c_state
def forward(self, is_train, req, in_data, out_data, aux): arm_cls_preds = in_data[0] odm_cls_target = in_data[1] odm_loc_target_mask = in_data[2] arm_cls_preds = nd.softmax(data=arm_cls_preds) arm_cls_preds_classes = nd.split(data=arm_cls_preds,axis=1,num_outputs=2) # arm_cls_preds_bg shape : (batch , h*w*num_anchors[:layers]) 负类【0】 arm_cls_preds_bg = nd.reshape(data=arm_cls_preds_classes[0],shape=(0,-1)) prob_temp = nd.ones_like(arm_cls_preds_bg)*0.99 cond1 = arm_cls_preds_bg >= prob_temp # > 0.99 idx is 1 # print('negative cond1 ------- :',heapq.nlargest(2,arm_cls_preds_bg[0])) temp1 = nd.ones_like(odm_cls_target)*(-1) ### TODO: 0 还是-1表示背景?? # 如果ARM分类出的负类的置信度大于0.99,将其在ODM的anchor标号中去掉(-1替代),负类转换为背景 odm_cls_target_mask = nd.where(condition=cond1,x=temp1,y=odm_cls_target) # apply filtering to odm_loc_target_mask # odm_loc_target_mask_shape: (batch, num_anchors, 4) arm_cls_preds_bg = nd.reshape(data=arm_cls_preds_bg,shape=(0,-1,1))#(batch , h*w*num_anchors[:layers],1) # (batch , h*w*num_anchors[:layers] , 4 ) odm_loc_target_mask = nd.reshape(data=odm_loc_target_mask,shape=(0,-1,4)) odm_loc_target_mask = odm_loc_target_mask[:,:,0] #(batch , h*w*num_anchors[:layers]) #(batch , h*w*num_anchors[:layers], 1) ## 取整个batch中 所有行的 第一列,相当于对原来的4个相同label[0 0 0 0 ],[1 1 1 1]变成[0],[1] odm_loc_target_mask = nd.reshape(data=odm_loc_target_mask,shape=(0,-1,1)) loc_temp = nd.ones_like(odm_loc_target_mask)*0.99 cond2 = arm_cls_preds_bg >= loc_temp temp2 = nd.zeros_like(odm_loc_target_mask) # 取0 # 如果ARM分类出的负类的置信度大于0.99,将其在ODM的掩码置0 ## 实际上不管IOU计算的大小,用AMR的分类结果,如果是大于0.99的负类,不管通过IOU判断的正负类结果如何,都设置为背景 odm_loc_target_bg_mask = nd.where(cond2,temp2,odm_loc_target_mask) odm_loc_target_bg_mask = nd.concat(*[odm_loc_target_bg_mask]*4,dim=2) # 还原维度 odm_loc_target_bg_mask = nd.reshape(odm_loc_target_bg_mask,shape=(0,-1)) for ind, val in enumerate([odm_cls_target_mask, odm_loc_target_bg_mask]): self.assign(out_data[ind], req[ind], val)
def sample(self, batch_size=1, with_details=False, with_entropy=False): actions = [] entropies = [] log_probs = [] for idx in range(len(self.num_tokens)): logits = self.decoders[idx](batch_size) probs = F.softmax(logits, axis=-1) log_prob = F.log_softmax(logits, axis=-1) entropy = -(log_prob * probs).sum(1, keepdims=False) if with_entropy else None action = mx.random.multinomial(probs, 1) ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context), action.astype('float32')) selected_log_prob = F.gather_nd(log_prob, ind) actions.append(action[:, 0]) entropies.append(entropy) log_probs.append(selected_log_prob) configs = [] for idx in range(batch_size): config = {} for i, action in enumerate(actions): choice = action[idx].asscalar() k, space = self.spaces[i] config[k] = int(choice) configs.append(config) if with_details: entropies = F.stack(*entropies, axis=1) if with_entropy else entropies return configs, F.stack(*log_probs, axis=1), entropies else: return configs
def forward(self, x): x = self.dense(x) probs = self.action_pred(x) values = self.value_pred(x) return F.softmax(probs), values
# 5. Test the networks total_batch = int(np.ceil(X_test.shape[0] / batch_size)) correct_counts = [0 for i in range(num_models)] ensemble_correct_count = 0 total_num = 0 for i in range(total_batch): num_valid = batch_size if (i + 1) * batch_size <= X_test.shape[0]\ else X_test.shape[0] - i * batch_size data_npy, label_npy, num_valid = get_batch(i, batch_size, X_test, y_test) prob_ensemble = nd.zeros(shape=(label_npy.shape[0], 10), ctx=mx.gpu()) for i, test_net in enumerate(test_nets): test_net.forward(data_batch=mx.io.DataBatch(data=[nd.array(data_npy)], label=None), is_train=False) logits_nd = test_net.get_outputs()[0] prob_nd = nd.softmax(logits_nd) prob_ensemble += prob_nd pred_cls = nd.argmax(prob_nd, axis=-1).asnumpy() correct_counts[i] += (pred_cls[:num_valid] == label_npy[:num_valid]).sum() prob_ensemble /= num_models ensemble_pred_cls = nd.argmax(prob_ensemble, axis=-1).asnumpy() ensemble_correct_count += (ensemble_pred_cls[:num_valid] == label_npy[:num_valid]).sum() for i in range(num_models): print(i, 'Accuracy:', correct_counts[i] / float(X_test.shape[0])) print('Ensemble accuracy:', ensemble_correct_count / float(X_test.shape[0])) ''' Learning Started! Epoch: 0001 cost = [ 0.23813407 0.23717315] Epoch: 0002 cost = [ 0.07455271 0.07434764] Epoch: 0003 cost = [ 0.05925059 0.06024327] Epoch: 0004 cost = [ 0.05032205 0.04895757]