def test_vector_perform(self): x = vector() f = aesara.function([x], logsoftmax(x, axis=None)) rng = np.random.default_rng(utt.fetch_seed()) xv = rng.standard_normal((6, )).astype(config.floatX) assert np.allclose(f(xv), sp.log_softmax(xv))
def run_mask(self, frame, count): try: nn_data = run_nn(self.mask_in, self.mask_nn, {"data": to_planar(frame, (224, 224))}) out = to_tensor_result(nn_data).get('349') # match = m_func.log_softmax(torch.from_numpy(out), dim=0).data.numpy() match = log_softmax(np.array(out)) # print(match) index = np.argmax(match) # print(index) ftype = 0 if index > 0.5 else 1 # print(ftype) color = (0, 0, 255) if ftype else (0, 255, 0) self.draw_bbox(self.face_coords[count], color) cv2.putText( self.debug_frame, '{:.2f}'.format(match[0]), (self.face_coords[count][0], self.face_coords[count][1] - 10), cv2.FONT_HERSHEY_COMPLEX, 1, color) cnt_mask, cnt_nomask = 0, 0 if ftype == 0: cnt_mask += 1 else: cnt_nomask += 1 proportion = cnt_mask / len(self.face_frame) * 100 # print(round(proportion,2)) cv2.putText(self.debug_frame, "masks:" + str(round(proportion, 2)) + "%", (10, 30), cv2.FONT_HERSHEY_COMPLEX, 0.75, (255, 0, 0)) except: pass
def test_forward_single_inner_gather(self, blank=0): xs = np.asarray( [[[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1], [0.1, 0.1, 0.2, 0.8, 0.1]], [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]]]], dtype=np.float32) xs = log_softmax(xs, axis=-1) ys = np.asarray([[1, 2]], dtype=np.int32) xn = np.asarray([2], dtype=np.int32) yn = np.asarray([2], dtype=np.int32) expected_cost = 4.495666 expected_costs = np.asarray([expected_cost], dtype=np.float32) expected_grads = np.array( [[[[-0.308198071906, -0.6918019280939998, 0.0, 0.0, 0.0], [-0.308198071906, 0.0, -0.3836038561880001, 0.0, 0.0], [-0.3836038561880001, 0.0, 0.0, 0.0, 0.0]], [[0.0, -0.308198071906, 0.0, 0.0, 0.0], [0.0, 0.0, -0.6163961438119995, 0.0, 0.0], [-0.9999999999999991, 0.0, 0.0, 0.0, 0.0]]]], dtype=np.float32) self._run_transducer(xs, xn, ys, yn, expected_costs=expected_costs, expected_grads=expected_grads, use_gpu=True, expected_error=None, gather=True)
def test_calls(self): n = 128 t = 100 u = 90 v = 3 for i in range(2): rng = np.random.RandomState(i) xs = rng.randn(n, t, u, v) xs = np.asarray(xs, dtype=np.float32) xs = log_softmax(xs, axis=-1) ys = np.asarray(rng.randint(1, v, (n, u - 1)), dtype=np.int32) xn = np.asarray([t] * n, dtype=np.int32) yn = np.asarray(rng.randint(1, u, n), dtype=np.int32) # costs, grads = transducer_loss( # xs, ys, # xn, yn) self._run_transducer(xs, xn, ys, yn, expected_costs=None, expected_grads=None, use_gpu=True, expected_error=None)
def _run_ctc_head(self, img): logits = self.exec_net.infer( inputs={self.config.get('model_input_names'): img})[ self.config.get('model_output_names').split(',')[0]] pred = log_softmax(logits, axis=2) pred = ctc_greedy_search(pred, 0) return pred
def aggregate_probas(logits, n_windows_stride=1): """Aggregate predicted probabilities with self-ensembling. Aggregate window-wise predicted probabilities obtained on overlapping sequences of windows using multiplicative voting as described in [Phan2018]_. Parameters ---------- logits : np.ndarray Array of shape (n_sequences, n_classes, n_windows) containing the logits (i.e. the raw unnormalized scores for each class) for each window of each sequence. n_windows_stride : int Number of windows between two consecutive sequences. Default is 1 (maximally overlapping sequences). Returns ------- np.ndarray : Array of shape ((n_rows - 1) * stride + n_windows, n_classes) containing the aggregated predicted probabilities for each window contained in the input sequences. References ---------- .. [Phan2018] Phan, H., Andreotti, F., Cooray, N., Chén, O. Y., & De Vos, M. (2018). Joint classification and prediction CNN framework for automatic sleep stage classification. IEEE Transactions on Biomedical Engineering, 66(5), 1285-1296. """ log_probas = log_softmax(logits, axis=1) return _pad_shift_array(log_probas, stride=n_windows_stride).sum(axis=0).T
def run_complete_model(self, img): model_output_names = get_onnx_outputs(self.model) model_input_names = get_onnx_inputs(self.model)[0] logits, _ = self.model.run( model_output_names, {model_input_names: np.array(img, dtype=np.float32)}) pred = log_softmax(logits, axis=2) pred = ctc_greedy_search(pred, 0) return pred
def forward(self, sentences, encode_sentences=True, relevant_subsequences=None): encoded_sents = [] encoded_seqs_no_pad = [] if encode_sentences: for sent in sentences: encoded = [] for line in sent.split("\n"): new_tokens = self.encoder.encode(line.strip()) if len(encoded) + len(new_tokens) >= self.max_seq_length: break encoded.extend(new_tokens) encoded.append(text_encoder.EOS_ID) encoded_seqs_no_pad.append(encoded) # pad shorter sequences to the full length encoded = encoded + [text_encoder.PAD_ID for _ in range(self.max_seq_length - len(encoded))] assert len(encoded) == self.max_seq_length encoded_sents.append(encoded) else: # assume sentences are encoded, pad/truncate them for sent in sentences: sent = sent[:self.max_seq_length] encoded_seqs_no_pad.append(sent) sent = sent + [text_encoder.PAD_ID for _ in range(self.max_seq_length - len(sent))] encoded_sents.append(sent) feed_dict = { self.input_nodes["targets"]: np.array(encoded_sents) } outputs = self.sess.run(self.output_nodes, feed_dict=feed_dict) return_outputs = { "logits": np.squeeze(outputs[0], axis=(2, 3)), "loss": outputs[1]["training"], "encoded_seqs_no_pad": encoded_seqs_no_pad } if relevant_subsequences is not None: for i, rss in enumerate(relevant_subsequences): encoded_subseq = self.encoder.encode(rss) positions = find_sub_list(encoded_subseq, encoded_sents[i]) misaligned_prefix_length = 0 while positions is None: misaligned_prefix_length += 1 encoded_subseq = encoded_subseq[1:] positions = find_sub_list(encoded_subseq, encoded_sents[i]) start, end = positions[-1] relevant_logits = return_outputs["logits"][i][start:end] log_probs = log_softmax(relevant_logits, axis=1) gold_log_probs = [lp[index] for index, lp in zip(encoded_subseq, log_probs)] return_outputs["subseq_log_loss"] = -1 * np.mean(gold_log_probs) return_outputs["misaligned_prefix_length"] = misaligned_prefix_length return return_outputs
def train_oneside(self, transition, rewards, states, q0): q1 = {} probs = {} for s in states: relative_probs = [] for a in self.actions: relative_probs.append(self.beta * q0[((s[1], s[0]), a)]) relative_probs = softmax(relative_probs) for j, a in enumerate(self.actions): probs[(s, a)] = relative_probs[j] self.test_probs = probs for first in range(self.max_iter): new_q1 = {} max_diff = 0 for s in states: for a in self.actions: new_q1[(s, a)] = 0 if (s, a) in q1: for s_ in states: num_actions = 0 state_prob = 0 total_rewards = 0 max_val = -1000 for a2 in self.actions: if (s, a, a2, s_) in transition: num_actions += 1 state_prob += transition[ (s, a, a2, s_)] * probs[(s, a2)] total_rewards += rewards[(s, a, a2, s_)][0] if q1[(s_, a2)] > max_val: max_val = q1[(s_, a2)] if num_actions == 0: continue total_rewards /= num_actions new_q1[(s, a)] += state_prob * ( total_rewards + self.discount * max_val) max_diff = max(max_diff, abs(q1[(s, a)] - new_q1[(s, a)])) q1 = new_q1 if max_diff < 1e-5 and first != 0: print(first) print("early") break final_log_probs = {} for s in states: relative_probs = [] for a in self.actions: relative_probs.append(self.beta * q1[(s, a)]) relative_probs = log_softmax(relative_probs) for j, a in enumerate(self.actions): final_log_probs[(s, a)] = relative_probs[j] return final_log_probs
def get_token_logp(token: dict, softmax: bool = True) -> tuple: """ returns token logp from forward and backward lstm """ forward_logits = token['forward']['logp'] backward_logits = token['backward']['logp'] if softmax: forward_logits = log_softmax(forward_logits) backward_logits = log_softmax(backward_logits) vocab_forward = dict( zip(token['forward']['candidate_words'], forward_logits)) vocab_backward = dict( zip(token['backward']['candidate_words'], backward_logits)) forward_logp = vocab_forward.get(token['word'], vocab_forward['<UNK>']) backward_logp = vocab_backward.get(token['word'], vocab_backward['<UNK>']) word = token['word'] if forward_logp != vocab_forward['<UNK>'] else '<UNK>' return forward_logp, backward_logp, word
def test_forward_batch(self): xs = np.asarray( [[[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1], [0.1, 0.1, 0.2, 0.8, 0.1]], [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]], [[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1], [0.1, 0.1, 0.2, 0.8, 0.1]], [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]], [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1], [0.1, 0.1, 0.2, 0.8, 0.1]]]], dtype=np.float32) xs = log_softmax(xs, axis=-1) ys = np.asarray([[1, 2], [1, 2]], dtype=np.int32) xn = np.asarray([2, 3], dtype=np.int32) yn = np.asarray([2, 2], dtype=np.int32) expected_costs = np.array([4.495666773770733, 5.7367250428101615], dtype=np.float32) expected_grads = np.array( [[[[-0.308198071906, -0.6918019280939998, 0.0, 0.0, 0.0], [-0.308198071906, 0.0, -0.3836038561880001, 0.0, 0.0], [-0.3836038561880001, 0.0, 0.0, 0.0, 0.0]], [[0.0, -0.308198071906, 0.0, 0.0, 0.0], [0.0, 0.0, -0.6163961438119995, 0.0, 0.0], [-0.9999999999999991, 0.0, 0.0, 0.0, 0.0]], [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]], [[[-0.45920877, -0.54079123, -0., -0., -0.], [-0.32392462, -0., -0.21686661, -0., -0.], [-0.21686661, -0., -0., -0., -0.]], [[-0.13528414, -0.32392462, -0., -0., -0.], [-0.29937584, -0., -0.3484734, -0., -0.], [-0.56534001, -0., -0., -0., -0.]], [[-0., -0.13528414, -0., -0., -0.], [-0., -0., -0.43465999, -0., -0.], [-1., -0., -0., -0., -0.]]]], dtype=np.float32) self._run_transducer(xs, xn, ys, yn, expected_costs, expected_grads, use_gpu=True, expected_error=None)
def run_model(self, img): if torch.is_tensor(img): img = img.clone().detach().numpy() if self.use_ctc: logits = self.exec_net.infer( inputs={self.config.get('model_input_names'): img })[self.config.get('model_output_names').split(',')[0]] pred = log_softmax(logits, axis=2) pred = ctc_greedy_search(pred, 0) return pred[0] enc_res = self.exec_net_encoder.infer( inputs={ self.config.get('encoder_input_names', ENCODER_INPUTS).split(',')[0]: img }) enc_out_names = self.config.get('encoder_output_names', ENCODER_OUTPUTS).split(',') ir_row_enc_out = enc_res[enc_out_names[0]] dec_states_h = enc_res[enc_out_names[1]] dec_states_c = enc_res[enc_out_names[2]] output = enc_res[enc_out_names[3]] dec_in_names = self.config.get('decoder_input_names', DECODER_INPUTS).split(',') dec_out_names = self.config.get('decoder_output_names', DECODER_OUTPUTS).split(',') tgt = np.array([[START_TOKEN]] * 1) logits = [] for _ in range(MAX_SEQ_LEN): dec_res = self.exec_net_decoder.infer( inputs={ dec_in_names[0]: dec_states_h, dec_in_names[1]: dec_states_c, dec_in_names[2]: output, dec_in_names[3]: ir_row_enc_out, dec_in_names[4]: tgt }) dec_states_h = dec_res[dec_out_names[0]] dec_states_c = dec_res[dec_out_names[1]] output = dec_res[dec_out_names[2]] logit = dec_res[dec_out_names[3]] logits.append(logit) tgt = np.reshape(np.argmax(logit, axis=1), (1, 1)).astype(np.long) if tgt[0][0] == END_TOKEN: break return np.argmax(np.array(logits).squeeze(1), axis=1)
def intention(self, rounds, coop_model, comp_model, punish_model): if len(rounds) == 0: return random.randint(0, 2) total_coop = 0 total_comp = 0 total_punish = 0 for round in rounds: coop = coop_model.step(round) + math.log(self.p_coop) comp = comp_model.step(round) + math.log(self.p_comp) punish = punish_model.step(round) + math.log(self.p_punish) probs = log_softmax([coop, comp, punish]) total_coop += probs[0] total_comp += probs[1] total_punish += probs[2] probs = softmax([total_coop, total_comp, total_punish]) return random.choices(range(3), probs)[0]
def intention(self, rounds, coop_model, comp_model): if len(rounds) == 0: return random.randint(0, 1) total_coop = 0 total_comp = 0 for round in rounds: coop = coop_model.step(round) + math.log(self.p_coop) comp = comp_model.step(round) + math.log(self.p_comp) probs = log_softmax([coop, comp]) total_coop += probs[0] total_comp += probs[1] probs = softmax([total_coop, total_comp]) if random.random() < probs[0]: return 0 else: return 1
def train_q0(self, transition, rewards, states, total_prob): q0 = {} for first in range(self.max_iter): new_q0 = {} max_diff = 0 for s in states: for a in self.actions: new_q0[(s, a)] = 0 if (s, a) in q0: for s_ in states: num_actions = 0 state_prob = 0 total_rewards = 0 max_val = -1000 for a2 in self.actions: if (s, a, a2, s_) in transition: num_actions += 1 state_prob += transition[(s, a, a2, s_)] total_rewards += rewards[(s, a, a2, s_)][1] if q0[(s_, a2)] > max_val: max_val = q0[(s_, a2)] if num_actions == 0: continue state_prob = state_prob / total_prob[(s, a)] total_rewards /= num_actions new_q0[(s, a)] += state_prob * ( total_rewards + self.discount * max_val) max_diff = max(max_diff, abs(q0[(s, a)] - new_q0[(s, a)])) q0 = new_q0 if max_diff < 1e-5 and first != 0: print(first) print("early") break elif first == self.max_iter - 1: print(max_diff) probs = {} for s in states: relative_probs = [] for a in self.actions: relative_probs.append(self.beta * q0[(s, a)]) relative_probs = log_softmax(relative_probs) for j, a in enumerate(self.actions): probs[(s, a)] = relative_probs[j] self.test_probs = probs return probs
def test_one_to_empty(self): xs = np.asarray([[[[0.1, 0.6, 0.1, 0.1, 0.1]]]], dtype=np.float32) xs = log_softmax(xs, axis=-1) ys = np.asarray([[]], dtype=np.int32) xn = np.asarray([1], dtype=np.int32) yn = np.asarray([0], dtype=np.int32) expected_costs = np.asarray([1.7314291957733714], dtype=np.float32) expected_grads = np.asarray([[[[-1., 0.0, 0.0, 0.0, 0.0]]]], dtype=np.float32) self._run_transducer(xs, xn, ys, yn, expected_costs, expected_grads, use_gpu=True, expected_error=None)
def train(self): q = {} for first in range(self.max_iter): new_q = {} max_diff = 0 for s in self.env.states: for a in self.actions: new_q[(s,a)] = 0 if (s,a) in q: for s_ in self.env.states: num_actions = 0 state_prob = 0 total_rewards = 0 max_val = -1000 for a2 in self.actions: if (s, a, a2, s_) in self.env.transitions: num_actions += 1 state_prob += self.env.transitions[(s,a,a2,s_)] * math.exp(self.other_probs[((s[1], s[0]),a2)]) rewards = self.env.rewards[(s,a,a2,s_)] total_rewards += self.w * rewards[0] - (1-self.w) * rewards[1] if q[(s_,a2)] > max_val: max_val = q[(s_,a2)] if num_actions == 0: continue total_rewards /= num_actions new_q[(s,a)] += state_prob * (total_rewards + self.discount* max_val) max_diff = max(max_diff, abs(q[(s,a)] - new_q[(s,a)])) q = new_q if max_diff < 1e-5 and first != 0: print(first) print("early") break for s in self.env.states: relative_probs = [] for a in self.actions: relative_probs.append(self.beta*q[(s,a)]) relative_probs = log_softmax(relative_probs) for j, a in enumerate(self.actions): self.log_probs[(s,a)] = relative_probs[j]
def test_forward_single_gather(self, blank=0): xs = np.asarray( [[[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1], [0.1, 0.1, 0.2, 0.8, 0.1]], [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]]]], dtype=np.float32) xs = log_softmax(xs, axis=-1) ys = np.asarray([[1, 2]], dtype=np.int32) xn = np.asarray([2], dtype=np.int32) yn = np.asarray([2], dtype=np.int32) N, T, U, V = xs.shape index = np.full([N, T, U, 2], np.array(blank, dtype=np.int64)) index[:, :, :U - 1, 1] = np.expand_dims(ys, axis=1) xs = np.take_along_axis(xs, indices=index, axis=3) expected_costs = np.array([4.495666], dtype=np.float32) expected_grads = np.array([[[[-0.308198071906, -0.6918019280939998], [-0.308198071906, -0.3836038561880001], [-0.3836038561880001, 0.0]], [[0.0, -0.308198071906], [0.0, -0.6163961438119995], [-0.9999999999999991, 0.0]]]]) self._run_transducer(xs, xn, ys, yn, expected_costs=expected_costs, expected_grads=expected_grads, use_gpu=True, expected_error=None, blank=-1)
def test_one_to_many(self): xs = np.asarray( [[[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1], [0.1, 0.1, 0.2, 0.8, 0.1]]]], dtype=np.float32) xs = log_softmax(xs, axis=-1) ys = np.asarray([[1, 2]], dtype=np.int32) xn = np.asarray([1], dtype=np.int32) yn = np.asarray([2], dtype=np.int32) expected_costs = np.asarray([4.274244594423859], dtype=np.float32) expected_grads = np.asarray( [[[[0.0, -1., 0.0, 0.0, 0.0], [0.0, 0.0, -1., 0.0, 0.0], [-1., 0.0, 0.0, 0.0, 0.0]]]], dtype=np.float32) self._run_transducer(xs, xn, ys, yn, expected_costs, expected_grads, use_gpu=True, expected_error=None)
def softmax_ud(logits): logps = log_softmax(np.array(logits)) return np.exp(logps)
def entropy(logits, alpha): logps = log_softmax(np.array(logits) / alpha) return -np.sum(np.exp(logps) * logps)
def inference(args, p_encoder, q_encoder, question_texts, p_tokenizer, q_tokenizer): es = elastic_setting(args.index_name) p_encoder.eval() q_encoder.eval() dense_retrieval_result = {} for question_text in tqdm(question_texts): es_context_list = elastic_retrieval(es, args.index_name, question_text, args.es_top_k=70) es_context_list = [context for context, score in es_context_list] p_seqs = p_tokenizer(es_context_list, padding='max_length', truncation=True, return_tensors='pt') q_seqs = q_tokenizer(question_text, padding='max_length', truncation=True, return_tensors='pt') p_input_ids = p_seqs['input_ids'] p_attention_mask = p_seqs['attention_mask'] p_token_type_ids = p_seqs['token_type_ids'] q_input_ids = q_seqs['input_ids'] q_attention_mask = q_seqs['attention_mask'] q_token_type_ids = q_seqs['token_type_ids'] p_input_ids_list = torch.Tensor([]) p_attention_mask_list = torch.Tensor([]) p_token_type_ids_list = torch.Tensor([]) top_k_id = [] for i in range(len(p_attention_mask)): ids_list = select_range(p_attention_mask[i]) for str_idx, end_idx in ids_list: p_input_ids_tmp = torch.cat([ torch.Tensor([101]), p_input_ids[i][str_idx:end_idx], torch.Tensor([102]) ]).int().long() p_attention_mask_tmp = p_attention_mask[i][str_idx - 1:end_idx + 1].int().long() p_token_type_ids_tmp = p_token_type_ids[i][str_idx - 1:end_idx + 1].int().long() p_input_ids_list = torch.cat( [p_input_ids_list, p_input_ids_tmp.unsqueeze(0)]).int().long() p_attention_mask_list = torch.cat( [p_attention_mask_list, p_attention_mask_tmp.unsqueeze(0)]).int().long() p_token_type_ids_list = torch.cat( [p_token_type_ids_list, p_token_type_ids_tmp.unsqueeze(0)]).int().long() top_k_id.append(i) batch_num = 20 if len(p_input_ids_list) % batch_num == 0: num = len(p_input_ids_list) // batch_num else: num = len(p_input_ids_list) // batch_num + 1 p_output_list = [] for i in range(num): p_input_ids = p_input_ids_list[i * batch_num:(i + 1) * batch_num] p_attention_mask = p_attention_mask_list[i * batch_num:(i + 1) * batch_num] p_token_type_ids = p_token_type_ids_list[i * batch_num:(i + 1) * batch_num] batch = (p_input_ids, p_attention_mask, p_token_type_ids) p_inputs = { 'input_ids': batch[0].to('cuda'), 'attention_mask': batch[1].to('cuda'), 'token_type_ids': batch[2].to('cuda') } p_outputs = p_encoder(**p_inputs).cpu() p_output_list.extend(p_outputs.cpu().tolist()) p_output_list = np.array(p_output_list) batch = (q_input_ids, q_attention_mask, q_token_type_ids) q_inputs = { 'input_ids': batch[0].to('cuda'), 'attention_mask': batch[1].to('cuda'), 'token_type_ids': batch[2].to('cuda') } q_outputs = q_encoder(**q_inputs).cpu() # (N, E) q_outputs = np.array(q_outputs.cpu().tolist()) sim_scores = np.matmul(q_outputs, np.transpose( p_output_list, [1, 0])) # (1, E) x (E, N) = (1, N) sim_scores = log_softmax(sim_scores, axis=1) class_0 = np.array( [1 if i == 0 else 0 for idx, i in enumerate(top_k_id)]) w = np.sum(sim_scores, axis=1) * 1 / np.shape(sim_scores)[1] sim_scores = sim_scores[0] - w[0] * class_0 preds_idx = np.argsort(-1 * sim_scores, axis=0) top_idx_list = [] top_k_list = [] for idx in preds_idx: top_idx = top_k_id[idx] if top_idx in top_idx_list: continue top_idx_list.append(top_idx) top_k_list.append((es_context_list[top_idx], sim_scores[idx])) dense_retrieval_result[question_text] = top_k_list[:args.dr_top_k] return dense_retrieval_result
def test_log_softmax_2d_axis0(log_softmax_2d_x, log_softmax_2d_expected): x = log_softmax_2d_x.T expected = log_softmax_2d_expected.T assert_allclose(sc.log_softmax(x, axis=0), expected, rtol=1e-13)
def test_sample(self): log_probs = log_softmax([5, 4, 10, 1]) action_code = self.transducer.sample(log_probs) self.assertTrue(0 <= action_code < self.transducer.number_actions)
def logsoftmax(x, **kwargs): return log_softmax(x, **kwargs)
def test_log_softmax_noneaxis(log_softmax_x, log_softmax_expected): # When axis=None, softmax operates on the entire array, and preserves # the shape. x = log_softmax_x.reshape(2, 2) expected = log_softmax_expected.reshape(2, 2) assert_allclose(sc.log_softmax(x), expected, rtol=1e-13)
def LDA_collapsed(document_word_matrix, document_word_matrix_test, n_iter_doc, n_iter, K, alpha, eta): """ Collapsed Variational Bayesian Inference for LDA. """ np.random.seed(0) D = document_word_matrix.shape[0] W = document_word_matrix.shape[1] W_array = np.arange(0, W, 1) bound_list = [] bound_test = [] phi_n = np.zeros((D, W, K)) phi = np.random.rand(D, W, K) for d in range(D): phi[d,:,:] = phi[d,:,:]/phi[d,:,:].sum(axis=1)[:,None] log_phi = np.zeros((D, W, K)) gamma = np.ones((D, K)) lambda_ = np.random.rand(K, W) for iter in range(n_iter_doc): t_beginning = time.time() for d in range(D): # if d%10 == 0: # print(d) # keeping only the considered document and useful infos mask = document_word_matrix[d] > 0 # N_count = document_word_matrix[d][mask] W_list = W_array[mask] # useful matrices M = document_word_matrix[d,W_list] # size W last_gamma = gamma[d, :].copy() gamma[d, :] = np.ones(K) for i in range(n_iter): # t0 = time.time() # if i%10 == 0: # print(i) # phi reduced to the right number of words # phi_temp = np.zeros((W_list.shape[0], K)) # log_phi_temp = np.zeros((W_list.shape[0], K)) # update phi # useful matrices phi_list = phi[d,W_list,:] # size W*K phi_list_n = M.reshape((-1,1)) * phi_list # size W*K PHI_N = np.tile(document_word_matrix[:, W_list, np.newaxis], (1, 1, K)) * phi[:,W_list,:] # D*W*K PHI_N_0 = PHI_N.sum(axis=0) var = PHI_N * (1 - phi[:,W_list,:]) var_0 = var.sum(axis=0) #### collapsed VB: formula 18 of original article #### # 1st term K_ = phi_list_n.sum(axis=0) # size K esp1 = K_[None,:] - phi_list_n term1 = alpha + esp1 # W*K # 2nd term esp2 = PHI_N_0 - PHI_N[d,:,:] term2 = eta + esp2 # 3rd term # esp3 = np.sum(PHI_N,axis=(0,1)).reshape((1,-1)) - PHI_N[d,:,:] esp3 = np.sum(PHI_N_0,axis=0).reshape((1,-1)) - PHI_N[d,:,:] term3 = W * eta + esp3 # W*K # 4th term var1 = phi_list_n * (1-phi_list) var1 = (var1).sum(axis=0).reshape((1,-1)) - var1 # W*K term4 = -var1 / (2*term1**2) # 5th term var2 = var_0 - var[d,:,:] term5 = - var2 / (2*term2**2) # 6th term # var3 = var.sum(axis=(0,1)).reshape((1,-1)) - var[d,:,:] var3 = var_0.sum(axis=0).reshape((1,-1)) - var[d,:,:] term6 = var3 / ( 2 * term3**2 ) log_phi_temp = np.log(term1) + np.log(term2) - np.log(term3) \ + term4 + term5 + term6 # W*K # print(np.mean(log_phi_temp)) # log_phi_temp = np.random.rand(len(W_list),K) phi_temp = sc.softmax(log_phi_temp, axis = 1) # log_phi_temp = sc.log_softmax(L_test, axis = 1) # update gamma last_gamma = gamma[d, :].copy() gamma[d, :] = alpha + np.sum(phi_temp * document_word_matrix[d,W_list].reshape(-1, 1), axis =0) # check inner convergence if np.mean(np.abs(gamma[d, :] - last_gamma)) < 0.001: break # update phi phi_n[d, W_list, :] = phi_temp * document_word_matrix[d,W_list].reshape(-1, 1) phi[d, W_list, :] = phi_temp log_phi[d, W_list, :] = sc.log_softmax(log_phi_temp,axis=1) # print(time.time() - t0) # M-step, update lambda_ lambda_ = eta + np.sum(phi_n, axis = 0).T t_end = time.time() #b = compute_bound_3(document_word_matrix, phi, log_phi, gamma, lambda_, alpha, eta, K, W, D, W_array) # Compute Test pbas #b_test = compute_pba_test(document_word_matrix_test, W_array, phi, D, K, W, alpha, eta) b_test = log_pba_approx(document_word_matrix_test, gamma, lambda_, D, W, phi, alpha, eta) b = log_pba_approx(document_word_matrix, gamma, lambda_, D, W, phi, alpha, eta) t_end_bound = time.time() print('iter n°{} - iter time = {} - bound time = {} - bound value = {} - bound_test {}'.format(iter, t_end - t_beginning, t_end_bound - t_end, b, b_test)) bound_list.append(b) bound_test.append(b_test) return bound_list, bound_test, phi, gamma, lambda_
def test_axes(axis_2d, expected_2d): assert_allclose( sc.log_softmax([[1000, 1], [1000, 1]], axis=axis_2d), expected_2d, rtol=1e-13, )
def test_vector_perform(self): x = vector() f = aesara.function([x], logsoftmax(x, axis=None)) xv = np.random.randn(6).astype(config.floatX) assert np.allclose(f(xv), sp.log_softmax(xv))
def test_log_softmax_2d_axis1(log_softmax_2d_x, log_softmax_2d_expected): x = log_softmax_2d_x expected = log_softmax_2d_expected assert_allclose(sc.log_softmax(x, axis=1), expected, rtol=1e-13)