def decoder_inputs(decoder_params): batch_size = decoder_params.batch_size inputs = np.zeros(batch_size, dtype=np.int).reshape(1, batch_size) enc_inputs = np.random.rand(1, decoder_params.batch_size, decoder_params.emb_size) vin = V(T(inputs)) ven = V(T(enc_inputs)) return vin, ven
def init_hidden(self): # print("hidden state initialized") return ( Variable(T(torch.zeros(self.num_layers, self.bs, self.hidden_size))), Variable(T(torch.zeros(self.num_layers, self.bs, self.hidden_size)))) # T to put in gpu
def calc_all_metrics(preds, y): preds = T(preds) y = T(y) res = [] for f in METRICS: res.append(f(preds, y)) return res
def attention_setup(request): sl, bs = 3, 2 edq, edk = request.param # query would be the hidden state of the decoder keys = T(np.random.rand(sl, bs, edk)) query = T(np.random.rand(bs, edq)) return keys, query
def decoder_inputs_transformer(): batch_size = 2 emb_size = 12 nlayers = 8 sl = 3 inputs = np.zeros(batch_size, dtype=np.int).reshape(1, batch_size) enc_inputs = np.random.rand(nlayers, sl, batch_size, emb_size) vin = V(T(inputs)) ven = V(T(enc_inputs)) return batch_size, emb_size, nlayers, sl, vin, ven
def test_select_hidden_by_index(): bs, num_beams = 2, 3 # when I pass inputs to the select_hidden_by_index function with bs=2, num_beams = 3 inputs = np.array([2, 3, 4, 10, 11, 12]).reshape(1, 6, 1) # [ndir, bs, hd] tr_inputs = [V(T(inputs))] # and indices for every batch [bs, ndims] indices = np.array([[0, 0, 1], [2, 2, 2]]) tr_indices = V(T(indices)) tr_indices = reshape_parent_indices(tr_indices.view(-1), bs=bs, num_beams=num_beams) results = select_hidden_by_index(tr_inputs, tr_indices.view(-1)) # then I get the expected seletec hidden expected = np.array([2, 2, 3, 12, 12, 12]) assert_allclose(actual=to_np(results[0]).ravel(), desired=expected)
def attention_projection_setup(request): sl, bs = 3, 2 edq, edk = request.param encoder_outputs = V(T(np.random.rand(sl, bs, edk))) # query would be the hidden state of the decoder decoder_output = V(T(np.random.rand(bs, edq))) params = {"n_out": 10, "n_in": edk, "dropout": 0.2, "att_nhid": 13 } return encoder_outputs, decoder_output, params
def generate_text(tokens, N=25): preds = [] for i in range(N): learner.model[0].reset() logits, _, _ = learner.model(T(tokens).unsqueeze(1)) probs = F.softmax(logits).data.cpu().numpy()[-1, :] candidates = np.argsort(probs)[::-1] while True: # Sampling candidate = np.random.choice(candidates, p=probs[candidates]) # Greedy # candidate = np.argmax(probs[2:]) + 2 if candidate > 2: print(probs[candidates][:3], probs[candidate]) preds.append(candidate) break # for candidate in candidates: # if candidate > 1 and ord(itos[candidate]) > 255 and (random.random() < probs[candidate] or probs[candidate] < 0.2): # print(probs[candidate]) # preds.append(candidate) # break # tokens = [preds[-1]]# tokens.append(int(preds[-1])) # tokens = [:1] print(sp.DecodeIds(tokens))
def test_transfomer_layer(): sl = 10 bs = 2 in_features = 32 inputs = tr.randn([sl, bs, in_features]) inputs = to_gpu(V(T(inputs))) transfomer = to_gpu(TransformerLayer(in_features=in_features, num_heads=8)) outputs = transfomer(inputs) assert_dims(outputs, [sl, bs, in_features])
def predict(self, x): fake_labels = np.array([0] * len(x)) ds = TextDataset(x, fake_labels) dl = DataLoader(ds, 1000, transpose=True, num_workers=1, pad_idx=1) preds = predict(self.m, dl) sm = Softmax() return to_np(sm(V(T(preds))))
def test_transfomer_layer_decoder(): sl = 10 bs = 2 in_features = 32 tr.random.manual_seed(0) encoder_inputs = tr.randn([sl, bs, in_features]) decoder_inputs = tr.randn([sl, bs, in_features]) encoder_inputs = to_gpu(V(T(encoder_inputs))) decoder_inputs = to_gpu(V(T(decoder_inputs))) transformer = to_gpu( TransformerLayerDecoder(input_size=in_features, num_heads=8, nhid=64, dropout=0)) outputs = transformer(encoder_inputs, decoder_inputs) assert_dims(outputs, [sl, bs, in_features]) outputs1 = transformer(encoder_inputs, decoder_inputs[:1]) assert_dims(outputs1, [1, bs, in_features]) assert ((outputs[0] - outputs1[0]).abs() < 1E-6).all()
def eval(texts): learner.model[0].reset() tokens =sp.EncodeAsIds(texts) logits, _, _ = learner.model(T(tokens).unsqueeze(1)) sorted_idx = np.argsort(logits.data.cpu().numpy(), 1) preds = [] for i in range(1, 4): preds.append([sp.IdToPiece(x) for x in sorted_idx[:, -i].tolist()]) # preds = list(map(lambda x: itos[x], np.argmax(logits.data.cpu().numpy(), 1))) return pd.DataFrame({"orig": sp.EncodeAsPieces(texts) + [""], "pred_1": [""] + preds[0], "pred_2": [""] + preds[1], "pred_3": [""] + preds[2]})
def test_MultiHeadAttention_with_mask(self_attention_setup): keys, query = self_attention_setup slk, bs, ek = keys.size() slq, bs, eq = query.size() num_heads = 4 nhid = 10 attention = to_gpu( MultiHeadAttention(num_heads=num_heads, nhid=nhid, keys_dim=ek, query_dim=eq, values_dim=ek, dropout=0.3)) mask = T(np.tril(np.ones((bs, num_heads, slq, slk)))).float() result = attention(query=V(query), keys=V(keys), values=V(keys), mask=mask) assert_dims(result, [slq, bs, num_heads * nhid])
def eval(texts): learner.model[0].reset() tokens = list(map(lambda x: mapping.get(x, 1), texts)) logits, _, _ = learner.model(T(tokens).unsqueeze(1)) sorted_idx = np.argsort(logits.data.cpu().numpy(), 1) preds = [] for i in range(1, 4): preds.append(list(map(lambda x: itos[x], sorted_idx[:, -i]))) # preds = list(map(lambda x: itos[x], np.argmax(logits.data.cpu().numpy(), 1))) return pd.DataFrame({"orig": [x for x in texts] + [" "], "pred_1": [""] + preds[0], "pred_2": [""] + preds[1], "pred_3": [""] + preds[2]})
def aucs(preds, targets): targets = to_np(targets) preds = to_np(preds) aurocs = [] n = preds.shape[1] print(n) # preds = np.nan_to_num(preds) # some numpy was nan or inf for i in range(n): aurocs.append(roc_auc_score(targets[:, i], preds[:, i])) return T(aurocs)
def test_transformer_encoder(): sl = 10 bs = 2 in_features = 300 num_layers = 5 inputs = tr.randn([sl, bs, in_features]) inputs = to_gpu(V(T(inputs))) transformer = to_gpu( TransformerEncoderLayers(input_size=in_features, num_heads=8, nhid=512, num_layers=num_layers)) layer_outputs = transformer(inputs) assert_dims(layer_outputs, [num_layers, sl, bs, in_features])
def test_transformer_decoder_layers(): sl = 10 bs = 2 in_features = 32 num_layers = 5 inputs = tr.randn([sl, bs, in_features]) encoder_inputs = to_gpu(V(T(tr.randn([num_layers, sl, bs, in_features])))) inputs = to_gpu(V(T(inputs))) transformer = to_gpu( TransformerDecoderLayers(input_size=in_features, num_heads=8, nhid=512, nlayers=num_layers, dropout=0.0)) assert transformer.hidden is None layer_outputs = transformer(inputs, encoder_inputs) assert_dims(layer_outputs, [num_layers, sl, bs, in_features]) assert transformer.hidden is None # Passing through tht decoderlayers only one output I should be getting the same output layer_outputs2 = transformer(inputs[:1], encoder_inputs) assert_dims(layer_outputs2, [num_layers, 1, bs, in_features]) for layer1, layer2 in zip(layer_outputs, layer_outputs2): assert ((layer1[0] - layer2[0]).abs() < 1E-6).all()
def eval_text(texts): tokens = sp.EncodeAsIds(texts)[:100] logits = learner.model(T(tokens).unsqueeze(0)) sorted_idx = np.argsort(logits.data.cpu().numpy(), 1) preds = [] for i in range(1, 4): preds.append([sp.IdToPiece(x) for x in sorted_idx[:, -i].tolist()]) # preds = list(map(lambda x: itos[x], np.argmax(logits.data.cpu().numpy(), 1))) print(len(preds[0])) return pd.DataFrame({ "orig": sp.EncodeAsPieces(texts)[-90:] + [""], "pred_1": [""] + preds[0][-90:], "pred_2": [""] + preds[1][-90:], "pred_3": [""] + preds[2][-90:] })
def neighbor_gen(at, distance_expansion=None, cutoff=5.0, n_gaussians=25, trainable_gaussians=False, environment_provider=ASEEnvironmentProvider(5.0), collect_triples=False, pair_provider=None, center_positions=True): properties = {} properties[Structure.Z] = T(at.numbers.astype(np.int)).unsqueeze(0) positions = at.positions.astype(np.float32) if center_positions: positions -= at.get_center_of_mass() properties[Structure.R] = T(positions).unsqueeze(0) properties[Structure.cell] = T(at.cell.astype(np.float32)).unsqueeze(0) # get atom environment idx = 0 nbh_idx, offsets = environment_provider.get_environment(idx, at) properties[Structure.neighbors] = T(nbh_idx.astype(np.int)).unsqueeze(0) properties[Structure.cell_offset] = T(offsets.astype( np.float32)).unsqueeze(0) properties[Structure.neighbor_mask] = None properties['_idx'] = T(np.array([idx], dtype=np.int)).unsqueeze(0) if pair_provider is not None: nbh_idx_j, nbh_idx_k = pair_provider.get_environment(nbh_idx) properties[Structure.neighbor_pairs_j] = T(nbh_idx_j.astype(np.int)) properties[Structure.neighbor_pairs_k] = T(nbh_idx_k.astype(np.int)) model = spk.custom.representation.RBF( distance_expansion=distance_expansion, cutoff=cutoff, n_gaussians=n_gaussians, trainable_gaussians=trainable_gaussians) model = to_gpu(model) r, f = model.forward(properties) return to_np(r.squeeze()), to_np(f.squeeze())
def test_MultiHeadAttention_with_mask(attention_setup): keys, query = attention_setup bs = query.size(0) ed = keys.size(2) sl = keys.size(0) eq = query.size(1) num_heads = 4 nhid = 10 attention = to_gpu( MultiHeadAttention(num_heads=num_heads, nhid=nhid, keys_dim=ed, query_dim=eq, values_dim=ed, dropout=0.3)) mask = V(T(np.zeros((sl, bs, num_heads)))) mask[0] = 1 result = attention(query=V(query), keys=V(keys), values=V(keys), mask=mask) assert_dims(result, [bs, num_heads * nhid])
get_ipython().system('pip install jieba') import jieba # In[21]: texts = "德国 是 世界 大国 之一 , 其 国内 生产总值 以 国际 汇率 计" tokens = list(map(lambda x: mapping.get(x, 1), texts.split(" "))) tokens # In[22]: logits, _, _ = learner.model(T(tokens).unsqueeze(1)) logits.shape # In[23]: sorted_idx = np.argsort(logits.data.cpu().numpy(), 1) preds = [] for i in range(1, 4): preds.append(list(map(lambda x: itos[x], sorted_idx[:, -i]))) # preds = list(map(lambda x: itos[x], np.argmax(logits.data.cpu().numpy(), 1))) pd.DataFrame({"orig": list(texts.split(" ")) + [" "], "pred_1": [""] + preds[0], "pred_2": [""] + preds[1], "pred_3": [""] + preds[2]})
def predict(img): batch = [T(TFMS(img))] inp = VV_(torch.stack(batch)) return SetupModel.model(inp).mean(0)
def get_cvae_loss(pad_idx, tchebycheff=False, sigmoid=False, tbc_weights=None, tbc_optimal_point=None, tbc_norm=2): STEP = 0 optimal_point = 0. if tbc_optimal_point is None else tbc_optimal_point weights = T([2., 1., 100.]) if tbc_weights is None else tbc_weights def cvae_loss(input, target, step=0, max_kld_step=None, **kwargs): predictions, recog_mu, recog_log_var, prior_mu, prior_log_var, bow_logits = input sl, bs, vocab = predictions.size() # dims are sq-1 times bs times vocab dec_input = predictions[:target.size(0)].view(-1, vocab).contiguous() slt = target.size(0) bow_targets = bow_logits.unsqueeze_(0).repeat(slt, 1, 1) target = target.view(-1).contiguous() bow_loss = F.cross_entropy(input=bow_targets.view(-1, vocab), target=target, ignore_index=pad_idx, reduce=False).view(-1, bs) bow_loss = bow_loss.mean() # targets are sq-1 times bs (one label for every word) kld_loss = gaussian_kld(recog_mu, recog_log_var, prior_mu, prior_log_var) decoder_loss = F.cross_entropy( input=dec_input, target=target, ignore_index=pad_idx, ) kld_weight = 1.0 if max_kld_step is None else min( (step + 1) / max_kld_step, 1) nonlocal STEP if step > STEP: if step == 0: STEP = 0 print( f"\nlosses: decoder {decoder_loss}, bow: {bow_loss}, kld x weight: {kld_loss} x {kld_weight}" ) STEP += 1 return decoder_loss + bow_loss + kld_loss * kld_weight def cvae_loss_tchebycheff(input, target, step=0, **kwargs): predictions, recog_mu, recog_log_var, prior_mu, prior_log_var, bow_logits = input sl, bs, vocab = predictions.size() # dims are sq-1 times bs times vocab dec_input = predictions[:target.size(0)].view(-1, vocab).contiguous() slt = target.size(0) bow_targets = bow_logits.unsqueeze_(0).repeat(slt, 1, 1) target = target.view(-1).contiguous() bow_loss = F.cross_entropy(input=bow_targets.view(-1, vocab), target=target, ignore_index=pad_idx, reduce=False).view(-1, bs) bow_loss = bow_loss.mean() # targets are sq-1 times bs (one label for every word) kld_loss = gaussian_kld(recog_mu, recog_log_var, prior_mu, prior_log_var) decoder_loss = F.cross_entropy( input=dec_input, target=target, ignore_index=pad_idx, ) # kld_weight = 1.0 if max_kld_step is None else min((step + 1) / max_kld_step, 1) nonlocal STEP if step > STEP: print( f"\nlosses: decoder {decoder_loss}, bow: {bow_loss}, kld x weight: {kld_loss}" ) STEP += 1 losses = torch.cat( [decoder_loss.view(1), bow_loss.view(1), kld_loss.view(1)]) loss = tchebycheff_objective(losses, weights=weights, norm=tbc_norm, optimal_point=optimal_point) return loss def cvae_loss_sigmoid(input, target, step=0, max_kld_step=None, **kwargs): predictions, recog_mu, recog_log_var, prior_mu, prior_log_var, bow_logits = input vocab = predictions.size(-1) # dims are sq-1 times bs times vocab dec_input = predictions[:target.size(0)].view(-1, vocab).contiguous() bow_targets = torch.zeros_like(bow_logits).scatter( 1, target.transpose(1, 0), 1) # mask pad token weights = to_gpu(V(torch.ones(bow_logits.size(-1)).unsqueeze_(0))) weights[0, pad_idx] = 0 bow_loss = F.binary_cross_entropy_with_logits(bow_logits, bow_targets, weight=weights) # targets are sq-1 times bs (one label for every word) kld_loss = gaussian_kld(recog_mu, recog_log_var, prior_mu, prior_log_var) target = target.view(-1).contiguous() decoder_loss = F.cross_entropy( input=dec_input, target=target, ignore_index=pad_idx, ) kld_weight = 1.0 if max_kld_step is None else min( (step + 1) / max_kld_step, 1) nonlocal STEP if step > STEP: if step == 0: STEP = 0 print( f"losses: decoder {decoder_loss}, bow: {bow_loss}, kld x weight: {kld_loss} x {kld_weight}" ) STEP += 1 return decoder_loss + bow_loss + kld_loss * kld_weight if tchebycheff: return cvae_loss_tchebycheff elif sigmoid: return cvae_loss_sigmoid else: return cvae_loss
def get_prediction(texts): input_tensor = T(np.array([1] + sp.EncodeAsIds(texts))).unsqueeze(1) return learn.model(input_tensor)[0].data.cpu().numpy()
drive.set_control(True) print("Controlling: ", drive.is_controlling()) input("Press key to start...") while True: if drive.is_written(): print("Reading img") img = drive.read_image() # HWC, BGR img_np = np.asarray(img).reshape(HEIGHT, WIDTH, 3)[:,:,::-1] #.astype('uint8') # HWC, RGB # import pdb; pdb.set_trace() #plt.imshow(img_np) #plt.show() img_np = (img_np/255).astype('float32') x = trn_tfms(img_np)[np.newaxis, ...] # shape (210, 280, 3) -> (3, 210, 210) -> (1, 3, 210, 210) x = Variable(T(x),requires_grad=False, volatile=True) output = learner.model(x) # shape (1, 14) pred_indicators = transform_range_output(to_np(output[0]), UNIT_RANGES, INDIC_RANGES) print("network raw output", output) print("pred_indicators", pred_indicators) indicators_formatted = format_indicators(pred_indicators) print("indicators_formatted", indicators_formatted) ground_truth = drive.read_indicators() print("ground_truth", ground_truth) drive.controller(indicators_formatted) drive.update_visualizations(indicators_formatted, ground_truth) drive.write(False) # Shared data read, and TORCS may continue drive.wait_key(1)
def print_all_metrics(preds, y): preds = T(preds) y = T(y) for f in METRICS: print("%-12s %.3f" % (f.__name__, f(preds, y)))
def test_BiRNNEncoder(): ntoken = 4 emb_sz = 2 nhid = 6 # Given a birnnencoder encoder = EmbeddingRNNEncoder(ntoken, emb_sz, nhid=nhid, nlayers=2, pad_token=0, dropouth=0.0, dropouti=0.0, dropoute=0.0, wdrop=0.0) encoder = to_gpu(encoder) assert encoder is not None weight = encoder.encoder.weight assert (4, 2) == weight.shape sl = 2 bs = 3 np.random.seed(0) inputs = np.random.randint(0, ntoken, sl * bs).reshape(sl, bs) vin = V(T(inputs)) # Then the initial output states should be zero encoder.reset(bs) initial_hidden = encoder.hidden h = [] c = [] for layer in initial_hidden: h.append(layer[0].data.cpu().numpy()) c.append(layer[1].data.cpu().numpy()) assert h[-1].sum() == 0 assert c[-1].sum() == 0 embeddings = encoder.encoder(vin) assert (2, 3, emb_sz) == embeddings.shape # Then the the new states are different from before raw_outputs, outputs = encoder(vin) for r, o in zip(raw_outputs, outputs): assert np.allclose(to_np(r), to_np(o)) initial_hidden = encoder.hidden h1 = [] c1 = [] for hl, cl, layer in zip(h, c, initial_hidden): h1.append(to_np(layer[0])) c1.append(to_np(layer[0])) assert ~np.allclose(hl, h1[-1]) assert ~np.allclose(cl, c1[-1]) # Then the the new states are different from before raw_outputs, outputs = encoder(vin) for r, o in zip(raw_outputs, outputs): assert np.allclose(to_np(r), to_np(o)) initial_hidden = encoder.hidden for hl, cl, layer in zip(h1, c1, initial_hidden): h_new = to_np(layer[0]) c_new = to_np(layer[0]) assert ~np.allclose(hl, h_new) assert ~np.allclose(cl, c_new)
new_matrix[i] = weights['0.embed.weight'][mapping_orig[w]] hits += 1 new_matrix[BEG, :] = 0 hits, hits *100 / len(itos[3:]) # In[20]: new_matrix[n_toks:, :] = weights['0.embed.weight'][-200:, :] # In[21]: weights['0.embed.weight'] = T(new_matrix) weights['1.weight'] = T(np.copy(new_matrix)[:-200, :]) weights['0.embed.weight'].shape # ## Languange Model # In[11]: n_toks, tokens_train, tokens_val, tokens_test = joblib.load("../data/cache/rating_unigram_tokens.pkl") # In[12]:
class MyRnn(nn.Module): def __init__(self, es, hl, n_classes): super().__init__() self.hl = hl self.embeddings = nn.Embedding(n_classes, es) self.rnn = nn.RNN(es, hl) self.linear = nn.Linear(hl, n_classes) def forward(self, *input): bs = input[0].size(0) hiddens = V(torch.zeros(1, bs, self.hl)) x = self.embeddings(V(torch.stack(input))) outputs, hiddens = self.rnn(x, hiddens) return F.softmax(self.linear(outputs), -1) if __name__ == '__main__': b = T([[1, 1], [0, 0]]) dot = EmbeddingDot(2, 3) print(dot.forward(b, [])) # print(dot.model) # print([a for a in dot.parameters()]) # # print(dot.forward(b, [])) # print([a for a in dot.parameters()]) rnn = MyRnn(3, 10, 4) rnn.forward(T([1, 2]), T([0, 0]))
def predict(img): batch = [T(SetupModel.tfms(img))] inp = VV_(torch.stack(batch)) return SetupModel.model(inp)