Esempio n. 1
0
def decoder_inputs(decoder_params):
    batch_size = decoder_params.batch_size
    inputs = np.zeros(batch_size, dtype=np.int).reshape(1, batch_size)
    enc_inputs = np.random.rand(1, decoder_params.batch_size, decoder_params.emb_size)
    vin = V(T(inputs))
    ven = V(T(enc_inputs))
    return vin, ven
Esempio n. 2
0
 def init_hidden(self):
     # print("hidden state initialized")
     return (
         Variable(T(torch.zeros(self.num_layers, self.bs,
                                self.hidden_size))),
         Variable(T(torch.zeros(self.num_layers, self.bs,
                                self.hidden_size))))  # T to put in gpu
Esempio n. 3
0
def calc_all_metrics(preds, y):
    preds = T(preds)
    y = T(y)
    res = []
    for f in METRICS:
        res.append(f(preds, y))
    return res
Esempio n. 4
0
def attention_setup(request):
    sl, bs = 3, 2
    edq, edk = request.param

    # query would be the hidden state of the decoder
    keys = T(np.random.rand(sl, bs, edk))
    query = T(np.random.rand(bs, edq))
    return keys, query
Esempio n. 5
0
def decoder_inputs_transformer():
    batch_size = 2
    emb_size = 12
    nlayers = 8
    sl = 3
    inputs = np.zeros(batch_size, dtype=np.int).reshape(1, batch_size)
    enc_inputs = np.random.rand(nlayers, sl, batch_size, emb_size)
    vin = V(T(inputs))
    ven = V(T(enc_inputs))
    return batch_size, emb_size, nlayers, sl, vin, ven
Esempio n. 6
0
def test_select_hidden_by_index():
    bs, num_beams = 2, 3
    # when I pass inputs to the select_hidden_by_index function with bs=2, num_beams = 3
    inputs = np.array([2, 3, 4, 10, 11, 12]).reshape(1, 6, 1)  # [ndir, bs, hd]
    tr_inputs = [V(T(inputs))]
    # and  indices for every batch [bs, ndims]
    indices = np.array([[0, 0, 1], [2, 2, 2]])
    tr_indices = V(T(indices))
    tr_indices = reshape_parent_indices(tr_indices.view(-1), bs=bs, num_beams=num_beams)
    results = select_hidden_by_index(tr_inputs, tr_indices.view(-1))
    # then I get the expected seletec hidden
    expected = np.array([2, 2, 3, 12, 12, 12])
    assert_allclose(actual=to_np(results[0]).ravel(), desired=expected)
Esempio n. 7
0
def attention_projection_setup(request):
    sl, bs = 3, 2
    edq, edk = request.param

    encoder_outputs = V(T(np.random.rand(sl, bs, edk)))
    # query would be the hidden state of the decoder
    decoder_output = V(T(np.random.rand(bs, edq)))
    params = {"n_out": 10,
              "n_in": edk,
              "dropout": 0.2,
              "att_nhid": 13
              }
    return encoder_outputs, decoder_output, params
def generate_text(tokens, N=25):    
    preds = []          
    for i in range(N):   
        learner.model[0].reset()          
        logits, _, _ = learner.model(T(tokens).unsqueeze(1))
        probs = F.softmax(logits).data.cpu().numpy()[-1, :]
        candidates = np.argsort(probs)[::-1]
        while True:
            # Sampling
            candidate = np.random.choice(candidates, p=probs[candidates])
            # Greedy
            # candidate = np.argmax(probs[2:]) + 2
            if candidate > 2:
                print(probs[candidates][:3], probs[candidate])
                preds.append(candidate)
                break
        # for candidate in candidates:
        #     if candidate > 1 and ord(itos[candidate]) > 255 and (random.random() < probs[candidate] or probs[candidate] < 0.2):
        #         print(probs[candidate])
        #         preds.append(candidate)
        #         break
        # tokens  = [preds[-1]]# 
        tokens.append(int(preds[-1]))
        # tokens = [:1]
        print(sp.DecodeIds(tokens)) 
Esempio n. 9
0
def test_transfomer_layer():
    sl = 10
    bs = 2
    in_features = 32
    inputs = tr.randn([sl, bs, in_features])
    inputs = to_gpu(V(T(inputs)))
    transfomer = to_gpu(TransformerLayer(in_features=in_features, num_heads=8))
    outputs = transfomer(inputs)
    assert_dims(outputs, [sl, bs, in_features])
Esempio n. 10
0
    def predict(self, x):
        fake_labels = np.array([0] * len(x))

        ds = TextDataset(x, fake_labels)
        dl = DataLoader(ds, 1000, transpose=True, num_workers=1, pad_idx=1)

        preds = predict(self.m, dl)
        sm = Softmax()
        return to_np(sm(V(T(preds))))
Esempio n. 11
0
def test_transfomer_layer_decoder():
    sl = 10
    bs = 2
    in_features = 32
    tr.random.manual_seed(0)
    encoder_inputs = tr.randn([sl, bs, in_features])
    decoder_inputs = tr.randn([sl, bs, in_features])
    encoder_inputs = to_gpu(V(T(encoder_inputs)))
    decoder_inputs = to_gpu(V(T(decoder_inputs)))
    transformer = to_gpu(
        TransformerLayerDecoder(input_size=in_features,
                                num_heads=8,
                                nhid=64,
                                dropout=0))
    outputs = transformer(encoder_inputs, decoder_inputs)
    assert_dims(outputs, [sl, bs, in_features])
    outputs1 = transformer(encoder_inputs, decoder_inputs[:1])
    assert_dims(outputs1, [1, bs, in_features])
    assert ((outputs[0] - outputs1[0]).abs() < 1E-6).all()
def eval(texts):
    learner.model[0].reset()
    tokens =sp.EncodeAsIds(texts)
    logits, _, _ = learner.model(T(tokens).unsqueeze(1))
    sorted_idx = np.argsort(logits.data.cpu().numpy(), 1)
    preds = []
    for i in range(1, 4):
          preds.append([sp.IdToPiece(x) for x in sorted_idx[:, -i].tolist()])
    # preds = list(map(lambda x: itos[x], np.argmax(logits.data.cpu().numpy(), 1)))
    return pd.DataFrame({"orig": sp.EncodeAsPieces(texts) + [""], 
                  "pred_1": [""] + preds[0], "pred_2": [""] + preds[1], "pred_3": [""] + preds[2]})
Esempio n. 13
0
def test_MultiHeadAttention_with_mask(self_attention_setup):
    keys, query = self_attention_setup
    slk, bs, ek = keys.size()
    slq, bs, eq = query.size()
    num_heads = 4
    nhid = 10
    attention = to_gpu(
        MultiHeadAttention(num_heads=num_heads, nhid=nhid, keys_dim=ek, query_dim=eq, values_dim=ek, dropout=0.3))
    mask = T(np.tril(np.ones((bs, num_heads, slq, slk)))).float()
    result = attention(query=V(query), keys=V(keys), values=V(keys), mask=mask)
    assert_dims(result, [slq, bs, num_heads * nhid])
Esempio n. 14
0
def eval(texts):
    learner.model[0].reset()
    tokens = list(map(lambda x: mapping.get(x, 1), texts))
    logits, _, _ = learner.model(T(tokens).unsqueeze(1))
    sorted_idx = np.argsort(logits.data.cpu().numpy(), 1)
    preds = []
    for i in range(1, 4):
          preds.append(list(map(lambda x: itos[x], sorted_idx[:, -i])))
    # preds = list(map(lambda x: itos[x], np.argmax(logits.data.cpu().numpy(), 1)))
    return pd.DataFrame({"orig": [x for x in texts] + [" "], 
                  "pred_1": [""] + preds[0], "pred_2": [""] + preds[1], "pred_3": [""] + preds[2]})
Esempio n. 15
0
def aucs(preds, targets):
    targets = to_np(targets)
    preds = to_np(preds)

    aurocs = []
    n = preds.shape[1]
    print(n)
    # preds = np.nan_to_num(preds)  # some numpy was nan or inf
    for i in range(n):
        aurocs.append(roc_auc_score(targets[:, i], preds[:, i]))
    return T(aurocs)
Esempio n. 16
0
def test_transformer_encoder():
    sl = 10
    bs = 2
    in_features = 300
    num_layers = 5
    inputs = tr.randn([sl, bs, in_features])
    inputs = to_gpu(V(T(inputs)))
    transformer = to_gpu(
        TransformerEncoderLayers(input_size=in_features,
                                 num_heads=8,
                                 nhid=512,
                                 num_layers=num_layers))
    layer_outputs = transformer(inputs)
    assert_dims(layer_outputs, [num_layers, sl, bs, in_features])
Esempio n. 17
0
def test_transformer_decoder_layers():
    sl = 10
    bs = 2
    in_features = 32
    num_layers = 5
    inputs = tr.randn([sl, bs, in_features])
    encoder_inputs = to_gpu(V(T(tr.randn([num_layers, sl, bs, in_features]))))
    inputs = to_gpu(V(T(inputs)))
    transformer = to_gpu(
        TransformerDecoderLayers(input_size=in_features,
                                 num_heads=8,
                                 nhid=512,
                                 nlayers=num_layers,
                                 dropout=0.0))
    assert transformer.hidden is None
    layer_outputs = transformer(inputs, encoder_inputs)
    assert_dims(layer_outputs, [num_layers, sl, bs, in_features])
    assert transformer.hidden is None
    # Passing through tht decoderlayers only one output I should be getting the same output
    layer_outputs2 = transformer(inputs[:1], encoder_inputs)
    assert_dims(layer_outputs2, [num_layers, 1, bs, in_features])
    for layer1, layer2 in zip(layer_outputs, layer_outputs2):
        assert ((layer1[0] - layer2[0]).abs() < 1E-6).all()
def eval_text(texts):
    tokens = sp.EncodeAsIds(texts)[:100]
    logits = learner.model(T(tokens).unsqueeze(0))
    sorted_idx = np.argsort(logits.data.cpu().numpy(), 1)
    preds = []
    for i in range(1, 4):
        preds.append([sp.IdToPiece(x) for x in sorted_idx[:, -i].tolist()])
    # preds = list(map(lambda x: itos[x], np.argmax(logits.data.cpu().numpy(), 1)))
    print(len(preds[0]))
    return pd.DataFrame({
        "orig": sp.EncodeAsPieces(texts)[-90:] + [""],
        "pred_1": [""] + preds[0][-90:],
        "pred_2": [""] + preds[1][-90:],
        "pred_3": [""] + preds[2][-90:]
    })
Esempio n. 19
0
def neighbor_gen(at,
                 distance_expansion=None,
                 cutoff=5.0,
                 n_gaussians=25,
                 trainable_gaussians=False,
                 environment_provider=ASEEnvironmentProvider(5.0),
                 collect_triples=False,
                 pair_provider=None,
                 center_positions=True):
    properties = {}
    properties[Structure.Z] = T(at.numbers.astype(np.int)).unsqueeze(0)

    positions = at.positions.astype(np.float32)
    if center_positions:
        positions -= at.get_center_of_mass()
    properties[Structure.R] = T(positions).unsqueeze(0)

    properties[Structure.cell] = T(at.cell.astype(np.float32)).unsqueeze(0)

    # get atom environment
    idx = 0
    nbh_idx, offsets = environment_provider.get_environment(idx, at)

    properties[Structure.neighbors] = T(nbh_idx.astype(np.int)).unsqueeze(0)
    properties[Structure.cell_offset] = T(offsets.astype(
        np.float32)).unsqueeze(0)
    properties[Structure.neighbor_mask] = None
    properties['_idx'] = T(np.array([idx], dtype=np.int)).unsqueeze(0)

    if pair_provider is not None:
        nbh_idx_j, nbh_idx_k = pair_provider.get_environment(nbh_idx)
        properties[Structure.neighbor_pairs_j] = T(nbh_idx_j.astype(np.int))
        properties[Structure.neighbor_pairs_k] = T(nbh_idx_k.astype(np.int))

    model = spk.custom.representation.RBF(
        distance_expansion=distance_expansion,
        cutoff=cutoff,
        n_gaussians=n_gaussians,
        trainable_gaussians=trainable_gaussians)
    model = to_gpu(model)
    r, f = model.forward(properties)
    return to_np(r.squeeze()), to_np(f.squeeze())
Esempio n. 20
0
def test_MultiHeadAttention_with_mask(attention_setup):
    keys, query = attention_setup
    bs = query.size(0)
    ed = keys.size(2)
    sl = keys.size(0)
    eq = query.size(1)
    num_heads = 4
    nhid = 10
    attention = to_gpu(
        MultiHeadAttention(num_heads=num_heads,
                           nhid=nhid,
                           keys_dim=ed,
                           query_dim=eq,
                           values_dim=ed,
                           dropout=0.3))
    mask = V(T(np.zeros((sl, bs, num_heads))))
    mask[0] = 1
    result = attention(query=V(query), keys=V(keys), values=V(keys), mask=mask)
    assert_dims(result, [bs, num_heads * nhid])
Esempio n. 21
0
get_ipython().system('pip install jieba')
import jieba


# In[21]:


texts = "德国 是 世界 大国 之一 , 其 国内 生产总值 以 国际 汇率 计"
tokens = list(map(lambda x: mapping.get(x, 1), texts.split(" ")))
tokens


# In[22]:


logits, _, _ = learner.model(T(tokens).unsqueeze(1))
logits.shape


# In[23]:


sorted_idx = np.argsort(logits.data.cpu().numpy(), 1)
preds = []
for i in range(1, 4):
      preds.append(list(map(lambda x: itos[x], sorted_idx[:, -i])))
# preds = list(map(lambda x: itos[x], np.argmax(logits.data.cpu().numpy(), 1)))
pd.DataFrame({"orig": list(texts.split(" ")) + [" "], 
              "pred_1": [""] + preds[0], "pred_2": [""] + preds[1], "pred_3": [""] + preds[2]})

Esempio n. 22
0
def predict(img):
    batch = [T(TFMS(img))]
    inp = VV_(torch.stack(batch))
    return SetupModel.model(inp).mean(0)
Esempio n. 23
0
def get_cvae_loss(pad_idx,
                  tchebycheff=False,
                  sigmoid=False,
                  tbc_weights=None,
                  tbc_optimal_point=None,
                  tbc_norm=2):
    STEP = 0
    optimal_point = 0. if tbc_optimal_point is None else tbc_optimal_point
    weights = T([2., 1., 100.]) if tbc_weights is None else tbc_weights

    def cvae_loss(input, target, step=0, max_kld_step=None, **kwargs):
        predictions, recog_mu, recog_log_var, prior_mu, prior_log_var, bow_logits = input
        sl, bs, vocab = predictions.size()
        # dims are sq-1 times bs times vocab
        dec_input = predictions[:target.size(0)].view(-1, vocab).contiguous()
        slt = target.size(0)
        bow_targets = bow_logits.unsqueeze_(0).repeat(slt, 1, 1)
        target = target.view(-1).contiguous()
        bow_loss = F.cross_entropy(input=bow_targets.view(-1, vocab),
                                   target=target,
                                   ignore_index=pad_idx,
                                   reduce=False).view(-1, bs)
        bow_loss = bow_loss.mean()
        # targets are sq-1 times bs (one label for every word)
        kld_loss = gaussian_kld(recog_mu, recog_log_var, prior_mu,
                                prior_log_var)
        decoder_loss = F.cross_entropy(
            input=dec_input,
            target=target,
            ignore_index=pad_idx,
        )
        kld_weight = 1.0 if max_kld_step is None else min(
            (step + 1) / max_kld_step, 1)
        nonlocal STEP
        if step > STEP:
            if step == 0: STEP = 0
            print(
                f"\nlosses: decoder {decoder_loss}, bow: {bow_loss}, kld x weight: {kld_loss} x {kld_weight}"
            )
            STEP += 1
        return decoder_loss + bow_loss + kld_loss * kld_weight

    def cvae_loss_tchebycheff(input, target, step=0, **kwargs):
        predictions, recog_mu, recog_log_var, prior_mu, prior_log_var, bow_logits = input
        sl, bs, vocab = predictions.size()
        # dims are sq-1 times bs times vocab
        dec_input = predictions[:target.size(0)].view(-1, vocab).contiguous()
        slt = target.size(0)
        bow_targets = bow_logits.unsqueeze_(0).repeat(slt, 1, 1)
        target = target.view(-1).contiguous()
        bow_loss = F.cross_entropy(input=bow_targets.view(-1, vocab),
                                   target=target,
                                   ignore_index=pad_idx,
                                   reduce=False).view(-1, bs)
        bow_loss = bow_loss.mean()
        # targets are sq-1 times bs (one label for every word)
        kld_loss = gaussian_kld(recog_mu, recog_log_var, prior_mu,
                                prior_log_var)
        decoder_loss = F.cross_entropy(
            input=dec_input,
            target=target,
            ignore_index=pad_idx,
        )
        # kld_weight = 1.0 if max_kld_step is None else min((step + 1) / max_kld_step, 1)
        nonlocal STEP
        if step > STEP:
            print(
                f"\nlosses: decoder {decoder_loss}, bow: {bow_loss}, kld x weight: {kld_loss}"
            )
            STEP += 1
        losses = torch.cat(
            [decoder_loss.view(1),
             bow_loss.view(1),
             kld_loss.view(1)])
        loss = tchebycheff_objective(losses,
                                     weights=weights,
                                     norm=tbc_norm,
                                     optimal_point=optimal_point)
        return loss

    def cvae_loss_sigmoid(input, target, step=0, max_kld_step=None, **kwargs):
        predictions, recog_mu, recog_log_var, prior_mu, prior_log_var, bow_logits = input
        vocab = predictions.size(-1)
        # dims are sq-1 times bs times vocab
        dec_input = predictions[:target.size(0)].view(-1, vocab).contiguous()
        bow_targets = torch.zeros_like(bow_logits).scatter(
            1, target.transpose(1, 0), 1)
        # mask pad token
        weights = to_gpu(V(torch.ones(bow_logits.size(-1)).unsqueeze_(0)))
        weights[0, pad_idx] = 0
        bow_loss = F.binary_cross_entropy_with_logits(bow_logits,
                                                      bow_targets,
                                                      weight=weights)

        # targets are sq-1 times bs (one label for every word)
        kld_loss = gaussian_kld(recog_mu, recog_log_var, prior_mu,
                                prior_log_var)
        target = target.view(-1).contiguous()
        decoder_loss = F.cross_entropy(
            input=dec_input,
            target=target,
            ignore_index=pad_idx,
        )
        kld_weight = 1.0 if max_kld_step is None else min(
            (step + 1) / max_kld_step, 1)
        nonlocal STEP
        if step > STEP:
            if step == 0: STEP = 0
            print(
                f"losses: decoder {decoder_loss}, bow: {bow_loss}, kld x weight: {kld_loss} x {kld_weight}"
            )
            STEP += 1

        return decoder_loss + bow_loss + kld_loss * kld_weight

    if tchebycheff:
        return cvae_loss_tchebycheff
    elif sigmoid:
        return cvae_loss_sigmoid
    else:
        return cvae_loss
def get_prediction(texts):
    input_tensor = T(np.array([1] + sp.EncodeAsIds(texts))).unsqueeze(1)
    return learn.model(input_tensor)[0].data.cpu().numpy()
Esempio n. 25
0
    drive.set_control(True)
    print("Controlling: ", drive.is_controlling())
    input("Press key to start...")
    while True:
        if drive.is_written():
            print("Reading img")
            img = drive.read_image() # HWC, BGR
            img_np = np.asarray(img).reshape(HEIGHT, WIDTH, 3)[:,:,::-1] #.astype('uint8') # HWC, RGB

            # import pdb; pdb.set_trace()
            #plt.imshow(img_np)
            #plt.show()

            img_np = (img_np/255).astype('float32')
            x = trn_tfms(img_np)[np.newaxis, ...] # shape (210, 280, 3) -> (3, 210, 210) -> (1, 3, 210, 210)
            x = Variable(T(x),requires_grad=False, volatile=True)
            output = learner.model(x) # shape (1, 14)
            pred_indicators = transform_range_output(to_np(output[0]), UNIT_RANGES, INDIC_RANGES)
            print("network raw output", output)
            print("pred_indicators", pred_indicators)

            indicators_formatted = format_indicators(pred_indicators)
            print("indicators_formatted", indicators_formatted)

            ground_truth = drive.read_indicators()
            print("ground_truth", ground_truth)

            drive.controller(indicators_formatted)
            drive.update_visualizations(indicators_formatted, ground_truth)
            drive.write(False) # Shared data read, and TORCS may continue
            drive.wait_key(1)
Esempio n. 26
0
def print_all_metrics(preds, y):
    preds = T(preds)
    y = T(y)
    for f in METRICS:
        print("%-12s %.3f" % (f.__name__, f(preds, y)))
Esempio n. 27
0
def test_BiRNNEncoder():
    ntoken = 4
    emb_sz = 2
    nhid = 6
    # Given a birnnencoder
    encoder = EmbeddingRNNEncoder(ntoken,
                                  emb_sz,
                                  nhid=nhid,
                                  nlayers=2,
                                  pad_token=0,
                                  dropouth=0.0,
                                  dropouti=0.0,
                                  dropoute=0.0,
                                  wdrop=0.0)

    encoder = to_gpu(encoder)
    assert encoder is not None

    weight = encoder.encoder.weight
    assert (4, 2) == weight.shape
    sl = 2
    bs = 3
    np.random.seed(0)
    inputs = np.random.randint(0, ntoken, sl * bs).reshape(sl, bs)
    vin = V(T(inputs))
    # Then the initial output states should be zero
    encoder.reset(bs)
    initial_hidden = encoder.hidden
    h = []
    c = []
    for layer in initial_hidden:
        h.append(layer[0].data.cpu().numpy())
        c.append(layer[1].data.cpu().numpy())
        assert h[-1].sum() == 0
        assert c[-1].sum() == 0
    embeddings = encoder.encoder(vin)
    assert (2, 3, emb_sz) == embeddings.shape

    # Then the the new states are different from before
    raw_outputs, outputs = encoder(vin)
    for r, o in zip(raw_outputs, outputs):
        assert np.allclose(to_np(r), to_np(o))
    initial_hidden = encoder.hidden
    h1 = []
    c1 = []
    for hl, cl, layer in zip(h, c, initial_hidden):
        h1.append(to_np(layer[0]))
        c1.append(to_np(layer[0]))
        assert ~np.allclose(hl, h1[-1])
        assert ~np.allclose(cl, c1[-1])

    # Then the the new states are different from before
    raw_outputs, outputs = encoder(vin)
    for r, o in zip(raw_outputs, outputs):
        assert np.allclose(to_np(r), to_np(o))
    initial_hidden = encoder.hidden

    for hl, cl, layer in zip(h1, c1, initial_hidden):
        h_new = to_np(layer[0])
        c_new = to_np(layer[0])
        assert ~np.allclose(hl, h_new)
        assert ~np.allclose(cl, c_new)
        new_matrix[i] = weights['0.embed.weight'][mapping_orig[w]]
        hits += 1
new_matrix[BEG, :] = 0
hits, hits *100 / len(itos[3:])


# In[20]:


new_matrix[n_toks:, :] = weights['0.embed.weight'][-200:, :]


# In[21]:


weights['0.embed.weight'] = T(new_matrix)
weights['1.weight'] = T(np.copy(new_matrix)[:-200, :])
weights['0.embed.weight'].shape


# ## Languange Model

# In[11]:


n_toks, tokens_train, tokens_val, tokens_test = joblib.load("../data/cache/rating_unigram_tokens.pkl")


# In[12]:

Esempio n. 29
0
class MyRnn(nn.Module):
    def __init__(self, es, hl, n_classes):
        super().__init__()
        self.hl = hl
        self.embeddings = nn.Embedding(n_classes, es)
        self.rnn = nn.RNN(es, hl)
        self.linear = nn.Linear(hl, n_classes)

    def forward(self, *input):
        bs = input[0].size(0)
        hiddens = V(torch.zeros(1, bs, self.hl))
        x = self.embeddings(V(torch.stack(input)))
        outputs, hiddens = self.rnn(x, hiddens)
        return F.softmax(self.linear(outputs), -1)


if __name__ == '__main__':
    b = T([[1, 1], [0, 0]])

    dot = EmbeddingDot(2, 3)
    print(dot.forward(b, []))

    # print(dot.model)
    # print([a for a in dot.parameters()])
    #
    # print(dot.forward(b, []))
    # print([a for a in dot.parameters()])

    rnn = MyRnn(3, 10, 4)
    rnn.forward(T([1, 2]), T([0, 0]))
Esempio n. 30
0
def predict(img):
    batch = [T(SetupModel.tfms(img))]
    inp = VV_(torch.stack(batch))
    return SetupModel.model(inp)