def _greedy_forward(self, inputs, hidden=None, constraints=None):
        dec_inputs = inputs
        max_iterations = min(dec_inputs.size(0), self.MAX_STEPS_ALLOWED) if self.training else self.max_iterations
        inputs = V(inputs[:1].data)  # inputs should be only first token initially [1,bs]
        sl, bs = inputs.size()
        finished = to_gpu(torch.zeros(bs).byte())
        iteration = 0
        self.beam_outputs = inputs.clone()
        final_outputs = []
        while not finished.all() and iteration < max_iterations:
            # output should be List[[sl, bs, layer_dim], ...] sl should be one
            if 0 < iteration and self.training and 0. < self.random() < self.pr_force:
                inputs = dec_inputs[iteration].unsqueeze(0)
            output = self.forward(inputs, hidden=hidden, num_beams=0, constraints=constraints)
            hidden = self.decoder_layer.hidden
            final_outputs.append(output)  # dim should be [sl=1, bs, nt]
            #  inputs are the indices  dims [1,bs] # repackage the var to avoid grad backwards
            inputs = assert_dims(V(output.data.max(dim=-1)[1]), [1, bs])
            iteration += 1
            self.beam_outputs = assert_dims(torch.cat([self.beam_outputs, inputs], dim=0), [iteration + 1, bs])
            new_finished = inputs.data == self.eos_token
            finished = finished | new_finished
            # stop if the output is to big to fit in memory

        self.beam_outputs = self.beam_outputs.view(-1, bs, 1)
        # outputs should be [sl, bs, nt]
        outputs = torch.cat(final_outputs, dim=0)
        return outputs
Exemple #2
0
def decoder_inputs(decoder_params):
    batch_size = decoder_params.batch_size
    inputs = np.zeros(batch_size, dtype=np.int).reshape(1, batch_size)
    enc_inputs = np.random.rand(1, decoder_params.batch_size, decoder_params.emb_size)
    vin = V(T(inputs))
    ven = V(T(enc_inputs))
    return vin, ven
Exemple #3
0
def attention_setup(request):
    sl, bs = 3, 2
    edq, edk = request.param

    # query would be the hidden state of the decoder
    keys = to_gpu(V(T(np.random.rand(sl, bs, edk))))
    query = to_gpu(V(T(np.random.rand(bs, edq))))
    return keys, query
def test_MPLPAttention(attention_setup):
    keys, query = attention_setup
    ed = keys.size(2)
    bs = query.size(0)
    in_features = keys.size(2) + query.size(1)
    attention = to_gpu(MLPAttention(in_features=in_features, nhid=200))
    result = attention(query=V(query), keys=V(keys), values=V(keys))
    assert (bs, ed) == result.shape
Exemple #5
0
def decoder_inputs_transformer():
    batch_size = 2
    emb_size = 12
    nlayers = 8
    sl = 3
    inputs = np.zeros(batch_size, dtype=np.int).reshape(1, batch_size)
    enc_inputs = np.random.rand(nlayers, sl, batch_size, emb_size)
    vin = V(T(inputs))
    ven = V(T(enc_inputs))
    return batch_size, emb_size, nlayers, sl, vin, ven
Exemple #6
0
def test_MultiHeadAttention_with_mask(self_attention_setup):
    keys, query = self_attention_setup
    slk, bs, ek = keys.size()
    slq, bs, eq = query.size()
    num_heads = 4
    nhid = 10
    attention = to_gpu(
        MultiHeadAttention(num_heads=num_heads, nhid=nhid, keys_dim=ek, query_dim=eq, values_dim=ek, dropout=0.3))
    mask = T(np.tril(np.ones((bs, num_heads, slq, slk)))).float()
    result = attention(query=V(query), keys=V(keys), values=V(keys), mask=mask)
    assert_dims(result, [slq, bs, num_heads * nhid])
Exemple #7
0
def test_MultiHeadAttention(self_attention_setup):
    keys, query = self_attention_setup
    slk, bs, ek = keys.size()
    slq, bs, eq = query.size()
    num_heads = 4
    nhid = 10
    attention = to_gpu(
        MultiHeadAttention(num_heads=num_heads, nhid=nhid, keys_dim=ek, query_dim=eq, values_dim=ek, dropout=0.3))

    result = attention(query=V(query), keys=V(keys), values=V(keys))
    assert_dims(result, [slq, bs, num_heads * nhid])
Exemple #8
0
def test_hred_training_parameters(model, hredmodel):
    *xs, y = next(iter(hredmodel.trn_dl))
    xs = V(xs)
    y = V(y)
    optimizer = Adam(model.parameters())
    output = model(*xs)
    optimizer.zero_grad()
    loss = decoder_loss(input=output[0], target=y, pad_idx=hredmodel.pad_idx)
    loss.backward()
    model_parameters = get_trainable_parameters(model)
    grad_flow_parameters = get_trainable_parameters(model, grad=True)
    assert set(model_parameters) == set(grad_flow_parameters)
Exemple #9
0
def attention_projection_setup(request):
    sl, bs = 3, 2
    edq, edk = request.param

    encoder_outputs = V(T(np.random.rand(sl, bs, edk)))
    # query would be the hidden state of the decoder
    decoder_output = V(T(np.random.rand(bs, edq)))
    params = {"n_out": 10,
              "n_in": edk,
              "dropout": 0.2,
              "att_nhid": 13
              }
    return encoder_outputs, decoder_output, params
Exemple #10
0
def test_select_hidden_by_index():
    bs, num_beams = 2, 3
    # when I pass inputs to the select_hidden_by_index function with bs=2, num_beams = 3
    inputs = np.array([2, 3, 4, 10, 11, 12]).reshape(1, 6, 1)  # [ndir, bs, hd]
    tr_inputs = [V(T(inputs))]
    # and  indices for every batch [bs, ndims]
    indices = np.array([[0, 0, 1], [2, 2, 2]])
    tr_indices = V(T(indices))
    tr_indices = reshape_parent_indices(tr_indices.view(-1), bs=bs, num_beams=num_beams)
    results = select_hidden_by_index(tr_inputs, tr_indices.view(-1))
    # then I get the expected seletec hidden
    expected = np.array([2, 2, 3, 12, 12, 12])
    assert_allclose(actual=to_np(results[0]).ravel(), desired=expected)
Exemple #11
0
def test_cvae_training_parameters(model, hredmodel, tchebycheff, sigmoid):
    *xs, y = next(iter(hredmodel.trn_dl))
    xs = V(xs)
    y = V(y)
    optimizer = Adam(model.parameters())
    output = model(*xs)
    optimizer.zero_grad()
    cvae_loss = get_cvae_loss(pad_idx=hredmodel.pad_idx,
                              tchebycheff=tchebycheff,
                              sigmoid=sigmoid)
    loss = cvae_loss(input=output[0], target=y)
    loss.backward()
    model_parameters = get_trainable_parameters(model)
    grad_flow_parameters = get_trainable_parameters(model, grad=True)
    assert set(model_parameters) == set(grad_flow_parameters)
Exemple #12
0
 def reparameterize(self, mu, logvar):
     if self.training:
         std = torch.exp(0.5 * logvar)
         eps = to_gpu(V(torch.randn(self.latent_dim)))
         return mu + eps * std
     else:
         return mu
Exemple #13
0
def test_MultiHeadAttention(attention_setup):
    keys, query = attention_setup
    bs = query.size(0)
    ed = keys.size(2)
    eq = query.size(1)
    num_heads = 4
    nhid = 10
    attention = to_gpu(
        MultiHeadAttention(num_heads=num_heads,
                           nhid=nhid,
                           keys_dim=ed,
                           query_dim=eq,
                           values_dim=eq))

    result = attention(query=V(query), keys=V(keys), values=V(keys))
    assert_dims(result, [bs, num_heads * nhid])
Exemple #14
0
    def cvae_loss_sigmoid(input, target, step=0, max_kld_step=None, **kwargs):
        predictions, recog_mu, recog_log_var, prior_mu, prior_log_var, bow_logits = input
        vocab = predictions.size(-1)
        # dims are sq-1 times bs times vocab
        dec_input = predictions[:target.size(0)].view(-1, vocab).contiguous()
        bow_targets = torch.zeros_like(bow_logits).scatter(
            1, target.transpose(1, 0), 1)
        # mask pad token
        weights = to_gpu(V(torch.ones(bow_logits.size(-1)).unsqueeze_(0)))
        weights[0, pad_idx] = 0
        bow_loss = F.binary_cross_entropy_with_logits(bow_logits,
                                                      bow_targets,
                                                      weight=weights)

        # targets are sq-1 times bs (one label for every word)
        kld_loss = gaussian_kld(recog_mu, recog_log_var, prior_mu,
                                prior_log_var)
        target = target.view(-1).contiguous()
        decoder_loss = F.cross_entropy(
            input=dec_input,
            target=target,
            ignore_index=pad_idx,
        )
        kld_weight = 1.0 if max_kld_step is None else min(
            (step + 1) / max_kld_step, 1)
        nonlocal STEP
        if step > STEP:
            if step == 0: STEP = 0
            print(
                f"losses: decoder {decoder_loss}, bow: {bow_loss}, kld x weight: {kld_loss} x {kld_weight}"
            )
            STEP += 1

        return decoder_loss + bow_loss + kld_loss * kld_weight
Exemple #15
0
def make_predictions():
    try:
        content = request.get_json(force=True)
    except HTTPException as e:
        return jsonify({'error': 'Request data invalid'}), 400

    img_str = base64.b64decode(str(content['image']))
    nparr = np.fromstring(img_str, np.uint8)

    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR).astype(np.float32) / 255
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    height, width, channels = img.shape

    im = val_tfms(img)
    output = model(V(im[None]))

    output = to_np(output)

    bb_i = expit(output[:, :4])
    y, x, y2, x2 = bb_i[0]
    bb_scaled = [y * height, x * width, y2 * height, x2 * width]
    bb_np = bb_hw(bb_scaled)

    c_i = output[:, 4:]

    class_pred = itoa[np.argmax(c_i)]
    return jsonify({'class': class_pred, 'bb': list([int(b) for b in bb_np])})
Exemple #16
0
def test_layer_norm():
    sl = 10
    bs = 2
    in_features = 32
    inputs = to_gpu(V(tr.randn([sl, bs, in_features])))
    layernorm = to_gpu(LayerNorm(in_features))
    outputs = layernorm(inputs)
    assert_dims(outputs, [sl, bs, in_features])
Exemple #17
0
def test_model(s2smodel):
    ntoken = [s2smodel.nt[name] for name in s2smodel.trn_dl.source_names]
    model = Transformer(ntoken=ntoken,
                        max_tokens=5,
                        eos_token=s2smodel.eos_idx)
    model = to_gpu(model)
    *xs, y = next(iter(s2smodel.trn_dl))
    xs = V(xs)
    y = V(y)
    optimizer = Adam(model.parameters())
    output = model(*xs)
    optimizer.zero_grad()
    loss = decoder_loss(input=output[0], target=y, pad_idx=s2smodel.pad_idx)
    loss.backward()
    model_parameters = get_trainable_parameters(model)
    grad_flow_parameters = get_trainable_parameters(model, grad=True)
    assert set(model_parameters) == set(grad_flow_parameters)
Exemple #18
0
def test_cell(cell_type, hidden_type):
    sl, bs, input_size, output_size = 8, 10, 12, 14
    cell = Cell(cell_type, input_size, output_size, dropout=0.0, wdrop=0.0)
    cell = to_gpu(cell)
    inputs = V(tr.rand(sl, bs, input_size))
    hidden = cell.hidden_state(bs)
    outputs, hidden = cell(inputs, hidden)
    assert (sl, bs, output_size) == outputs.shape
    assert isinstance(hidden, hidden_type)
Exemple #19
0
    def predict(self, x):
        fake_labels = np.array([0] * len(x))

        ds = TextDataset(x, fake_labels)
        dl = DataLoader(ds, 1000, transpose=True, num_workers=1, pad_idx=1)

        preds = predict(self.m, dl)
        sm = Softmax()
        return to_np(sm(V(T(preds))))
Exemple #20
0
def test_transfomer_layer():
    sl = 10
    bs = 2
    in_features = 32
    inputs = tr.randn([sl, bs, in_features])
    inputs = to_gpu(V(T(inputs)))
    transfomer = to_gpu(TransformerLayer(in_features=in_features, num_heads=8))
    outputs = transfomer(inputs)
    assert_dims(outputs, [sl, bs, in_features])
Exemple #21
0
 def predict(self, image):
     """
     input: PIL image (w, h, c)
     output: prob np.array
     """
     image = V(self.tfm(image)[None])
     py = torch.sigmoid(self(image))
     prob = py.detach().cpu().numpy()[0]
     return prob
Exemple #22
0
def test_transfomer_layer_decoder():
    sl = 10
    bs = 2
    in_features = 32
    tr.random.manual_seed(0)
    encoder_inputs = tr.randn([sl, bs, in_features])
    decoder_inputs = tr.randn([sl, bs, in_features])
    encoder_inputs = to_gpu(V(T(encoder_inputs)))
    decoder_inputs = to_gpu(V(T(decoder_inputs)))
    transformer = to_gpu(
        TransformerLayerDecoder(input_size=in_features,
                                num_heads=8,
                                nhid=64,
                                dropout=0))
    outputs = transformer(encoder_inputs, decoder_inputs)
    assert_dims(outputs, [sl, bs, in_features])
    outputs1 = transformer(encoder_inputs, decoder_inputs[:1])
    assert_dims(outputs1, [1, bs, in_features])
    assert ((outputs[0] - outputs1[0]).abs() < 1E-6).all()
Exemple #23
0
def test_MultiHeadAttention_with_mask(attention_setup):
    keys, query = attention_setup
    bs = query.size(0)
    ed = keys.size(2)
    sl = keys.size(0)
    eq = query.size(1)
    num_heads = 4
    nhid = 10
    attention = to_gpu(
        MultiHeadAttention(num_heads=num_heads,
                           nhid=nhid,
                           keys_dim=ed,
                           query_dim=eq,
                           values_dim=ed,
                           dropout=0.3))
    mask = V(T(np.zeros((sl, bs, num_heads))))
    mask[0] = 1
    result = attention(query=V(query), keys=V(keys), values=V(keys), mask=mask)
    assert_dims(result, [bs, num_heads * nhid])
Exemple #24
0
 def forward(self, inp):
     sl, bs = inp.size()
     #sl= sequence length, bs= batch size
     h = self.initHidden(bs)
     emb = self.emb_enc_drop(self.emb_enc(inp))
     enc_out, h = self.gru_enc(emb, h)
     h = h.view(2, 2, bs, -1).permute(0, 2, 1,
                                      3).contiguous().view(2, bs, -1)
     h = self.out_enc(self.drop_enc(h))
     # h = hidden state obtained from the encoder
     dec_inp = V(torch.zeros(bs).long())
     res = []
     #decoder impl
     for i in range(self.out_sl):
         emb = self.emb_dec(dec_inp).unsqueeze(0)
         outp, h = self.gru_dec(emb, h)
         outp = self.out(self.out_drop(outp[0]))
         res.append(outp)
         dec_inp = V(outp.data.max(1)[1])
         if (dec_inp == 1).all(): break
     return torch.stack(res)
    def forward(self, inp, y=None):
        sl, bs = inp.size()
        h = self.initHidden(bs)

        #sl= sequence length, bs= batch size
        emb = self.emb_enc_drop(self.emb_enc(inp))
        enc_out, h = self.gru_enc(emb, h)
        h = h.view(2, 2, bs, -1).permute(0, 2, 1,
                                         3).contiguous().view(2, bs, -1)
        h = self.out_enc(self.drop_enc(h))

        # h = hidden state obtained from the encoder
        dec_inp = V(torch.zeros(bs).long())
        res, attns = [], []
        w1e = enc_out @ self.W1

        for i in range(self.out_sl):

            #for getting the attention model
            w2h = self.l2(h[-1])
            u = F.tanh(w1e + w2h)
            a = F.softmax(u @ self.V, 0)
            attns.append(a)
            Xa = (a.unsqueeze(2) * enc_out).sum(0)
            emb = self.emb_dec(dec_inp)

            #use attention models and embeddings to get the weight from the enc
            wgt_enc = self.l3(torch.cat([emb, Xa], 1))

            outp, h = self.gru_dec(wgt_enc.unsqueeze(0), h)
            outp = self.out(self.out_drop(outp[0]))
            res.append(outp)
            dec_inp = V(outp.data.max(1)[1])
            if (dec_inp == 1).all(): break

            #Implement Teacher Forcing
            if (y is not None) and (random.random() < self.pr_force):
                if i >= len(y): break
                dec_inp = y[i]
        return torch.stack(res)
Exemple #26
0
 def forward(self, input_tensor, keys_vector, values_vector, mask=False):
     self_attention_outputs = []
     sl, bs, _ = keys_vector.size()
     for index, input_step in enumerate(input_tensor, 1):
         if mask:
             mask_ = V(tr.zeros(sl, bs, self.num_heads))
             mask_[:index] = 1
         else:
             mask_ = None
         self_attention_outputs.append(
             self.attention(query=input_step, keys=keys_vector,
                            values=values_vector, mask=mask_))  # dims [bs, dims]
     return tr.stack(self_attention_outputs, dim=0)  # dims [sl, bs, dims]
Exemple #27
0
def make_predictions():
    try:
        content = request.data
        #content = request.get_json(force=True)
        #content = format(content)
        print ('content is',format(content))
    except HTTPException as e:
        print("Inside make predictions")

        return jsonify({'error': 'Request data invalid'}), 400
    #print("RIMIIIIIIIII", content)
    content = content.decode().split(',')[1]
    print(content)
    img_str = base64.b64decode(str(content))
    print('img_str is', img_str)
    nparr = np.fromstring(img_str, np.uint8)
    print('nparr is', nparr)
    imd = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    print('imdecode is',imd)

    img = imd.astype(np.float32) / 255
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    print('image is', img)

    height, width, channels = img.shape

    im = val_tfms(img)
    output = model(V(torch.from_numpy(im[None])))
    print ("Rimiiiiiii", output)


    output = to_np(output)
    print ("Output", output)

    bb_i = expit(output[:, :4])
    y, x, y2, x2 = bb_i[0]
    bb_scaled = [
        y * height,
        x * width,
        y2 * height,
        x2 * width]
    bb_np = bb_hw(bb_scaled)

    c_i = output[:, 4:]
    print ("c_i", c_i)

    class_pred = itoa[np.argmax(c_i)]
    print ("class_pred", class_pred)
    print("list", list([int(b) for b in bb_np]))

    return jsonify({'class': class_pred, 'bb': list([int(b) for b in bb_np])})
Exemple #28
0
def test_transformer_decoder_layers():
    sl = 10
    bs = 2
    in_features = 32
    num_layers = 5
    inputs = tr.randn([sl, bs, in_features])
    encoder_inputs = to_gpu(V(T(tr.randn([num_layers, sl, bs, in_features]))))
    inputs = to_gpu(V(T(inputs)))
    transformer = to_gpu(
        TransformerDecoderLayers(input_size=in_features,
                                 num_heads=8,
                                 nhid=512,
                                 nlayers=num_layers,
                                 dropout=0.0))
    assert transformer.hidden is None
    layer_outputs = transformer(inputs, encoder_inputs)
    assert_dims(layer_outputs, [num_layers, sl, bs, in_features])
    assert transformer.hidden is None
    # Passing through tht decoderlayers only one output I should be getting the same output
    layer_outputs2 = transformer(inputs[:1], encoder_inputs)
    assert_dims(layer_outputs2, [num_layers, 1, bs, in_features])
    for layer1, layer2 in zip(layer_outputs, layer_outputs2):
        assert ((layer1[0] - layer2[0]).abs() < 1E-6).all()
Exemple #29
0
def test_transformer_encoder():
    sl = 10
    bs = 2
    in_features = 300
    num_layers = 5
    inputs = tr.randn([sl, bs, in_features])
    inputs = to_gpu(V(T(inputs)))
    transformer = to_gpu(
        TransformerEncoderLayers(input_size=in_features,
                                 num_heads=8,
                                 nhid=512,
                                 num_layers=num_layers))
    layer_outputs = transformer(inputs)
    assert_dims(layer_outputs, [num_layers, sl, bs, in_features])
Exemple #30
0
def test_SDPAttention(attention_setup):
    keys, query = attention_setup
    bs = query.size(0)
    ed = keys.size(2)
    eq = query.size(1)
    attention = to_gpu(SDPAttention(in_features=ed))
    if ed != eq:
        with pytest.raises(RuntimeError):
            result = attention(query=V(query), keys=V(keys), values=V(keys))
    else:
        result = attention(query=V(query), keys=V(keys), values=V(keys))
        assert (bs, ed) == result.shape