Beispiel #1
0
def generation_argmax(rnn, X, eos, start="", maxlen=200):
    X = torch.unsqueeze(string2code(start), 1)
    with torch.no_grad():
        h0 = torch.zeros(1, rnn.dim_latent)
        ht1 = rnn.forward(X.to(device), h0.to(device))
        dt1 = rnn.decode(torch.unsqueeze(ht1[-1], 0))
        yt1 = F.softmax(dt1, dim=2).argmax(dim=2)
        ht1 = ht1[-1]
        logits = dt1[0, 0, yt1]
        i = 1
        while yt1[-1] != eos and i < maxlen:
            #print(torch.unsqueeze(yt1[-1],0).shape)
            ht1 = rnn.one_step(torch.unsqueeze(yt1[-1], 0), ht1)
            dt1 = rnn.decode(torch.unsqueeze(ht1, 0))
            y = F.softmax(dt1, dim=2).argmax(dim=2)
            yt1 = torch.cat((yt1, y))
            logits = torch.cat((logits, dt1[0, 0, yt1[-1]].unsqueeze(0)))
            i += 1

        X.cpu()
        h0.cpu()
        ht1.cpu()
        yt1.cpu()
        y.cpu()
        dt1.cpu()
        logits.cpu()

    sequence = code2string(yt1)

    return sequence, logits
def generate_beam(model, eos, device, k, start="", maxlen=200):
    def beam_search_decoder(predictions, top_k=3):
        output_sequences = [([], 0)]
        for probs_idx in predictions:
            new_sequences = []
            for old_seq, old_score in output_sequences:
                for p, char_index in zip(*probs_idx):
                    new_seq = old_seq + [char_index.item()]
                    new_score = old_score + np.log(p.item())
                    new_sequences.append((new_seq, new_score))
            output_sequences = sorted(new_sequences,
                                      key=lambda val: val[1],
                                      reverse=True)
            output_sequences = output_sequences[:top_k]
        return output_sequences

    x = torch.tensor(string2code(start)).unsqueeze(-1).to(device)
    h = [v.to(device) for v in model.initHidden(1)]
    l = [lettre2id[start]]
    model.eval()
    predictions = []
    p = p_nucleus(model.decode, k)
    with torch.no_grad():
        for _ in range(maxlen):
            h = model(x, *h)
            probs = p(h[0])
            predictions.append(probs)
            start = beam_search_decoder(predictions, top_k=1)[0][0][-1]
            l.append(start)
            if start == eos:
                break
            start = torch.tensor(start).unsqueeze(-1).to(device)
    return code2string(l)
def nucleus_sampling(rnn, emb, decoder, latent_size, eos=1, k_max=10, start="", maxlen=30, dict_size=96, threshold=0.8):

    h = torch.zeros(1, latent_size).to('cuda')
    for j in range(len(start)):
        h = rnn(emb(string2code(start[j])).to('cuda'), h)

    n = 0

    maximum, index = torch.topk(torch.nn.functional.softmax(decoder(h), 1), k_max)
    probas = maximum.squeeze()
    h = torch.stack([h.squeeze() for j in range(k_max)]).to('cuda')

    final_seq = index.transpose(0,1)

    count = 0
    while n<maxlen:
        n+=1

        h = rnn(emb(index.squeeze().to('cpu')).to('cuda'),h)

        k = 2
        p = 0
        while k<=k_max and p<threshold:

            s = torch.nn.functional.softmax(decoder(h), 1).flatten()/k
            maximum, index = torch.topk(s, k)
            sum_max = torch.sum(maximum)

            s = (probas * (torch.nn.functional.softmax(decoder(h), 1).transpose(0, 1))).transpose(0, 1).flatten()/sum_max

            maximum, index = torch.topk(s, k)
            k+=1
            p = torch.sum(maximum)

        probas = (probas[index//dict_size]*maximum)
        h = h[index//dict_size]

        final_seq = final_seq[index//dict_size]

        rnn.memory = rnn.memory[index//dict_size]

        index = index%dict_size

        final_seq = torch.cat((final_seq, index.unsqueeze(1)), 1)

    final_sentences = []
    for i in final_seq:
        final_sentences.append(code2string(i.to('cpu').numpy()))

    return final_sentences
def generate(rnn, device, eos, start="", maxlen=200):
    x = torch.tensor(string2code(start)).unsqueeze(-1).to(device)
    h = [v.to(device) for v in rnn.initHidden(1)]
    l = [lettre2id[start]]
    rnn.eval()
    with torch.no_grad():
        for _ in range(maxlen):
            h = rnn(x, *h)
            d = rnn.decode(h[0])
            probs = torch.exp(d)
            start = torch.distributions.categorical.Categorical(probs).sample()
            l.append(start.item())
            if start.item() == eos:
                break
            start = start.unsqueeze(-1).to(device)
    return code2string(l)
def generate(rnn, emb, decoder, latent_size, eos=1, start="a", maxlen=30):
    #  TODO:  Implémentez la génération à partir du RNN, et d'une fonction decoder qui renvoie les logits (logarithme de probabilité à une constante près, i.e. ce qui vient avant le softmax) des différentes sorties possibles

    h = torch.zeros(1, latent_size).to('cuda')
    for j in range(len(start)):
        h = rnn(emb(string2code(start[j])).to('cuda'), h)
    i = torch.argmax(decoder(h)).item()
    final_sequence = [i]
    n = 0

    while final_sequence[-1] != eos and n<maxlen:
        h = rnn(emb(torch.tensor(final_sequence[-1]).unsqueeze(0)).to('cuda'),h)
        final_sequence.append(torch.argmax(decoder(h)).item())

        n += 1

    return code2string(final_sequence)
def generate_beam(rnn, emb, decoder, latent_size, eos=1, k=10, start="", maxlen=30, dict_size=97):

    h = torch.zeros(1, latent_size).to('cuda')
    for j in range(len(start)):
        h = rnn(emb(string2code(start[j])).to('cuda'), h)

    n = 0

    maximum, index = torch.topk(torch.nn.functional.softmax(decoder(h), 1), k)
    probas = maximum.squeeze()
    h = torch.stack([h.squeeze() for j in range(k)]).to('cuda')

    final_seq = index.transpose(0,1)

    mask = torch.ones(k).long().to('cuda')

    count = 0
    while n<maxlen and count<k:
        n+=1

        h = rnn(emb(index.squeeze().to('cpu')).to('cuda'),h)

        maximum, index = torch.topk((probas*(torch.nn.functional.softmax(decoder(h), 1).transpose(0,1))).transpose(0,1).flatten(), k)

        probas = (probas[index//dict_size]*maximum)/torch.sum(probas)
        h = h[index//dict_size]

        final_seq = final_seq[index//dict_size]

        rnn.memory = rnn.memory[index//dict_size]

        index = index%dict_size

        mask = mask * (index != eos)  # .long() #1 is eos
        count = k-torch.count_nonzero(mask)

        probas[torch.nonzero((mask == 0))] = 1000000

        final_seq = torch.cat((final_seq, index.unsqueeze(1)), 1)

    final_sentences = []
    for i in final_seq:
        final_sentences.append(code2string(i.to('cpu').numpy()))

    return final_sentences
Beispiel #7
0
def generate(rnn, emb, decoder, eos, start="", maxlen=200):
    #  TODO:  Implémentez la génération à partir du RNN, et d'une fonction decoder qui renvoie les logits (logarithme de probabilité à une constante près, i.e. ce qui vient avant le softmax) des différentes sorties possibles
    X = torch.unsqueeze(string2code(start), 1)
    with torch.no_grad():
        h0 = torch.zeros(1, rnn.dim_latent)
        ht1 = rnn.forward(X.to(device), h0.to(device))
        dt1 = rnn.decode(torch.unsqueeze(ht1[-1], 0))
        yt1 = torch.multinomial(torch.flatten(F.softmax(dt1, dim=2)),
                                1,
                                replacement=True)

        ht1 = ht1[-1]
        logits = dt1[0, 0, yt1]
        i = 1
        while yt1[-1] != eos and i < maxlen:
            #print(torch.unsqueeze(yt1[-1],0).shape)
            ht1 = rnn.one_step(torch.unsqueeze(yt1[-1], 0), ht1)
            dt1 = rnn.decode(torch.unsqueeze(ht1, 0))
            y = torch.multinomial(torch.flatten(F.softmax(dt1, dim=2)),
                                  1,
                                  replacement=True)
            yt1 = torch.cat((yt1, y))
            logits = torch.cat((logits, dt1[0, 0, yt1[-1]].unsqueeze(0)))
            i += 1

        X.cpu()
        h0.cpu()
        ht1.cpu()
        yt1.cpu()
        y.cpu()
        dt1.cpu()
        logits.cpu()

    sequence = code2string(yt1)

    return sequence, logits
Beispiel #8
0
def generate_beam(rnn,
                  emb,
                  decoder,
                  eos,
                  k,
                  start="",
                  maxlen=200,
                  nucleus=False):
    #  TODO:  Implémentez le beam Search
    X = torch.unsqueeze(string2code(start), 1)
    with torch.no_grad():
        h0 = torch.zeros(1, rnn.dim_latent)
        ht1 = rnn.forward(X.to(device), h0.to(device))
        dt1 = rnn.decode(torch.unsqueeze(ht1[-1], 0))
        if nucleus:
            # sampling k label with nucleus
            log_proba_t1, yt1 = p_nucleus(torch.flatten(dt1), k)
        else:
            log_proba_t1, yt1 = torch.topk(
                torch.flatten(F.log_softmax(dt1, dim=2)), k)

        #sequence start and serves to save k best sequences
        ht1 = ht1[-1].expand(k, ht1.shape[2])
        log_proba_t1 = log_proba_t1.unsqueeze(1)
        yt1 = yt1.unsqueeze(1)

        seq_len = 1
        h_tmp = torch.zeros(k * k, ht1.shape[1], device=device)
        y_tmp = torch.zeros(k * k, 1, device=device)
        log_proba_tmp = torch.zeros(k * k, 1, device=device)
        while seq_len < maxlen:
            for rank in range(k):
                if yt1[rank, seq_len - 1] != eos and yt1[rank,
                                                         seq_len - 1] != 0:
                    h = rnn.one_step(yt1[rank, seq_len - 1].unsqueeze(0),
                                     ht1[rank].unsqueeze(0))
                    dt1 = rnn.decode(torch.unsqueeze(h, 0))
                    if nucleus:
                        # sampling k label with nucleus
                        log_proba, y = p_nucleus(torch.flatten(dt1), k)

                    else:
                        log_proba, y = torch.topk(
                            torch.flatten(F.log_softmax(dt1, dim=2)), k)
                    h_tmp[rank * k:rank * k + k] = h.expand(k, h.shape[1])
                    y_tmp[rank * k:rank * k + k] = y.view(-1, 1)
                    log_proba_tmp[rank * k:rank * k + k] = log_proba.view(
                        -1, 1) + log_proba_t1[rank]

                else:
                    log_proba_tmp[rank * k:rank * k +
                                  k] = log_proba_t1[rank].clone()

            #select k best sequence
            _, k_best_ind = torch.topk(torch.flatten(log_proba_tmp), k)
            #save k best
            ht1 = h_tmp[k_best_ind].clone()
            log_proba_t1 = log_proba_tmp[k_best_ind].clone()
            yt1 = torch.cat((yt1, y_tmp[k_best_ind].clone()), dim=1)

            # reset tmp variables
            h_tmp *= 0
            y_tmp *= 0
            log_proba_tmp *= 0

            seq_len += 1

        X.cpu()
        h0.cpu()
        ht1.cpu()
        yt1.cpu()
        yt1.cpu()
        log_proba_t1.cpu()
        dt1.cpu()
        h_tmp.cpu()
        y_tmp.cpu()
        log_proba_tmp.cpu()

    #select best sequence
    print(log_proba_t1)
    ind = torch.flatten(log_proba_t1).argmax()
    best_seq = yt1[ind]
    sequence = code2string(best_seq)

    return sequence