コード例 #1
0
def apply_sampling(embeddings, hid, start='S', end='E', t=1.0):
    x = embeddings[start]
    e = embeddings[end]

    S_x = helper.index_to_vec(start, embeddings)

    S_h = np.zeros(hid, dtype=theano.config.floatX)
    S_c = np.zeros(hid, dtype=theano.config.floatX)

    sampled = [x]

    counter = 0

    while x != e:
        S_x = np.reshape(S_x, (1, -1))
        S_h, S_c, S_y = sampling(S_x, S_h.flatten(), S_c.flatten())
        S_y = helper.t_softmax(S_y.flatten(), t=t)
        S_x = np.random.multinomial(n=1, pvals=S_y)
        x = helper.vec_to_index(S_x)
        sampled.append(x)
        if counter == 1000:
            break
        counter += 1

    return sampled[1:-1]
コード例 #2
0
ファイル: __main__.py プロジェクト: Gnork/confusion-words
def apply_sampling(embeddings, hid, start='S', end='E', t=1.0):
    x = embeddings[start]
    e = embeddings[end]
    
    S_x = helper.index_to_vec(start, embeddings)
    
    S_h = np.zeros(hid, dtype=theano.config.floatX)
    S_c = np.zeros(hid, dtype=theano.config.floatX)
    
    sampled = [x]
    
    counter = 0
    
    while x != e:
        S_x = np.reshape(S_x, (1, -1))
        S_h, S_c, S_y = sampling(S_x, S_h.flatten(), S_c.flatten())
        S_y = helper.t_softmax(S_y.flatten(), t=t)
        S_x = np.random.multinomial(n=1, pvals=S_y)
        x = helper.vec_to_index(S_x)
        sampled.append(x)
        if counter == 1000:
            break
        counter += 1
        
    return sampled[1:-1]
コード例 #3
0
ファイル: __main__.py プロジェクト: Gnork/confusion-words
def apply_sampling(token_embeddings, hid, start='S', end='E', t=1.0, max_tokens=50):
    token_list = list(token_embeddings.token_to_index.keys())
    
    S_x = token_embeddings.token_to_vec(start)
    
    S_h = np.zeros(hid, dtype=theano.config.floatX)
    S_c = np.zeros(hid, dtype=theano.config.floatX)
    
    sampled_tokens = [start]
    
    counter = 0
    
    while sampled_tokens[-1] != end:
        if counter == max_tokens:
            sampled_tokens.append(end)
            break
            
        S_x = np.reshape(S_x, (1, -1))
        S_h, S_c, S_y = sampling(S_x, S_h.flatten(), S_c.flatten())
        S_y = S_y.flatten()
        distribution = helper.t_softmax(S_y, t=t)
        S_x = np.random.multinomial(n=1, pvals=distribution)
        idx = helper.vec_to_index(S_x)
        sampled_token = token_list[idx]
        sampled_tokens.append(sampled_token)
        
        counter += 1
        
    return sampled_tokens[1:-1]
コード例 #4
0
def char_sequence_probability(sentence, predictions, embeddings, t=1.0):
    log_p = 0
    sequence = list(' '.join(sentence)) + ['E']
    for c, p in zip(sequence, predictions):
        probabilities = helper.t_softmax(p, t=t)
        index = embeddings[c]
        log_p += np.log(probabilities[index])
    return log_p
コード例 #5
0
ファイル: test.py プロジェクト: Gnork/confusion-words
def char_sequence_probability(sentence, predictions, embeddings, t=1.0):
    log_p = 0
    sequence = list(' '.join(sentence)) + ['E']
    for c, p in zip(sequence, predictions):
        probabilities = helper.t_softmax(p, t=t)
        index = embeddings[c]
        log_p += np.log(probabilities[index])
    return log_p