Esempio n. 1
0
def plot_log_p(filename, codec, model):
    from questions.likelihood import log_likelihood
    with open(os.path.join('datasets', filename + '.pkl'), 'rb') as f:
        lls = []
        data = pkl.load(f)
        for i in trange(len(data)):
            text = data[i]
            text = codec.encode(text).to(device)
            ## TODO: complete the code in the function `log_likelihood`
            lls.append(log_likelihood(model, text))
        lls = np.asarray(lls)

    with open(os.path.join('submit', filename + '_raw.pkl'), 'wb') as f:
        pkl.dump(lls, f, protocol=pkl.HIGHEST_PROTOCOL)

    plt.figure()
    plt.hist(lls)
    plt.xlabel('Log-likelihood')
    plt.xlim([-600, 0])
    plt.ylabel('Counts')
    plt.title(filename)
    plt.savefig(os.path.join('submit', filename + '.png'), bbox_inches='tight')
    plt.show()
    plt.close()
    print("# Figure written to %s.png." % filename)
def classification(model, text):
    """
    Classify whether the string `text` is randomly generated or not.
    :param model: The GPT-2 model
    :param texts: A tensor of shape (1, T), where T is the length of the text
    :return: True if `text` is a random string. Otherwise return False
    """

    with torch.no_grad():
        ## TODO: Return True if `text` is a random string. Or else return False.
        ll = log_likelihood(model, text)
        return ll < -300