Ejemplo n.º 1
0
import numpy as np
import os, sys
import review_proc as rp
import matplotlib.pyplot as plt

init_prob = []
final_prob = []

for filename in os.listdir('./probs/'):
    review_number = filename[0:-4]
    rv = rp.review('./aclImdb/test/posneg/'+review_number+'.txt')
    final_prob_scalar = np.load('./probs/'+review_number+'.npy')
    
    delta_matrix = np.load('./diffs/actual_diff_'+review_number+'.npy')
    if rv.sentiment == 'pos':
        m = np.amin(delta_matrix,axis=0)
        delta = np.amin(m)
        i = np.argmin(m)
    elif rv.sentiment == 'neg':
        m = np.amax(delta_matrix,axis=0)
        delta = np.amax(delta_matrix)
        i = np.argmax(m)
    #if final_prob_scalar < 0.5 and rv.sentiment == 'pos':
    #    print(i)
    if final_prob_scalar > 0.5 and rv.sentiment == 'neg':
        print(m)
    final_prob.append(final_prob_scalar)
    init_prob.append(final_prob_scalar-delta)

#plt.hist(final_prob,bins=50)
#plt.show()
Ejemplo n.º 2
0
    word_embedding = np.load(word_embedding_filename)
    word_to_embedding_index = np.load(word_to_embedding_index_filename).item()
except FileNotFoundError:
    print('Word embedding not found, running word2vec')
    word2vec.w2v(corpus_filename='./corpus/imdb_train_corpus.txt')

embedding_norm = np.linalg.norm(word_embedding, axis=1)
embedding_norm.shape = (10000, 1)
normalized_word_embedding = word_embedding / embedding_norm
m = word_to_embedding_index
# Reverse dictionary to look up words from indices
embedding_index_to_word = dict(zip(m.values(), m.keys()))

root = './aclImdb/test/posneg/'
for filename in os.listdir('./ggs_results/diffs/'):
    rv = rp.review(root + filename[0:-4] + '.txt')
    diff = np.load('./ggs_results/diffs/' + filename)
    prob = np.load('./ggs_results/probs/' + filename)
    print('Filename: ', filename, 'Initial Probability: ', prob[0][0])
    if rv.sentiment == 'pos':
        m = np.amin(diff)
    else:
        m = np.amax(diff)
    prob_positive = prob[0, 0] + m
    # Following are conditions where one-word replacement worked.
    if prob_positive < 0.5 and rv.sentiment == 'pos':
        continue
    elif prob_positive > 0.5 and rv.sentiment == 'neg':
        continue
    g = tf.Graph()
    tf.reset_default_graph()
Ejemplo n.º 3
0
embedding_norm = np.linalg.norm(word_embedding, axis=1)
embedding_norm.shape = (10000, 1)
normalized_word_embedding = word_embedding / embedding_norm
m = word_to_embedding_index
# Reverse dictionary to look up words from indices
embedding_index_to_word = dict(zip(m.values(), m.keys()))

c = [0, 0, 0]
t = [0, 0, 0]
for it, divs in enumerate([2, 4, 8]):
    res_dir = './minmax/' + str(divs) + '/ii/'
    for file_name in os.listdir(res_dir):
        ii = np.load(res_dir + file_name)
        jj = np.load(res_dir[:-3] + 'jj/' + file_name)
        g = tf.Graph()
        rv = rp.review('./aclImdb/test/posneg/' + file_name[:-4] + '.txt')
        w = [embedding_index_to_word[i] for i in ii]
        for n, j in enumerate(jj):
            rv.tokens[j] = w[n]
        with g.as_default():
            global_step_tensor = \
                tf.Variable(0, trainable = False, name = 'global_step')
            # Create RNN graph
            r = rnn.classifier(batch_size=1,
                               learning_rate=0.0,
                               hidden_size=16,
                               max_time=rv.length,
                               embeddings=word_embedding,
                               global_step=global_step_tensor)
            with tf.Session() as sess:
                tf.train.Saver().restore(
Ejemplo n.º 4
0
try:
    word_embedding = np.load(word_embedding_filename)
    word_to_embedding_index = np.load(word_to_embedding_index_filename).item()
except FileNotFoundError:
    print('Word embedding not found, running word2vec')
    word2vec.w2v(corpus_filename = './corpus/imdb_train_corpus.txt')

embedding_norm = np.linalg.norm(word_embedding,axis=1)
embedding_norm.shape = (10000,1)
normalized_word_embedding = word_embedding / embedding_norm
m = word_to_embedding_index
# Reverse dictionary to look up words from indices
embedding_index_to_word = dict(zip(m.values(), m.keys()))

g = tf.Graph()
rv = rp.review('./aclImdb/test/posneg/5164_10.txt')
#ii = [242,242,837,348]
#jj = [11,215,467,680]
#ii = [369,369,837]
#jj = [72,292,495]
ii = [882,348]
jj = [165,495]
w = [embedding_index_to_word[i] for i in ii]

print(w)
for n,j in enumerate(jj):
    rv.tokens.insert(j,w[n])
rv.length = rv.length + len(ii)
with g.as_default():
    global_step_tensor = tf.Variable(0, trainable = False, name = 'global_step')
    # Create RNN graph
Ejemplo n.º 5
0
t = []
f = 0
root_dir = './aclImdb/test/posneg/'
restore_name = '4786_9.txt'
flag = True
for file_name in os.listdir(root_dir):
    if file_name != restore_name and flag:
        print(file_name)
        continue
    elif flag:
        flag = False
        continue
    word_count = 0
    g = tf.Graph()
    print('Running attack on: ' + file_name)
    rvo = rp.review(root_dir + file_name)
    rvo.translate(rvo.length, word_to_embedding_index, embedding_index_to_word)
    rvo.vec(word_embedding)
    # Actual Neural Network
    with g.as_default():
        global_step_tensor = tf.Variable(0,
                                         trainable=False,
                                         name='global_step')
        r = rnn.classifier(batch_size=1,
                           learning_rate=0.0,
                           hidden_size=8,
                           max_time=rvo.length,
                           embeddings=word_embedding,
                           global_step=global_step_tensor)
        with tf.Session() as sess:
            restore_name = './ckpts/gridckpt_8_10/imdb-rnn-e15.ckpt'
Ejemplo n.º 6
0
try:
    word_embedding = np.load(word_embedding_filename)
    word_to_embedding_index = np.load(word_to_embedding_index_filename).item()
except FileNotFoundError:
    print('Word embedding not found, running word2vec')
    word2vec.w2v(corpus_filename='./corpus/imdb_train_corpus.txt')

embedding_norm = np.linalg.norm(word_embedding, axis=1)
embedding_norm.shape = (10000, 1)
normalized_word_embedding = word_embedding / embedding_norm
m = word_to_embedding_index
# Reverse dictionary to look up words from indices
embedding_index_to_word = dict(zip(m.values(), m.keys()))

g = tf.Graph()
rv = rp.review('./aclImdb/test/posneg/9999_10.txt')
with g.as_default():
    global_step_tensor = tf.Variable(0, trainable=False, name='global_step')
    # Create RNN graph
    r = rnn.classifier(batch_size=1,
                       learning_rate=0.0,
                       hidden_size=16,
                       max_time=1024,
                       embeddings=word_embedding,
                       global_step=global_step_tensor)
    with tf.Session() as sess:
        tf.train.Saver().restore(sess,
                                 './ckpts/gridckpt_16_10/imdb-rnn-e15.ckpt')
        print(rv.tokens)
        rv.translate(r.max_time, word_to_embedding_index,
                     embedding_index_to_word)
Ejemplo n.º 7
0
test_file_list = os.listdir('./aclImdb/test/posneg/')
with g.as_default():
    global_step_tensor = tf.Variable(0, trainable=False, name='global_step')
    # Create RNN graph
    r = rnn.classifier(batch_size=num_top * per_batch,
                       learning_rate=0.0,
                       hidden_size=16,
                       max_time=1024,
                       embeddings=word_embedding,
                       global_step=global_step_tensor)
    with tf.Session() as sess:
        tf.train.Saver().restore(sess,
                                 './ckpts/gridckpt_16_10/imdb-rnn-e15.ckpt')
        for test_file in test_file_list:
            print('Processing ' + test_file)
            rv = rp.review('./aclImdb/test/posneg/' + test_file)
            rv.translate(r.max_time, word_to_embedding_index,
                         embedding_index_to_word)
            rv.vec(word_embedding)
            decision, probability, batch_grad = \
                r.infer_batched_prob(sess,rv,rv.index_vector[0][0],50,list(range(10)))
            rnn_sentiment = 'pos' if not decision[0] else 'neg'
            print('Neural Net Decision: ', rnn_sentiment, ' Actual: ',
                  rv.sentiment)
            if rnn_sentiment != rv.sentiment:
                continue
            grad = batch_grad[0][0, 0:rv.length, :]
            W = word_embedding
            G = grad
            D = W @ (G.T)
            c = np.sum(np.multiply(rv.vector_list, G), axis=1)
Ejemplo n.º 8
0
 global_step_tensor = tf.Variable(0, trainable=False, name='global_step')
 # Create RNN graph
 r = rnn.classifier(batch_size=1,
                    learning_rate=0.0,
                    hidden_size=16,
                    max_time=1024,
                    embeddings=word_embedding,
                    global_step=global_step_tensor)
 succ = 0
 n = 0
 with tf.Session() as sess:
     tf.train.Saver().restore(sess,
                              './ckpts/gridckpt_16_10/imdb-rnn-e15.ckpt')
     for test_file in test_file_list:
         review_name = test_file[12:-4] + '.txt'
         rv = rp.review('./aclImdb/test/posneg/' + review_name)
         rv.translate(r.max_time, word_to_embedding_index,
                      embedding_index_to_word)
         rv.vec(word_embedding)
         decision, probability, batch_grad = r.infer_dpg(sess, rv)
         rnn_sentiment = 'pos' if not decision[0] else 'neg'
         if rnn_sentiment != rv.sentiment:
             continue
         grad = batch_grad[0][0, 0:rv.length, :]
         W = word_embedding
         G = grad
         D = W @ (G.T)
         c = np.sum(np.multiply(rv.vector_list, G), axis=1)
         d = D - c
         x = np.load('./diffs/' + test_file)
         x.shape = (100000, 1)
Ejemplo n.º 9
0
import numpy as np
import matplotlib.pyplot as plt
import os, sys, re
import review_proc as rp

V = 10000
num_top = 10

root = './ggs_results/'
for filename in os.listdir(root + 'diffs/'):
    rv = rp.review('aclImdb/test/posneg/' + filename[0:-4] + '.txt')
    grad = np.load(root + 'grads/' + filename)
    diff = np.load(root + 'diffs/' + filename)
    n = np.linalg.norm(grad, axis=1)
    i = np.argsort(n)[-num_top:]
    for index in i:
        top_word = rv.tokens[index]
        print(top_word)
    print('_' * 20)
    #n = n[i]; n.shape = (10,1)
    #n = np.tile(n.T,(V,1))