Python tokの例、nltk.tok Pythonの例

コード例 #1

0

ファイルを表示

ファイル: converter.py プロジェクト: seanpmcn/madlibs

def main():
    text = 'Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in ' \
           'Liberty, and dedicated to the proposition that all men are created equal. Now we are engaged in a great ' \
           'civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure. We are ' \
           'met on a great battle-field of that war. We have come to dedicate a portion of that field, as a final ' \
           'resting place for those who here gave their lives that that nation might live. It is altogether fitting and ' \
           'proper that we should do this. But, in a larger sense, we can not dedicate -- we can not consecrate -- we ' \
           'can not hallow -- this ground. The brave men, living and dead, who struggled here, have consecrated it, far ' \
           'above our poor power to add or detract. The world will little note, nor long remember what we say here, but ' \
           'it can never forget what they did here. It is for us the living, rather, to be dedicated here to the ' \
           'unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here ' \
           'dedicated to the great task remaining before us -- that from these honored dead we take increased devotion ' \
           'to that cause for which they gave the last full measure of devotion -- that we here highly resolve that ' \
           'these dead shall not have died in vain -- that this nation, under God, shall have a new birth of freedom -- ' \
           'and that government of the people, by the people, for the people, shall not perish from the earth.'
    text_tok = tok(text)
    text_pos = pd.DataFrame(tag(text_tok), columns=['words', 'pos tags'])
    text_pos.insert(0, 'key', list(range(len(text_pos.index))))
    print(text_pos)

    repl_pos = {
        'pos tags': [
            'CD', 'JJ', 'JJR', 'JJS', 'NN', 'NNS', 'NNP', 'NNPS', 'RB', 'RBR',
            'RBS'
        ],
        'pos names': [
            'number', 'adjective', 'comparative adjective',
            'superlative adjective', 'singular noun', 'plural noun',
            'proper noun', 'plural proper noun', 'adverb',
            'comparative adverb', 'superlative adverb'
        ]
    }
    repl_pos = pd.DataFrame(data=repl_pos)

    print(repl_pos)

    text_repl = pd.merge(text_pos, repl_pos, on='pos tags', how='inner')
    text_repl['key'] = text_repl['key'].astype(int)

    print(text_repl)

    sparsity = 7
    repl_num = floor(len(text_repl.index) / sparsity)

    replace = text_repl.sample(n=repl_num)
    new_words = ['_____'] * repl_num
    replace.insert(4, 'new words', new_words)

    print(replace)

    output = ''

コード例 #2

0

ファイルを表示

ファイル: nerWithInput.py プロジェクト: robincamille/replacethechar

def returnNames(url):
        
    theurl = "https://raw.githubusercontent.com/robincamille/replacethechar/master/texts/biblekjv.txt"
    #raw_input("URL to .txt file: ")
    sourcefile = urllib2.urlopen(theurl)
    source = sourcefile.read()
    
    #Tokenize
    sourcetok = tok(source[:partition])
    
    #Tag POS
    sourcetag = postag(sourcetok)
    
    #Outputs POS-tagged text
    sourcene = ne(sourcetag, binary=False)
    
    charsall = []
    for n in sourcene:
        if type(n) == tree.Tree:
            if n.label() == 'PERSON':
                for m in n:
                    charsall.append(m[0])
    
    honorifics = ['Mr.', 'Mrs.', 'Ms.', 'Miss', 'Dr.', 'Prof.', 'Professor', 'Lord', 'Lady', 'Sir', 'Madam', 'Dame', 'Rev.', 'Rabbi']
    
    charsallnames = []
    for s in charsall:
        if s in honorifics:
            pass
        else:
            charsallnames.append(s)
    
    counted = (word for word in charsallnames if word[:1].isupper())
    c = Counter(counted)
    charscommon = c.most_common(5)
    
    chars = []
    for s in charscommon:
        chars.append(s[0])
    
    print '\nMost common names:'
    print '\t'.join(chars)
    return chars,source

コード例 #3

0

ファイルを表示

ファイル: start.py プロジェクト: robincamille/nondescript2

def my_form_post():

    # Nondescript UI input page: left box, writing sample
    # same as output page: invisible
    corpus = request.form['corpus']

    # Nondescript UI input page: right box, message
    # Output page: 3 tabs at the bottom
    if request.form['whichmessage'] == 'choosesuggestmessage':
        message = request.form['suggestmessage']
    if request.form['whichmessage'] == 'chooseluckymessage':
        message = request.form['luckymessage']
    if request.form['whichmessage'] == 'chooseorigmessage':
        message = request.form['origmessage']

    docraw = corpus + ' ' + message  #Analyze writing overall
    #doc = docraw.split()
    doc = tok(docraw)
    printcompare = []  #things to print: style vs. all background documents
    printoverall = []  #things to print: overall style
    printunusualwords = []
    unusualwordsonly = []
    printclassify = []  #things to print: classifier output
    advice = []  #tips such as use shorter sentences

    #Document length
    #s.append('Document length: %d words' % len(doc))

    #Return message forms: synonym-suggestion, -replacement, original
    origmessage = message
    anonmessage = changewords(message)
    suggestmessage = anonmessage[0]  #includes synonym suggestions in parens
    luckymessage = anonmessage[1]  #randomly replaces some words with synonyms

    #Cosine similarity in vocabularies of 100, 1000, 10000 words
    printcompare.append('Similarity between this message and original writing sample: %.3f'\
                        % (sim(toponly.top(corpus,10000),toponly.top(message,10000))[0,1]))
    # printcompare.append('Similarity between this message and original writing sample (10k words): %.3f'\
    #                     % (sim(toponly.top(corpus,10000),toponly.top(message,10000))[0,1]))
    # printcompare.append('Similarity between this message and original writing sample (1k words): %.3f' \
    #                     % (sim(toponly.top(corpus,1000),toponly.top(message,1000))[0,1]))
    # printcompare.append('Similarity between this message and original writing sample (100 words): %.3f'   \
    #                     % (sim(toponly.top(corpus,100),toponly.top(message,100))[0,1]))

    #Average word lengths
    printcompare.append("Your message's word length is {:.2f}x \
        your average".format(
        avgwordlength(message.split()) / avgwordlength(corpus.split())))
    #totwlavg = mean(totwl)
    word_compare = avgwordlength(doc) / backgroundcorpusWL
    if word_compare > 1.2:
        advice.append("Try using shorter words.")
    elif word_compare < 0.9:
        advice.append("Try using longer words.")
    printoverall.append("Your overall word length is {:.2f}x \
        everyone else's average.".format(word_compare))

    #Average sent lengths
    printcompare.append("Your message's sentence length is {:.2f}x \
        your average".format(avgsentlength(message) / avgsentlength(corpus)))
    #totslavg = mean(totsl)
    sent_compare = avgsentlength(doc) / backgroundcorpusSL
    if sent_compare > 1.2:
        advice.append("Use shorter sentences.")
    elif sent_compare < 0.9:
        advice.append("Use longer sentences.")
    printoverall.append("Your overall sentence length is {:.2f}x \
        everyone else's average.".format(sent_compare))

    advice.append(
        "Focus on changing the highlighted and red-underlined words.")

    #Top unusual words

    #Set up word frequency comparison
    with open(bcfreqs) as infile:  #from sources.py
        allfreqraw = [l[1:] for l in infile]
    allfreq = {}
    for row in allfreqraw:
        row = row.split(',')
        allfreq[row[0][:-1]] = float(row[1])

    # with open('allfreq.csv','w') as allfreqfile:
    # 	allfreqfile.write(str(allfreq))

    doccount = defaultdict(int)
    docfreq = defaultdict(int)

    for word in doc:
        doccount[word.lower()] += 1  #term count

    for word in doccount:
        docfreq[word] = doccount[word] / float(len(doccount))  #term frequency

    # with open('docfreq.csv','w') as docfreqfile:
    # 	docfreqfile.write(str(docfreq))

    #Compare word frequencies
    compfreq = defaultdict(list)
    for word in docfreq:
        if word in allfreq.keys():
            compfreq[word] = [docfreq[word], allfreq[word]]
        else:
            pass

    compwords = []
    for word in compfreq:
        if doccount[word] > 1:
            if compfreq[word][0] > compfreq[word][1]:
                # if compfreq[word][1] == 0:
                #     v = compfreq[word][1] / minfreq #min freq from train/
                # else:
                v = compfreq[word][0] / float(compfreq[word][1])
                compwords.append([v, word, doccount[word]
                                  ])  #currently 0 words? fix this
            else:
                pass
        else:
            pass

    compwordssort = sorted(compwords, reverse=True)

    for i in compwordssort[:10]:
        unusualwordsonly.append(i[1])
        printunusualwords.append(
            '{}: {:.1f}x more frequent (used {} times in sample and message)'.
            format(i[1], i[0], i[2]))
    unusualwordsonly = ' '.join(unusualwordsonly)

    # The important bit:

    #Compare to n random authors in background corpus
    #Run through classifier: train & test
    #backgroundcorpus directory & filelist .txt file specified
    #in sources.py
    classifieroutcome = [i for i in classifydocs(backgroundcorpus,\
                                             filelist,\
                                             docraw,\
                                             message,\
                                             1000)] #vocab of n words

    #Output to send to compare-output-simple.html
    return render_template("compare-output-simple.html", \
                           compareoverall = printoverall, \
                           unusualwordsonly = unusualwordsonly, \
                           unusualwords = printunusualwords, \
                           advice = advice, \
                           corpus = corpus, \
                           repeatdoc = message, \
                           suggestdoc = suggestmessage, \
                           luckydoc = luckymessage, \
                           origdoc = origmessage, \
                           comparestats = printcompare, \
                           classifieroutcome = classifieroutcome[0], \
                           classifierscore = classifieroutcome[1])

コード例 #4

0

ファイルを表示

##Caveats: first/last names and most honorifics not considered

from nltk import tree
from nltk import word_tokenize as tok
from nltk import pos_tag as postag
from nltk import ne_chunk as ne
from collections import Counter

infile = open('data_columns/nussbaum/nuss01.txt',
              'r')  # Put your filename here
source = infile.read()
source = source.decode('utf-8')
infile.close()

print 'Tokenizing'
sourcetok = tok(source)

print 'Tagging Part Of Speech (POS)...'
sourcetag = postag(sourcetok)

print 'Running POS-tagged text through Named Entity chunker...'
sourcene = ne(sourcetag, binary=False)

# Find just the Named Entities that we want
charsall = []
for n in sourcene:
    if type(n) == tree.Tree:
        #if n.label() == 'PERSON':

        if n.node == 'PERSON':  #Options: PERSON, ORGANIZATION, LOCATION
            for m in n:

コード例 #5

0

ファイルを表示

def readbook():
    bk = pglist[pickbook()]  #pick #2000 from list
    number = bk[1]
    codetitle = bk[2]
    title = bk[3]

    intro = []
    intro.append(foundit[randint(0, len(foundit) - 1)] % title)
    intro.append('\n\nYou flip to a random page and begin to read...\n\n')

    urll = 'https://raw.githubusercontent.com/GITenberg/' + codetitle + \
           '/master/' + number + '.txt'
    guturl = 'http://www.gutenberg.org/ebooks/' + number

    time.sleep(3)  # politeness
    req = urllib2.Request(urll)
    response = urllib2.urlopen(req)
    the_page = response.read()

    ex = the_page[10000:11000]

    if "Project Gutenberg" in ex:
        ex = the_page[30000:31000]
    elif "PROJECT GUTENBERG" in ex:
        ex = the_page[30000:31000]
    else:
        pass

    # stitch together line breaks
    ex = ex + '\n\r'  #hacky way to make sure below splits happen
    ex = ex.split('\n')
    ex = ' '.join(ex)
    ex = ex.split('\r')
    ex = ' '.join(ex)

    # get rid of double spaces, brackets, and asterisky section breaks
    ex = ex + '  '
    ex = ex.split('  ')
    exfin = []
    for w in ex:
        if w == '':
            pass
        elif w == '*':
            pass
        elif w[0] == ' ':
            w = w[1:]
            exfin.append(w)
        elif w[0] == '[':
            w = w[1:]
            exfin.append(w)
        elif w[-1] == ']':
            w = w[:-1]
            exfin.append(w)
        else:
            exfin.append(w)
    ex = ' '.join(exfin)

    # split into sentences
    exs = tok(ex)

    # start with second sentence, end with second-to-last
    if exs[1][:2] == '" ':  # skip initial quotation mark if any
        exs[1] = exs[1][2:]
    blurb = '> ... ' + (' '.join(exs[1:-1])) + ' ...'

    outtro = (leave[randint(0, len(leave) - 1)])

    usedbooktitles.append(title)
    titlelink = '[' + title + '](' + guturl + ')'
    usedbooktitlesandlinks.append(titlelink)

    return ' '.join(intro)[1:], blurb, outtro

コード例 #6

0

ファイルを表示

ファイル: ner.py プロジェクト: robincamille/replacethechar

import urllib2
from nltk import tree
from nltk import word_tokenize as tok
from nltk import pos_tag as postag
from nltk import ne_chunk as ne
from nltk.corpus import gutenberg as gb
from collections import Counter

theurl = raw_input("URL to .txt file: ")
sourcefile = urllib2.urlopen(theurl)
source = sourcefile.read()


# Tokenize
sourcetok = tok(source)

# Tag POS
sourcetag = postag(sourcetok)

# Outputs POS-tagged text
sourcene = ne(sourcetag, binary=False)

charsall = []
for n in sourcene:
    if type(n) == tree.Tree:
        if n.label() == "PERSON":
            for m in n:
                charsall.append(m[0])

honorifics = [

コード例 #7

0

ファイルを表示

def prepare_for_bert(folder,
                     number,
                     filename,
                     write_to_file=False,
                     wiki_novel=False):

    ####### STEP 1: setting up the folders' path variables

    book_nlp_output_folder = '{}/book_nlp_output'.format(folder)
    temp_folder = '{}/temp'.format(folder)
    processed_novel_folder = '{}/processed_novel'.format(folder)

    if wiki_novel == False:
        filename = filename.replace('_clean.txt_', '_')
    else:
        filename = '{}/original_wikipedia_page/{}.txt'.format(folder, number)

    if wiki_novel == False:
        transform_characters_list(book_nlp_output_folder, folder, number)

    ########   STEP 3: creating a list of characters from that file, by using a function in nonce2vec.utils.novels_utilities

    char_list = get_characters_list(folder, number)

    if wiki_novel == False:
        genders_dict = get_characters_gender(folder, number, char_list)
    else:
        genders_dict = {}

    ########    STEP 5: creating the final version of the txt to be used for training on N2V. Main features are 1) one sentence per line, 2) different names for the same character are substituted with one single name, 3) punctuation is removed and double/triple backspaces, common within Gutenberg files, are removed

    print('Creating the final version of novel {} for version: {}'.format(
        number, filename))
    if wiki_novel == False:
        f = open('{}'.format(filename)).read()
        out = open('{}_bert'.format(filename), 'w')
        lines = tok(f)
    else:
        lines = open('{}'.format(filename)).readlines()
        out = open('{}_bert'.format(filename), 'w')

    if wiki_novel == False:
        current_char_list, full_novel = cleanup_novels(char_list, lines, out,
                                                       write_to_file)
    else:
        current_char_list, full_novel = cleanup_novels(char_list,
                                                       lines,
                                                       out,
                                                       write_to_file,
                                                       wiki_novel=True)

    if wiki_novel == False:
        novel_versions = {}
        mid_novel = int(len(full_novel) / 2)

        novel_versions['{}_part_a'.format(filename)] = full_novel[:mid_novel]
        novel_versions['{}_part_b'.format(filename)] = full_novel[mid_novel:]
    elif wiki_novel == True:
        novel_versions = full_novel

    return novel_versions, current_char_list, genders_dict

コード例 #8

0

ファイルを表示

ファイル: remove_gutenberg_header.py プロジェクト: andreabruera/novel_aficionados

processed_novel_folder = '{}/processed_novel'.format(base_folder)
os.makedirs('{}'.format(temp_folder), exist_ok=True)
os.makedirs('{}'.format(booknlp_folder), exist_ok=True)
os.makedirs('{}'.format(processed_novel_folder), exist_ok=True)

number = sys.argv[2]

f = open('{}/{}.txt'.format(folder, number)).readlines()
out_clean = open('{}/{}_clean.txt'.format(temp_folder, number), 'w')
clean_list = []
for v, i in enumerate(f):
    if '***START' not in str(i) and v != len(f) - 1:
        pass
    elif v == len(f) - 1:
        malandrino = f
    else:
        malandrino = f[(v + 1):]
        break
for i in malandrino:
    if '***END' not in str(i):
        c = i.replace('_', ' ').strip(' ').strip('\n').replace('\r', '')
        clean_list.append('{}'.format(c))
    else:
        break
clean_book = ' '.join(clean_list)
sent_book = tok(clean_book)
for i in sent_book:
    out_clean.write('{}\n'.format(i))

out_clean.close()

コード例 #9

0

ファイルを表示

import numpy as np
import matplotlib.pyplot as plt
from nltk import word_tokenize as tok
from scipy import linalg as SPLA
import utils

testFile = r"C:\Users\Carl Wilhjelm\PycharmProjects\ICSProject\logs\syslogTest.txt"

# create tokenized list of all logs
syslog = []
with open(testFile, 'r') as f:
    syslogText = f.readlines()

for line in syslogText:
    syslog.append(tok(line))
n = len(syslog)
print(n)
opNumber = int(n * (0.01))

# create naive distance matrix for all logs
distanceMatrix = [[0 for x in range(n)] for y in range(n)]
for i in range(n):
    for j in range(i, n):
        distance = 0
        k = min(len(syslog[i]), len(syslog[j]))
        for e in range(k):
            if syslog[i][e] != syslog[j][e]:
                distance += 1
        distanceMatrix[i][j] = distance / k
        distanceMatrix[j][i] = distanceMatrix[i][j]

コード例 #10

0

ファイルを表示

def prepare_for_n2v(folder, number, filename, w2v_model):

    #######  STEP 1: from the output of booknlp to a file containing the list of characters and the number of times they occur

    f = open('{}/book_nlp_output/book.id.html'.format(folder)).read().split(
        '<br />')
    out = open('{}/characters_{}.txt'.format(folder, number), 'w')
    for i in f:
        if '<h1>Text' in i:
            break
        else:
            i2 = sub('.*Characters</h1>', '', i)
            i3 = i2.replace('-- ', '')
            i4 = sub('\([0-9]*\)', '_', i3)
            i5 = i4.replace(' _ ', '_').strip('_')
            i6 = i5.replace('\t ', '\t')
            i7 = sub(r'[^\w\s]', '', i6)
            if 'Gutenberg' not in i7:
                out.write('{}\n'.format(i7.lower()))
    out.close()

    ########   STEP 2: creating a list of characters from that file, by using a function in nonce2vec.utils.novels_utilities

    char_list = get_characters_list(folder, number)

    gender_list = get_characters_gender(folder, number, char_list)

    print(gender_list)

    ########    STEP 3: creating the final version of the txt to be used for training on N2V. Main features are 1) one sentence per line, 2) different names for the same character are substituted with one single name, 3) punctuation is removed and double/triple backspaces, common within Gutenberg files, are removed

    files = [
        '{}'.format(filename), '{}_part_a'.format(filename),
        '{}_part_b'.format(filename)
    ]
    add_to_char_list = 1
    char_dict = {}
    for i in files:
        char_dict_part = {}
        #    f=open('../{}'.format(i)).read()
        f = open('{}'.format(i)).read()

        out_filename = i.replace('_clean.txt_',
                                 '_').replace('/temp', '/processed_novel')
        #    out=open('../{}_n2v'.format(out_filename),'w')
        out = open('{}_n2v'.format(out_filename), 'w')

        lines = tok(f)

        for line in lines:
            line = line.strip('\n')
            for alias in char_list:
                if type(alias) == list:
                    for a in alias:
                        a = sub(r'\W+', ' ', a)
                    first_name = alias[0]
                    if ' ' in first_name:
                        name_parts = first_name.split(' ')
                        character = name_parts[1]
                    else:
                        character = first_name
                    aliases = alias[1:]
                    for name in aliases:
                        if name in line:
                            char_dict_part[character] += 1
                            line = line.replace(str(name), str(character))
                else:
                    if alias in line:
                        alias = sub(r'\W+', ' ', alias)
                        if ' ' in alias:
                            name_parts = alias.split(' ')
                            character = name_parts[1]
                        line = line.replace(alias, character)
                        char_dict_part[character] += 1
                    else:
                        pass

            line2 = re.sub(r'\W+', r' ', line)
            line3 = line2.strip(' ')
            out.write('{}\n'.format(line3.lower()))
        add_to_char_list_final == False

コード例 #11

0

ファイルを表示

ファイル: wc.py プロジェクト: WestbrookT/HackPredict

import os
from nltk import word_tokenize as tok
pos = os.listdir('winners')
neg = os.listdir('losers')

total = 0
cnt = 0
for p in pos:
    cnt += 1
    with open('winners/{}'.format(p), 'r') as f:
        total += len(tok(f.read()))

for p in neg:
    cnt += 1
    with open('losers/{}'.format(p), 'r') as f:
        total += len(tok(f.read()))

print(total, cnt, total // cnt)

コード例 #12

0

ファイルを表示

ファイル: nerWithInputFun.py プロジェクト: robincamille/replacethechar

def returnNames(url):

    theurl = "http://www.ccel.org/ccel/bible/kjv.txt"
    # raw_input("URL to .txt file: ")
    sourcefile = urllib2.urlopen(theurl)
    source = sourcefile.read()

    # Tokenize
    sourcetok = tok(source[:partition])

    # Tag POS
    sourcetag = postag(sourcetok)

    # Outputs POS-tagged text
    sourcene = ne(sourcetag, binary=False)

    charsall = []
    for n in sourcene:
        if type(n) == tree.Tree:
            if n.label() == "PERSON":
                for m in n:
                    charsall.append(m[0])

    # exclude from names:
    honorifics = [
        "Mr.",
        "Mrs.",
        "Ms.",
        "Miss",
        "Dr.",
        "Prof.",
        "Professor",
        "Lord",
        "Lady",
        "Sir",
        "Madam",
        "Dame",
        "Rev.",
        "Rabbi",
        "Version",
        "Gutenberg",
    ]

    charsallnames = []
    for s in charsall:
        if s in honorifics:
            pass
        else:
            charsallnames.append(s)

    counted = (word for word in charsallnames if word[:1].isupper())
    c = Counter(counted)
    charscommon = c.most_common(5)

    chars = []
    for s in charscommon:
        chars.append(s[0])

    # print '\nMost common names:'
    # print '\t'.join(chars)
    return chars, source

コード例 #13

0

ファイルを表示

ファイル: textillating.py プロジェクト: robincamille/textillating

def main():
    """Amplifies the affect of a given text. Adverbs and adjectives are altered."""

    if len(sys.argv) == 2:
        f = sys.argv[1]
        if f[len(f) - 4:] == '.txt':
            usefile = f  #must be a .txt file
        else:
            usefile = 'great_expectations.txt'
            print("This script requires .txt files only. The file you\n\
            specified was not a .txt file. Instead, we'll use\n\
            Great Expectations as an example...")
    else:
        usefile = 'great_expectations.txt'
        print("You can define which .txt file to use like so: \n\
        python textillating.py [filename you want to use.txt]\n\
        You didn't specify a .txt file to use, so in the meantime,\n\
        we'll use Great Expectations as an example...")

    print('Processing... This may take a minute...')

    filename = open(usefile, 'r')
    text = filename.readlines()  #readlines in order to preserve line breaks
    filename.close()

    outfile = open('extremely_' + usefile, 'w')

    raw_text = []
    new_text = []

    for line in text:
        line = pos(tok(line))  #('excellent', 'JJ')
        raw_text.append(line)

    modifiers = [
        'WAY', 'ABSOLUTELY', 'ACTUALLY', 'ACUTELY', 'ALMIGHTY', 'AMPLY',
        'ASSUREDLY', 'ASTONISHINGLY', 'AWFULLY', 'CATEGORICALLY', 'CERTAINLY',
        'CLEARLY', 'CONSIDERABLY', 'DECIDEDLY', 'DEEPLY', 'DRASTICALLY',
        'EMINENTLY', 'EMPHATICALLY', 'EXAGGERATEDLY', 'EXCEEDINGLY',
        'EXCEPTIONALLY', 'EXCESSIVELY', 'EXORBITANTLY', 'EXPLICITLY',
        'EXTENSIVELY', 'EXTRAORDINARILY', 'EXTREMELY', 'FOR REAL', 'GENUINELY',
        'GREATLY', 'HIGHLY', 'HUGELY', 'IMMENSELY', 'IMMODERATELY',
        'INCREDIBLY', 'INDUBITABLY', 'INORDINATELY', 'INTENSELY', 'LARGELY',
        'LEGITIMATELY', 'LITERALLY', 'MARKEDLY', 'NOTABLY', 'NOTICEABLY',
        'OBVIOUSLY', 'OVERLY', 'PARTICULARLY', 'PLENTY', 'POSITIVELY',
        'POWERFULLY', 'PRODIGIOUSLY', 'PROFOUNDLY', 'PROHIBITIVELY', 'QUITE',
        'RADICALLY', 'REALLY', 'REAL', 'REMARKABLY', 'SEVERELY', 'STRIKINGLY',
        'SUBSTANTIALLY', 'SUPER', 'SUPERLATIVELY', 'SURPASSINGLY',
        'SURPRISINGLY', 'TERRIBLY', 'TERRIFICALLY', 'TOO', 'TOTALLY', 'TRULY',
        'ULTRA', 'UNCOMMONLY', 'UNDENIABLY', 'UNDOUBTEDLY', 'UNEQUIVOCALLY',
        'UNMISTAKABLY', 'UNQUESTIONABLY', 'UTTERLY', 'VASTLY', 'VERILY',
        'VERY', 'VIOLENTLY', 'VITALLY', 'WONDERFULLY'
    ]

    for line in raw_text:  #goes line by line to preserve line breaks
        for word in line:
            word_score = sid.polarity_scores(word[0])['compound']
            use_synonym = word[0]  #updates later
            possible_synonyms = []
            if wf.blacklisted(word[0]):
                pass
            elif word[0].lower() in ignore:
                pass
            elif word[
                    1] == 'JJ':  #adjectives only; adverbs don't quite work well here
                for syn in wn.synsets(word[0]):
                    for lemma in syn.lemmas():
                        syn_meta = str(syn).split('.')  #match part of speech
                        if syn_meta[1] == 'a' or 's':  #adjectives
                            possible_synonyms.append(lemma.name())
                all_synonyms = set(possible_synonyms)  #de-dupe
                for synonym in all_synonyms:
                    syn_score = sid.polarity_scores(synonym)['compound']
                    #scores range from -1 to 1, 1 being positive affect
                    if wf.blacklisted(synonym):
                        pass
                    elif word_score == 0:
                        if abs(syn_score) > sid.polarity_scores(
                                use_synonym)['compound']:
                            use_synonym = synonym  #choose most xtreme synonym (either pos or neg)
                    elif word_score > 0:
                        if syn_score > sid.polarity_scores(
                                use_synonym)['compound']:
                            use_synonym = synonym  #choose most xtreme synonym (positive)
                    elif word_score < 0:
                        if syn_score < sid.polarity_scores(
                                use_synonym)['compound']:
                            use_synonym = synonym  #choose most xtreme synonym (negative)
                if use_synonym == word[0]:
                    use_synonym = modifiers[randint(
                        0,
                        len(modifiers) - 1)] + ' ' + word[0]  #VERY neutral
                use_synonym = use_synonym.replace("_", " ").upper()
            elif word[0] == '.':
                use_synonym = '!'
            elif word[0] == '!':
                use_synonym = '!!!!!!!!!!!!!'
            elif word[0] == '?':
                use_synonym = '??!!'
            else:
                use_synonym = word[0]
            new_text.append(use_synonym)
        new_text.append('\n')  #preserve line breaks

    outfile.write(detok.detokenize(new_text))
    #Does not deal with quotation marks well. Adds a space before/after them

    outfile.close()

    print('All done! See extremely_' + usefile +
          ' for your newly exciting text.')