def test_set_url(self): expected = 'http://some-url.org:1234' helpers.set_corenlp_url('some-url.org:1234') with open(CORENLP_SERVER_URL, 'r') as f: actual = f.read().strip('\n') self.assertEqual(expected, actual)
def test_url_default(self): expected = 'http://0.0.0.0:0000' helpers.set_corenlp_url('http://0.0.0.0:0000') with open(CORENLP_SERVER_URL, 'r') as f: actual = f.read().strip('\n') self.assertEqual(expected, actual)
def test_predictions(self): helpers.set_corenlp_url('artifacts.gccis.rit.edu:41194') data = "Have you found the answer for your question? If yes would you" \ " please share it? Sorry :) I dont want to hack the system!! :" \ ") is there another way? What are you trying to do? Why can't" \ " you just store the \"Range\"? This was supposed to have been" \ " moved to <url> per the cfd. why wasn't it moved?" expected = [{ 'Have you found the answer for your question?': [0.45793466358055329, 0.54206533641944665] }, { 'If yes would you please share it?': [0.47243183562775615, 0.52756816437224363] }, { 'Sorry :) I dont want to hack the system!!': [0.54823398057393613, 0.45176601942606376] }, { ':) is there another way?': [0.54151149263615428, 0.45848850736384572] }, { 'What are you trying to do?': [0.25075788316047232, 0.74924211683952746] }, { 'Why can\'t you just store the "Range"?': [0.10255615890730475, 0.89744384109269537] }, { 'This was supposed to have been moved to <url> per the cfd.': [0.38666486673559936, 0.61333513326440048] }, { "why wasn't it moved?": [0.29890263769016051, 0.70109736230983943] }, { 'document': [0.38237418986399213, 0.61762581013600781] }] actual = self.classifier.predict(data) self.assertEqual(expected, actual) data = { 'sentence': 'If yes would you please share it?', 'parses': [ 'ROOT(root-0, please-5)', 'dep(please-5, If-1)', 'dep(please-5, yes-2)', 'aux(please-5, would-3)', 'nsubj(please-5, you-4)', 'dobj(please-5, share-6)', 'dep(please-5, it-7)', 'punct(please-5, ?-8)' ] } expected = [{ 'If yes would you please share it?': [0.47243183562775615, 0.52756816437224363] }, { 'document': [0.47243183562775615, 0.52756816437224363] }] actual = self.classifier.predict(data) self.assertEqual(expected, actual)
def test_format_doc(self): helpers.set_corenlp_url('artifacts.gccis.rit.edu:41194/') expected = [{ 'sentences': ['I am the walrus.'], 'parses': [ 'ROOT(root-0, walrus-4)', 'nsubj(walrus-4, I-1)', 'cop(walrus-4, am-2)', 'det(walrus-4, the-3)', 'punct(walrus-4, .-5)' ] }] actual = helpers.format_doc('I am the walrus.') self.assertEqual(expected, actual)
def test_format_doc_dict(self): helpers.set_corenlp_url('artifacts.gccis.rit.edu:41194/') data = { 'If yes would you please share it?': [ 'ROOT(root-0, please-5)', 'dep(please-5, If-1)', 'dep(please-5, yes-2)', 'aux(please-5, would-3)', 'nsubj(please-5, you-4)', 'dobj(please-5, share-6)', 'dep(please-5, it-7)', 'punct(please-5, ?-8)' ] } expected = { 'sentences': ['If yes would you please share it?'], 'parses': [ 'ROOT(root-0, please-5)', 'dep(please-5, If-1)', 'dep(please-5, yes-2)', 'aux(please-5, would-3)', 'nsubj(please-5, you-4)', 'dobj(please-5, share-6)', 'dep(please-5, it-7)', 'punct(please-5, ?-8)' ] } for sent, deps in data.items(): actual = helpers.format_doc(sent, deps) self.assertEqual(expected, actual)
def write(args): #Set up the stanford corenlp server set_corenlp_url("http://localhost:9000") cls = Classifier() #read in the data file data = pandas.read_csv(args.read_file, sep=',', error_bad_lines=False) #Set the write file outFile = open(args.write_file, 'w', newline='') fieldnames = [ "Id", "PostTypeId", "AcceptedAnswerId", "ParentId", "CreationDate", "DeletionDate", "Score", "ViewCount", "Body", "BodyNOHTML", "PolitenessConfidence", "ImpolitenessConfidence", "OwnerUserId", "OwnerDisplayName", "LastEditorUserId", "LastEditorDisplayName", "LastEditDate", "LastActivityDate", "Title", "Tags", "AnswerCount", "CommentCount", "FavoriteCount", "ClosedDate", "CommunityOwnedDate" ] w = csv.DictWriter(outFile, fieldnames=fieldnames) w.writeheader() skips = 0 #Create the CSV of processed data for i in range(data.shape[0]): try: tempDict = {} tempDict["Id"] = str(data.iloc[i, 0]) tempDict["PostTypeId"] = str(data.iloc[i, 1]) tempDict["AcceptedAnswerId"] = str(data.iloc[i, 2]) tempDict["ParentId"] = str(data.iloc[i, 3]) tempDict["CreationDate"] = str(data.iloc[i, 4]) tempDict["DeletionDate"] = str(data.iloc[i, 5]) tempDict["Score"] = str(data.iloc[i, 6]) tempDict["ViewCount"] = str(data.iloc[i, 7]) tempDict["Body"] = str(data.iloc[i, 8]) #Remove html and code snippets temp = str(data.iloc[i, 8]) soup = BeautifulSoup(str(data.iloc[i, 8]), "lxml") removals = soup.find_all('code') for match in removals: match.decompose() text = soup.get_text() text = text.replace('\n', ' ').replace('\r', '').replace( '\t', '') #remove newlines and such #get the predictions for the classifier output = cls.predict(text) vals = list( output[-1].values() ) #The last entry in the analyzed array of values are the scores for the entire doc tempDict["BodyNOHTML"] = text tempDict["PolitenessConfidence"] = str(vals[0][0]) tempDict["ImpolitenessConfidence"] = str(vals[0][1]) tempDict["OwnerUserId"] = str(data.iloc[i, 9]) tempDict["OwnerDisplayName"] = str(data.iloc[i, 10]) tempDict["LastEditorUserId"] = str(data.iloc[i, 11]) tempDict["LastEditorDisplayName"] = str(data.iloc[i, 12]) tempDict["LastEditDate"] = str(data.iloc[i, 13]) tempDict["LastActivityDate"] = str(data.iloc[i, 14]) tempDict["Title"] = str(data.iloc[i, 15]) tempDict["Tags"] = str(data.iloc[i, 16]) tempDict["AnswerCount"] = str(data.iloc[i, 17]) tempDict["CommentCount"] = str(data.iloc[i, 18]) tempDict["FavoriteCount"] = str(data.iloc[i, 19]) tempDict["ClosedDate"] = str(data.iloc[i, 20]) tempDict["CommunityOwnedDate"] = str(data.iloc[i, 21]) if (i % 1000 == 0): print("entry: " + str(i) + " of " + str(data.shape[0])) w.writerow(tempDict) except: print("Total skips = " + str(skips)) #keep track of how many strings were skipped skips += 1 continue
def test_format_doc_str(self): helpers.set_corenlp_url('artifacts.gccis.rit.edu:41194/') expected = [ [{ 'sentences': ['Have you found the answer for your question?'], 'parses': [ 'ROOT(root-0, found-3)', 'aux(found-3, Have-1)', 'nsubj(found-3, you-2)', 'det(answer-5, the-4)', 'dobj(found-3, answer-5)', 'case(question-8, for-6)', 'nmod:poss(question-8, your-7)', 'nmod:for(answer-5, question-8)', 'punct(found-3, ?-9)' ] }, { 'sentences': ['If yes would you please share it?'], 'parses': [ 'ROOT(root-0, please-5)', 'dep(please-5, If-1)', 'dep(please-5, yes-2)', 'aux(please-5, would-3)', 'nsubj(please-5, you-4)', 'dobj(please-5, share-6)', 'dep(please-5, it-7)', 'punct(please-5, ?-8)' ] }], [{ 'sentences': ['Sorry :) I dont want to hack the system!!'], 'parses': [ 'ROOT(root-0, :-RRB--2)', 'amod(:-rrb--2, Sorry-1)', 'nsubj(want-5, I-3)', 'nsubj:xsubj(hack-7, I-3)', 'aux(want-5, dont-4)', 'acl:relcl(:-rrb--2, want-5)', 'mark(hack-7, to-6)', 'xcomp(want-5, hack-7)', 'det(system-9, the-8)', 'dobj(hack-7, system-9)', 'nummod(:-rrb--2, !!-10)' ] }, { 'sentences': [':) is there another way?'], 'parses': [ 'ROOT(root-0, :-RRB--1)', 'acl(:-rrb--1, is-2)', 'expl(is-2, there-3)', 'det(way-5, another-4)', 'nsubj(is-2, way-5)', 'punct(:-rrb--1, ?-6)' ] }], [{ 'sentences': ['What are you trying to do?'], 'parses': [ 'ROOT(root-0, trying-4)', 'dep(trying-4, What-1)', 'aux(trying-4, are-2)', 'nsubj(trying-4, you-3)', 'nsubj:xsubj(do-6, you-3)', 'mark(do-6, to-5)', 'xcomp(trying-4, do-6)', 'punct(trying-4, ?-7)' ] }, { 'sentences': ['Why can\'t you just store the "Range"?'], 'parses': [ 'ROOT(root-0, store-6)', 'advmod(store-6, Why-1)', 'aux(store-6, ca-2)', "neg(store-6, n't-3)", 'nsubj(store-6, you-4)', 'advmod(store-6, just-5)', 'det(range-9, the-7)', 'punct(range-9, ``-8)', 'dobj(store-6, Range-9)', "punct(range-9, ''-10)", 'punct(store-6, ?-11)' ] }], [{ 'sentences': [ 'This was supposed to have been moved to <url> per ' 'the cfd.' ], 'parses': [ 'ROOT(root-0, supposed-3)', 'nsubjpass(supposed-3, This-1)', 'nsubjpass:xsubj(moved-7, This-1)', 'auxpass(supposed-3, was-2)', 'mark(moved-7, to-4)', 'aux(moved-7, have-5)', 'auxpass(moved-7, been-6)', 'xcomp(supposed-3, moved-7)', 'mark(<url>-9, to-8)', 'xcomp(moved-7, <url>-9)', 'case(cfd-12, per-10)', 'det(cfd-12, the-11)', 'nmod:per(<url>-9, cfd-12)', 'punct(supposed-3, .-13)' ] }, { 'sentences': ["why wasn't it moved?"], 'parses': [ 'ROOT(root-0, moved-5)', 'advmod(moved-5, why-1)', 'auxpass(moved-5, was-2)', "neg(moved-5, n't-3)", 'nsubjpass(moved-5, it-4)', 'punct(moved-5, ?-6)' ] }] ] data = [] with open(TEST_DOCUMENT_PATH, 'r') as f: for line in f.readlines(): data.append(line.strip('\n')) for i, line in enumerate(data): actual = helpers.format_doc(line) self.assertEqual(expected[i], actual)
import os import math from flask import Flask from flask import render_template, request, jsonify import politeness from politeness.classifier import Classifier from politeness.helpers import set_corenlp_url set_corenlp_url('http://127.0.0.1:5000/') cls = Classifier() app = Flask(__name__) # app.config.from_object(os.environ['APP_SETTINGS']) # @app.route("/") # def hello(): # return "Hello world, it's the Politeness Classifier!" @app.route("/") def text_input_form(): return render_template("politeness-form.html") # @app.route("/") # def text_input_form2(): # return render_template("politeness-form.html")
def _set_corenlp_url(args): from politeness import helpers if args.list: print(helpers.get_corenlp_url()) else: helpers.set_corenlp_url(args.url)