def getDuffyScore(sent1, sent2): tree_1 = tree() tree_2 = tree() out1 = getNLPToks(sent1) out2 = getNLPToks(sent2) generateTree(out1['parse'], tree_1) generateTree(out2['parse'], tree_2) flipTree(tree_1) flipTree(tree_2) (rscore_st, nscore_st) = CollinsDuffy(tree_1, tree_2, 0.8, 1, 1) return rscore_st, nscore_st
def getMoschittiScore(sent1, sent2): tree_1 = tree() tree_2 = tree() out1 = getNLPToks(sent1) out2 = getNLPToks(sent2) generateTree(out1['parse'], tree_1) generateTree(out2['parse'], tree_2) flipTree(tree_1) flipTree(tree_2) (rscore_st, nscore_st) = MoschittiPT(tree_1, tree_2, 0.8, 1, 1) # return rscore_st,nscore_st return nscore_st
def parseCandidateBooks(candidate): # print('parsing') pTrees=list() pSents=list() pWithoutTokenTrees=list() for sent in candidate: sentParse=getNLPToks(sent) tempTree=tree() tempTree2=tree() generateTree(sentParse['parse'],tempTree) generateTree(sentParse['parse'],tempTree2) pSents.append(sentParse['parse']) flipTree(tempTree) flipTree(tempTree2) pTrees.append(tempTree) pWithoutTokenTrees.append(removeTokens(tempTree2,sent)) print('candidate') return (pTrees,pSents,pWithoutTokenTrees)
def parseBook(candidate): pTrees = list() pSents = list() for sent in candidate: sentParse = getNLPToks(sent) tempTree = tree() generateTree(sentParse['parse'], tempTree) pSents.append(sentParse['parse']) flipTree(tempTree) pTrees.append(tempTree) return (pTrees, pSents)
def parseNewText(chunk): #print('Parsing chunk') # chunk=chunkTuple[0] # location=chunkTuple[1] # nlp=StanfordCoreNLP(location) parseChunk=list() parseSentenceChunk=list() parseWithoutTokenChunk=list() for sent in chunk: sentParse=getNLPToks(sent) tempTree=tree() tempTree2=tree() generateTree(sentParse['parse'],tempTree) generateTree(sentParse['parse'],tempTree2) parseSentenceChunk.append(sentParse['parse']) flipTree(tempTree) flipTree(tempTree2) parseChunk.append(tempTree) parseWithoutTokenChunk.append(removeTokens(tempTree2,sent)) print('over') return (parseChunk,parseSentenceChunk,parseWithoutTokenChunk)
def parseCandidateBooks(candidate): pTrees=list() pWithoutTokenTrees=list() for para in candidate: para=sent_tokenize(para) sentTrees=list() sentWithoutTokenTrees=list() for sent in para: sentParse=getNLPToks(sent) tempTree=tree() tempTree2=tree() generateTree(sentParse['parse'],tempTree) generateTree(sentParse['parse'],tempTree2) flipTree(tempTree) flipTree(tempTree2) sentTrees.append(tempTree) sentWithoutTokenTrees.append(removeTokens(tempTree2,sent)) pTrees.append(sentTrees) pWithoutTokenTrees.append(sentWithoutTokenTrees) print('candidate') return pTrees,pWithoutTokenTrees
def parseNewText(paraChunk): # print('Parsing chunk') parseChunk=list() parseWithoutTokenChunk=list() for para in paraChunk: paraParse=list() paraWithoutTokenParse=list() para=sent_tokenize(para) for sent in para: sentParse=getNLPToks(sent) tempTree=tree() tempTree2=tree() generateTree(sentParse['parse'],tempTree) generateTree(sentParse['parse'],tempTree2) # parseSentenceChunk.append(sentParse['parse']) flipTree(tempTree) flipTree(tempTree2) paraParse.append(tempTree) paraWithoutTokenParse.append(removeTokens(tempTree2,sent)) parseChunk.append(paraParse) parseWithoutTokenChunk.append(paraWithoutTokenParse) print('over') return parseChunk,parseWithoutTokenChunk
def parseNewText(paraChunk): print('Parsing chunk') parseChunk = list() for para in paraChunk: paraParse = list() para = sent_tokenize(para) for sent in para: sentParse = getNLPToks(sent) tempTree = tree() generateTree(sentParse['parse'], tempTree) # parseSentenceChunk.append(sentParse['parse']) flipTree(tempTree) paraParse.append(tempTree) parseChunk.append(paraParse) return parseChunk
def parseBook(candidate): pTrees = list() # pSents=list() for para in candidate: para = sent_tokenize(para) sentTrees = list() for sent in para: sentParse = getNLPToks(sent) tempTree = tree() generateTree(sentParse['parse'], tempTree) # pSents.append(sentParse['parse']) flipTree(tempTree) sentTrees.append(tempTree) pTrees.append(sentTrees) # return (pTrees,pSents) return pTrees
for book in booksList: for sent in reducedSentences[book]: reducedBooks[book].append(books[book][sent]) pickling_on = open("./bible/reducedBooks.pickle", "wb") pickle.dump(reducedBooks, pickling_on) i = 0 parseTrees = list() parsedSentences = list() for sent in text: if i % 10 == 0: print(i) sentParse = getNLPToks(sent) tempTree = tree() generateTree(sentParse['parse'], tempTree) parsedSentences.append(sentParse['parse']) flipTree(tempTree) parseTrees.append(tempTree) i = i + 1 pickling_on = open("./bible/parseTrees.pickle", "wb") pickle.dump(parseTrees, pickling_on) potentialParseTrees = dict() potentialParsedSentences = dict() for book in booksList: print(book) candidate = reducedBooks[book]