예제 #1
0
def parseContents(contentList):
    typedDep=""
    depGraphList=[]
    str1=[]
    str1.append('. '.join(contentList)) # to make the whole list into a single item
                                        # otherwise the parser need to be initialised many times.
    parser = StanfordParser ("/home/rohith/stanford-parser")
    for content in str1:
        typedDep += parser.parse(content)
        typedDep =re.sub('[0-9-]+',"",typedDep) # to remove numbers and '-'
        rx = re.compile("\((.+), (.+)\)")
        depGraphList.append(rx.findall(typedDep))
        # import string
        #string.split(inputString, '\n')  # --> ['Line 1', 'Line 2', 'Line 3']    
    return depGraphList
예제 #2
0
def parseSenses(senseList):
    typedDep=""
    depGraphList=[]
    tempList=[]
    str1=[]
    str1.append('. '.join(senseList)) # to make the whole list into a single item
                                        # otherwise the parser need to be initialised many times.
    str1[0] = stemWords(str1[0], rmStopWords= True)
    parser = StanfordParser ("/home/rohith/stanford-parser")
    for content in str1:
        typedDep += parser.parse(content)
        typedDep =re.sub('[0-9-]+',"",typedDep) # to remove numbers and '-'
        #need to separate each senses into induvidual lists
        typedDepList = typedDep.split("\n\n")
        typedDepList.pop()
        rx = re.compile("\((.+), (.+)\)")
        for dep in typedDepList:
            depGraphList.append(rx.findall(dep))   
    return depGraphList