Python lemmatizeToken Examples

Programming Language: Python

Namespace/Package Name: lemmatizeabstracts

Method/Function: lemmatizeToken

Examples at hotexamples.com: 4

Python lemmatizeToken - 4 examples found. These are the top rated real world Python examples of lemmatizeabstracts.lemmatizeToken extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: filterabstracts.py Project: olabknbit/acres

def keepForDiabetesCorpus(xmldoc):
    """ Return True if we should keep this abstract for the diabetes corpus
        Include abstract in diabetes corpus if it contains at least one cost value or term.
    """
    abstractNodes = xmldoc.getElementsByTagName('Abstract')
    if abstractNodes is None or len(abstractNodes) == 0:
        return False

    textNodeList = abstractNodes[0].getElementsByTagName('AbstractText')
    if textNodeList is None or len(textNodeList) == 0:
        return False

    nCostValues = 0
    nCostTerms = 0
    tokenCount = 0
    cueLemmaSet = {"cost", "QALY", "QALYs"}

    for textNode in textNodeList:
        text = xmlutil.getText(textNode)
        sentenceList = sentenceSplitter.tokenize(text)
        for sText in sentenceList:
            tokenTextList = tokenizer.tokenize(sText)
            tokenList = tokenlist.TokenList()
            tokenList.convertStringList(tokenTextList)
            s = sentence.Sentence(tokenList)
            for token in s:
                tokenCount += 1
                lemmatizeabstracts.lemmatizeToken(token)
                if token.lemma in cueLemmaSet or token.text.find('cost') >= 0:
                    nCostTerms += 1
                if cvFinder.tokenIsCostValue(token):
                    nCostValues += 1

    return (nCostValues > 0 or nCostTerms > 0) and tokenCount > 100

Example #2

Show file

File: filterabstracts.py Project: rlsummerscales/acres

def keepForDiabetesCorpus(xmldoc):
    """ Return True if we should keep this abstract for the diabetes corpus
        Include abstract in diabetes corpus if it contains at least one cost value or term.
    """
    abstractNodes = xmldoc.getElementsByTagName('Abstract')
    if abstractNodes is None or len(abstractNodes) == 0:
        return False

    textNodeList = abstractNodes[0].getElementsByTagName('AbstractText')
    if textNodeList is None or len(textNodeList) == 0:
        return False

    nCostValues = 0
    nCostTerms = 0
    tokenCount = 0
    cueLemmaSet = {"cost", "QALY", "QALYs"}

    for textNode in textNodeList:
        text = xmlutil.getText(textNode)
        sentenceList = sentenceSplitter.tokenize(text)
        for sText in sentenceList:
            tokenTextList = tokenizer.tokenize(sText)
            tokenList = tokenlist.TokenList()
            tokenList.convertStringList(tokenTextList)
            s = sentence.Sentence(tokenList)
            for token in s:
                tokenCount += 1
                lemmatizeabstracts.lemmatizeToken(token)
                if token.lemma in cueLemmaSet or token.text.find('cost') >= 0:
                    nCostTerms += 1
                if cvFinder.tokenIsCostValue(token):
                    nCostValues += 1

    return (nCostValues > 0 or nCostTerms > 0) and tokenCount > 100

Example #3

Show file

File: filterabstracts.py Project: olabknbit/acres

def keepForDiabetesCorpusCostValue(xmldoc):
    """ Return True if we should keep this abstract for the diabetes corpus
        Include abstract in diabetes corpus if it contains at least *one* currency value.
    """
    textNodeList = xmldoc.getElementsByTagName('AbstractText')
    nCostValues = 0
    for textNode in textNodeList:
        text = xmlutil.getText(textNode)
        sentenceList = sentenceSplitter.tokenize(text)
        for sText in sentenceList:
            tokenTextList = tokenizer.tokenize(sText)
            tokenList = tokenlist.TokenList()
            tokenList.convertStringList(tokenTextList)
            s = sentence.Sentence(tokenList)
            for token in s:
                lemmatizeabstracts.lemmatizeToken(token)
                if cvFinder.tokenIsCostValue(token):
                    nCostValues += 1

    return nCostValues > 0

Example #4

Show file

File: filterabstracts.py Project: rlsummerscales/acres

def keepForDiabetesCorpusCostValue(xmldoc):
    """ Return True if we should keep this abstract for the diabetes corpus
        Include abstract in diabetes corpus if it contains at least *one* currency value.
    """
    textNodeList = xmldoc.getElementsByTagName('AbstractText')
    nCostValues = 0
    for textNode in textNodeList:
        text = xmlutil.getText(textNode)
        sentenceList = sentenceSplitter.tokenize(text)
        for sText in sentenceList:
            tokenTextList = tokenizer.tokenize(sText)
            tokenList = tokenlist.TokenList()
            tokenList.convertStringList(tokenTextList)
            s = sentence.Sentence(tokenList)
            for token in s:
                lemmatizeabstracts.lemmatizeToken(token)
                if cvFinder.tokenIsCostValue(token):
                    nCostValues += 1

    return nCostValues > 0