Python Preprocessor.rawText Examples

Programming Language: Python

Namespace/Package Name: Preprocessing.Preprocessor

Class/Type: Preprocessor

Method/Function: rawText

Examples at hotexamples.com: 2

Python Preprocessor.rawText - 2 examples found. These are the top rated real world Python examples of Preprocessing.Preprocessor.Preprocessor.rawText extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Preprocessor(18)

getMetaMapConcepts(2)

offsetParse(2)

rawText(2)

getParseTree(1)

getRoot(1)

posTaggedText(1)

process(1)

timexTagAndTokenizeText(1)

timexTagText(1)

wordTokenizeText(1)

Example #1

Show file

File: DrugnameMetamapExtractor.py Project: tkakar/FDA-Textmining

class DrugnameMetamapExtractor(object):
    def __init__(self, rawTextFileName, intermediateXMLFileName):
        self.preprocess = Preprocessor(rawTextFileName,
                                       intermediateXMLFileName)
        self.Text = self.preprocess.rawText()

    def findEntity(self):

        # the server installed on your machine
        mm = MetaMap.get_instance('/work/tkakar/public_mm/bin/metamap14')

        #sample_Text = '/work/tkakar/FDAfirstNarrative.txt'
        rawText = self.Text
        #sents= self.Text
        concepts, error = mm.extract_concepts([rawText],
                                              word_sense_disambiguation=True)
        offset_list = []
        drugs_list = []
        drug_offset_pair = ()
        for concept in concepts:
            c = concept.semtypes
            c = c.replace("[", "")
            c = c.replace("]", "")
            semTypes = c.strip().split(",")
            #print semTypes, type(semTypes)
            for semType in semTypes:

                if semType in ['phsu', 'orch']:
                    token = concept.trigger.strip().split("-")[0]
                    token = token.replace("[", "")
                    #print concept.pos_info, "pos_info"
                    offset = self.preprocess.offsetParse(concept.pos_info, ';')
                    #print offset , "offset" , len(offset)
                    for item in offset:
                        #print item ,item[1]
                        item[1] = item[0] + item[1]

                        #print ("offsetMetamap"  ,  item )
                        if item not in offset_list:
                            offset_list.append(item)
                            drugs_list.append(token)
        drugs_list = [drug.replace('"', "") for drug in drugs_list]
        #print len(drugs_list)
        elementList = []
        for drug, offset in zip(drugs_list, offset_list):
            #print drug, type(drug), type(offset), [offset]

            elementList.append(
                DrugnameElement(drug, [offset], "DrugnameMetamapExtractor",
                                "DRUGNAME"))

        #print len(elementList)
        return elementList

Example #2

Show file

 def __init__(self, rawTextFileName, intermediateXMLFileName):
     preprocess = Preprocessor(rawTextFileName, intermediateXMLFileName)
     self.Text = preprocess.rawText()