Beispiel #1
0
def generateXML(offloadFile, DEID):
    entries = organizeDATA.organizeANDparse(offloadFile)

    if not DEID:
        XMLfile = offloadFile.replace(".txt",".noDEID.xml")
        save2XML(entries, XMLfile)
        return XMLfile

    else:
        import importDEIDresources
        DEIDlists=importDEIDresources.getGazetteers()
        print "DEID resources imported"
        
        DEIDentries=[]
        for entry in entries:
            DEID = DeIdentifier( entry["contents"], DEIDlists )
            entry["contentsDEID"]= resetControlCodes( DEID.deidentifyRAW() )
            del entry["contents"]
            DEIDentries.append(entry)
        DEIDXMLfile = offloadFile.replace(".txt",".DEID.xml")
        save2XML(DEIDentries, DEIDXMLfile)
        return DEIDXMLfile
Beispiel #2
0
    basket=["","","",words[index],"","",""]
    i=0
    for x in xrange(index-3,index+4):
        if x >= 0:
            try:
                if words[x] != '':
                    basket[i]=words[x].lower()
            except:
                basket[i]=""
        i+=1
    print "===", basket

if __name__ == "__main__":
    print '''
###
Commandline usage:

echo "This is my text with a name, Marc, in it." | python DeIdentifier.py
###
	'''
    import sys
    
    '''Collect and read in all resources'''
    import importDEIDresources
    DEIDlists = importDEIDresources.getGazetteers()

    dataPoint=sys.stdin.read()
    DEID=DeIdentifier(dataPoint, DEIDlists)
    DEIDtext=DEID.deidentifyRAW()
    print "output:", DEIDtext