def __init__(self, options): """create an instance of a criticalFinder object associated with the SQLite database. dbname: name of SQLite database """ # Define queries to select data from the SQLite database # this gets the reports we will process self.query1 = '''SELECT %s,%s FROM %s'''%(options.id,options.report_text,options.table) self.conn = sqlite.connect(options.dbname) self.cursor = self.conn.cursor() self.cursor.execute(self.query1) self.reports = self.cursor.fetchall() print "number of reports to process",len(self.reports) self.context = pyConText.pyConText() mods = itemData.instantiateFromCSV(options.lexical_kb) trgs = itemData.instantiateFromCSV(options.domain_kb) self.modifiers = itemData.itemData() for key in mods.keys(): self.modifiers.prepend(mods[key]) self.targets = itemData.itemData() for key in trgs.keys(): self.targets.prepend(trgs[key])
def markup_report( self, report='''IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of bowel obstruction or mass identified within the abdomen or pelvis. Non-specific interstitial opacities and bronchiectasis seen at the right base, suggestive of post-inflammatory changes. ''', modifiers=None, targets=None): print("type of modifiers", type(modifiers)) print("len of modifiers", len(modifiers)) print(modifiers) for m in modifiers: print(m) if modifiers == None: _modifiers = self.mod else: _modifiers = itemData.itemData() _modifiers.extend(json.loads(modifiers)) if targets == None: _targets = self.tar else: _targets = itemData.itemData() _targets.extend(json.loads(targets)) context = self.split_sentences(report, _modifiers, _targets) clrs = self.get_colors_dict(_modifiers, _targets) return html.mark_document_with_html(context, colors=clrs)
def markup_report(self, report='''IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of bowel obstruction or mass identified within the abdomen or pelvis. Non-specific interstitial opacities and bronchiectasis seen at the right base, suggestive of post-inflammatory changes. ''', modifiers=None, targets=None): print("type of modifiers",type(modifiers)) print("len of modifiers",len(modifiers)) print(modifiers) for m in modifiers: print(m) if modifiers==None: _modifiers = self.mod else: _modifiers = itemData.itemData() _modifiers.extend(json.loads(modifiers)) if targets==None: _targets=self.tar else: _targets = itemData.itemData() _targets.extend(json.loads(targets)) context = self.split_sentences(report, _modifiers, _targets) clrs = self.get_colors_dict(_modifiers, _targets) return html.mark_document_with_html(context, colors=clrs)
def get_target_phrases_item_data(target_phrases): # make an itemData of our custom target phrases target_phrases_item_data = itemData.itemData() for target_phrase in target_phrases: # create a contextItem from the target phrase contextItemTarget = itemData.contextItem( [target_phrase, target_phrase, target_phrase, target_phrase]) target_phrases_item_data.append(contextItemTarget) return target_phrases_item_data
def setUp(self): # create a sample image in memory self.context = pyConText.ConTextMarkup() self.splitter = helpers.sentenceSplitter() self.su1 = u"kanso <Diagnosis>**diabetes**</Diagnosis> utesl\xf6t eller diabetes men inte s\xe4kert. Vi siktar p\xe5 en r\xf6ntgenkontroll. kan det vara nej panik\xe5ngesten\n?" self.su2 = u"IMPRESSION: 1. LIMITED STUDY DEMONSTRATING NO GROSS EVIDENCE OF SIGNIFICANT PULMONARY EMBOLISM." self.su3 = u"This is a sentence that does not end with a number. But this sentence ends with 1. So this should be recognized as a third sentence." self.su4 = u"This is a sentence with a numeric value equal to 1.43 and should not be split into two parts." self.items = [ [u"pulmonary embolism", u"PULMONARY_EMBOLISM", ur"""pulmonary\s(artery )?(embol[a-z]+)""", ""], ["no gross evidence of", "PROBABLE_NEGATED_EXISTENCE", "", "forward"], ] self.itemData = itemData.itemData() for i in self.items: cit = itemData.contextItem
def setUp(self): # create a sample image in memory self.context = pyConText.ConTextMarkup() self.splitter = helpers.sentenceSplitter() self.su1 = u'kanso <Diagnosis>**diabetes**</Diagnosis> utesl\xf6t eller diabetes men inte s\xe4kert. Vi siktar p\xe5 en r\xf6ntgenkontroll. kan det vara nej panik\xe5ngesten\n?' self.su2 = u'IMPRESSION: 1. LIMITED STUDY DEMONSTRATING NO GROSS EVIDENCE OF SIGNIFICANT PULMONARY EMBOLISM.' self.su3 = u'This is a sentence that does not end with a number. But this sentence ends with 1. So this should be recognized as a third sentence.' self.su4 = u'This is a sentence with a numeric value equal to 1.43 and should not be split into two parts.' self.items = [[ u"pulmonary embolism", u"PULMONARY_EMBOLISM", ur"""pulmonary\s(artery )?(embol[a-z]+)""", "" ], [ "no gross evidence of", "PROBABLE_NEGATED_EXISTENCE", "", "forward" ]] self.itemData = itemData.itemData() for i in self.items: cit = itemData.contextItem
def convertCSVtoitemData(csvFile, encoding='utf-8', delimiter="\t", headerRows=1, literalColumn=0, categoryColumn=1, regexColumn=2, ruleColumn=3): """ takes a CSV file of itemdata rules and creates a single itemData instance. csvFile: name of file to read items from encoding: unicode enocidng to use; default = 'utf-8' headerRows: number of header rows in file; default = 1 literalColumn: column from which to read the literal; default = 0 categoryColumn: column from which to read the category; default = 1 regexColumn: column from which to read the regular expression: default = 2 ruleColumn: column from which to read the rule; default = 3 """ items = itemData.itemData() # itemData to be returned to the user header = [] #reader, f0 = get_fileobj(csvFile) #print csvFile f = open(csvFile, 'rU') reader = csv.reader(f, delimiter=delimiter) # first grab numbe rof specified header rows for i in range(headerRows): row = next(reader) header.append(row) # now grab each itemData for row in reader: #print row tmp = [ row[literalColumn], row[categoryColumn], row[regexColumn], row[ruleColumn] ] tmp[2] = r'''{0}'''.format( tmp[2]) # convert the regular expression string into a raw string item = itemData.contextItem(tmp) items.append(item) f.close() return items
self.result_label = result_label self.query1 = '''SELECT %s,%s FROM %s'''%(self.rid,self.column,self.table) print self.query1 self.mode = mode self.dbname = dbname self.getDBConnection(self.dbname) # get reports to process self.cursor.execute(self.query1) self.reports = self.cursor.fetchall() print "number of reports to process",len(self.reports) # Create the pyConTextNLP ConTextDocument. This is the container for all the markups self.document = pyConText.ConTextDocument() self.modifiers = itemData.itemData() self.targets = itemData.itemData() for kb in lexical_kb: self.modifiers.extend( itemData.instantiateFromCSVtoitemData(kb) ) for kb in domain_kb: self.targets.extend( itemData.instantiateFromCSVtoitemData(kb) ) self.debug = debug if( self.debug ): print "debug set to True" tmp = os.path.splitext(self.dbname) self.debugDir = tmp[0]+"_debug_dir" if( not os.path.exists(self.debugDir) ): os.mkdir(self.debugDir) else:
def __init__(self, options): """create an instance of a criticalFinder object associated with the SQLite database. dbname: name of SQLite database """ # Define queries to select data from the SQLite database # this gets the reports we will process self.query1 = '''SELECT %s,%s FROM %s'''%(options.id,options.report_text,options.table) t = time.localtime() self.save_dir = options.save_dir#+"-%s-%s-%s"%(t[0],t[1],t[2]) count = 1 if( not os.path.exists(self.save_dir) ): os.mkdir(self.save_dir) self.html_dir=self.save_dir+"/html/" if( not os.path.exists(self.html_dir) ): os.mkdir(self.html_dir) print options.dbname self.doGraphs = options.doGraphs self.allow_uncertainty = options.allow_uncertainty self.proc_category = options.category self.conn = sqlite.connect(options.dbname+".db") print options.dbname+".db" self.cursor = self.conn.cursor() print self.query1 self.cursor.execute(self.query1) self.reports = self.cursor.fetchall() print "number of reports to process",len(self.reports) #raw_input('continue') tmp = os.path.splitext(options.odbname) outfile = tmp[0]+self.proc_category+"_%s.db"%(self.allow_uncertainty) rsltsDB = os.path.join(self.save_dir,outfile) if( os.path.exists(rsltsDB) ): os.remove(rsltsDB) #old database output by DM self.resultsConn = sqlite.connect(rsltsDB) self.resultsCursor = self.resultsConn.cursor() # self.resultsCursor.execute("""CREATE TABLE alerts ( reportid TEXT, smokingStatus TEXT, report TEXT)""") # Create the itemData object to store the modifiers for the analysis # starts with definitions defined in pyConText and then adds # definitions specific for peFinder #DM - addition self.context=pyConText.ConTextDocument() mods=itemData.instantiateFromCSV(options.lexical_kb) trgs=itemData.instantiateFromCSV(options.Hx_kb) self.modifiers = itemData.itemData() for mod in mods.keys(): self.modifiers.prepend(mods[mod]) self.targets = itemData.itemData() for trg in trgs.keys(): self.targets.prepend(trgs[trg])
def test_instantiate_itemData(self): cit1 = itemData.contextItem(self.items[0]) it1 = itemData.itemData() it1.append(cit1) assert it1
self.query1 = '''SELECT %s,%s FROM %s''' % (self.rid, self.column, self.table) print self.query1 self.mode = mode self.dbname = dbname self.getDBConnection(self.dbname) # get reports to process self.cursor.execute(self.query1) self.reports = self.cursor.fetchall() print "number of reports to process", len(self.reports) # Create the pyConTextNLP ConTextDocument. This is the container for all the markups self.document = pyConText.ConTextDocument() self.modifiers = itemData.itemData() self.targets = itemData.itemData() for kb in lexical_kb: self.modifiers.extend(itemData.instantiateFromCSVtoitemData(kb)) for kb in domain_kb: self.targets.extend(itemData.instantiateFromCSVtoitemData(kb)) self.debug = debug if (self.debug): print "debug set to True" tmp = os.path.splitext(self.dbname) self.debugDir = tmp[0] + "_debug_dir" if (not os.path.exists(self.debugDir)): os.mkdir(self.debugDir) else: self.debugDir = ''