def __init__(self, dbname, runMode = 'all' ): """create an instance of a PEContext object associated with the SQLite database. dbname: name of SQLite database test: If True run against the test set, otherwise run against the training set """ # Define queries to select data from the SQLite database # this gets the reports we will process self.query1 = '''SELECT id,impression FROM pesubject''' # get the consensus labels for the disease and quality states self.query2 = '''SELECT diseaseState,qualityState,CDS,diseaseUncertainty,CQS FROM consensus_states WHERE psid==?''' # get the userannotations from the three raters we are using for this # study self.query3 = """SELECT usrname,diseaseState,qualityState,qualitylimit,usrname FROM userannotations where psid_id == ? AND (usrname=='aaron' OR usrname=='hyun' OR usrname=='sean')""" # create query to get temporal annotations self.query4 = """SELECT usrname FROM AND (usrname == 'jd' OR usrname =='sean' OR usrname=='peter')""" self.conn = sqlite.connect(dbname) self.cursor = self.conn.cursor() self.cursor.execute(self.query1) # get the training set reports. The first 20 reports were used to train # Aaron and Hyun and so can be skipped reports = self.cursor.fetchall() if( runMode == 'full' ): self.reports = reports[:] elif( 'test' ): self.reports = reports[270:] else: self.reports = (reports[20:])[:250] print "number of reports to process",len(self.reports) raw_input('continue') t = time.localtime() d = datetime.datetime(t[0],t[1],t[2]) # create dictionaries to store the agreement matricies for the disease, # quality and uncertainty states self.usMatrix = {('Yes','Yes'):0,('Yes','No'):0,('No','Yes'):0,('No','No'):0} self.dsMatrix = {('Pos','Pos'):0,('Pos','Neg'):0,('Neg','Pos'):0,('Neg','Neg'):0} self.qsMatrix = {('Diagnostic','Diagnostic'):0,('Diagnostic','Not Diagnostic'):0, ('Not Diagnostic','Diagnostic'):0,('Not Diagnostic','Not Diagnostic'):0} self.hsMatrix = {('Old','Old'):0,('Old','New'):0,('New','Old'):0,('New','New'):0} # create context objects for each of the questions we want to be answering self.context = {"disease":pycontext.pycontext(), "quality":pycontext.pycontext(), "quality2":pycontext.pycontext()} # Create files for storing problem cases suffix = runMode self.f1 = open(dbname+".ds%s.txt"%suffix,'w') self.f2 = open(dbname+".qs%s.txt"%suffix,'w') self.f3 = open(dbname+".us%s.txt"%suffix,'w') self.f4 = open(dbname+".all%s.txt"%suffix,"w") self.f5 = open(dbname+".hs%s.txt"%suffix,"w") self.f6 = open(dbname+".disagreements%s.pckle"%suffix,"wb") rsltsDB = dbname+".OriginalResults.db" if( os.path.exists(rsltsDB) ): os.remove(rsltsDB) self.resultsConn = sqlite.connect(rsltsDB) self.resultsCursor = self.resultsConn.cursor() self.resultsCursor.execute("""CREATE TABLE results ( id INT PRIMARY KEY, disease TEXT, uncertainty TEXT, historical TEXT, quality TEXT)""") # Create the itemData object to store the modifiers for the analysis # starts with definitions defined in pyConText and then adds # definitions specific for peFinder self.modifiers = {"disease":itemData.itemData()} self.modifiers["disease"].extend(pseudoNegations) self.modifiers["disease"].extend(definiteNegations) self.modifiers["disease"].extend(probableNegations) self.modifiers["disease"].extend(probables) self.modifiers["disease"].extend(definites) self.modifiers["disease"].extend(indications) self.modifiers["disease"].extend(historicals) self.modifiers["disease"].extend(conjugates) # Create a seperate itemData for the quality modifiers self.modifiers["quality"] = itemData.itemData() self.modifiers["quality"].extend(pseudoNegations) self.modifiers["quality"].extend(definiteNegations) self.modifiers["quality"].extend(probableNegations) self.modifiers["quality"].extend(probables) self.modifiers["quality"].extend(historicals) self.modifiers["quality"].extend(conjugates) self.modifiers["quality"].extend(qualities) self.modifiers["quality"].extend([['limited dataset compliant','EXCLUSION','','']]) # Quality targets self.targets = {"disease":peItems} self.targets["quality"] = itemData.itemData() #self.targets["quality"].extend(qualities) self.targets["quality"].extend(examFeatures) self.targets["quality"].extend([['limited dataset compliant','EXCLUSION','','']]) self.targets["quality2"] = itemData.itemData() self.targets["quality2"].extend(artifacts) self.temporalCount = 0 self.breakCases = set([320]) self.models = {} self.problemCases = [] self.disagreements = {"ds":[],"qs":[],"us":[],"hs":[]}
def __init__(self, dbname, outfile): """create an instance of a PEContext object associated with the SQLite database. dbname: name of SQLite database """ # Define queries to select data from the SQLite database # this gets the reports we will process self.query1 = '''SELECT id,impression FROM pesubject''' self.conn = sqlite.connect(dbname) self.cursor = self.conn.cursor() self.cursor.execute(self.query1) self.reports = self.cursor.fetchall() #conn = sqlite.connect("nx_o_context_differences.db") #curs = conn.cursor() #curs.execute("""select id from disease_diff""") #es = curs.fetchall() #errors = [e[0] for e in es] # #tmp = [] #for r in self.reports: # if r[0] in errors: # print "removed" # tmp.append(r) #self.reports = tmp print "number of reports to process",len(self.reports) t = time.localtime() d = datetime.datetime(t[0],t[1],t[2]) # create context objects for each of the questions we want to be answering self.context = {"disease":pycontext.pycontext(), "quality":pycontext.pycontext(), "quality2":pycontext.pycontext()} rsltsDB = outfile if( os.path.exists(rsltsDB) ): os.remove(rsltsDB) self.resultsConn = sqlite.connect(rsltsDB) self.resultsCursor = self.resultsConn.cursor() self.resultsCursor.execute("""CREATE TABLE results ( id INT PRIMARY KEY, disease TEXT, uncertainty TEXT, historical TEXT, quality TEXT)""") # Create the itemData object to store the modifiers for the analysis # starts with definitions defined in pyConText and then adds # definitions specific for peFinder self.modifiers = {"disease":itemData.itemData()} self.modifiers["disease"].extend(pseudoNegations) self.modifiers["disease"].extend(definiteNegations) self.modifiers["disease"].extend(probableNegations) self.modifiers["disease"].extend(probables) self.modifiers["disease"].extend(definites) self.modifiers["disease"].extend(indications) self.modifiers["disease"].extend(historicals) self.modifiers["disease"].extend(conjugates) # Create a seperate itemData for the quality modifiers self.modifiers["quality"] = itemData.itemData() self.modifiers["quality"].extend(pseudoNegations) self.modifiers["quality"].extend(definiteNegations) self.modifiers["quality"].extend(probableNegations) self.modifiers["quality"].extend(probables) self.modifiers["quality"].extend(historicals) self.modifiers["quality"].extend(conjugates) self.modifiers["quality"].extend(qualities) self.modifiers["quality"].extend([['limited dataset compliant','EXCLUSION','','']]) # Quality targets self.targets = {"disease":peItems} self.targets["quality"] = itemData.itemData() #self.targets["quality"].extend(qualities) self.targets["quality"].extend(examFeatures) self.targets["quality"].extend([['limited dataset compliant','EXCLUSION','','']]) self.targets["quality2"] = itemData.itemData() self.targets["quality2"].extend(artifacts) self.temporalCount = 0 self.models = {}
def __init__(self, dbname, runMode='all'): """create an instance of a PEContext object associated with the SQLite database. dbname: name of SQLite database test: If True run against the test set, otherwise run against the training set """ # Define queries to select data from the SQLite database # this gets the reports we will process self.query1 = '''SELECT id,impression FROM pesubject''' # get the consensus labels for the disease and quality states self.query2 = '''SELECT diseaseState,qualityState,CDS,diseaseUncertainty,CQS FROM consensus_states WHERE psid==?''' # get the userannotations from the three raters we are using for this # study self.query3 = """SELECT usrname,diseaseState,qualityState,qualitylimit,usrname FROM userannotations where psid_id == ? AND (usrname=='aaron' OR usrname=='hyun' OR usrname=='sean')""" # create query to get temporal annotations self.query4 = """SELECT usrname FROM AND (usrname == 'jd' OR usrname =='sean' OR usrname=='peter')""" self.conn = sqlite.connect(dbname) self.cursor = self.conn.cursor() self.cursor.execute(self.query1) # get the training set reports. The first 20 reports were used to train # Aaron and Hyun and so can be skipped reports = self.cursor.fetchall() if (runMode == 'full'): self.reports = reports[:] elif ('test'): self.reports = reports[270:] else: self.reports = (reports[20:])[:250] print "number of reports to process", len(self.reports) raw_input('continue') t = time.localtime() d = datetime.datetime(t[0], t[1], t[2]) # create dictionaries to store the agreement matricies for the disease, # quality and uncertainty states self.usMatrix = { ('Yes', 'Yes'): 0, ('Yes', 'No'): 0, ('No', 'Yes'): 0, ('No', 'No'): 0 } self.dsMatrix = { ('Pos', 'Pos'): 0, ('Pos', 'Neg'): 0, ('Neg', 'Pos'): 0, ('Neg', 'Neg'): 0 } self.qsMatrix = { ('Diagnostic', 'Diagnostic'): 0, ('Diagnostic', 'Not Diagnostic'): 0, ('Not Diagnostic', 'Diagnostic'): 0, ('Not Diagnostic', 'Not Diagnostic'): 0 } self.hsMatrix = { ('Old', 'Old'): 0, ('Old', 'New'): 0, ('New', 'Old'): 0, ('New', 'New'): 0 } # create context objects for each of the questions we want to be answering self.context = { "disease": pycontext.pycontext(), "quality": pycontext.pycontext(), "quality2": pycontext.pycontext() } # Create files for storing problem cases suffix = runMode self.f1 = open(dbname + ".ds%s.txt" % suffix, 'w') self.f2 = open(dbname + ".qs%s.txt" % suffix, 'w') self.f3 = open(dbname + ".us%s.txt" % suffix, 'w') self.f4 = open(dbname + ".all%s.txt" % suffix, "w") self.f5 = open(dbname + ".hs%s.txt" % suffix, "w") self.f6 = open(dbname + ".disagreements%s.pckle" % suffix, "wb") rsltsDB = dbname + ".OriginalResults.db" if (os.path.exists(rsltsDB)): os.remove(rsltsDB) self.resultsConn = sqlite.connect(rsltsDB) self.resultsCursor = self.resultsConn.cursor() self.resultsCursor.execute("""CREATE TABLE results ( id INT PRIMARY KEY, disease TEXT, uncertainty TEXT, historical TEXT, quality TEXT)""") # Create the itemData object to store the modifiers for the analysis # starts with definitions defined in pyConText and then adds # definitions specific for peFinder self.modifiers = {"disease": itemData.itemData()} self.modifiers["disease"].extend(pseudoNegations) self.modifiers["disease"].extend(definiteNegations) self.modifiers["disease"].extend(probableNegations) self.modifiers["disease"].extend(probables) self.modifiers["disease"].extend(definites) self.modifiers["disease"].extend(indications) self.modifiers["disease"].extend(historicals) self.modifiers["disease"].extend(conjugates) # Create a seperate itemData for the quality modifiers self.modifiers["quality"] = itemData.itemData() self.modifiers["quality"].extend(pseudoNegations) self.modifiers["quality"].extend(definiteNegations) self.modifiers["quality"].extend(probableNegations) self.modifiers["quality"].extend(probables) self.modifiers["quality"].extend(historicals) self.modifiers["quality"].extend(conjugates) self.modifiers["quality"].extend(qualities) self.modifiers["quality"].extend( [['limited dataset compliant', 'EXCLUSION', '', '']]) # Quality targets self.targets = {"disease": peItems} self.targets["quality"] = itemData.itemData() #self.targets["quality"].extend(qualities) self.targets["quality"].extend(examFeatures) self.targets["quality"].extend( [['limited dataset compliant', 'EXCLUSION', '', '']]) self.targets["quality2"] = itemData.itemData() self.targets["quality2"].extend(artifacts) self.temporalCount = 0 self.breakCases = set([320]) self.models = {} self.problemCases = [] self.disagreements = {"ds": [], "qs": [], "us": [], "hs": []}
def __init__(self, dbname, outfile): """create an instance of a PEContext object associated with the SQLite database. dbname: name of SQLite database """ # Define queries to select data from the SQLite database # this gets the reports we will process self.query1 = '''SELECT id,impression FROM pesubject''' self.conn = sqlite.connect(dbname) self.cursor = self.conn.cursor() self.cursor.execute(self.query1) self.reports = self.cursor.fetchall() #conn = sqlite.connect("nx_o_context_differences.db") #curs = conn.cursor() #curs.execute("""select id from disease_diff""") #es = curs.fetchall() #errors = [e[0] for e in es] # #tmp = [] #for r in self.reports: # if r[0] in errors: # print "removed" # tmp.append(r) #self.reports = tmp print "number of reports to process", len(self.reports) t = time.localtime() d = datetime.datetime(t[0], t[1], t[2]) # create context objects for each of the questions we want to be answering self.context = { "disease": pycontext.pycontext(), "quality": pycontext.pycontext(), "quality2": pycontext.pycontext() } rsltsDB = outfile if (os.path.exists(rsltsDB)): os.remove(rsltsDB) self.resultsConn = sqlite.connect(rsltsDB) self.resultsCursor = self.resultsConn.cursor() self.resultsCursor.execute("""CREATE TABLE results ( id INT PRIMARY KEY, disease TEXT, uncertainty TEXT, historical TEXT, quality TEXT)""") # Create the itemData object to store the modifiers for the analysis # starts with definitions defined in pyConText and then adds # definitions specific for peFinder self.modifiers = {"disease": itemData.itemData()} self.modifiers["disease"].extend(pseudoNegations) self.modifiers["disease"].extend(definiteNegations) self.modifiers["disease"].extend(probableNegations) self.modifiers["disease"].extend(probables) self.modifiers["disease"].extend(definites) self.modifiers["disease"].extend(indications) self.modifiers["disease"].extend(historicals) self.modifiers["disease"].extend(conjugates) # Create a seperate itemData for the quality modifiers self.modifiers["quality"] = itemData.itemData() self.modifiers["quality"].extend(pseudoNegations) self.modifiers["quality"].extend(definiteNegations) self.modifiers["quality"].extend(probableNegations) self.modifiers["quality"].extend(probables) self.modifiers["quality"].extend(historicals) self.modifiers["quality"].extend(conjugates) self.modifiers["quality"].extend(qualities) self.modifiers["quality"].extend( [['limited dataset compliant', 'EXCLUSION', '', '']]) # Quality targets self.targets = {"disease": peItems} self.targets["quality"] = itemData.itemData() #self.targets["quality"].extend(qualities) self.targets["quality"].extend(examFeatures) self.targets["quality"].extend( [['limited dataset compliant', 'EXCLUSION', '', '']]) self.targets["quality2"] = itemData.itemData() self.targets["quality2"].extend(artifacts) self.temporalCount = 0 self.models = {}