Ejemplo n.º 1
0
    def __init__(self, dbname, runMode = 'all' ):
        """create an instance of a PEContext object associated with the SQLite
        database.
        dbname: name of SQLite database
        test: If True run against the test set, otherwise run against the training set
        """

        # Define queries to select data from the SQLite database
        # this gets the reports we will process
        self.query1 = '''SELECT id,impression FROM pesubject''' 
        # get the consensus labels for the disease and quality states
        self.query2 = '''SELECT diseaseState,qualityState,CDS,diseaseUncertainty,CQS FROM consensus_states WHERE psid==?'''
        # get the userannotations from the three raters we are using for this
        # study
        self.query3 = """SELECT usrname,diseaseState,qualityState,qualitylimit,usrname FROM userannotations where psid_id == ? AND (usrname=='aaron' OR usrname=='hyun' OR usrname=='sean')"""
        # create query to get temporal annotations
        self.query4 = """SELECT usrname FROM AND (usrname == 'jd' OR usrname =='sean' OR usrname=='peter')"""


        self.conn = sqlite.connect(dbname)
        self.cursor = self.conn.cursor()
        self.cursor.execute(self.query1)
        # get the training set reports. The first 20 reports were used to train
        # Aaron and Hyun and so can be skipped
        reports = self.cursor.fetchall()
        if( runMode == 'full' ):
            self.reports = reports[:]
        elif( 'test' ):
            self.reports = reports[270:]
        else:
            self.reports = (reports[20:])[:250]
        
            
        print "number of reports to process",len(self.reports)
        raw_input('continue')
        t = time.localtime()
        d = datetime.datetime(t[0],t[1],t[2])

        # create dictionaries to store the agreement matricies for the disease,
        # quality and uncertainty states
        self.usMatrix = {('Yes','Yes'):0,('Yes','No'):0,('No','Yes'):0,('No','No'):0}
        self.dsMatrix = {('Pos','Pos'):0,('Pos','Neg'):0,('Neg','Pos'):0,('Neg','Neg'):0}
        self.qsMatrix = {('Diagnostic','Diagnostic'):0,('Diagnostic','Not Diagnostic'):0,
                         ('Not Diagnostic','Diagnostic'):0,('Not Diagnostic','Not Diagnostic'):0}
        self.hsMatrix = {('Old','Old'):0,('Old','New'):0,('New','Old'):0,('New','New'):0}
        
        # create context objects for each of the questions we want to be answering
        self.context = {"disease":pycontext.pycontext(),
                        "quality":pycontext.pycontext(),
                        "quality2":pycontext.pycontext()}

        # Create files for storing problem cases
       
        suffix = runMode
        self.f1 = open(dbname+".ds%s.txt"%suffix,'w')
        self.f2 = open(dbname+".qs%s.txt"%suffix,'w')
        self.f3 = open(dbname+".us%s.txt"%suffix,'w')
        self.f4 = open(dbname+".all%s.txt"%suffix,"w")
        self.f5 = open(dbname+".hs%s.txt"%suffix,"w")
        self.f6 = open(dbname+".disagreements%s.pckle"%suffix,"wb")
        rsltsDB = dbname+".OriginalResults.db"
        if( os.path.exists(rsltsDB) ):
            os.remove(rsltsDB)
        
        self.resultsConn = sqlite.connect(rsltsDB)
        self.resultsCursor = self.resultsConn.cursor()
        self.resultsCursor.execute("""CREATE TABLE results (
            id INT PRIMARY KEY,
            disease TEXT,
            uncertainty TEXT,
            historical TEXT,
            quality TEXT)""")


        # Create the itemData object to store the modifiers for the  analysis
        # starts with definitions defined in pyConText and then adds
        # definitions specific for peFinder
        self.modifiers = {"disease":itemData.itemData()}
        self.modifiers["disease"].extend(pseudoNegations)
        self.modifiers["disease"].extend(definiteNegations)
        self.modifiers["disease"].extend(probableNegations)
        self.modifiers["disease"].extend(probables)
        self.modifiers["disease"].extend(definites)
        self.modifiers["disease"].extend(indications)
        self.modifiers["disease"].extend(historicals)
        self.modifiers["disease"].extend(conjugates)

        # Create a seperate itemData for the quality modifiers
        self.modifiers["quality"] = itemData.itemData()
        self.modifiers["quality"].extend(pseudoNegations)
        self.modifiers["quality"].extend(definiteNegations)
        self.modifiers["quality"].extend(probableNegations)
        self.modifiers["quality"].extend(probables)
        self.modifiers["quality"].extend(historicals)
        self.modifiers["quality"].extend(conjugates)
        self.modifiers["quality"].extend(qualities)
        self.modifiers["quality"].extend([['limited dataset compliant','EXCLUSION','','']])
        # Quality targets
        self.targets = {"disease":peItems}
        self.targets["quality"] = itemData.itemData()
        #self.targets["quality"].extend(qualities)
        self.targets["quality"].extend(examFeatures)
        self.targets["quality"].extend([['limited dataset compliant','EXCLUSION','','']])

        

        self.targets["quality2"] = itemData.itemData()
        self.targets["quality2"].extend(artifacts)
        self.temporalCount = 0
        self.breakCases = set([320])
        self.models = {}
        self.problemCases = []
        self.disagreements = {"ds":[],"qs":[],"us":[],"hs":[]}
Ejemplo n.º 2
0
    def __init__(self, dbname, outfile):
        """create an instance of a PEContext object associated with the SQLite
        database.
        dbname: name of SQLite database
        """

        # Define queries to select data from the SQLite database
        # this gets the reports we will process
        self.query1 = '''SELECT id,impression FROM pesubject''' 



        self.conn = sqlite.connect(dbname)
        self.cursor = self.conn.cursor()
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()

        #conn = sqlite.connect("nx_o_context_differences.db")
        #curs = conn.cursor()
        #curs.execute("""select id from disease_diff""")
        #es = curs.fetchall()
        #errors = [e[0] for e in es]
        #
        #tmp = []
        #for r in self.reports:
        #    if r[0] in errors:
        #        print "removed"
        #        tmp.append(r)
        #self.reports = tmp
        
        
        print "number of reports to process",len(self.reports)
        t = time.localtime()
        d = datetime.datetime(t[0],t[1],t[2])
        
        # create context objects for each of the questions we want to be answering
        self.context = {"disease":pycontext.pycontext(),
                        "quality":pycontext.pycontext(),
                        "quality2":pycontext.pycontext()}


        rsltsDB = outfile
        if( os.path.exists(rsltsDB) ):
            os.remove(rsltsDB)
        
        self.resultsConn = sqlite.connect(rsltsDB)
        self.resultsCursor = self.resultsConn.cursor()
        self.resultsCursor.execute("""CREATE TABLE results (
            id INT PRIMARY KEY,
            disease TEXT,
            uncertainty TEXT,
            historical TEXT,
            quality TEXT)""")


        # Create the itemData object to store the modifiers for the  analysis
        # starts with definitions defined in pyConText and then adds
        # definitions specific for peFinder
        self.modifiers = {"disease":itemData.itemData()}
        self.modifiers["disease"].extend(pseudoNegations)
        self.modifiers["disease"].extend(definiteNegations)
        self.modifiers["disease"].extend(probableNegations)
        self.modifiers["disease"].extend(probables)
        self.modifiers["disease"].extend(definites)
        self.modifiers["disease"].extend(indications)
        self.modifiers["disease"].extend(historicals)
        self.modifiers["disease"].extend(conjugates)

        # Create a seperate itemData for the quality modifiers
        self.modifiers["quality"] = itemData.itemData()
        self.modifiers["quality"].extend(pseudoNegations)
        self.modifiers["quality"].extend(definiteNegations)
        self.modifiers["quality"].extend(probableNegations)
        self.modifiers["quality"].extend(probables)
        self.modifiers["quality"].extend(historicals)
        self.modifiers["quality"].extend(conjugates)
        self.modifiers["quality"].extend(qualities)
        self.modifiers["quality"].extend([['limited dataset compliant','EXCLUSION','','']])
        # Quality targets
        self.targets = {"disease":peItems}
        self.targets["quality"] = itemData.itemData()
        #self.targets["quality"].extend(qualities)
        self.targets["quality"].extend(examFeatures)
        self.targets["quality"].extend([['limited dataset compliant','EXCLUSION','','']])

        

        self.targets["quality2"] = itemData.itemData()
        self.targets["quality2"].extend(artifacts)
        self.temporalCount = 0
        self.models = {}
Ejemplo n.º 3
0
    def __init__(self, dbname, runMode='all'):
        """create an instance of a PEContext object associated with the SQLite
        database.
        dbname: name of SQLite database
        test: If True run against the test set, otherwise run against the training set
        """

        # Define queries to select data from the SQLite database
        # this gets the reports we will process
        self.query1 = '''SELECT id,impression FROM pesubject'''
        # get the consensus labels for the disease and quality states
        self.query2 = '''SELECT diseaseState,qualityState,CDS,diseaseUncertainty,CQS FROM consensus_states WHERE psid==?'''
        # get the userannotations from the three raters we are using for this
        # study
        self.query3 = """SELECT usrname,diseaseState,qualityState,qualitylimit,usrname FROM userannotations where psid_id == ? AND (usrname=='aaron' OR usrname=='hyun' OR usrname=='sean')"""
        # create query to get temporal annotations
        self.query4 = """SELECT usrname FROM AND (usrname == 'jd' OR usrname =='sean' OR usrname=='peter')"""

        self.conn = sqlite.connect(dbname)
        self.cursor = self.conn.cursor()
        self.cursor.execute(self.query1)
        # get the training set reports. The first 20 reports were used to train
        # Aaron and Hyun and so can be skipped
        reports = self.cursor.fetchall()
        if (runMode == 'full'):
            self.reports = reports[:]
        elif ('test'):
            self.reports = reports[270:]
        else:
            self.reports = (reports[20:])[:250]

        print "number of reports to process", len(self.reports)
        raw_input('continue')
        t = time.localtime()
        d = datetime.datetime(t[0], t[1], t[2])

        # create dictionaries to store the agreement matricies for the disease,
        # quality and uncertainty states
        self.usMatrix = {
            ('Yes', 'Yes'): 0,
            ('Yes', 'No'): 0,
            ('No', 'Yes'): 0,
            ('No', 'No'): 0
        }
        self.dsMatrix = {
            ('Pos', 'Pos'): 0,
            ('Pos', 'Neg'): 0,
            ('Neg', 'Pos'): 0,
            ('Neg', 'Neg'): 0
        }
        self.qsMatrix = {
            ('Diagnostic', 'Diagnostic'): 0,
            ('Diagnostic', 'Not Diagnostic'): 0,
            ('Not Diagnostic', 'Diagnostic'): 0,
            ('Not Diagnostic', 'Not Diagnostic'): 0
        }
        self.hsMatrix = {
            ('Old', 'Old'): 0,
            ('Old', 'New'): 0,
            ('New', 'Old'): 0,
            ('New', 'New'): 0
        }

        # create context objects for each of the questions we want to be answering
        self.context = {
            "disease": pycontext.pycontext(),
            "quality": pycontext.pycontext(),
            "quality2": pycontext.pycontext()
        }

        # Create files for storing problem cases

        suffix = runMode
        self.f1 = open(dbname + ".ds%s.txt" % suffix, 'w')
        self.f2 = open(dbname + ".qs%s.txt" % suffix, 'w')
        self.f3 = open(dbname + ".us%s.txt" % suffix, 'w')
        self.f4 = open(dbname + ".all%s.txt" % suffix, "w")
        self.f5 = open(dbname + ".hs%s.txt" % suffix, "w")
        self.f6 = open(dbname + ".disagreements%s.pckle" % suffix, "wb")
        rsltsDB = dbname + ".OriginalResults.db"
        if (os.path.exists(rsltsDB)):
            os.remove(rsltsDB)

        self.resultsConn = sqlite.connect(rsltsDB)
        self.resultsCursor = self.resultsConn.cursor()
        self.resultsCursor.execute("""CREATE TABLE results (
            id INT PRIMARY KEY,
            disease TEXT,
            uncertainty TEXT,
            historical TEXT,
            quality TEXT)""")

        # Create the itemData object to store the modifiers for the  analysis
        # starts with definitions defined in pyConText and then adds
        # definitions specific for peFinder
        self.modifiers = {"disease": itemData.itemData()}
        self.modifiers["disease"].extend(pseudoNegations)
        self.modifiers["disease"].extend(definiteNegations)
        self.modifiers["disease"].extend(probableNegations)
        self.modifiers["disease"].extend(probables)
        self.modifiers["disease"].extend(definites)
        self.modifiers["disease"].extend(indications)
        self.modifiers["disease"].extend(historicals)
        self.modifiers["disease"].extend(conjugates)

        # Create a seperate itemData for the quality modifiers
        self.modifiers["quality"] = itemData.itemData()
        self.modifiers["quality"].extend(pseudoNegations)
        self.modifiers["quality"].extend(definiteNegations)
        self.modifiers["quality"].extend(probableNegations)
        self.modifiers["quality"].extend(probables)
        self.modifiers["quality"].extend(historicals)
        self.modifiers["quality"].extend(conjugates)
        self.modifiers["quality"].extend(qualities)
        self.modifiers["quality"].extend(
            [['limited dataset compliant', 'EXCLUSION', '', '']])
        # Quality targets
        self.targets = {"disease": peItems}
        self.targets["quality"] = itemData.itemData()
        #self.targets["quality"].extend(qualities)
        self.targets["quality"].extend(examFeatures)
        self.targets["quality"].extend(
            [['limited dataset compliant', 'EXCLUSION', '', '']])

        self.targets["quality2"] = itemData.itemData()
        self.targets["quality2"].extend(artifacts)
        self.temporalCount = 0
        self.breakCases = set([320])
        self.models = {}
        self.problemCases = []
        self.disagreements = {"ds": [], "qs": [], "us": [], "hs": []}
Ejemplo n.º 4
0
    def __init__(self, dbname, outfile):
        """create an instance of a PEContext object associated with the SQLite
        database.
        dbname: name of SQLite database
        """

        # Define queries to select data from the SQLite database
        # this gets the reports we will process
        self.query1 = '''SELECT id,impression FROM pesubject'''

        self.conn = sqlite.connect(dbname)
        self.cursor = self.conn.cursor()
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()

        #conn = sqlite.connect("nx_o_context_differences.db")
        #curs = conn.cursor()
        #curs.execute("""select id from disease_diff""")
        #es = curs.fetchall()
        #errors = [e[0] for e in es]
        #
        #tmp = []
        #for r in self.reports:
        #    if r[0] in errors:
        #        print "removed"
        #        tmp.append(r)
        #self.reports = tmp

        print "number of reports to process", len(self.reports)
        t = time.localtime()
        d = datetime.datetime(t[0], t[1], t[2])

        # create context objects for each of the questions we want to be answering
        self.context = {
            "disease": pycontext.pycontext(),
            "quality": pycontext.pycontext(),
            "quality2": pycontext.pycontext()
        }

        rsltsDB = outfile
        if (os.path.exists(rsltsDB)):
            os.remove(rsltsDB)

        self.resultsConn = sqlite.connect(rsltsDB)
        self.resultsCursor = self.resultsConn.cursor()
        self.resultsCursor.execute("""CREATE TABLE results (
            id INT PRIMARY KEY,
            disease TEXT,
            uncertainty TEXT,
            historical TEXT,
            quality TEXT)""")

        # Create the itemData object to store the modifiers for the  analysis
        # starts with definitions defined in pyConText and then adds
        # definitions specific for peFinder
        self.modifiers = {"disease": itemData.itemData()}
        self.modifiers["disease"].extend(pseudoNegations)
        self.modifiers["disease"].extend(definiteNegations)
        self.modifiers["disease"].extend(probableNegations)
        self.modifiers["disease"].extend(probables)
        self.modifiers["disease"].extend(definites)
        self.modifiers["disease"].extend(indications)
        self.modifiers["disease"].extend(historicals)
        self.modifiers["disease"].extend(conjugates)

        # Create a seperate itemData for the quality modifiers
        self.modifiers["quality"] = itemData.itemData()
        self.modifiers["quality"].extend(pseudoNegations)
        self.modifiers["quality"].extend(definiteNegations)
        self.modifiers["quality"].extend(probableNegations)
        self.modifiers["quality"].extend(probables)
        self.modifiers["quality"].extend(historicals)
        self.modifiers["quality"].extend(conjugates)
        self.modifiers["quality"].extend(qualities)
        self.modifiers["quality"].extend(
            [['limited dataset compliant', 'EXCLUSION', '', '']])
        # Quality targets
        self.targets = {"disease": peItems}
        self.targets["quality"] = itemData.itemData()
        #self.targets["quality"].extend(qualities)
        self.targets["quality"].extend(examFeatures)
        self.targets["quality"].extend(
            [['limited dataset compliant', 'EXCLUSION', '', '']])

        self.targets["quality2"] = itemData.itemData()
        self.targets["quality2"].extend(artifacts)
        self.temporalCount = 0
        self.models = {}