def openShell(self, trans): """ Called when a shell is opened by a user logging in via SSH or similar. """ # Obtain a protocol instance. This is our custom Network.SSHServerProtocol. # The protocol controls the way that data is sent and received down the connection. # In our case, it presents a TTY-based user interface to the user, while all we care # about is sending lines to the user and receiving lines from them. from Network import SSHServerProtocol # Get the protocol instance. The protocol is also our transport. # Note that the Twisted networking model is a stack of protocols, # where lower level protocols transport higher level ones. self.transport = proto = SSHServerProtocol(self, *self.savedSize) # Connect the protocol and the transport together (I don't really understand why # it needs to be connected both ways like this, or what the wrapper does) proto.makeConnection(trans) trans.makeConnection(session.wrapProtocol(proto)) #self.send_message("Hi there!") # Obtain the Player object from the database player_id = self.world.db.get_player_id(self.username, self._charname) log.debug("Username: {0}, character: {2}, id: {1}".format(self.username, player_id, self._charname)) self.player = self.world.get_thing(player_id) # Finish login (what does this call do?) self.complete_login()
def cost(self, fDictList, problemArr): """Evaluate the error function. This is based off of a sigmoid over the difference from the left-right outputs. Assume that the lArr is always 'preferred' over the rArr. """ theCost = 0 for lSubData, rSubData, subProbArr in self.gradChunkDataIterator( fDictList, problemArr): #lTargs = ones(problemArr.shape[0]) # Here we simply need to calc the cost lOut = self.layerModel.fprop(lSubData) rOut = self.layerModel.fprop(rSubData) outputs = sigmoid(lOut - rOut) outputs = where(outputs < OFFSET_EPSILON, OFFSET_EPSILON, outputs) outputs = where(outputs > 1 - OFFSET_EPSILON, 1 - OFFSET_EPSILON, outputs) # Cross-Entropy # NOTE here that all targs are 1. error = log(outputs) newCostContrib = error.sum() theCost -= newCostContrib decayContribution = self.l2decay * (self.params** 2).sum() * problemArr.shape[0] if self.chunklog: myLog.debug('decayContribution : %.4f, cost : %.4f' % (decayContribution, theCost)) theCost += decayContribution return theCost
def runDBScript( scriptFile, skipErrors = False ): """Given a DB script file object (caller should handle the opening by filename or other method), run each command as a SQL statement, delimited by semicolons (;) at the end of a line. If there are any errors running a command in the file and the skipErrors parameter is True, then this will continue to run the rest of the script, just logging the error message. Otherwise, if skipErrors is False, the exception will be raised out of this method. """ conn = connection() cur = conn.cursor() try: sqlLines = [] # As list of lines (strings) until meet semicolon terminator (more efficient than string concatenation) for line in scriptFile: if not line.startswith(COMMENT_TAG): # Note, standard SQL comments are auto-ignored ("--" and "/* */") sqlLines.append(line) if line.strip().endswith(SQL_DELIM): sql = str.join("",sqlLines) log.debug("Executing in Script: "+ sql) try: cur.execute( sql ) # Need to "auto-commit" after each command, # otherwise a skipped error will rollback # any previous commands as well if skipErrors: conn.commit() except Exception, err: conn.rollback(); # Reset changes and connection state log.warning("Error Executing in Script: "+ sql ) log.warning(err) if not skipErrors: raise err sqlLines = [] conn.commit()
def grad(self, dataArr, targetArr, onlineAdjustmentFactor=None): """Evaluate the gradient of the error function wrt the params""" currGrad = zeros(self.params.shape) meanDOut = [] minDOut = [] maxDOut = [] for subData, subTarg in self.gradChunkDataIterator(dataArr, targetArr): actual_out = self.layerModel.fprop(subData) d_outputs = actual_out - subTarg meanDOut.append(mean(abs(d_outputs))) minDOut.append(min(d_outputs)) maxDOut.append(max(d_outputs)) self.layerModel.bprop(d_outputs, subData) currGradContrib = self.layerModel.grad(d_outputs, subData) currGradContrib = currGradContrib.sum(1) currGrad += currGradContrib decayContribution = 2 * self.l2decay * self.params if onlineAdjustmentFactor is not None: decayContribution *= onlineAdjustmentFactor currGrad += decayContribution if self.chunklog: myLog.debug('||currGrad||^1 : %.4f, ||decayContribution|| : %.4f, mean(currGrad) : %.4f, max(currGrad) : %.4f' % \ (abs(currGrad).sum(), self.l2decay * (self.params**2).sum(), mean(currGrad), max(abs(currGrad)))) return currGrad
def cost(self, dataArr, targetArr, onlineAdjustmentFactor=None, dolog=False): """Evaluate the error function. Even in batch mode, we iterate over the data in smaller chunks. """ theCost = 0 for subData, subTarg in self.gradChunkDataIterator(dataArr, targetArr): outputs = self.layerModel.fprop(subData) outputs = where(outputs < OFFSET_EPSILON, OFFSET_EPSILON, outputs) outputs = where(outputs > 1 - OFFSET_EPSILON, 1 - OFFSET_EPSILON, outputs) # Cross-Entropy error = multiply(subTarg, log(outputs)) + multiply( 1 - subTarg, log(1 - outputs)) newCostContrib = error.sum() theCost -= newCostContrib decayContribution = self.l2decay * (self.params**2).sum() if onlineAdjustmentFactor is not None: # Basically a way to make the cost at each small step be a little smaller (cause # we end up taking more gradient steps) decayContribution *= onlineAdjustmentFactor if dolog: myLog.debug('decayContribution : %.4f, cost : %.4f' % (decayContribution, theCost)) theCost += decayContribution return theCost
def cost(self, fDictList, problemArr): """Evaluate the error function. This is based off of a sigmoid over the difference from the left-right outputs. Assume that the lArr is always 'preferred' over the rArr. """ theCost = 0; for lSubData, rSubData, subProbArr in self.gradChunkDataIterator(fDictList, problemArr): #lTargs = ones(problemArr.shape[0]) # Here we simply need to calc the cost lOut = self.layerModel.fprop(lSubData) rOut = self.layerModel.fprop(rSubData) outputs = sigmoid(lOut - rOut) outputs = where(outputs < OFFSET_EPSILON, OFFSET_EPSILON, outputs); outputs = where(outputs > 1-OFFSET_EPSILON, 1-OFFSET_EPSILON, outputs); # Cross-Entropy # NOTE here that all targs are 1. error = log(outputs) newCostContrib = error.sum(); theCost -= newCostContrib; decayContribution = self.l2decay * (self.params**2).sum() * problemArr.shape[0]; if self.chunklog: myLog.debug('decayContribution : %.4f, cost : %.4f' % (decayContribution, theCost)); theCost += decayContribution; return theCost;
def cost(self, fDictList, targetArr, idxArr): """Evaluate the error function. Even in batch mode, we iterate over the data in smaller chunks. """ theCost = 0 for subData, subTarg in self.gradChunkDataIterator( fDictList, targetArr, idxArr): outputs = self.layerModel.fprop(subData) outputs = where(outputs < OFFSET_EPSILON, OFFSET_EPSILON, outputs) outputs = where(outputs > 1 - OFFSET_EPSILON, 1 - OFFSET_EPSILON, outputs) # Cross-Entropy error = multiply(subTarg, log(outputs)) + multiply( 1 - subTarg, log(1 - outputs)) newCostContrib = error.sum() theCost -= newCostContrib decayContribution = self.l2decay * (self.params**2).sum() * len(idxArr) if self.chunklog: myLog.debug('decayContribution : %.4f, cost : %.4f' % (decayContribution, theCost)) theCost += decayContribution return theCost
def test_analyzePatientItems(self): # Run the association analysis against the mock test data above and verify # expected stats afterwards. associationQuery = \ """ select clinical_item_id, subsequent_item_id, count_0, count_3600, count_86400, count_604800, count_2592000, count_7776000, count_31536000, count_any, time_diff_sum, time_diff_sum_squares from clinical_item_association where clinical_item_id < 0 and count_any > 0 order by clinical_item_id, subsequent_item_id """ log.debug( "Use incremental update, only doing the update based on a part of the data." ) self.analyzer.analyzePatientItems([-11111], (-15, -14), -16) # Count associations that result in given sequence of items expectedAssociationStats = \ [ [-16,-16, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], # Need virtual item base counts as well [-12,-16, 0, 0, 0, 0, 1, 1, 1, 1, 2509200.0, 2509200.0**2], [-11,-16, 0, 0, 0, 0, 1, 1, 1, 1, 2422800.0, 2422800.0**2], [-10,-16, 0, 0, 0, 0, 0, 2, 2, 2, 5101200.0+5187600.0, 5101200.0**2+5187600.0**2], [ -8,-16, 0, 0, 0, 0, 0, 1, 1, 1, 5180400.0, 5180400.0**2], ] associationStats = DBUtil.execute(associationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) # Should record links between surrogate triple items and the sequential items it is based upon itemLinkQuery = \ """ select clinical_item_id, linked_item_id from clinical_item_link where clinical_item_id < 0 order by clinical_item_id, linked_item_id """ expectedItemLinks = \ [ [-16, -15], [-16, -14], ] itemLinks = DBUtil.execute(itemLinkQuery) self.assertEqualTable(expectedItemLinks, itemLinks)
def analyzePatientItems(self, analysisOptions): """Primary run function to analyze patient clinical item data and record updated stats to the respective database tables. Does the analysis only for records pertaining to the given patient IDs (provides a way to limit the extent of analysis depending on params). Will also record analyze_date timestamp on any records analyzed, so that analysis will not be repeated if called again on the same records. """ progress = ProgressDots() conn = self.connFactory.connection() try: # Preload lookup data to facilitate rapid checks and filters later linkedItemIdsByBaseId = self.dataManager.loadLinkedItemIdsByBaseId( conn=conn) # Keep an in memory buffer of the updates to be done so can stall and submit them # to the database in batch to minimize inefficient DB hits updateBuffer = self.makeUpdateBuffer() log.info("Main patient item query...") for iPatient, patientItemList in enumerate( self.queryPatientItemsPerPatient(analysisOptions, progress=progress, conn=conn)): log.debug( "Calculate associations for Patient %d's %d patient items. %d associations in buffer." % (iPatient, len(patientItemList), updateBuffer["nAssociations"])) self.updateItemAssociationsBuffer(patientItemList, updateBuffer, analysisOptions, linkedItemIdsByBaseId, progress=progress) if self.readyForIntervalCommit(iPatient, updateBuffer, analysisOptions): log.info("Commit after %s patients" % (iPatient + 1)) self.persistUpdateBuffer(updateBuffer, linkedItemIdsByBaseId, analysisOptions, iPatient, conn=conn) # Periodically commit update buffer else: # If not committing, still send a quick arbitrary query to DB, # otherwise connection may get recycled because DB thinks timeout with no interaction DBUtil.execute("select 1+1", conn=conn) log.info("Final commit / persist") self.persistUpdateBuffer(updateBuffer, linkedItemIdsByBaseId, analysisOptions, -1, conn=conn) # Final update buffer commit. Don't use iPatient here, as may collide if interval commit happened to land on last patient finally: conn.close()
def test_dataConversion(self): # Run the data conversion on the same data and look for expected records log.debug("Run the conversion process...") self.converter.convertSourceItems(TEST_START_DATE) # Just query back for the same data, de-normalizing the data back to a general table testQuery = \ """ select pi.external_id, pi.patient_id, pi.encounter_id, cic.description, ci.external_id, ci.name, ci.description, pi.item_date from patient_item as pi, clinical_item as ci, clinical_item_category as cic where pi.clinical_item_id = ci.clinical_item_id and ci.clinical_item_category_id = cic.clinical_item_category_id and cic.source_table = '%s' order by pi.patient_id desc, ci.name, pi.item_date """ % TEST_SOURCE_TABLE expectedData = \ [ # Expected data should be updated once we have ICD9 - Name conversion tables [None, -126268, -131015534571, "Diagnosis (PROBLEM_LIST)", None, "ICD9.-0285", "Diagnosis 1", datetime(2111,5,4)], [None, -126268, -131015534571, "Diagnosis (ADMIT_DX)", None, "ICD9.-785", "Diagnosis 4", datetime(2111,5,18)], [None, -126500, -131017780655, "Diagnosis (PROBLEM_LIST)", None, "ICD9.-431.0", "Diagnosis 2b", datetime(2111,10,14)], [None, -126500, -131017780655, "Diagnosis (PROBLEM_LIST)", None, "ICD9.-431.00", "Diagnosis 2", datetime(2111,10,14)], [None, -126798, -131014753610, "Diagnosis (ADMIT_DX)", None, "ICD9.-780", "Diagnosis 5", datetime(2111,3,8)], [None, -126798, -131016557370, "Diagnosis (ADMIT_DX)", None, "ICD9.-780", "Diagnosis 5", datetime(2111,7,26)], [None, -126798, -131016557370, "Diagnosis (ADMIT_DX)", None, "ICD9.-780.9", "Diagnosis 6a", datetime(2111,7,26)], [None, -126798, -131016557370, "Diagnosis (ADMIT_DX)", None, "ICD9.-780.97", "Diagnosis 6", datetime(2111,7,26)], [None, -2126500L, -135000000000L, 'Diagnosis (PROBLEM_LIST)', None, 'ICD10.-10431.0', 'Diagnosis 2b Full', datetime(2111, 10, 14, 0, 0)], [None, -2126500L, -135000000000L, 'Diagnosis (PROBLEM_LIST)', None, 'ICD10.-10431.00', 'Diagnosis 2 Full', datetime(2111, 10, 14, 0, 0)], [None, -2126500L, -135000000000L, 'Diagnosis (PROBLEM_LIST)', None, 'ICD10.-10432', '-10432', datetime(2111, 10, 14, 0, 0)], [None, -2126500L, -135000000000L, 'Diagnosis (PROBLEM_LIST)', None, 'ICD9.-431.0', 'Diagnosis 2b', datetime(2111, 10, 14, 0, 0)], [None, -2126500L, -135000000000L, 'Diagnosis (PROBLEM_LIST)', None, 'ICD9.-431.00', 'Diagnosis 2', datetime(2111, 10, 14, 0, 0)], [None, -2126500L, -135000000000L, 'Diagnosis (PROBLEM_LIST)', None, 'ICD9.-432', '-432', datetime(2111, 10, 14, 0, 0)], [None, -2126798L, -135014753610L, 'Diagnosis (PROBLEM_LIST)', None, 'ICD10.-10482.9', '-10482.9', datetime(2111, 6, 6, 0, 0)], [None, -2126798L, -135014753610L, 'Diagnosis (PROBLEM_LIST)', None, 'ICD10.-10483', '-10483', datetime(2111, 6, 6, 0, 0)], [None, -2126798L, -135014753610L, 'Diagnosis (ADMIT_DX)', None, 'ICD10.-10780', 'Diagnosis 5 Full', datetime(2111, 3, 8, 0, 0)], [None, -2126798L, -135014753610L, 'Diagnosis (PROBLEM_LIST)', None, 'ICD9.-482.9', '-482.9', datetime(2111, 6, 6, 0, 0)], [None, -2126798L, -135014753610L, 'Diagnosis (PROBLEM_LIST)', None, 'ICD9.-483', '-483', datetime(2111, 6, 6, 0, 0)], [None, -2126798L, -135014753610L, 'Diagnosis (ADMIT_DX)', None, 'ICD9.-780', 'Diagnosis 5', datetime(2111, 3, 8, 0, 0)], ] actualData = DBUtil.execute(testQuery) self.assertEqualTable(expectedData, actualData)
def train(self): """Method to run through all the data and train away""" self.costTrajectory = []; # Set some things up if doing online c1Idx = None; c0Idx = None; if not self.batch: shuffIdx = arange(0, len(self.idxArr)); c1Idx = shuffIdx[self.targetArr == 1]; c0Idx = shuffIdx[self.targetArr == 0]; #myLog.debug('len(c1Idx) : %d, len(c0Idx) : %d' % (len(c1Idx), len(c0Idx))); numOnlineRuns = int(len(self.idxArr)/float(self.onlineChunkSize)) + 1; c1Step = int(len(c1Idx) / numOnlineRuns) + 1; c0Step = int(len(c0Idx) / numOnlineRuns) + 1; try: for iEpoch in range(self.numEpochs): if self.batch: self.trainer.step(self.fDictList, self.targetArr, self.idxArr); else: # Want to balance each of the chunks used in the online learning. shuffle(c1Idx); shuffle(c0Idx); for iOnlineRun in range(numOnlineRuns): c1RowStart = iOnlineRun * c1Step; c1RowEnd = c1RowStart + c1Step; c0RowStart = iOnlineRun * c0Step; c0RowEnd = c0RowStart + c0Step; theInds = concatenate((c1Idx[c1RowStart:c1RowEnd], c0Idx[c0RowStart:c0RowEnd])) shuffle(theInds) #myLog.debug('minshuffidx : %d, maxshuffidx: %d' % (min(theInds), max(theInds))) subTargets = self.targetArr[theInds]; subIdx = self.idxArr[theInds]; self.trainer.step(self.fDictList, subTargets, subIdx); if self.epochlog: myLog.debug('About to call cost in postEpoch call') self.postEpochCall(iEpoch); # Test for convergence if self.checkconverge and len(self.costTrajectory) > self.convergeEpochs + 1: if std(self.costTrajectory[-self.convergeEpochs:]) < self.costEpsilon: myLog.critical('Convergence after Epoch %d!!' % iEpoch); return self.costTrajectory; if self.callback is not None: self.callback(self); myLog.critical('Never completely converged after %d epochs!' % self.numEpochs); except KeyboardInterrupt, e: myLog.critical('Interrupted with Keyboard after %d epochs, stopping here, currCost = %f' % (iEpoch, self.costTrajectory[-1])) return self.costTrajectory;
def askQuestions(self, questions): resp = [] for message, isPassword in questions: resp.append((message, 0 if isPassword else 1)) packet = NS('') + NS('') + NS('') packet += struct.pack('>L', len(resp)) for prompt, echo in resp: packet += NS(prompt) packet += chr(echo) self.transport.sendPacket(userauth.MSG_USERAUTH_INFO_REQUEST, packet) log.debug("Asked the user questions")
def store_pubkey(self, pubkey): """ Temporarily store a publickey in the auth session state. These can later be retrieved and stored permanently if an account is created. """ algo, blob, rest = getNS(pubkey[1:], 2) self.state.add_key(blob) log.debug( self.key2str(algo, blob) ) # Tell client that this key didn't auth them self.send_authFail()
def analyzePatientItems(self, patientIds, itemIdSequence, virtualItemId): """Primary run function to analyze patient clinical item data and record updated stats to the respective database tables. Does the analysis only for records pertaining to the given patient IDs (provides a way to limit the extent of analysis depending on params). Note that this does NOT record analyze_date timestamp on any records analyzed, as would collide with AssociationAnalysis primary timestamping, thus it is the caller's responsibility to be careful not to repeat this analysis redundantly and generating duplicated statistics. """ progress = ProgressDots() conn = self.connFactory.connection() try: # Preload lookup data to facilitate rapid checks and filters later linkedItemIdsByBaseId = self.dataManager.loadLinkedItemIdsByBaseId( conn=conn) self.verifyVirtualItemLinked(itemIdSequence, virtualItemId, linkedItemIdsByBaseId, conn=conn) # Keep an in memory buffer of the updates to be done so can stall and submit them # to the database in batch to minimize inefficient DB hits updateBuffer = dict() log.info("Main patient item query...") analysisOptions = AnalysisOptions() analysisOptions.patientIds = patientIds for iPatient, patientItemList in enumerate( self.queryPatientItemsPerPatient(analysisOptions, progress=progress, conn=conn)): log.debug( "Calculate associations for Patient %d's %d patient items" % (iPatient, len(patientItemList))) self.updateItemAssociationsBuffer(itemIdSequence, virtualItemId, patientItemList, updateBuffer, linkedItemIdsByBaseId, progress=progress) # Periodically send a quick arbitrary query to DB, otherwise connection may get recycled because DB thinks timeout with no interaction DBUtil.execute("select 1+1", conn=conn) log.info("Final commit") self.commitUpdateBuffer(updateBuffer, linkedItemIdsByBaseId, conn=conn) # Final update buffer commit finally: conn.close()
def standardDecay(self, decayAnalysisOptions): conn = self.connFactory.connection() prefixes = ['', 'patient_', 'encounter_'] times = [ '0', '3600', '7200', '21600', '43200', '86400', '172800', '345600', '604800', '1209600', '2592000', '7776000', '15552000', '31536000', '63072000', '126144000', 'any' ] try: log.debug("Connected to datbase") curs = conn.cursor() fields = list() for prefix in prefixes: for time in times: fieldName = prefix + "count_" + str(time) fields.append(fieldName + '=' + fieldName + "*" + str(decayAnalysisOptions.decay)) """log.debug("starting to drop indices"); sqlQuery = "ALTER TABLE clinical_item_association drop CONSTRAINT clinical_item_association_pkey;" curs.execute(sqlQuery) sqlQuery = "ALTER TABLE clinical_item_association drop CONSTRAINT clinical_item_association_clinical_item_fkey;" curs.execute(sqlQuery) sqlQuery = "ALTER TABLE clinical_item_association drop CONSTRAINT clinical_item_association_subsequent_item_fkey;" curs.execute(sqlQuery) sqlQuery = "ALTER TABLE clinical_item_association drop CONSTRAINT clinical_item_association_composite_key;" curs.execute(sqlQuery) sqlQuery = "DROP INDEX clinical_item_association_clinical_item_id;" curs.execute(sqlQuery) sqlQuery = "DROP INDEX clinical_item_association_subsequent_item_id;" curs.execute(sqlQuery) log.debug( "finished dropping indices" ); """ log.debug("starting decay") sqlQuery = "UPDATE clinical_item_association SET " + str.join( ',', fields) + ";" curs.execute(sqlQuery) log.debug("finished decay") """log.debug("starting to add indices"); sqlQuery = "ALTER TABLE clinical_item_association ADD CONSTRAINT clinical_item_association_pkey PRIMARY KEY (clinical_item_association_id);" curs.execute(sqlQuery) sqlQuery = "ALTER TABLE clinical_item_association ADD CONSTRAINT clinical_item_association_clinical_item_fkey FOREIGN KEY (clinical_item_id) REFERENCES clinical_item(clinical_item_id);" curs.execute(sqlQuery) sqlQuery = "ALTER TABLE clinical_item_association ADD CONSTRAINT clinical_item_association_subsequent_item_fkey FOREIGN KEY (subsequent_item_id) REFERENCES clinical_item(clinical_item_id);" curs.execute(sqlQuery) sqlQuery = "ALTER TABLE clinical_item_association ADD CONSTRAINT clinical_item_association_composite_key UNIQUE (clinical_item_id, subsequent_item_id);" curs.execute(sqlQuery) sqlQuery = "CREATE INDEX clinical_item_association_clinical_item_id ON clinical_item_association(clinical_item_id, subsequent_item_id);" curs.execute(sqlQuery) sqlQuery = "CREATE INDEX clinical_item_association_subsequent_item_id ON clinical_item_association(subsequent_item_id, clinical_item_id);" curs.execute(sqlQuery) log.debug("finished adding indices"); """ conn.commit() log.debug("finished commit") finally: curs.close() conn.close()
def prepareItemAssociations(self, itemIdPairs, linkedItemIdsByBaseId, conn): """Make sure all pair-wise item association records are ready / initialized so that subsequent queries don't have to pause to check for their existence. Should help greatly to reduce number of queries and execution time. """ clinicalItemIdSet = set() #Do the below to convert the list of strings into a list of pairs, which is needed for the rest of this function for index, pair in enumerate(itemIdPairs): itemIdPairs[index] = eval(pair) for (itemId1, itemId2) in itemIdPairs: clinicalItemIdSet.add(itemId1) clinicalItemIdSet.add(itemId2) nItems = len(clinicalItemIdSet) # Now go through all needed item pairs and create default records as needed log.debug("Ensure %d baseline records ready" % (nItems * nItems)) for itemId1 in clinicalItemIdSet: # Query to see which ones already exist in the database # Do this for each source clinical item instead of all combinations to avoid excessive in memory tracking query = SQLQuery() query.addSelect("clinical_item_id") query.addSelect("subsequent_item_id") query.addFrom("clinical_item_association") query.addWhereEqual("clinical_item_id", itemId1) query.addWhereIn("subsequent_item_id", clinicalItemIdSet) associationTable = DBUtil.execute(query, conn=conn) # Keep track in memory temporarily for rapid lookup existingItemIdPairs = set() for row in associationTable: existingItemIdPairs.add(tuple(row)) for itemId2 in clinicalItemIdSet: itemIdPair = (itemId1, itemId2) if itemIdPair not in existingItemIdPairs and self.acceptableClinicalItemIdPair( itemId1, itemId2, linkedItemIdsByBaseId): defaultAssociation = RowItemModel( itemIdPair, ("clinical_item_id", "subsequent_item_id")) try: # Optimistic insert of a new item pair, should be safe since just checked above, but parallel processes may collide DBUtil.insertRow("clinical_item_association", defaultAssociation, conn=conn) except conn.IntegrityError, err: log.warning(err) pass
def handle_known_user(self, method, rest): if method == "publickey": #TODO: Do public key auth for the user. algo, blob, rest = getNS(rest[1:], 2) log.trace( self.key2str(algo, blob)) self.send_authFail() elif method == "keyboard-interactive": log.debug( "Interactive attempt") # Start up the keyboard-interactive state machine. # This will take care of asking questions. self.state.begin_interactive() elif method == "password": #TODO: Do password auth for the user. self.send_authFail() else: # No idea what this is, but we don't support it. log.debug( "Unknown {0} attempt".format(method) ) self.send_authFail()
def test_recommender_stats_commandline(self): # Run the recommender against the mock test data above and verify expected stats calculations log.debug("Query with single item not perturbed by others.") headers = [ "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq", "baselineFreq", "freqRatio", "P-Fisher" ] expectedData = \ [ RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 7.0, 0.1, 0.0100, 10.0, 3.7e-06], headers ), RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 20.0, 0.286, 0.0133, 21.42857, 1.2e-23], headers ), ] sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "ItemRecommender.py", "maxRecommendedId=0&queryItemIds=-6&countPrefix=patient_&resultCount=3&sortField=P-Fisher", "-" ] self.recommender.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualRecommendedDataStatsTextOutput(expectedData, textOutput, headers) log.debug("Query for non-unique counts.") headers = [ "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq", "baselineFreq", "freqRatio", "oddsRatio" ] expectedData = \ [ RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 25.0, 0.35714, 0.01333, 26.7857, 107.96296], headers ), RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 12.0, 0.1714, 0.01, 17.1429, 33.47126], headers ), ] sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "ItemRecommender.py", "maxRecommendedId=0&queryItemIds=-6&countPrefix=&resultCount=3&sortField=oddsRatio", "-" ] self.recommender.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualRecommendedDataStatsTextOutput(expectedData, textOutput, headers)
def test_recommender_stats(self): # Run the recommender against the mock test data above and verify expected stats calculations query = RecommenderQuery() query.parseParams \ ( { "countPrefix": "patient_", "queryItemIds": "-6", "resultCount": "3", # Just get top 3 ranks for simplicity "maxRecommendedId": "0", # Artificial constraint to focus only on test data "sortField": "P-Fisher", # Specifically request derived expected vs. observed stats } ) log.debug("Query with single item not perturbed by others.") headers = [ "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq", "baselineFreq", "freqRatio", "P-Fisher" ] expectedData = \ [ RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 7.0, 0.1, 0.0100, 10.0, 3.7e-06], headers ), RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 20.0, 0.286, 0.0133, 21.42857, 1.2e-23], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedDataStats(expectedData, recommendedData, headers) log.debug("Query for non-unique counts.") query.parseParams \ ( { "countPrefix": "", "sortField": "oddsRatio", } ) headers = [ "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq", "baselineFreq", "freqRatio", "oddsRatio" ] expectedData = \ [ RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 25.0, 0.35714, 0.01333, 26.7857, 107.96296], headers ), RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 12.0, 0.1714, 0.01, 17.1429, 33.47126], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedDataStats(expectedData, recommendedData, headers)
def ssh_USERAUTH_REQUEST(self, packet): """ This method is called when a packet is received. The client has requested authentication. Payload: string user string next service string method [authentication specific data] """ self.packet_count += 1 user, nextService, method, rest = getNS(packet, 3) if self.isBadUsername(user): return first = False if self.state is None or self.state.is_invalid( user, nextService ): # If username or desired service has changed during auth, # the RFC says we must discard all state. self.state = UserAuthState( self, user, nextService ) # We do keep track of how many state changes there have been. # This is used to thwart bots. self.state_changes += 1 #log.debug(dir(self.transport.factory.portal)) self.firstContact() first = True log.debug( "Auth request for user {0}, service {1}, method {2}.".format(user, nextService, method) ) if self.state_changes > 3 or self.packet_count > 20: log.info("Disconnecting user: too many attempts") self.disconnect_hostNotAllowed("You are doing that too much!") if first and self.state.user_is_known: self.supportedAuthentications.append("password") if method == "none": # We want to push the user through keyboard-interactive. # This lets the client know what methods we do support. return self.send_authFail() if self.state.user_is_known: # Username is known to us! Do normal login. return self.handle_known_user( method, rest ) else: # This user is not known to us. return self.handle_new_user(method, rest)
def insertRow(tableName, insertDict, conn=None, cursor=None): """Insert a record into the named table based on the contents of the provided row dictionary (RowItemModel)""" extConn = ( conn is not None ); if not extConn: conn = connection(); extCursor = (cursor is not None); if cursor is None: cursor = conn.cursor(); try: insertQuery = buildInsertQuery( tableName, insertDict.keys() ); insertParams= insertDict.values(); # Convert component list into string log.debug( parameterizeQueryString( insertQuery, insertParams ) ); cursor.execute( insertQuery,insertParams ); finally: if not extCursor: cursor.close(); if not extConn: conn.close();
def test_dataConversion(self): # Run the data conversion on the same data and look for expected records log.debug("Run the conversion process...") convOptions = ConversionOptions() convOptions.startDate = TEST_START_DATE self.converter.convertSourceItems(convOptions) # Just query back for the same data, de-normalizing the data back to a general table testQuery = \ """ select pi.external_id, pi.patient_id, pi.encounter_id, cic.description, ci.name, ci.description, pi.item_date from patient_item as pi, clinical_item as ci, clinical_item_category as cic where pi.clinical_item_id = ci.clinical_item_id and ci.clinical_item_category_id = cic.clinical_item_category_id and cic.source_table = 'stride_culture_micro' order by pi.external_id desc """ expectedData = \ [ ##### CHANGE to the actual expected data [-10, 1, 2, "Microculture Susceptibility General", "Clindamycin:Not_Susceptible", "Not_Susceptible TO Clindamycin", DBUtil.parseDateValue("9/10/2111 13:15"),], [-11, 2, 3, "Microculture Susceptibility General", "Vancomycin:Susceptible", "Susceptible TO Vancomycin", DBUtil.parseDateValue("4/26/2109 9:49"),], [-12, 3, 4, "Microculture Susceptibility General", "Oxacillin:Not_Susceptible", "Not_Susceptible TO Oxacillin", DBUtil.parseDateValue("4/18/2109 4:48"),], [-13, 4, 5, "Microculture Susceptibility General", "Vancomycin:Susceptible", "Susceptible TO Vancomycin", DBUtil.parseDateValue("3/28/2109 23:21"),], [-14, 5, 6, "Microculture Susceptibility General", "Amoxicillin-Clavulanic Acid:Susceptible", "Susceptible TO Amoxicillin-Clavulanic Acid", DBUtil.parseDateValue("6/3/2109 17:07")], [-15, 6, 7, "Microculture Susceptibility General", "Negative Culture", "Microculture Grew No Bacteria", DBUtil.parseDateValue("6/4/2109 17:07")], [-18, 11, 11, "Microculture Susceptibility General", "Amikacin:Susceptible", "Susceptible TO Amikacin", DBUtil.parseDateValue("6/11/2111 18:07")] ] actualData = DBUtil.execute(testQuery) self.assertEqualTable(expectedData, actualData)
def resetAssociationModel(self, conn=None): extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: log.debug("Connected to database for reseting purposes") result = DBUtil.execute("DELETE FROM clinical_item_association;", conn=conn) log.debug("Training table cleared items: %s" % result) # Droppings constraints can greatly speed up the next step of updating analyze dates #curs.execute("ALTER TABLE backup_link_patient_item drop constraint backup_link_patient_item_patient_item_fkey;") #curs.execute("ALTER TABLE patient_item_collection_link drop constraint patient_item_collection_link_patient_fkey;") #curs.execute("ALTER TABLE patient_item drop constraint patient_item_pkey;") #curs.execute("ALTER TABLE patient_item drop constraint patient_item_clinical_item_fkey;") #curs.execute("drop index index_patient_item_clinical_item_id_date;") #curs.execute("drop index index_patient_item_patient_id_date;") #curs.execute("drop index index_patient_item_external_id;") #curs.execute("drop index index_patient_item_encounter_id_date;") #curs.execute("ALTER TABLE patient_item drop constraint patient_item_composite;") result = DBUtil.execute( "UPDATE patient_item SET analyze_date = NULL where analyze_date is not NULL;", conn=conn) log.debug("Analyze_date set to NULL: %s" % result) # Add back constraints #curs.execute("ALTER TABLE patient_item ADD CONSTRAINT patient_item_pkey PRIMARY KEY (patient_item_id);") #curs.execute("ALTER TABLE patient_item ADD CONSTRAINT patient_item_clinical_item_fkey FOREIGN KEY (clinical_item_id) REFERENCES clinical_item(clinical_item_id);") #curs.execute("CREATE INDEX index_patient_item_clinical_item_id_date ON patient_item(clinical_item_id, item_date);") #curs.execute("CREATE INDEX index_patient_item_patient_id_date ON patient_item(patient_id, item_date);") #curs.execute("CREATE INDEX index_patient_item_external_id ON patient_item(external_id, clinical_item_id);") #curs.execute("CREATE INDEX index_patient_item_encounter_id_date ON patient_item(encounter_id, item_date);") #curs.execute("ALTER TABLE patient_item ADD CONSTRAINT patient_item_composite UNIQUE (patient_id, clinical_item_id, item_date);") #curs.execute("ALTER TABLE backup_link_patient_item ADD CONSTRAINT backup_link_patient_item_patient_item_fkey FOREIGN KEY (patient_item_id) REFERENCES patient_item(patient_item_id);") #curs.execute("ALTER TABLE patient_item_collection_link ADD CONSTRAINT patient_item_collection_link_patient_fkey FOREIGN KEY (patient_item_id) REFERENCES patient_item(patient_item_id);") # Flag that any cached association metrics will be out of date self.clearCacheData("analyzedPatientCount", conn=conn) # Reset clinical_item denormalized counts self.updateClinicalItemCounts(conn=conn) conn.commit() log.debug("Connection committed") finally: if not extConn: conn.close()
def extract_argvDict(self, commentLines): argvDict = dict() for line in commentLines: if "argv[0]" not in argvDict: # Only use the first one found commentStr = line[1:].strip() # Remove comment tag and any flanking whitespace try: jsonData = json.loads(commentStr) argv = jsonData if type(jsonData) == dict: argv = jsonData["argv"] # Simple parse through argv to turn into dictionary of key-value pairs lastKey = None iArg = 0 for i in xrange(len(argv)): if i == 0: argvDict["argv[0]"] = argv[i] else: if lastKey is not None: # Already have an option key, see if next item is option value, or another option and last was just set/present or not if argv[i].startswith("-"): # Two option keys in a row, the former was apparently a set/present or not option argvDict[lastKey] = lastKey lastKey = argv[i] else: argvDict[lastKey] = argv[i] lastKey = None else: if argv[i].startswith("-"): lastKey = argv[i] else: # Moved on to general arguments argvDict["args[%d]" % iArg] = argv[i] iArg += 1 except ValueError, exc: # Not a JSON parsable string, ignore it then log.debug(exc) pass
def cost(self, fDictList, targetArr, idxArr): """Evaluate the error function. Even in batch mode, we iterate over the data in smaller chunks. """ theCost = 0; for subData, subTarg in self.gradChunkDataIterator(fDictList, targetArr, idxArr): outputs = self.layerModel.fprop(subData); outputs = where(outputs < OFFSET_EPSILON, OFFSET_EPSILON, outputs); outputs = where(outputs > 1-OFFSET_EPSILON, 1-OFFSET_EPSILON, outputs); # Cross-Entropy error = multiply(subTarg, log(outputs)) + multiply(1 - subTarg, log(1-outputs)); newCostContrib = error.sum(); theCost -= newCostContrib; decayContribution = self.l2decay * (self.params**2).sum() * len(idxArr); if self.chunklog: myLog.debug('decayContribution : %.4f, cost : %.4f' % (decayContribution, theCost)); theCost += decayContribution; return theCost;
def main (self, argv): """Main method, callable from command line""" usageStr = "usage: %prog [options] <patientIds>\n"+\ " <patientIds> Comma-separated list of patient IDs to run the analysis on, or use option to specify a file.\n" parser = OptionParser(usage=usageStr) parser.add_option("-i", "--idFile", dest="idFile", help="If provided, look for patient IDs in then named file, one ID per line, in the format '/Users/Muthu/Desktop/JonathanChen/patientlist.txt'") parser.add_option("-s", "--startDate", dest="startDate", metavar="<startDate>", help="Date string (e.g., 2011-12-15), must be provided, will start analysis on items occuring on or after this date."); parser.add_option("-e", "--endDate", dest="endDate", metavar="<endDate>", help="Date string (e.g., 2011-12-15), must be provided, will stop analysis on items occuring before this date."); parser.add_option("-w", "--window", type="int", dest="window", metavar="<window>", help="Window integer (e.g., 36), (unit is deltas, i.e. a window of 36 and a delta of 4 weeks means that after 36 x4 weeks, the data is decayed ~1/e ~ 0.37). More precisely, the window x delta is how long it will take for the data to decay to 38 percent of its original worth. Higher delta means it takes longer to decay. This number must be provided."); parser.add_option("-d", "--delta", type="int", dest="delta", metavar="<delta>", help="Delta integer (e.g., 4), (unit of time is weeks, defaults to 4 weeks), define in what increments do you want to read in the data. After each increment/delta, it performs a decay."); parser.add_option("-a", "--associationsPerCommit", type="int", dest="associationsPerCommit", help="If provided, will commit incremental analysis results to the database when accrue this many association items. Can help to avoid allowing accrual of too much buffered items whose runtime memory will exceed the 32bit 2GB program limit.") parser.add_option("-u", "--itemsPerUpdate", type="int", dest="itemsPerUpdate", help="If provided, when updating patient_item analyze_dates, will only update this many items at a time to avoid overloading MySQL query.") parser.add_option("-o", "--outputFile", dest="outputFile", help="If provided, send buffer to output file rather than commiting to database") (options, args) = parser.parse_args(argv[1:]) decayAnalysisOptions = DecayAnalysisOptions() log.debug("starting process"); #set start and end dates, item length (delta), and decay rate decayAnalysisOptions.startD = datetime.strptime(options.startDate, DATE_FORMAT) #makes a datetime object for the start and end date decayAnalysisOptions.endD = datetime.strptime(options.endDate, DATE_FORMAT) decayAnalysisOptions.windowLength = options.window #how many deltas in your window decayAnalysisOptions.delta = timedelta(weeks=options.delta) if options.associationsPerCommit is not None: decayAnalysisOptions.associationsPerCommit = options.associationsPerCommit if options.itemsPerUpdate is not None: decayAnalysisOptions.itemsPerUpdate = options.itemsPerUpdate if options.delta != None: decayAnalysisOptions.delta = timedelta(weeks=(options.delta)) #length of one decay item if options.outputFile is not None: decayAnalysisOptions.outputFile = options.outputFile #set patientIds based on either a file input or args decayAnalysisOptions.patientIds = list() if len(args) > 0: decayAnalysisOptions.patientIds.extend(args[0].split(",")) if options.idFile is not None: idFile = stdOpen(options.idFile) for line in idFile: decayAnalysisOptions.patientIds.append(line.strip()) #quit if invalid parameters if decayAnalysisOptions.startD is None or decayAnalysisOptions.endD is None or options.window is None or options.window == 0 or decayAnalysisOptions.patientIds is None: parser.print_help() sys.exit(0) log.debug("global start and end date"); log.debug(decayAnalysisOptions.startD, decayAnalysisOptions.endD, decayAnalysisOptions.windowLength); self.decayAnalyzePatientItems(decayAnalysisOptions)
def handle_new_user(self, method, rest): """ Handles incoming auth from a new, unknown username. """ if method == "publickey": # Store their pubkeys so they can use one to register with us. log.debug( "Pubkey attempt" ) self.store_pubkey( rest ) elif method == "keyboard-interactive": log.debug( "Interactive attempt") # Start up the keyboard-interactive state machine. # This will take care of asking questions. self.state.begin_interactive() elif method == "password": # We told this client we don't support passwords # but they are ignoring us. Probably a bot. log.info("Disconnecting user: illegal password attempt") self.disconnect_noAuthAllowed("This auth method is not allowed") self.transport.factory.banHost(self.ip) else: # No idea what this is, but we don't support it. log.debug( "Unknown {0} attempt".format(method) ) self.send_authFail()
def test_recommender(self): # Run the recommender against the mock test data above and verify expected stats afterwards. query = RecommenderQuery() #query.queryItemIds = set(); #query.excludeItemIds = set(); #query.categoryIds = set(); #query.timeDeltaMax = None; # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent. If left blank, will just consider all items within a given patient as co-occurrent. query.sortField = "tf" query.limit = 16 # Go ahead and query for all since short list and can get expected calculation results for all query.maxRecommendedId = 0 # Artificial constraint to focus only on test data log.debug( "Query with no item key input, just return ranks by general likelihood then." ) headers = ["clinical_item_id", "score"] expectedData = \ [ RowItemModel( [-2, 2.0/13], headers ), RowItemModel( [-5, 2.0/13], headers ), RowItemModel( [-6, 2.0/13], headers ), RowItemModel( [-1, 1.0/13], headers ), RowItemModel( [-3, 1.0/13], headers ), RowItemModel( [-7, 1.0/13], headers ), RowItemModel( [-8, 1.0/13], headers ), RowItemModel( [-10,1.0/13], headers ), RowItemModel( [-11,1.0/13], headers ), RowItemModel( [-12,1.0/13], headers ), RowItemModel( [-13,1.0/13], headers ), RowItemModel( [-14,1.0/13], headers ), RowItemModel( [-15,1.0/13], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with key item inputs for which no data exists. Effecitvely ignore it then, so just return ranks by general likelihood." ) query.queryItemIds = set([-100]) expectedData = \ [ RowItemModel( [-2, 2.0/13], headers ), RowItemModel( [-5, 2.0/13], headers ), RowItemModel( [-6, 2.0/13], headers ), RowItemModel( [-1, 1.0/13], headers ), RowItemModel( [-3, 1.0/13], headers ), RowItemModel( [-7, 1.0/13], headers ), RowItemModel( [-8, 1.0/13], headers ), RowItemModel( [-10,1.0/13], headers ), RowItemModel( [-11,1.0/13], headers ), RowItemModel( [-12,1.0/13], headers ), RowItemModel( [-13,1.0/13], headers ), RowItemModel( [-14,1.0/13], headers ), RowItemModel( [-15,1.0/13], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("Query with category filter on recommended results.") query.queryItemIds = set([-100]) query.excludeCategoryIds = set([-1, -4, -5, -6]) expectedData = \ [ #RowItemModel( [-2, 2.0/13], headers ), RowItemModel( [-5, 2.0/13], headers ), RowItemModel( [-6, 2.0/13], headers ), #RowItemModel( [-1, 1.0/13], headers ), #RowItemModel( [-3, 1.0/13], headers ), RowItemModel( [-7, 1.0/13], headers ), RowItemModel( [-8, 1.0/13], headers ), RowItemModel( [-10,1.0/13], headers ), RowItemModel( [-11,1.0/13], headers ), RowItemModel( [-12,1.0/13], headers ), RowItemModel( [-13,1.0/13], headers ), #RowItemModel( [-14,1.0/13], headers ), #RowItemModel( [-15,1.0/13], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with category filter and specific exclusion filter on recommended results." ) query.queryItemIds = set([-100]) query.excludeItemIds = set([-6, -10]) query.excludeCategoryIds = set([-1, -4, -5, -6]) expectedData = \ [ #RowItemModel( [-2, 2.0/13], headers ), RowItemModel( [-5, 2.0/13], headers ), #RowItemModel( [-6, 2.0/13], headers ), #RowItemModel( [-1, 1.0/13], headers ), #RowItemModel( [-3, 1.0/13], headers ), RowItemModel( [-7, 1.0/13], headers ), RowItemModel( [-8, 1.0/13], headers ), #RowItemModel( [-10,1.0/13], headers ), RowItemModel( [-11,1.0/13], headers ), RowItemModel( [-12,1.0/13], headers ), RowItemModel( [-13,1.0/13], headers ), #RowItemModel( [-14,1.0/13], headers ), #RowItemModel( [-15,1.0/13], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "General query with a couple of input clinical items + one with no association data (should effectively be ignored)." ) query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() expectedData = \ [ RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ), RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ), RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with category limit") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set([-2, -4, -5, -6]) expectedData = \ [ #RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ), RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ), #RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ), #RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ), #RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ), RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with specific exclusion") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set([-4, -3, -2]) query.excludeCategoryIds = set() expectedData = \ [ RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ), #RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ), RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query, sort by TF*IDF lift.") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() query.sortField = "lift" expectedData = \ [ #RowItemModel( [-5, (13.0/2)*((1.0/6)*(2.0/2)+(1.0/4)*(1.0/2))], headers ), #RowItemModel( [-2, (13.0/2)*((1.0/6)*(1.0/2)+(1.0/6)*(2.0/2))], headers ), RowItemModel( [-3, (13.0/1)*((1.0/6)*(2.0/2))], headers ), RowItemModel( [-7, (13.0/1)*((1.0/6)*(2.0/2))], headers ), RowItemModel( [-8, (13.0/1)*((1.0/6)*(2.0/2))], headers ), RowItemModel( [-6, (13.0/2)*((1.0/6)*(2.0/2)+(1.0/4)*(1.0/2))], headers ), RowItemModel( [-14,(13.0/1)*((1.0/4)*(1.0/2))], headers ), RowItemModel( [-15,(13.0/1)*((1.0/4)*(1.0/2))], headers ), RowItemModel( [-1, (13.0/1)*((1.0/6)*(1.0/2))], headers ), RowItemModel( [-10,(13.0/1)*((1.0/6)*(1.0/2))], headers ), RowItemModel( [-11,(13.0/1)*((1.0/6)*(1.0/2))], headers ), RowItemModel( [-12,(13.0/1)*((1.0/6)*(1.0/2))], headers ), RowItemModel( [-13,(13.0/1)*((1.0/6)*(1.0/2))], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query)
sigOut = sigmoid(rOut - lOut); d_outputs = sigOut; if len(d_outputs > 0) and d_outputs.shape[1] > 0: self.layerModel.bprop(d_outputs, rd) currGradContrib = self.layerModel.grad(d_outputs, rd) currGradContrib = currGradContrib.sum(1) currGrad += currGradContrib; #myLog.info('problemArr.shape[0]: %d' % problemArr.shape[0]) decayContribution = 2 * self.l2decay * self.params * problemArr.shape[0] #myLog.info('min, max decayContribution: %0.4f %0.4f' % (min(decayContribution), max(decayContribution))) currGrad += decayContribution; if self.chunklog: myLog.debug('||currGrad||^1 : %.4f, ||decayContribution|| : %.4f, mean(currGrad) : %.4f, max(currGrad) : %.4f' \ % (abs(currGrad).sum(), self.l2decay * (self.params**2).sum() * problemArr.shape[0], mean(currGrad), max(abs(currGrad)))); myLog.debug('max(currGrad) : %.4f, min(currGrad) : %.4f' % (max(currGrad), min(currGrad))) myLog.debug('max(params) : %.4f, min(params) : %.4f' % (max(self.params), min(self.params))) myLog.debug('max(decayContribution) : %.4f, min(decayContribution) : %.4f' % (max(decayContribution), min(decayContribution))) return currGrad; def apply(self, newData): """Apply the trained neural net to this new data""" outData = zeros(len(newData)); currIdx = 0; for lDataChunk in self.singleSideGradChunkDataIterator(newData): actOut = self.layerModel.fprop(lDataChunk); actOut = actOut.flatten();
def test_findOrInsertItem(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) searchDict = {} insertDict = {} searchDict["TestTypes_id"] = +123 log.debug("Insert a new item using default params") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict) self.assertEqual(+123, data) self.assertEqual(True, isNew) log.debug("Find the existing item") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict) self.assertEqual(+123, data) self.assertEqual(False, isNew) insertDict["TestTypes_id"] = +456 log.debug("Find existing item, with optional insert data") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict) self.assertEqual(+123, data) self.assertEqual(False, isNew) searchDict["TestTypes_id"] = +789 insertDict["TestTypes_id"] = +789 insertDict["MyInteger"] = 123 log.debug("Insert a new item with actual data") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict) self.assertEqual(+789, data) self.assertEqual(True, isNew) searchDict["TestTypes_id"] = +234 insertDict["TestTypes_id"] = +234 log.debug("Retrieve a different column") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual(None, data) self.assertEqual(True, isNew) searchDict["TestTypes_id"] = +345 insertDict["TestTypes_id"] = +345 insertDict["MyText"] = "testText" log.debug("Insert and retrieve a different column") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual("testText", data) self.assertEqual(True, isNew) insertDict["MyText"] = "newText" log.debug( "Try inserting a different value under an existing row. Should NOT work" ) (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual("testText", data) self.assertEqual(False, isNew) log.debug( "Try inserting a different value under an existing row, but force the update" ) insertDict["MyText"] = "newText" (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText", forceUpdate=True) self.assertEqual("newText", data) self.assertEqual(False, isNew)
def updateFromFile( sourceFile, tableName, columnNames=None, nIdCols=1, delim=None, skipErrors=False, connFactory=None ): """Update the database with the contents of a whitespace-delimited text file. Updates the contents of the <tableName> with the data from the <sourceFile>. One line is expected in the <sourceFile> per row in the database, with each item delimited by the <delim> character (specify None for any whitespace). These items will be inserted under the respective order of the given list of <columnNames>. If the columnNames parameter is not provided, assume the first line of the <sourceFile> contains the column names. To know which rows to update, assume the FIRST column listed in <columnNames> is the ID column to identify rows by. In that case, the data value there from the <sourceFile> will not be used to update the row, but will instead be used to identify the row to update the rest of the data by. If more than one column is necessary to identify a row (composite key), indicate how many of the first columns in <columnNames> should be used with <nIdCols>. Note that these key ID values must not be None / null. The query looks for rows where columnname = value, and the = operator always returns false when the value is null. Returns the total number of rows successfully updated. """ if columnNames is None or len(columnNames) < 1: headerLine = sourceFile.readline(); columnNames = headerLine.split(delim); conn = None; if connFactory is not None: conn = connFactory.connection(); else: conn = connection() cur = conn.cursor() nCols = len(columnNames); try: # Prepare the SQL Statement sql = []; sql.append("update"); sql.append( tableName ); sql.append("set"); # Data Columns for i in xrange(nIdCols,nCols): sql.append(columnNames[i]); sql.append("="); sql.append(Env.SQL_PLACEHOLDER); sql.append(","); sql.pop(); # Remove extra comma at end # ID Columns sql.append("where") for i in xrange(nIdCols): sql.append(columnNames[i]); sql.append("="); sql.append(Env.SQL_PLACEHOLDER); sql.append("and"); sql.pop(); # Remove extra comma at end sql = str.join(" ",sql); log.debug(sql) # Loop through file and execute update statement for every line progress = ProgressDots() for iLine, line in enumerate(sourceFile): if not line.startswith(COMMENT_TAG): try: line = line[:-1]; # Strip the newline character params = line.split(delim); # Special handling for null / None string for iParam in xrange(len(params)): if params[iParam] == "" or params[iParam] == NULL_STRING: # Treat blank strings as NULL params[iParam] = None; # Reposition ID columns to end of parameter list idParams = params[:nIdCols]; dataParams = params[nIdCols:]; paramTuple = dataParams; paramTuple.extend( idParams ); paramTuple = tuple(paramTuple); cur.execute(sql, paramTuple); # Need to "auto-commit" after each command, # otherwise a skipped error will rollback # any previous commands as well if skipErrors: conn.commit() progress.Update() except Exception, err: conn.rollback(); # Reset changes and connection state log.critical(sql); log.critical(paramTuple); log.warning("Error Executing in Script: %s", parameterizeQueryString(sql,paramTuple) ); if skipErrors: log.warning(err) else: raise err conn.commit() return progress.GetCounts();
def findOrInsertItem(tableName, searchDict, insertDict=None, retrieveCol=None, forceUpdate=False, autoCommit=True, conn=None, connFactory=None): """Search the named table in database for a row whose attributes match the key-value pairs specified in searchDict. If one exists, then return the column (probably the primary key) named by retrieveCol. Otherwise, insert a row into the table with the data specified in the insertDict key-value pairs and try accessing the retrieveCol again (presumably the one just inserted). If forceUpdate is specified as True, then, even if the row already exists in the database, update the row to match the contents of the insertDict. The connection object to the database (conn) can be specified, otherwise it will just default to that returned by the connection() method. If no insertDict is specified, use the searchDict as necessary. If no retrieveCol is specified, then will attempt to find the default primary key column based on the table name. Returns a tuple (col, isNew) where col is the value of the retrieveCol and isNew is a boolean indicating if this came from a new row just inserted or if it was just taken from an existing record. """ extConn = ( conn is not None ); if insertDict == None: insertDict = searchDict if conn == None: # If no specific connection object provided, look for a connection factory to produce one if connFactory is not None: conn = connFactory.connection(); else: # No connection or factory specified, just fall back on default connection then conn = connection(); try: cur = conn.cursor() # Create the query for checking if it's already in the database searchQuery = SQLQuery(); if retrieveCol == None: searchQuery.addSelect(defaultIDColumn(tableName)); else: searchQuery.addSelect(retrieveCol); searchQuery.addFrom(tableName) for i, (col, value) in enumerate(searchDict.iteritems()): if value is not None: searchQuery.addWhereEqual(col, value); else: # Equals operator doesn't work for null values searchQuery.addWhereOp(col,"is",value); # Convert query as a model into a single string searchParams= searchQuery.params; searchQuery = str(searchQuery); log.debug("Before Select Query: "+ parameterizeQueryString( searchQuery, searchParams ) ); # Check if the retrieveCol is already in the database, # by these search criteria cur.execute( searchQuery, searchParams ); result = cur.fetchone() log.debug("After Select/fetchone Query: "+ parameterizeQueryString( searchQuery, searchParams ) ); rowExisted = result is not None; if ( rowExisted ): if forceUpdate: # Item already exists, but want to force an update with the insertDict contents updateRow( tableName, insertDict, searchDict.values(), searchDict.keys(), conn=conn ); cur.execute( searchQuery, searchParams ); result = cur.fetchone() return (result[0], not rowExisted) else: # Item does not yet exist. Insert it, then get the retrieveCol again. insertRow( tableName, insertDict, conn=conn, cursor=cur ); # allow user to not commit when providing his/her own connection if not extConn or autoCommit: conn.commit(); # Now that insert or update has completed, try to retrieve the data again, # using sequences if possible #if retrieveCol is None: #cur.execute(identityQuery(tableName)); #else: # comment out the above because it wasn't working for some tables. cur.execute( searchQuery, searchParams ); result = cur.fetchone() if (result != None): # Returning data from the just inserted item return (result[0], not rowExisted) else: log.warning("For table "+tableName+", could not find "+ str(searchDict) +" even after inserting "+ str(insertDict) ) return (None, None) finally: if not extConn: conn.close(); # If we opened the connection ourselves, then close it ourselves
def train(self): """Method to run through all the data and train away""" # Set some things up if doing online # will only calculate the gradient on the left hand side at a time # So make a target array and make sure we look at each ordered pair in both orders numOnlineRuns = int((self.problemArr.shape[0])/float(self.onlineChunkSize)) + 1; theIdx = arange(0, self.problemArr.shape[0]) theStep = int(len(theIdx) / numOnlineRuns) + 1; ## Check if we have some null rows. If we do, then stratify the rows trained on in each ## online step ## We don't want a complete step of left -1's or right -1's. isNullRows = (self.problemArr[:,0] == -1) | (self.problemArr[:, 1] == -1) isNullRows = any(isNullRows) if isNullRows: myLog.info('Have -1 in problemArr, stratifying the online step data') nNullIdx = theIdx[(self.problemArr[:,0] != -1) & (self.problemArr[:,1] != -1)] lNullIdx = theIdx[self.problemArr[:,0] == -1] rNullIdx = theIdx[self.problemArr[:,1] == -1] nNullStep = int(len(nNullIdx)/numOnlineRuns) + 1 lNullStep = int(len(lNullIdx)/numOnlineRuns) + 1 rNullStep = int(len(rNullIdx)/numOnlineRuns) + 1 try: for iEpoch in range(self.numEpochs): if self.batch: self.trainer.step(self.fDictList, self.problemArr); else: # Want to balance each of the chunks used in the online learning. if isNullRows: shuffle(nNullIdx); shuffle(lNullIdx) shuffle(rNullIdx) else: shuffle(theIdx); for iOnlineRun in range(numOnlineRuns): if isNullRows: nNullStart = iOnlineRun * nNullStep nNullEnd = nNullStart + nNullStep lNullStart = iOnlineRun * lNullStep lNullEnd = lNullStart + lNullStep rNullStart = iOnlineRun * rNullStep rNullEnd = rNullStart + rNullStep subProbArr = concatenate((self.problemArr[nNullIdx[nNullStart:nNullEnd], :], self.problemArr[lNullIdx[lNullStart:lNullEnd], :], self.problemArr[rNullIdx[rNullStart:rNullEnd], :])) else: rowStart = iOnlineRun * theStep; rowEnd = rowStart + theStep; subIdx = theIdx[rowStart:rowEnd]; subProbArr = self.problemArr[subIdx, :] self.trainer.step(self.fDictList, subProbArr); myLog.debug('About to call cost in postEpoch call') self.postEpochCall(iEpoch) # Test for convergence if self.checkconverge and len(self.costTrajectory) > self.nconvergesteps: if std(self.costTrajectory[-self.nconvergesteps:]) < self.costEpsilon: myLog.critical('Convergence after Epoch %d!!' % iEpoch); return self.costTrajectory; if self.callback is not None: self.callback(self); myLog.critical('Never completely converged after %d epochs!' % self.numEpochs); except KeyboardInterrupt, e: myLog.critical('Interrupted with Keyboard after %d epochs, stopping here, currCost = %f' % \ (iEpoch, self.costTrajectory[-1])) return self.costTrajectory;
def insertFile( sourceFile, tableName, columnNames=None, delim=None, idFile=None, skipErrors=False, dateColFormats=None, escapeStrings=False, estInput=None, connFactory=None ): """Insert the contents of a whitespace-delimited text file into the database. For PostgreSQL specifically, consider alternative direct COPY command that can run 10x: E.g., gzip -d -c TTeam_2014.tsv.gz | psql -U jonc101 -c "COPY tteamx ( pat_deid, enc_deid, relation, prov_id, prov_name, start_date, end_date ) FROM STDIN WITH (FORMAT csv, DELIMITER E'\t', HEADER, NULL 'None');" resident-access-log-2017 Inserts the contents of the <sourceFile> into the database under the <tableName>. One line is expected in the <sourceFile> per row in the database, with each item delimited by the <delim> character. These items will be inserted under the respective order of the given list of columnNames. Use the built-in csv module for parsing out lines and managing quotes, etc. If delimiter is not specified (None), then default to tab-delimited If idFile is provided, then will try to run SQL from identityQuery method after each insert, and write out the contents, one per line to the idFile. Will bypass above step if can find an insert column with the expected default ID column ("tableName_id") If dateColFormats provided, expect a dictionary keyed by the names of columns that should be as interpreted date strings, with values equal to the Python date format string to parse them by. If a format string is not provided, a series of standard date format strings will be attempted (but this is inefficient for repeated date text parsing and error handling). Returns the total number of rows successfully inserted. """ if columnNames is not None and len(columnNames) < 1: columnNames = None; # If empty columnNames list, then reset to null and look for it in first line of data reader = TabDictReader(sourceFile, fieldnames=columnNames, delimiter=delim); columnNames = reader.fieldnames; idCol = defaultIDColumn(tableName); iIdCol = None; # Index of manually specified ID column. May be null for iCol, colName in enumerate(columnNames): if colName == idCol: iIdCol = iCol; if dateColFormats is not None: # Ensure column keys are normalized dateCols = dateColFormats.keys(); for dateCol in dateCols: normalCol = normalizeColName(dateCol); dateColFormats[normalCol] = dateColFormats[dateCol]; conn = None; if connFactory is not None: conn = connFactory.connection(); else: conn = connection() cur = conn.cursor() try: # Prepare the SQL Statement sqlParts = [] sqlParts.append("insert into") sqlParts.append( tableName ) sqlParts.append("(") sqlParts.append( str.join(",", columnNames) ); sqlParts.append(")") sqlParts.append("values") sqlParts.append("(") for i in range(len(columnNames)): sqlParts.append( Env.SQL_PLACEHOLDER ) # Parameter placeholder, depends on DB-API sqlParts.append(",") sqlParts.pop(); # Remove extra end comma sqlParts.append(")") sql = str.join(" ",sqlParts) log.debug(sql) # Loop through file and execute insert statement everytime find enough delimited parameters. nInserts = 0 nCols = len(columnNames) params = list(); progress = ProgressDots(total=estInput); for iLine, rowModel in enumerate(reader): # Parse out data values from strings for iCol, colName in enumerate(columnNames): value = parseValue(rowModel[colName], colName, dateColFormats, escapeStrings); params.append(value); log.debug(params) try: cur.execute(sql,tuple(params)) nInserts += cur.rowcount if idFile != None: rowId = None; if iIdCol is not None: # Look for manually assigned ID value first rowId = params[iIdCol]; else: cur.execute(identityQuery(tableName)); rowId = cur.fetchone()[0]; print >> idFile, rowId; # Need to "auto-commit" after each command, # otherwise a skipped error will rollback # any previous commands as well if skipErrors: conn.commit() progress.Update() except Exception, err: log.info(sql); log.info(tuple(params)) conn.rollback(); # Reset any changes since the last commit if skipErrors: log.warning("Error Executing in Script: "+ sql ) log.warning(err) else: raise; params = list(); conn.commit() return nInserts
def grad(self, fDictList, targetArr, idxArr): """Evaluate the gradient of the error function wrt the params""" currGrad = zeros(self.params.shape); meanDOut = []; minDOut = []; maxDOut = []; for subData, subTarg in self.gradChunkDataIterator(fDictList, targetArr, idxArr): actual_out = self.layerModel.fprop(subData); d_outputs = actual_out - subTarg; meanDOut.append(mean(abs(d_outputs))); minDOut.append(min(d_outputs)); maxDOut.append(max(d_outputs)); self.layerModel.bprop(d_outputs, subData); currGradContrib = self.layerModel.grad(d_outputs, subData) currGradContrib = currGradContrib.sum(1); currGrad += currGradContrib; decayContribution = 2 * self.l2decay * self.params * len(idxArr); currGrad += decayContribution; if self.chunklog and len(meanDOut) > 0: myLog.debug('mean(abs(d_outputs)) : %.4f, min(d_outputs): %.4f, max(d_outputs) : %.4f' % \ (mean(meanDOut), min(minDOut), max(maxDOut))) myLog.debug('||currGrad||^1 : %.4f, ||decayContribution|| : %.4f, mean(currGrad) : %.4f, max(currGrad) : %.4f' % \ (abs(currGrad).sum(), self.l2decay * (self.params**2).sum() * len(idxArr), mean(currGrad), max(abs(currGrad)))); myLog.debug('max(currGrad) : %.4f, min(currGrad) : %.4f' % (max(currGrad), min(currGrad))) myLog.debug('max(params) : %.4f, min(params) : %.4f' % (max(self.params), min(self.params))) myLog.debug('max(decayContribution) : %.4f, min(decayContribution) : %.4f' % (max(decayContribution), min(decayContribution))) myLog.debug('len(idxArr) = %d' % len(idxArr)) return currGrad;
def execute( query, parameters=None, includeColumnNames=False, incTypeCodes=False, formatter=None, conn=None, connFactory=None, autoCommit=True): """Execute a single SQL query / command against the database. If the description attribute is not None, this implies this was a select statement that produced a result set which will be returned by the fetchall() method. If the description is null, then at least return the rowcount affected by the query. This may be -1 or None still if it is a non-row affecting command (e.g. create / drop). If includeColumnNames is true and the query yields a result set, then one row (list) will be added to the beginning which contains the names of each column as extracted from the cursor.description. If incTypeCodes is true and the query yields a result set, a row (list) will be added to the beginning (but after column names if those are included as well), which contains the numerical type codes of each column as extracted from the cursor.description. This method is probably not terribly efficient and should only be used for prototype testing and short command line functions. For retrieving data to send to stdout or some other stream, add the formatter parameter as an instance of a ResultFormatter object to pipe the data through one fetch at a time. In that case, the full results (which are presumably large) will NOT be returned by the method. If the query object is actually a SQLQuery object, then will use the SQLQuery.getParams() as the params, and str(SQLQuery) as the query string. If autoCommit is True, will autoCommit. The function will also autoCommit if an external connection is NOT supplied. """ # Look for an explicitly specified external connection extConn = conn is not None if conn is None: # If no specific connection object provided, look for a connection factory # to produce one if connFactory is not None: conn = connFactory.connection() else: # No connection or factory specified, just fall back on default connection then conn = connection() cur = conn.cursor() if isinstance(query, SQLQuery): if parameters is None: parameters = tuple(query.getParams()) else: parameters = tuple(parameters) query = str(query) elif parameters is None: parameters = () #log.debug(parameterizeQueryString(query,parameters)); returnValue = None try: timer = time.time(); try: cur.execute( query, parameters ) except Exception, err: log.error(err); #log.error(parameterizeQueryString(query,parameters)); if (not extConn) or autoCommit: conn.rollback(); raise; timer = time.time() - timer; log.debug("Query Time: (%1.3f sec)" % timer ); if cur.description != None: returnValue = [] colNames = None; if includeColumnNames: colNames = columnNamesFromCursor(cur); returnValue.append(colNames) if incTypeCodes: typeCodes = typeCodesFromCursor(cur); returnValue.append(typeCodes); if formatter != None: # An output formatter was specified, pipe the data out one row at time if includeColumnNames: formatter.formatTuple(colNames) progress = ProgressDots(); row = cur.fetchone() while row != None: formatter.formatTuple(row) row = cur.fetchone() progress.Update(); log.info("%d Rows Completed",progress.GetCounts()); returnValue = cur.rowcount else: # No formatter specified, just return the entire result set dataTable = list(cur.fetchall()); for i, row in enumerate(dataTable): dataTable[i] = list(row); returnValue.extend(dataTable); else: returnValue = cur.rowcount if (not extConn) or autoCommit: conn.commit()
def test_dataConversion_maxMixtureCount(self): # Run the data conversion on the same data and look for expected records log.debug("Run the conversion process..."); convOptions = ConversionOptions(); convOptions.startDate = TEST_START_DATE; convOptions.normalizeMixtures = False; convOptions.maxMixtureCount = 2; convOptions.doseCountLimit = 5; self.converter.convertSourceItems(convOptions); # Just query back for the same data, de-normalizing the data back to a general table testQuery = \ """ select pi.external_id, pi.patient_id, pi.encounter_id, cic.description, ci.external_id, ci.name, ci.description, pi.item_date from patient_item as pi, clinical_item as ci, clinical_item_category as cic where pi.clinical_item_id = ci.clinical_item_id and ci.clinical_item_category_id = cic.clinical_item_category_id and cic.source_table = 'stride_order_med' order by pi.external_id, ci.external_id """; expectedData = \ [ [ -421032269, 2968778, 333, "Med (IV)", -96559, "MED-96559", "Piperacillin-Tazobactam (IV)", datetime(2112,2,8,11,17) ], #[ -418436155, 1607844, "Med (PR)", -3001, "RXCUI-3001", "Bisacodyl (PR)", datetime(2111,12,20,0,40) ], # Skip PRNs [ -418436145, 1607844, 222, "Med (PR)", -3001, "RXCUI-3001", "Bisacodyl (PR)", datetime(2111,12,22,0,40) ], [ -418063010, 3032765, 111, "Med (PO)", -4001, "RXCUI-4001", "Prednisone (PO)", datetime(2111,12,12,8,56) ], [ -418062652, 3036488, 555, "Med (IV)", -5007, "MED-5007 (<5)", "METOPROLOL TARTRATE 5 MG/5 ML IV SOLN (<5 doses)", datetime(2111,12,12,8,30) ], [ -418062352, 3016488, 666, "Med (IV)", -5007, "MED-5007 (<5)", "METOPROLOL TARTRATE 5 MG/5 ML IV SOLN (<5 doses)", datetime(2111,12,13,8,30) ], [ -418013851, -2429057, 444, "Med (PO)", -2001, "RXCUI-2001", "Famotidine (PO)", datetime(2111,12,10,19,10) ], [ -418011851, -2429057, 444, "Med (PO)", -2001, "RXCUI-2001", "Famotidine (PO)", datetime(2111,12,10,19,9) ], [ -414321352, 3036588, 777, "Med (IV)", -5007, "MED-5007 (<5)", "METOPROLOL TARTRATE 5 MG/5 ML IV SOLN (<5 doses)", datetime(2111,12,14,8,30) ], # Simple mixture with different dosing counts [ -395900000, 1234567, 888, "Med (IV)", -20481, "RXCUI-20481 (<5)", "Cefepime (IV) (<5 doses)", datetime(2111, 1, 2, 3, 0)], [ -395800000, 1234567, 888, "Med (IV)", -20481, "RXCUI-20481", "Cefepime (IV)", datetime(2111, 1,10, 3, 0)], [ -395700000, 1234567, 888, "Med (IV)", -20481, "RXCUI-20481", "Cefepime (IV)", datetime(2111, 3,10, 3, 0)], # IVF Mixture, composite ingredient description [ -392000000, 1234567, 888, "Med (IV)", -530000, "MED-530000 (<5)", "IVF Mix (<5 doses)", datetime(2111, 4, 1, 3, 0)], # Mini mixture. Too many components, just use summary description [ -391000000, 1234567, 888, "Med (IV)", -540000, "MED-540000 (<5)", "Mini TPN (<5 doses)", datetime(2111, 4, 2, 3, 0)], # Still aggregated because breaking up into component amino acids results in too many # Complex mixture [ -390000000, 1234567, 888, "Med (IV)", -550000, "MED-550000", "TPN Adult", datetime(2111, 5, 2, 3, 0)], ]; actualData = DBUtil.execute(testQuery); self.assertEqualTable( expectedData, actualData );
def decayAnalyzePatientItems(self, decayAnalysisOptions): log.debug("delta = %s" % decayAnalysisOptions.delta); # Derive decay scalar based on window length if not directly set if decayAnalysisOptions.decay is None: decayAnalysisOptions.decay = 1-(1.0/decayAnalysisOptions.windowLength) #decay rate = (1 - (1/c)), where c = window length currentBuffer = None; # In memory buffer if using temp files. Otherwise, use the database as the data cache if decayAnalysisOptions.outputFile is not None: currentBuffer = dict(); ##### # Step one delta (e.g., month) at a time until end date ##### currentItemStart = decayAnalysisOptions.startD currentItemEnd = currentItemStart + decayAnalysisOptions.delta #Keep running the Analysis until you reach the end date while currentItemStart < decayAnalysisOptions.endD: log.debug(currentItemStart); log.debug(currentItemEnd); instance = AssociationAnalysis.AssociationAnalysis() analysisOptions = AssociationAnalysis.AnalysisOptions() if decayAnalysisOptions.outputFile is not None: analysisOptions.bufferFile = decayAnalysisOptions.outputFile # Decay any existing stats before learn new ones to increment if currentBuffer is None: self.standardDecay(decayAnalysisOptions) else: log.debug("buffer decay"); currentBuffer = instance.bufferDecay(currentBuffer, decayAnalysisOptions.decay) self.decayCount +=1 #Add in a new delta worth of training instance.associationsPerCommit = decayAnalysisOptions.associationsPerCommit instance.itemsPerUpdate = decayAnalysisOptions.itemsPerUpdate log.debug(instance.associationsPerCommit); log.debug(instance.itemsPerUpdate); analysisOptions.patientIds = decayAnalysisOptions.patientIds analysisOptions.startDate = currentItemStart; analysisOptions.endDate = currentItemEnd # Decide which count fields to update if decayAnalysisOptions.skipLargerCountWindows: deltaSeconds = decayAnalysisOptions.delta.total_seconds(); # Look for smallest value that is still larger than the given delta. smallestLargerDeltaOption = sys.maxint; for secondsOption in DELTA_NAME_BY_SECONDS.iterkeys(): if secondsOption > deltaSeconds: smallestLargerDeltaOption = min(secondsOption, smallestLargerDeltaOption); # Tell the AssociationAnalysis to only accrue data for time ranges within the size of the delta period specified analysisOptions.deltaSecondsOptions = list(); for secondsOption in DELTA_NAME_BY_SECONDS.iterkeys(): if secondsOption <= smallestLargerDeltaOption: analysisOptions.deltaSecondsOptions.append(secondsOption); log.debug("starting new delta"); log.debug(analysisOptions.startDate); log.debug(analysisOptions.endDate); instance.analyzePatientItems(analysisOptions) #if an outputFile has been set, it will automatically output buffer to the file log.debug("finished new delta"); # if you have been doing everything in memory, then load the latest buffer from Analysis Options and merge it with your current buffer if currentBuffer is not None: bufferOneDelta = instance.loadUpdateBufferFromFile(decayAnalysisOptions.outputFile) currentBuffer = instance.mergeBuffers(currentBuffer, bufferOneDelta) log.debug("finished merge"); #Increment dates to next four weeks currentItemStart = currentItemEnd currentItemEnd = currentItemStart + decayAnalysisOptions.delta log.debug("Total number of decays: " + str(self.decayCount) ); # Commit to database if have been doing everything in memory. (If not, then have already been commiting to database incrementally) if currentBuffer is not None: finalCommitBufferFileName = "finalCommitBuffer.txt" if os.path.exists(decayAnalysisOptions.outputFile): os.remove(decayAnalysisOptions.outputFile) instance.saveBufferToFile(finalCommitBufferFileName, currentBuffer) del currentBuffer #deleting the buffer to save memory, because commitUpdateBufferFromFile will need the memory equivalent of one new buffer instance.commitUpdateBufferFromFile(str(finalCommitBufferFileName)) # Comment out if want the file to remain so can commit separetely or for debugging if os.path.exists(finalCommitBufferFileName): os.remove(finalCommitBufferFileName) log.debug("finished process");
def test_dataConversion_normalized(self): # Run the data conversion on the same data and look for expected records log.debug("Run the conversion process..."); convOptions = ConversionOptions(); convOptions.startDate = TEST_START_DATE; convOptions.normalizeMixtures = True; convOptions.doseCountLimit = 5; self.converter.convertSourceItems(convOptions); # Just query back for the same data, de-normalizing the data back to a general table testQuery = \ """ select pi.external_id, pi.patient_id, pi.encounter_id, cic.description, ci.external_id, ci.name, ci.description, pi.item_date from patient_item as pi, clinical_item as ci, clinical_item_category as cic where pi.clinical_item_id = ci.clinical_item_id and ci.clinical_item_category_id = cic.clinical_item_category_id and cic.source_table = 'stride_order_med' order by pi.external_id, ci.external_id """; expectedData = \ [ [ -421032269, 2968778, 333, "Med (IV)", -1002, "RXCUI-1002", "Tazobactam (IV)", datetime(2112,2,8,11,17) ], [ -421032269, 2968778, 333, "Med (IV)", -1001, "RXCUI-1001", "Piperacillin (IV)", datetime(2112,2,8,11,17) ], #[ -418436155, 1607844, "Med (PR)", -3001, "RXCUI-3001", "Bisacodyl (PR)", datetime(2111,12,20,0,40) ], # Skip PRNs [ -418436145, 1607844, 222, "Med (PR)", -3001, "RXCUI-3001", "Bisacodyl (PR)", datetime(2111,12,22,0,40) ], [ -418063010, 3032765, 111, "Med (PO)", -4001, "RXCUI-4001", "Prednisone (PO)", datetime(2111,12,12,8,56) ], [ -418062652, 3036488, 555, "Med (IV)", -5007, "MED-5007 (<5)", "METOPROLOL TARTRATE 5 MG/5 ML IV SOLN (<5 doses)", datetime(2111,12,12,8,30) ], [ -418062352, 3016488, 666, "Med (IV)", -5007, "MED-5007 (<5)", "METOPROLOL TARTRATE 5 MG/5 ML IV SOLN (<5 doses)", datetime(2111,12,13,8,30) ], [ -418013851, -2429057, 444, "Med (PO)", -2001, "RXCUI-2001", "Famotidine (PO)", datetime(2111,12,10,19,10) ], [ -418011851, -2429057, 444, "Med (PO)", -2001, "RXCUI-2001", "Famotidine (PO)", datetime(2111,12,10,19,9) ], [ -414321352, 3036588, 777, "Med (IV)", -5007, "MED-5007 (<5)", "METOPROLOL TARTRATE 5 MG/5 ML IV SOLN (<5 doses)", datetime(2111,12,14,8,30) ], # Simple mixture with different dosing counts [ -395900000, 1234567, 888, "Med (IV)", -20481, "RXCUI-20481 (<5)", "Cefepime (IV) (<5 doses)", datetime(2111, 1, 2, 3, 0)], [ -395800000, 1234567, 888, "Med (IV)", -20481, "RXCUI-20481", "Cefepime (IV)", datetime(2111, 1,10, 3, 0)], [ -395700000, 1234567, 888, "Med (IV)", -20481, "RXCUI-20481", "Cefepime (IV)", datetime(2111, 3,10, 3, 0)], # IVF Mix [ -392000000, 1234567, 888, "Med (IV)", -9863, 'RXCUI-9863 (<5)', 'Sodium Chloride (IV) (<5 doses)', datetime(2111, 4, 1, 3, 0)], [ -392000000, 1234567, 888, "Med (IV)", -8591, 'RXCUI-8591 (<5)', 'Potassium Chloride (IV) (<5 doses)', datetime(2111, 4, 1, 3, 0)], [ -392000000, 1234567, 888, "Med (IV)", -4850, 'RXCUI-4850 (<5)', 'Glucose (IV) (<5 doses)', datetime(2111, 4, 1, 3, 0)], # Mini-Mix [ -391000000, 1234567, 888, "Med (IV)", -236719, 'RXCUI-236719 (<5)', 'Sodium Phosphate, Dibasic (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -235496, 'RXCUI-235496 (<5)', 'Sodium Phosphate, Monobasic (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -11115, 'RXCUI-11115 (<5)', 'Valine (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -10962, 'RXCUI-10962 (<5)', 'Tyrosine (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -10898, 'RXCUI-10898 (<5)', 'Tryptophan (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -10524, 'RXCUI-10524 (<5)', 'Threonine (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -9863, 'RXCUI-9863 (<5)', 'Sodium Chloride (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -9671, 'RXCUI-9671 (<5)', 'Serine (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -8737, 'RXCUI-8737 (<5)', 'Proline (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -8156, 'RXCUI-8156 (<5)', 'Phenylalanine (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -6837, 'RXCUI-6837 (<5)', 'Methionine (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -6536, 'RXCUI-6536 (<5)', 'Lysine (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -6308, 'RXCUI-6308 (<5)', 'Leucine (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -6033, 'RXCUI-6033 (<5)', 'Isoleucine (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -5340, 'RXCUI-5340 (<5)', 'Histidine (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -4919, 'RXCUI-4919 (<5)', 'Glycine (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -4850, 'RXCUI-4850 (<5)', 'Glucose (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -1091, 'RXCUI-1091 (<5)', 'Arginine (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], [ -391000000, 1234567, 888, "Med (IV)", -426, 'RXCUI-426 (<5)', 'Alanine (IV) (<5 doses)', datetime(2111, 4, 2, 3, 0)], # Complex mixture [ -390000000, 1234567, 888, "Med (IV)", -253182, 'RXCUI-253182', 'Regular Insulin, Human (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -236719, 'RXCUI-236719', 'Sodium Phosphate, Dibasic (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -235496, 'RXCUI-235496', 'Sodium Phosphate, Monobasic (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -89905, 'RXCUI-89905', 'Multivitamin Preparation (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -39937, 'RXCUI-39937', 'Zinc Chloride (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -29261, 'RXCUI-29261', 'Manganese Chloride (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -21579, 'RXCUI-21579', 'Copper Sulfate (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -21032, 'RXCUI-21032', 'Chromous Chloride (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -11115, 'RXCUI-11115', 'Valine (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -10962, 'RXCUI-10962', 'Tyrosine (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -10898, 'RXCUI-10898', 'Tryptophan (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -10524, 'RXCUI-10524', 'Threonine (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -9863, 'RXCUI-9863', 'Sodium Chloride (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -9671, 'RXCUI-9671', 'Serine (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -8737, 'RXCUI-8737', 'Proline (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -8591, 'RXCUI-8591', 'Potassium Chloride (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -8308, 'RXCUI-8308', 'Vitamin K 1 (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -8156, 'RXCUI-8156', 'Phenylalanine (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -6837, 'RXCUI-6837', 'Methionine (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -6585, 'RXCUI-6585', 'Magnesium Sulfate (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -6536, 'RXCUI-6536', 'Lysine (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -6308, 'RXCUI-6308', 'Leucine (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -6033, 'RXCUI-6033', 'Isoleucine (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -5340, 'RXCUI-5340', 'Histidine (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -4919, 'RXCUI-4919', 'Glycine (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -4850, 'RXCUI-4850', 'Glucose (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -1908, 'RXCUI-1908', 'Calcium Gluconate (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -1091, 'RXCUI-1091', 'Arginine (IV)', datetime(2111, 5, 2, 3, 0)], [ -390000000, 1234567, 888, "Med (IV)", -426, 'RXCUI-426', 'Alanine (IV)', datetime(2111, 5, 2, 3, 0)], ]; actualData = DBUtil.execute(testQuery); self.assertEqualTable( expectedData, actualData ); # Query for orderset links testQuery = \ """ select pi.external_id, ci.description, ic.external_id, ic.name, ic.section, ic.subgroup from patient_item as pi, clinical_item as ci, clinical_item_category as cic, patient_item_collection_link as picl, item_collection_item as ici, item_collection as ic where pi.clinical_item_id = ci.clinical_item_id and ci.clinical_item_category_id = cic.clinical_item_category_id and cic.source_table = 'stride_order_med' and pi.patient_item_id = picl.patient_item_id and picl.item_collection_item_id = ici.item_collection_item_id and ici.item_collection_id = ic.item_collection_id order by pi.external_id, ci.external_id """; expectedData = \ [ [ -421032269, "Tazobactam (IV)", -111,"General Admit","Medications","Antibiotics"], [ -421032269, "Piperacillin (IV)", -111,"General Admit","Medications","Antibiotics"], [ -418436145, "Bisacodyl (PR)", -111,"General Admit","Medications","Stool Softeners"], # Mini-Mix [ -391000000, 'Sodium Phosphate, Dibasic (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Sodium Phosphate, Monobasic (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Valine (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Tyrosine (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Tryptophan (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Threonine (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Sodium Chloride (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Serine (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Proline (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Phenylalanine (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Methionine (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Lysine (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Leucine (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Isoleucine (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Histidine (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Glycine (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Glucose (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Arginine (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], [ -391000000, 'Alanine (IV) (<5 doses)', -222, "Nutrition","Infusions","TPN"], ]; actualData = DBUtil.execute(testQuery); self.assertEqualTable( expectedData, actualData );
def test_dataConversion(self): # Run the data conversion on the same data and look for expected records log.debug("Run the conversion process...") self.converter.convertSourceItems(TEST_START_DATE) # Just query back for the same data, de-normalizing the data back to a general table testQuery = \ """ select pi.external_id, pi.patient_id, pi.encounter_id, cic.description, ci.external_id, ci.name, ci.description, pi.item_date from patient_item as pi, clinical_item as ci, clinical_item_category as cic where pi.clinical_item_id = ci.clinical_item_id and ci.clinical_item_category_id = cic.clinical_item_category_id and cic.source_table = 'stride_order_proc' order by pi.external_id """ expectedData = \ [ [ -419697343, 3042640, 222, "Point of Care Testing", 1001, "LABPOCGLU", "GLUCOSE BY METER", datetime(2112,01,13) ], [ -419268937, 3039254, 666, "Lab", 1721, "LABPTT", "PTT PARTIAL THROMBOPLASTIN TIME", datetime(2112,01,05) ], [ -419268931, 3039254, 666, "Lab", 1721, "LABPTT", "PTT PARTIAL THROMBOPLASTIN TIME", datetime(2112,01,04) ], [ -418928388, -1612899, 333, "Point of Care Testing", 1001, "LABPOCGLU", "GLUCOSE BY METER", datetime(2111,12,28) ], [ -418045499, 2087083, 444, "Nursing", 1428, "NUR1018", "MONITOR INTAKE AND OUTPUT", datetime(2111,12,11) ], [ -417974686, 380873, 111, "Nursing", 1453, "NUR1043", "NURSING PULSE OXIMETRY", datetime(2111,12,10) ], [ -417843774, 2648748, 555, "Nursing", 1508, "NUR1068", "WEIGHT", datetime(2111,12,8) ], ] actualData = DBUtil.execute(testQuery) self.assertEqualTable(expectedData, actualData) # Query for order set links testQuery = \ """ select pi.external_id, ci.description, ic.external_id, ic.name, ic.section, ic.subgroup from patient_item as pi, clinical_item as ci, clinical_item_category as cic, patient_item_collection_link as picl, item_collection_item as ici, item_collection as ic where pi.clinical_item_id = ci.clinical_item_id and ci.clinical_item_category_id = cic.clinical_item_category_id and cic.source_table = 'stride_order_proc' and pi.patient_item_id = picl.patient_item_id and picl.item_collection_item_id = ici.item_collection_item_id and ici.item_collection_id = ic.item_collection_id order by pi.external_id """ expectedData = \ [ [ -419268931, "PTT PARTIAL THROMBOPLASTIN TIME", -111,"General Admit","Lab","Coag"], [ -418928388, "GLUCOSE BY METER", -222,"ER General","Testing","PoC" ], [ -418045499, "MONITOR INTAKE AND OUTPUT", -111,"General Admit","Nursing","Monitoring" ], ] actualData = DBUtil.execute(testQuery) self.assertEqualTable(expectedData, actualData)
def test_compositeRelated(self): # Simulate command-line execution self.analyzer.main([ "medinfo/cpoe/DataManager.py", "-c", "-2,-4,-8|NewComposite|New Composite Item|-1|-100" ]) #compositeId = self.analyzer.compositeRelated( (-2,-4,-8), "NewComposite","New Composite Item", -1, -100 ); # Revise the new item ID to a sentinel test value expectedClinicalItemStatus = \ [ [-1, "CBC",1, 1], [-2, "BMP",0, 1], [-3, "Hepatic Panel",1, 1], [-4, "Cardiac Enzymes",1, 1], [-5, "CXR",1, 1], [-6, "RUQ Ultrasound",1, 1], [-7, "CT Abdomen/Pelvis",1, 1], [-8, "CT PE Thorax",1, 1], [-9, "Acetaminophen",1, 1], [-10, "Carvedilol",1, 1], [-11, "Enoxaparin",1, 1], [-12, "Warfarin",1, 1], [-13, "Ceftriaxone",1, 1], [-14, "Foley Catheter",1, 1], [-15, "Strict I&O",1, 1], [-16, "Fall Precautions",1, 1], [-100,"NewComposite", 1, 0], # Remove from default recommend list ] clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery) self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus) expectedPatientItems = \ [ # Use placeholder "*" for analyze date, just verify that it exists and is consistent. Actual value is not important # Likewise, use None for primary ID key whose specific value is unimportant [None,-11111,-100, datetime(2000, 1, 1, 0), None], [-1, -11111, -4, datetime(2000, 1, 1, 0), "*"], [-2, -11111, -10, datetime(2000, 1, 1, 0), "*"], [None,-11111,-100, datetime(2000, 1, 1, 2), None], [-3, -11111, -8, datetime(2000, 1, 1, 2), "*"], [-4, -11111, -10, datetime(2000, 1, 2, 0), "*"], [-5, -11111, -12, datetime(2000, 2, 1, 0), "*"], [-10, -22222, -7, datetime(2000, 1, 5, 0), "*"], [-12, -22222, -6, datetime(2000, 1, 9, 0), "*"], [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"], [-14, -33333, -6, datetime(2000, 2, 9, 0), "*"], [None,-33333,-100, datetime(2000, 2,11, 0), None], [-15, -33333, -2, datetime(2000, 2,11, 0), "*"], [-16, -33333, -11, datetime(2000, 2,11, 0), "*"], [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"], ] patientItems = DBUtil.execute(self.patientItemQuery) self.assertEqualPatientItems(expectedPatientItems, patientItems) # Check for tracking link records linkQuery = \ """ select clinical_item_id, linked_item_id from clinical_item_link where clinical_item_id < 0 order by clinical_item_id desc, linked_item_id desc """ expectedItems = \ [ [-100,-2], [-100,-4], [-100,-8], ] actualItems = DBUtil.execute(linkQuery) self.assertEqualTable(expectedItems, actualItems) log.debug("Test incremental update via command-line") self.analyzer.main(["medinfo/cpoe/DataManager.py", "-g", "-6|-100"]) #self.analyzer.generatePatientItemsForCompositeId( (-6,), -100 ); expectedPatientItems = \ [ # Use placeholder "*" for analyze date, just verify that it exists and is consistent. Actual value is not important # Likewise, use None for primary ID key whose specific value is unimportant [None,-11111,-100, datetime(2000, 1, 1, 0), None], [-1, -11111, -4, datetime(2000, 1, 1, 0), "*"], [-2, -11111, -10, datetime(2000, 1, 1, 0), "*"], [None,-11111,-100, datetime(2000, 1, 1, 2), None], [-3, -11111, -8, datetime(2000, 1, 1, 2), "*"], [-4, -11111, -10, datetime(2000, 1, 2, 0), "*"], [-5, -11111, -12, datetime(2000, 2, 1, 0), "*"], [-10, -22222, -7, datetime(2000, 1, 5, 0), "*"], [None,-22222,-100, datetime(2000, 1, 9, 0), None], [-12, -22222, -6, datetime(2000, 1, 9, 0), "*"], [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"], [None,-33333,-100, datetime(2000, 2, 9, 0), None], [-14, -33333, -6, datetime(2000, 2, 9, 0), "*"], [None,-33333,-100, datetime(2000, 2,11, 0), None], [-15, -33333, -2, datetime(2000, 2,11, 0), "*"], [-16, -33333, -11, datetime(2000, 2,11, 0), "*"], [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"], ] patientItems = DBUtil.execute(self.patientItemQuery) self.assertEqualPatientItems(expectedPatientItems, patientItems) # Check for tracking link records expectedItems = \ [ [-100,-2], [-100,-4], [-100,-6], [-100,-8], ] actualItems = DBUtil.execute(linkQuery) self.assertEqualTable(expectedItems, actualItems) log.debug("Test inherited update") self.analyzer.main([ "medinfo/cpoe/DataManager.py", "-c", "-7,-100|InheritingComposite|Inheriting Composite Item|-1|-101" ]) #compositeId = self.analyzer.compositeRelated( (-7,-100), "InheritingComposite","Inheriting Composite Item", -1, -101 ); # Revise the new item ID to a sentinel test value expectedClinicalItemStatus = \ [ [-1, "CBC",1, 1], [-2, "BMP",0, 1], [-3, "Hepatic Panel",1, 1], [-4, "Cardiac Enzymes",1, 1], [-5, "CXR",1, 1], [-6, "RUQ Ultrasound",1, 1], [-7, "CT Abdomen/Pelvis",1, 1], [-8, "CT PE Thorax",1, 1], [-9, "Acetaminophen",1, 1], [-10, "Carvedilol",1, 1], [-11, "Enoxaparin",1, 1], [-12, "Warfarin",1, 1], [-13, "Ceftriaxone",1, 1], [-14, "Foley Catheter",1, 1], [-15, "Strict I&O",1, 1], [-16, "Fall Precautions",1, 1], [-100,"NewComposite", 1, 0], [-101,"InheritingComposite", 1, 0], ] clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery) self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus) expectedPatientItems = \ [ # Use placeholder "*" for analyze date, just verify that it exists and is consistent. Actual value is not important # Likewise, use None for primary ID key whose specific value is unimportant [None,-11111,-101, datetime(2000, 1, 1, 0), None], [None,-11111,-100, datetime(2000, 1, 1, 0), None], [-1, -11111, -4, datetime(2000, 1, 1, 0), "*"], [-2, -11111, -10, datetime(2000, 1, 1, 0), "*"], [None,-11111,-101, datetime(2000, 1, 1, 2), None], [None,-11111,-100, datetime(2000, 1, 1, 2), None], [-3, -11111, -8, datetime(2000, 1, 1, 2), "*"], [-4, -11111, -10, datetime(2000, 1, 2, 0), "*"], [-5, -11111, -12, datetime(2000, 2, 1, 0), "*"], [None,-22222,-101, datetime(2000, 1, 5, 0), None], [-10, -22222, -7, datetime(2000, 1, 5, 0), "*"], [None,-22222,-101, datetime(2000, 1, 9, 0), None], [None,-22222,-100, datetime(2000, 1, 9, 0), None], [-12, -22222, -6, datetime(2000, 1, 9, 0), "*"], [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"], [None,-33333,-101, datetime(2000, 2, 9, 0), None], [None,-33333,-100, datetime(2000, 2, 9, 0), None], [-14, -33333, -6, datetime(2000, 2, 9, 0), "*"], [None,-33333,-101, datetime(2000, 2,11, 0), None], [None,-33333,-100, datetime(2000, 2,11, 0), None], [-15, -33333, -2, datetime(2000, 2,11, 0), "*"], [-16, -33333, -11, datetime(2000, 2,11, 0), "*"], [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"], ] patientItems = DBUtil.execute(self.patientItemQuery) self.assertEqualPatientItems(expectedPatientItems, patientItems) # Check for tracking link records expectedItems = \ [ [-100,-2], [-100,-4], [-100,-6], [-100,-8], [-101,-7], [-101,-100], ] actualItems = DBUtil.execute(linkQuery) self.assertEqualTable(expectedItems, actualItems)
def test_dataConversion(self): # Run the data conversion on the same data and look for expected records log.debug("Run the conversion process...") self.converter.convertSourceItems(TEST_START_DATE) # Just query back for the same data, de-normalizing the data back to a general table testQuery = \ """ select pi.external_id, pi.patient_id, pi.encounter_id, cic.description, ci.external_id, ci.name, ci.description, pi.num_value, pi.text_value, pi.item_date from patient_item as pi, clinical_item as ci, clinical_item_category as cic where pi.clinical_item_id = ci.clinical_item_id and ci.clinical_item_category_id = cic.clinical_item_category_id and cic.source_table = 'stride_order_results' order by pi.external_id desc, ci.name """ expectedData = \ [ [-1748206, -1099, -9890, 'Lab Result', None, '25OHD3(InRange)', '25-HYDROXY D3 (InRange)', 50, None, DBUtil.parseDateValue('7/3/2111 14:21'),], [-2658433, -6894, -211, 'Lab Result', None, '25OHD3(InRange)', '25-HYDROXY D3 (InRange)', 45, None, DBUtil.parseDateValue('7/5/2111 0:28'),], [-2794591, -4038, -6687, 'Lab Result', None, '25OHD3(InRange)', '25-HYDROXY D3 (InRange)', 70, None, DBUtil.parseDateValue('3/19/2113 19:26'),], [-3347071, -6139, -7104, 'Lab Result', None, '25OHD3(Low)', '25-HYDROXY D3 (Low)', 2, None, DBUtil.parseDateValue('9/8/2113 22:10'),], [-3393444, -5157, -5537, 'Lab Result', None, '25OHD3(InRange)', '25-HYDROXY D3 (InRange)', 65, None, DBUtil.parseDateValue('10/9/2113 5:03'),], [-3580354, -2795, -752, 'Lab Result', None, '25OHD3(InRange)', '25-HYDROXY D3 (InRange)', 49, None, DBUtil.parseDateValue('12/17/2113 0:40'),], [-4464954, -4591, -1383, 'Lab Result', None, '25OHD3(InRange)', '25-HYDROXY D3 (InRange)', 55, None, DBUtil.parseDateValue('5/28/2113 23:28'),], [-19007449, -9542, -4105, 'Lab Result', None, 'NA(High)', 'SODIUM, SER/PLAS (High)', 157, None, DBUtil.parseDateValue('9/13/2109 11:55'),], [-19231504, -1518, -3744, 'Lab Result', None, 'NA(Low)', 'SODIUM, SER/PLAS (Low)', 134, None, DBUtil.parseDateValue('8/20/2109 12:22'),], [-21479311, -9844, -5135, 'Lab Result', None, 'NA(InRange)', 'SODIUM, SER/PLAS (InRange)', 142, None, DBUtil.parseDateValue('8/31/2109 15:42'),], [-22793877, -3261, -4837, 'Lab Result', None, 'HCT(LowPanic)', 'HEMATOCRIT(HCT) (Low Panic)', 19.7, None, DBUtil.parseDateValue('11/29/2111 14:36'),], [-22793877, -3261, -4837, 'Lab Result', None, 'HGB(LowPanic)', 'HEMOGLOBIN(HGB) (Low Panic)', 7, None, DBUtil.parseDateValue('11/30/2111 7:36'),], [-22793877, -3261, -4837, 'Lab Result', None, 'MCH(InRange)', 'MCH(MCH) (InRange)', 31.7, None, DBUtil.parseDateValue('10/17/2112 1:09'),], [-22793877, -3261, -4837, 'Lab Result', None, 'MCHC(InRange)', 'MCHC(MCHC) (InRange)', 35.4, None, DBUtil.parseDateValue('12/13/2112 2:54'),], [-22793877, -3261, -4837, 'Lab Result', None, 'MCV(InRange)', 'MCV(MCV) (InRange)', 89.7, None, DBUtil.parseDateValue('11/11/2112 2:54'),], [-22793877, -3261, -4837, 'Lab Result', None, 'PLT(Low)', 'PLATELET COUNT(PLT) (Low)', 11, None, DBUtil.parseDateValue('1/30/2113 13:28'),], [-22793877, -3261, -4837, 'Lab Result', None, 'RBC(Low)', 'RBC(RBC) (Low)', 2.2, None, DBUtil.parseDateValue('7/11/2113 23:24'),], [-22793877, -3261, -4837, 'Lab Result', None, 'RDW(High)', 'RDW(RDW) (High)', 33.3, None, DBUtil.parseDateValue('1/27/2113 14:44'),], [-22793877, -3261, -4837, 'Lab Result', None, 'WBC(LowPanic)', 'WBC(WBC) (Low Panic)', 0.2, None, DBUtil.parseDateValue('9/25/2109 16:10'),], [-22910018, -1862, -621, 'Lab Result', None, 'MG(InRange)', 'MAGNESIUM, SER/PLAS(MGN) (InRange)', 2.1, None, DBUtil.parseDateValue('11/13/2112 8:18'),], [-29501223, -9860, -1772, 'Lab Result', None, 'ACETA(InRange)', 'ACETAMINOPHEN(ACETA) (InRange)', 5.1, None, DBUtil.parseDateValue('11/29/2111 0:15'),], [-29966444, -5690, -1150, 'Lab Result', None, 'ACETA(InRange)', 'ACETAMINOPHEN(ACETA) (InRange)', 4.2, None, DBUtil.parseDateValue('11/29/2111 2:27'),], [-30560253, -7803, -1772, 'Lab Result', None, 'ACETA(InRange)', 'ACETAMINOPHEN(ACETA) (InRange)', 2.6, None, DBUtil.parseDateValue('11/29/2111 16:13'),], [-31237072, -124, -8391, 'Lab Result', None, 'ACETA(InRange)', 'ACETAMINOPHEN(ACETA) (InRange)', 50.6, None, DBUtil.parseDateValue('11/29/2111 5:45'),], [-31300455, -2168, -261, 'Lab Result', None, 'ACETA(High)', 'ACETAMINOPHEN(ACETA) (High)', 270.7, None, DBUtil.parseDateValue('11/29/2111 18:58'),], [-31823670, -2130, -3897, 'Lab Result', None, 'ACETA(InRange)', 'ACETAMINOPHEN(ACETA) (InRange)', 5.4, None, DBUtil.parseDateValue('11/29/2111 14:08'),], [-33197720, -9926, -4898, 'Lab Result', None, 'ACETA(Result)', 'ACETAMINOPHEN(ACETA) (Result)', None, None, DBUtil.parseDateValue('11/29/2111 15:22'),], [-33280031, -3858, -6463, 'Lab Result', None, 'ACETA(Result)', 'ACETAMINOPHEN(ACETA) (Result)', 9999999, None, DBUtil.parseDateValue('11/29/2111 7:41'),], [-33765278, -4255, -622, 'Lab Result', None, '9374R(Result)', 'INTERPRETATION/ COMMENTS CLASS II 9374R (Result)', 9999999, None, DBUtil.parseDateValue('9/22/2112 20:26'),], [-35954787, -7074, -6965, 'Lab Result', None, 'GTP53(Result)', 'TP53(GTP53) (Result)', 9999999, None, DBUtil.parseDateValue('8/19/2109 16:39'),], [-36668349, -9815, -3658, 'Lab Result', None, 'ACETA(InRange)', 'ACETAMINOPHEN(ACETA) (InRange)', 7.7, None, DBUtil.parseDateValue('10/30/2111 7:23'),], [-38543619, -6562, -4489, 'Lab Result', None, 'GTP53(Result)', 'TP53(GTP53) (Result)', 9999999, None, DBUtil.parseDateValue('10/23/2109 14:30'),], [-39004110, -5750, -4953, 'Lab Result', None, 'YLEPT1(InRange)', 'LEPTIN (InRange)', 20, None, DBUtil.parseDateValue('8/26/2112 15:07'),], [-40604146, -7480, -8730, 'Lab Result', None, '9374R(Result)', 'INTERPRETATION/ COMMENTS CLASS II 9374R (Result)', None, None, DBUtil.parseDateValue('12/13/2111 18:12'),], ] actualData = DBUtil.execute(testQuery) self.assertEqualTable(expectedData, actualData) # Query back for stat data testQuery = \ """ select base_name, max_result_flag, max_result_in_range from order_result_stat where base_name not like 'PreTest_%%' order by base_name """ # Don't necessarily expect stats for all items if always get a usable result_flag, result_in_range_yn, or sentinel result value expectedData = \ [ ["25OHD3",None,None], #["9374R",None,None], ["ACETA",None,None], #["GTP53",None,None], #["HCT","Low Panic",None], #["HGB","Low Panic",None], #["MCH",None,"Y"], #["MCHC",None,"Y"], #["MCV",None,"Y"], #["MG",None,"Y"], ["NA","Low",None], #["PLT","Low",None], #["RBC","Low",None], #["RDW","High",None], #["WBC","Low Panic",None], ["YLEPT1",None,None], ] actualData = DBUtil.execute(testQuery) self.assertEqualTable(expectedData, actualData)
def serviceStopped(self): log.debug("Auth Service for {0} stopping".format(self.ip))