Exemple #1
0
    def buildModel(self, corpusBOWGenerator, numTopics):
        """Build topic model from corpus (interpret as generator over contents)

        Given the bag-of-words corpus, build a docCountByWordId count dictionary
        to facilitate subsequent Term Frequency * Inverse DOCUMENT FREQUENCY calculations.
        In Clinical context, document = patient.

        Return (model, docCountByWordId);
        """
        # Load dictionary to translate item IDs to descriptions
        itemsById = DBUtil.loadTableAsDict("clinical_item")
        id2word = dict()
        # Models expect a pair for every possible item ID, and judges vocabulary size by length of this dictionary rather than the maximum ID values.  That means have to populate all of the empty ones as well.
        maxId = max(itemsById.keys())
        for itemId in range(maxId + 1):
            description = str(itemId)
            # Default to just the same as the ID string
            if itemId in itemsById:
                description = itemsById[itemId]["description"]
            id2word[itemId] = description

        # Stream in progressive updates from corpus generator so don't have to load all into memory
        # Do a batch of many at a time, otherwise very slow to increment one at a time
        docBuffer = list()

        prog = ProgressDots()
        self.model = None
        self.docCountByWordId = {
            None: 0
        }
        # Use None key to represent count of all documents
        for i, document in enumerate(corpusBOWGenerator):
            for (
                    wordId, wordCount
            ) in document:  # Assuming uniqueness of wordId keys for each document
                if wordId not in self.docCountByWordId:
                    self.docCountByWordId[wordId] = 0
                self.docCountByWordId[wordId] += 1
            self.docCountByWordId[None] += 1

            docBuffer.append(document)
            if i % BUFFER_UPDATE_SIZE == (
                    BUFFER_UPDATE_SIZE -
                    1):  # Update model with current buffer of documents
                self.model = self.updateModel(self.model, docBuffer, id2word,
                                              numTopics)
                docBuffer = list()
                # Discard committed buffer
            prog.update()

        self.model = self.updateModel(self.model, docBuffer, id2word,
                                      numTopics)
        # Last update for any remaining documents in buffer
        docBuffer = list()
        # Discard committed buffer

        # prog.printStatus();
        return (self.model, self.docCountByWordId)
Exemple #2
0
 def initItemLookups(self, query):
     self.itemsById = DBUtil.loadTableAsDict("clinical_item")
     self.categoryIdByItemId = dict()
     for itemId, item in self.itemsById.iteritems():
         self.categoryIdByItemId[itemId] = item["clinical_item_category_id"]
     self.candidateItemIds = set()
     emptyQuerySet = set()
     for itemId in self.docCountByWordId.keys():
         if self.isItemRecommendable(itemId, emptyQuerySet, query,
                                     self.categoryIdByItemId):
             self.candidateItemIds.add(itemId)
Exemple #3
0
    def action_default(self):
        """Present set of patient notes"""
        patientId = int(self.requestData["sim_patient_id"])
        simTime = int(self.requestData["sim_time"])

        # Load lookup table to translate note type IDs into description strings
        noteTypeById = DBUtil.loadTableAsDict("sim_note_type")

        manager = SimManager()
        results = manager.loadNotes(patientId, simTime)

        htmlLines = list()
        for dataModel in results:
            self.formatDataModel(dataModel, noteTypeById)
            htmlLines.append(LINE_TEMPLATE % dataModel)
        self.requestData["detailTable"] = str.join("\n", htmlLines)

        if len(results) > 0:
            self.requestData["initialNoteContent"] = results[-1]["content"]
Exemple #4
0
    def initItemLookups(self, query):
        """Load lookup info and save into local member variables for reuse later
        so don't have to do wasteful repeat DB lookups for serial queries
        """
        # Build mutual lookup tables for all order sets and clinical items contained
        self.itemIdsByOrderSetId = dict()
        self.orderSetIdsByItemId = dict()
        results = DBUtil.execute \
            ("""select ic.external_id, ici.clinical_item_id
                from item_collection_item as ici, item_collection as ic
                where ic.item_collection_id = ici.item_collection_id
                and ic.section <> %(p)s
                and ici.collection_type_id = %(p)s
                """ % {"p": DBUtil.SQL_PLACEHOLDER},
                (AD_HOC_SECTION, COLLECTION_TYPE_ORDER_SET)
            )
        for orderSetId, itemId in results:
            if orderSetId not in self.itemIdsByOrderSetId:
                self.itemIdsByOrderSetId[orderSetId] = set()
            self.itemIdsByOrderSetId[orderSetId].add(itemId)

            if itemId not in self.orderSetIdsByItemId:
                self.orderSetIdsByItemId[itemId] = set()
            self.orderSetIdsByItemId[itemId].add(orderSetId)

        self.itemsById = DBUtil.loadTableAsDict("clinical_item")
        self.categoryIdByItemId = dict()
        self.patientCountByItemId = dict()
        for itemId, item in self.itemsById.items():
            self.categoryIdByItemId[itemId] = item["clinical_item_category_id"]
            self.patientCountByItemId[itemId] = item["patient_count"]
        self.candidateItemIds = set()
        emptyQuerySet = set()
        for itemId in list(self.orderSetIdsByItemId.keys()):
            if self.isItemRecommendable(itemId, emptyQuerySet, query,
                                        self.categoryIdByItemId):
                self.candidateItemIds.add(itemId)
    results = recommender(recQuery)
    for result in results:
        itemIds.add(result["clinical_item_id"])

    print >> sys.stderr, admitDxId, len(itemIds)

    # Top results by baseline prevalence
    recQuery.sortField = "prevalence"
    results = recommender(recQuery)
    for result in results:
        itemIds.add(result["clinical_item_id"])

    print >> sys.stderr, admitDxId, len(itemIds)

# Load clinicalItem models for quick lookups
clinicalItemById = DBUtil.loadTableAsDict("clinical_item")

print str.join("\t", [
    "Admit Dx ID", "Section", "Guideline", "item_collection_id",
    "collection_type_id", "value", "comment", "clinical_item_id", "Name",
    "Description"
])

for (admitDxId, sectionName,
     guidelineName) in admitDxIdSectionGuidelineNameTuples:
    itemIds = itemIdsByAdmitDxId[admitDxId]

    for itemId in itemIds:
        clinicalItem = clinicalItemById[itemId]
        # Note just printing blank spaces for values in middle
        print "%s\t%s\t%s\t\t1\t\t\t%s\t%s\t%s" % (