コード例 #1
0
ファイル: TopicModel.py プロジェクト: xxxx3/CDSS
    def buildModel(self, corpusBOWGenerator, numTopics):
        """Build topic model from corpus (interpret as generator over contents)

        Given the bag-of-words corpus, build a docCountByWordId count dictionary
        to facilitate subsequent Term Frequency * Inverse DOCUMENT FREQUENCY calculations.
        In Clinical context, document = patient.

        Return (model, docCountByWordId);
        """
        # Load dictionary to translate item IDs to descriptions
        itemsById = DBUtil.loadTableAsDict("clinical_item")
        id2word = dict()
        # Models expect a pair for every possible item ID, and judges vocabulary size by length of this dictionary rather than the maximum ID values.  That means have to populate all of the empty ones as well.
        maxId = max(itemsById.keys())
        for itemId in range(maxId + 1):
            description = str(itemId)
            # Default to just the same as the ID string
            if itemId in itemsById:
                description = itemsById[itemId]["description"]
            id2word[itemId] = description

        # Stream in progressive updates from corpus generator so don't have to load all into memory
        # Do a batch of many at a time, otherwise very slow to increment one at a time
        docBuffer = list()

        prog = ProgressDots()
        self.model = None
        self.docCountByWordId = {
            None: 0
        }
        # Use None key to represent count of all documents
        for i, document in enumerate(corpusBOWGenerator):
            for (
                    wordId, wordCount
            ) in document:  # Assuming uniqueness of wordId keys for each document
                if wordId not in self.docCountByWordId:
                    self.docCountByWordId[wordId] = 0
                self.docCountByWordId[wordId] += 1
            self.docCountByWordId[None] += 1

            docBuffer.append(document)
            if i % BUFFER_UPDATE_SIZE == (
                    BUFFER_UPDATE_SIZE -
                    1):  # Update model with current buffer of documents
                self.model = self.updateModel(self.model, docBuffer, id2word,
                                              numTopics)
                docBuffer = list()
                # Discard committed buffer
            prog.update()

        self.model = self.updateModel(self.model, docBuffer, id2word,
                                      numTopics)
        # Last update for any remaining documents in buffer
        docBuffer = list()
        # Discard committed buffer

        # prog.printStatus();
        return (self.model, self.docCountByWordId)
コード例 #2
0
 def initItemLookups(self, query):
     self.itemsById = DBUtil.loadTableAsDict("clinical_item")
     self.categoryIdByItemId = dict()
     for itemId, item in self.itemsById.iteritems():
         self.categoryIdByItemId[itemId] = item["clinical_item_category_id"]
     self.candidateItemIds = set()
     emptyQuerySet = set()
     for itemId in self.docCountByWordId.keys():
         if self.isItemRecommendable(itemId, emptyQuerySet, query,
                                     self.categoryIdByItemId):
             self.candidateItemIds.add(itemId)
コード例 #3
0
    def action_default(self):
        """Present set of patient notes"""
        patientId = int(self.requestData["sim_patient_id"])
        simTime = int(self.requestData["sim_time"])

        # Load lookup table to translate note type IDs into description strings
        noteTypeById = DBUtil.loadTableAsDict("sim_note_type")

        manager = SimManager()
        results = manager.loadNotes(patientId, simTime)

        htmlLines = list()
        for dataModel in results:
            self.formatDataModel(dataModel, noteTypeById)
            htmlLines.append(LINE_TEMPLATE % dataModel)
        self.requestData["detailTable"] = str.join("\n", htmlLines)

        if len(results) > 0:
            self.requestData["initialNoteContent"] = results[-1]["content"]
コード例 #4
0
ファイル: OrderSetRecommender.py プロジェクト: xxxx3/CDSS
    def initItemLookups(self, query):
        """Load lookup info and save into local member variables for reuse later
        so don't have to do wasteful repeat DB lookups for serial queries
        """
        # Build mutual lookup tables for all order sets and clinical items contained
        self.itemIdsByOrderSetId = dict()
        self.orderSetIdsByItemId = dict()
        results = DBUtil.execute \
            ("""select ic.external_id, ici.clinical_item_id
                from item_collection_item as ici, item_collection as ic
                where ic.item_collection_id = ici.item_collection_id
                and ic.section <> %(p)s
                and ici.collection_type_id = %(p)s
                """ % {"p": DBUtil.SQL_PLACEHOLDER},
                (AD_HOC_SECTION, COLLECTION_TYPE_ORDER_SET)
            )
        for orderSetId, itemId in results:
            if orderSetId not in self.itemIdsByOrderSetId:
                self.itemIdsByOrderSetId[orderSetId] = set()
            self.itemIdsByOrderSetId[orderSetId].add(itemId)

            if itemId not in self.orderSetIdsByItemId:
                self.orderSetIdsByItemId[itemId] = set()
            self.orderSetIdsByItemId[itemId].add(orderSetId)

        self.itemsById = DBUtil.loadTableAsDict("clinical_item")
        self.categoryIdByItemId = dict()
        self.patientCountByItemId = dict()
        for itemId, item in self.itemsById.items():
            self.categoryIdByItemId[itemId] = item["clinical_item_category_id"]
            self.patientCountByItemId[itemId] = item["patient_count"]
        self.candidateItemIds = set()
        emptyQuerySet = set()
        for itemId in list(self.orderSetIdsByItemId.keys()):
            if self.isItemRecommendable(itemId, emptyQuerySet, query,
                                        self.categoryIdByItemId):
                self.candidateItemIds.add(itemId)
コード例 #5
0
    results = recommender(recQuery)
    for result in results:
        itemIds.add(result["clinical_item_id"])

    print >> sys.stderr, admitDxId, len(itemIds)

    # Top results by baseline prevalence
    recQuery.sortField = "prevalence"
    results = recommender(recQuery)
    for result in results:
        itemIds.add(result["clinical_item_id"])

    print >> sys.stderr, admitDxId, len(itemIds)

# Load clinicalItem models for quick lookups
clinicalItemById = DBUtil.loadTableAsDict("clinical_item")

print str.join("\t", [
    "Admit Dx ID", "Section", "Guideline", "item_collection_id",
    "collection_type_id", "value", "comment", "clinical_item_id", "Name",
    "Description"
])

for (admitDxId, sectionName,
     guidelineName) in admitDxIdSectionGuidelineNameTuples:
    itemIds = itemIdsByAdmitDxId[admitDxId]

    for itemId in itemIds:
        clinicalItem = clinicalItemById[itemId]
        # Note just printing blank spaces for values in middle
        print "%s\t%s\t%s\t\t1\t\t\t%s\t%s\t%s" % (