Esempio n. 1
0
def main_buildTopicModel(argv):
    bowInputFilename = SOURCE_DATA_DIR + INPUT_FILENAME

    mod = TopicModel()
    for numTopics in numTopicsOptions:
        subargv = ["TopicModel", "-n", str(numTopics)]
        subargv.extend([
            bowInputFilename,
            MODEL_DIR + "/topicModel." + os.path.basename(bowInputFilename) +
            ".%dTopic.model" % (numTopics),
        ])
        mod.main(subargv)
    return mod.model
Esempio n. 2
0
    def __init__(self, model, docCountByWordId=None):
        """Initialize module with prior generated model and word document counts from TopicModel module.
        """
        BaseItemRecommender.__init__(self)
        self.modeler = TopicModel()
        # Utility instance to run off of

        if docCountByWordId:  # Specified both options
            self.model = model
            self.docCountByWordId = docCountByWordId
        else:  # If only the first one specified, interpret it as a base filename to load the objects from
            filename = model
            (self.model, self.docCountByWordId
             ) = self.modeler.loadModelAndDocCounts(filename)

        # Cached lookup data.  Don't repeat work for serial queries
        self.itemsById = None
        self.categoryIdByItemId = None
        self.candidateItemIds = None
        self.weightByItemIdByTopicId = None
Esempio n. 3
0
def main_quickTest(argv):
    modelFilename = argv[1]
    modeler = TopicModel()

    timer = time.time()
    (model, docCountByWordId) = modeler.loadModelAndDocCounts(modelFilename)
    timer = time.time() - timer
    log.info("%.2f seconds to load", timer)

    timer = time.time()
    weightByItemIdByTopicId = modeler.generateWeightByItemIdByTopicId(
        model, 100)
    timer = time.time() - timer
    log.info("%.2f seconds to generate weights", timer)

    for i in xrange(3):
        prog = ProgressDots()
        for (topicId, weightByItemId) in weightByItemIdByTopicId.iteritems():
            for (itemId, itemWeight) in weightByItemId.iteritems():
                prog.update()
        prog.printStatus()
    """
Esempio n. 4
0
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)

        log.info("Populate the database with test data")
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = [
            "clinical_item_id", "clinical_item_category_id", "name",
            "analysis_status"
        ]
        dataModels = \
            [
                RowItemModel( [1, -1, "CBC",1], headers ),
                RowItemModel( [2, -1, "BMP",0], headers ), # Clear analysis status, so this will be ignored unless changed
                RowItemModel( [3, -1, "Hepatic Panel",1], headers ),
                RowItemModel( [4, -1, "Cardiac Enzymes",1], headers ),
                RowItemModel( [5, -2, "CXR",1], headers ),
                RowItemModel( [6, -2, "RUQ Ultrasound",1], headers ),
                RowItemModel( [7, -2, "CT Abdomen/Pelvis",1], headers ),
                RowItemModel( [8, -2, "CT PE Thorax",1], headers ),
                RowItemModel( [9, -3, "Acetaminophen",1], headers ),
                RowItemModel( [10, -3, "Carvedilol",1], headers ),
                RowItemModel( [11, -3, "Enoxaparin",1], headers ),
                RowItemModel( [12, -3, "Warfarin",1], headers ),
                RowItemModel( [13, -3, "Ceftriaxone",1], headers ),
                RowItemModel( [14, -4, "Foley Catheter",1], headers ),
                RowItemModel( [15, -4, "Strict I&O",1], headers ),
                RowItemModel( [16, -4, "Fall Precautions",1], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)

        # Input file contents in Bag-of-Words formats
        # Specifically avoid the use of items 6 or 7 in the training data
        self.inputBOWFileStr = \
"""[[1,1],[2,2],[3,1],[4,4],[5,10],[8,5]]
[[3,4],[4,4],[9,3],[10,2],[12,6],[13,3],[15,5],[16,8]]
[[1,1],[2,2],[3,1],[4,4],[5,10],[8,5],[9,1],[10,2],[11,1],[12,4],[13,10],[14,1],[15,3],[16,5]]
[[1,4],[2,9],[9,1],[10,2],[11,7],[12,4],[13,2],[16,6]]
[[4,3],[5,31],[8,5],[12,6],[13,8],[16,5]]
"""
        self.instance = TopicModel()