Example #1
0
    def action_default(self):
        # Read checkboxes by presence or absence of field
        self.requestData[
            "incCols"] = ""  # Checkboxes not passed if unchecked, so extra step to ensure uncheck is persisted
        incCols = False
        if self.mForm.has_key("incCols"):
            self.requestData["incCols"] = self.mForm["incCols"].value
            incCols = True

        # Point to the specified database
        connFactory = self.connectionFactory()

        timer = time.time()
        # Just execute a normal query, possibly with a result set
        results = DBUtil.execute(self.mForm["input"].value,
                                 includeColumnNames=incCols,
                                 connFactory=connFactory)
        if type(results) == list:  # Result set, format as table
            formatter = TextResultsFormatter(StringIO())
            formatter.formatResultSet(results)
            self.requestData["resultsText"] = formatter.getOutFile().getvalue()

            headerRowFormat = None
            if incCols: headerRowFormat = "th"

            formatter = HtmlResultsFormatter(StringIO(), headerRowFormat)
            formatter.formatResultSet(results)
            self.requestData["resultsHtml"] = formatter.getOutFile().getvalue()

            self.requestData["resultsInfo"] = "(%d rows) " % len(results)
        else:
            self.requestData[
                "resultsText"] = "%d rows affected (or other return code)" % results
        timer = time.time() - timer
        self.requestData["resultsInfo"] += "(%1.3f seconds)" % timer
    def main(self, argv):
        """Main method, callable from command line"""
        usageStr =  "usage: %prog [options] <patientIds> [<outputFile>]\n"+\
                    "   <patientIds>    Patient ID file or Comma-separated list of test Patient IDs to run analysis against\n"+\
                    "   <outputFile>    If query yields a result set, then that will be output\n"+\
                    "                       to the named file.  Leave blank or specify \"-\" to send to stdout.\n"
        parser = OptionParser(usage=usageStr)
        parser.add_option("-R", "--recommender",  dest="recommender",  help="Name of the recommender to run the analysis against.  Options: %s" % RECOMMENDER_CLASS_BY_NAME.keys());
        parser.add_option("-s", "--sortField",  dest="sortField",  help="Allow overriding of default sort field when returning ranked results");
        parser.add_option("-f", "--fieldFilters",  dest="fieldFilters",  help="Filters to exclude results.  Comma-separated separated list of field-op:value exclusions where op is either < or > like, conditionalFreq<:0.1,frqeRatio<:1");
        parser.add_option("-t", "--timeDeltaMax",  dest="timeDeltaMax",  help="If set, represents a time delta in seconds maximum by which recommendations should be based on.  Defaults to recommending items that occur at ANY time after the key orders.  If provided, will apply limits to only orders placed within 0 seconds, 1 hour (3600), 1 day (86400), or 1 week (604800) of the key orders / items.");
        parser.add_option("-a", "--aggregationMethod",  dest="aggregationMethod",  help="Aggregation method to use for recommendations based off multiple query items.  Options: %s." % list(AGGREGATOR_OPTIONS) );
        parser.add_option("-p", "--countPrefix",  dest="countPrefix",  help="Prefix for how to do counts.  Blank for default item counting allowing repeats, otherwise ignore repeats for patient_ or encounter_");
        parser.add_option("-q", "--queryItemMax",  dest="queryItemMax",  help="If set, specifies a maximum number of query items to use when analyzing serial recommendations.  Will stop analyzing further for a patient once reach this limit.");
        (options, args) = parser.parse_args(argv[1:])

        log.info("Starting: "+str.join(" ", argv))
        timer = time.time();
        if len(args) > 1:
            # Parse out the query parameters
            query = AnalysisQuery();
            query.recommender = RECOMMENDER_CLASS_BY_NAME[options.recommender]();
            query.recommender.dataManager.dataCache = dict();   # Use a local cahce to speed up repeat queries

            patientIdsParam = args[0];
            try:
                # Try to open patient IDs as a file
                patientIdFile = stdOpen(patientIdsParam);
                query.patientIds = set( patientIdFile.read().split() );
            except IOError:
                # Unable to open as a filename, then interpret as simple comma-separated list
                query.patientIds = set(patientIdsParam.split(","));


            query.baseRecQuery = RecommenderQuery();
            query.baseRecQuery.excludeCategoryIds = query.recommender.defaultExcludedClinicalItemCategoryIds();
            query.baseRecQuery.excludeItemIds = query.recommender.defaultExcludedClinicalItemIds();
            if options.sortField is not None:
                query.baseRecQuery.sortField = options.sortField;
            if options.fieldFilters is not None:
                for fieldFilterStr in options.fieldFilters.split(","):
                    (fieldOp, valueStr) = fieldFilterStr.split(":");
                    query.baseRecQuery.fieldFilters[fieldOp] = float(valueStr);
            if options.timeDeltaMax is not None and len(options.timeDeltaMax) > 0:
                query.baseRecQuery.timeDeltaMax = timedelta(0,int(options.timeDeltaMax));
            if options.aggregationMethod is not None:
                query.baseRecQuery.aggregationMethod = options.aggregationMethod;
            if options.countPrefix is not None:
                query.baseRecQuery.countPrefix = options.countPrefix;

            if options.queryItemMax is not None:
                query.queryItemMax = int(options.queryItemMax);

            # Run the actual analysis
            analysisResults = self(query);

            # Format the results for output
            outputFilename = None;
            if len(args) > 1:
                outputFilename = args[1];
            outputFile = stdOpen(outputFilename,"w");

            print >> outputFile, "#", argv;  # Print comment line with analysis arguments to allow for deconstruction later

            colNames = ["patientId", "clinicalItemId", "iItem", "iRecItem", "recRank", "recScore"];
            analysisResults.insert(0, colNames);    # Insert a mock record to get a header / label row

            formatter = TextResultsFormatter( outputFile );
            formatter.formatResultSet( analysisResults );

        else:
            parser.print_help()
            sys.exit(-1)

        timer = time.time() - timer;
        log.info("%.3f seconds to complete",timer);
Example #3
0
    def main(self, argv):
        """Main method, callable from command line"""
        usageStr =  "usage: %prog [options] <inputFile> [<outputFile>]\n"+\
                    "   <inputFile> Tab-delimited file, first two labeled columns expected to represent labeled outcome (0 and non-zero) and score/probability of outcome\n"+\
                    "   <outputFile>    Tab-delimited table specifying TPR (sensitivity) and FPR (1-specificity) for components of a ROC plot.  Comment / header line will be a JSON parseable dictionary of additional summary stats, including the ROC AUC.\n"+\
                    "                       Leave blank or specify \"-\" to send to stdout.\n"
        parser = OptionParser(usage=usageStr)
        parser.add_option(
            "-f",
            "--figure",
            dest="figure",
            help=
            "If set, will also try to auto-generate an example figure and store to a file here"
        )
        parser.add_option("-t",
                          "--title",
                          dest="title",
                          help="Title caption to apply to generated figure")
        parser.add_option(
            "-r",
            "--rcParams",
            dest="rcParams",
            help=
            "JSON dictionary format string specifying any MatPlotLib RC Params to use when generating figure.  For example: \"{\\\"axes.titlesize\\\":20,'legend.fontsize':20}\".  For more info, see http://matplotlib.org/users/customizing.html "
        )
        parser.add_option(
            "-n",
            "--nSamples",
            dest="nSamples",
            help=
            "If set, bootstrap this many samples of data to calculate multiple ROC AUC c-statistics to produce a distribution and confidence interval"
        )
        parser.add_option(
            "-b",
            "--baseScoreCol",
            dest="baseScoreCol",
            help=
            "Name of the base scoring method against which to compare all others when calculating contingency statistics as below."
        )
        parser.add_option(
            "-c",
            "--contingencyStats",
            dest="contingencyStats",
            help=
            "Comma-separated list of contingency stat IDs (see medinfo.common.StatsUtil.ContingencyStats) to calculate for different scoring methods against the specified base scoring method.  For example, 'P-Fisher,P-YatesChi2'"
        )
        parser.add_option(
            "-l",
            "--logScores",
            dest="logScores",
            action="store_true",
            help=
            "If set, will do analysis on the natural log / ln of the scores, which can help accomodate extremely large or small scores that disrupt result with loss of numerical precision"
        )
        parser.add_option(
            "-o",
            "--colOutcome",
            dest="colOutcome",
            help=
            "Index of column to expect outcome values in.  Defaults to 0.  Can specify a string to identify a column header."
        )
        parser.add_option(
            "-s",
            "--colScore",
            dest="colScore",
            help=
            "Index of column to expect score values in.  Defaults to 1.  Can specify strings and comma-separated lists to plot multiple curves."
        )

        (options, args) = parser.parse_args(argv[1:])

        log.info("Starting: " + str.join(" ", argv))
        timer = time.time()
        if len(args) > 1:
            inputFilename = args[0]
            inputFile = stdOpen(inputFilename)

            self.logScores = options.logScores

            # Run the actual analysis
            (analysisResultsByScoreId,
             summaryData) = self(inputFile, options.colOutcome,
                                 options.colScore, options)

            # Generate plot figure
            if options.figure is not None:
                rcParams = None
                if options.rcParams is not None:
                    rcParams = json.loads(options.rcParams)
                self.generateFigure(analysisResultsByScoreId, summaryData,
                                    options.figure, options.title, rcParams,
                                    options.colScore)

            # Format the results for output
            outputFilename = None
            if len(args) > 1:
                outputFilename = args[1]
            outputFile = stdOpen(outputFilename, "w")

            # Print comment line with arguments to allow for deconstruction later as well as extra results
            summaryData["argv"] = argv
            print >> outputFile, COMMENT_TAG, json.dumps(summaryData)

            outputTable = self.formatAnalysisTable(analysisResultsByScoreId)

            formatter = TextResultsFormatter(outputFile)
            formatter.formatResultSet(outputTable)
        else:
            parser.print_help()
            sys.exit(-1)

        timer = time.time() - timer
        log.info("%.3f seconds to complete", timer)