Ejemplo n.º 1
0
def boxplot(phenotype):
    """Draw a boxplot for the given continuous phenotype.

    :param phenotype: Display a boxplot for the provided continuous phenotype.
    :type phenotype: str

    An exception will be raised if the required phenotype is not continuous.

    """
    data, meta = _get_data_meta(phenotype)
    data = data[~np.isnan(data)]
    _type = types.type_str(meta["variable_type"])
    if _type.subtype_of(types.Continuous):
        raise REPLException("Can't draw boxplot for non-continuous variable "
                            "('{}').".format(phenotype))

    fig, ax = plt.subplots(1, 1)
    ax.boxplot(data, vert=False)
    ax.set_xlabel(phenotype)
    ax.set_yticklabels([])
    ax.yaxis.set_ticks_position("none")

    filename = "cohort_plot.png"
    plt.savefig(filename, dpi=300)
    return _response_from_img_filename(filename)
Ejemplo n.º 2
0
def info(phen_or_command, drug_code=None):
    """Get information and summary statistics on a phenotype.

    :param phenotype: The name of the phenotype to get summary information on.
    :type phenotype: str

    Use 'list' to see all the available phenotypes for this command.

    This command can also be used to get information on a drug:

    info drug 12345 or C05 (ATC code)

    """
    if phen_or_command == "drug":
        return _info_drug(drug_code)

    phenotype = phen_or_command
    message = StringIO()
    data, meta = _get_data_meta(phenotype)

    print("Phenotype meta data:", file=message)
    for k, v in meta.items():
        if COLOR:
            k = colored(k, "green")
        print("\t{}{}".format(k.ljust(30), v), file=message)

    print("\nSummary statistics:", file=message)

    n_missing = STATE["manager"].get_number_missing(phenotype)
    n_total = data.shape[0]

    print("\t{} / {} missing values ({:.3f}%)".format(
        n_missing, n_total, n_missing / n_total * 100
    ), file=message)

    t = types.type_str(meta["variable_type"])

    if t.subtype_of(types.Discrete):
        # Show information on prevalence.
        n_cases = np.sum(data == 1)
        n_controls = np.sum(data == 0)
        print("\t{} cases, {} controls; prevalence: {:.3f}%".format(
            n_cases, n_controls, n_cases / (n_cases + n_controls) * 100
        ), file=message)

    elif t.subtype_of(types.Continuous):
        mean = np.nanmean(data)
        std = np.nanstd(data)
        print(u"\tµ = {}, σ = {}".format(mean, std), file=message)
        print("\tmin = {}, max = {}".format(np.nanmin(data), np.nanmax(data)),
              file=message)

    elif t.subtype_of(types.Factor):
        print("\nCounts (rate):", file=message)
        n = data.shape[0]
        for name, count in data.value_counts().iteritems():
            print("\t{}: {} ({:.3f}%)".format(name, count, count / n * 100),
                  file=message)

    return {"success": True, "message": message.getvalue()}
Ejemplo n.º 3
0
def normal_qq_plot(phenotype):
    """Plot the Normal QQ plot of the observations.

    :param phenotype: The phenotype for which to draw the QQ plot.
    :type phenotype: str

    This function is only available for continuous phenotypes.

    """
    data, meta = _get_data_meta(phenotype)
    data = data[~np.isnan(data)]
    if types.type_str(meta["variable_type"]).subtype_of(types.Continuous):
        raise REPLException(
            "Could not create QQ plot for {} variable '{}'.".format(
                meta["variable_type"], phenotype
            )
        )

    data = np.sort(data)
    expected = scipy.stats.norm.ppf(
        np.arange(1, data.shape[0] + 1) / (data.shape[0] + 1),
        loc=np.mean(data),
        scale=np.std(data)
    )

    plt.scatter(expected, data, color="black", marker="o", s=10)

    x_min, x_max = plt.xlim()
    y_min, y_max = plt.ylim()

    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
        expected, data
    )
    plt.plot(
        [x_min, x_max],
        [slope * x_min + intercept, slope * x_max + intercept],
        "--", color="black",
        label="$R^2 = {:.4f}$".format(r_value ** 2)
    )
    plt.legend(loc="lower right")

    plt.xlabel("Expected quantiles")
    plt.ylabel("Observed quantiles")

    plt.xlim([x_min, x_max])
    plt.ylim([y_min, y_max])

    filename = "cohort_plot.png"
    return _response_from_img_filename(filename)
Ejemplo n.º 4
0
def histogram(phenotype, nbins=None):
    """Draw a histogram (or a bar plot for discrete variables) of the data.

    :param phenotype: The phenotype for which to draw the histogram.
    :type phenotype: str

    :param nbins: The number of bins for the histogram (optional).
    :type nbins: int

    This function will work on both continuous and discrete variables (but
    not factors).

    """
    data, meta = _get_data_meta(phenotype)
    t = types.type_str(meta["variable_type"])

    data = data[~np.isnan(data)]
    if t.subtype_of(types.Continuous):
        # Histogram.
        if nbins:
            plt.hist(data, bins=nbins)
        else:
            plt.hist(data)

        plt.xlabel(phenotype)

    elif t.subtype_of(types.Discrete):
        # Bar plot.
        plt.bar((0.1, 0.4), (np.sum(data == 0), np.sum(data == 1)), width=0.1)
        plt.xticks((0.15, 0.45), ("control", "case"))
        plt.xlim((0, 0.6))

    else:
        raise REPLException("Could not generate histogram for '{}' variable."
                            "".format(meta["variable_type"]))

    filename = "cohort_plot.png"
    plt.savefig(filename, dpi=300)

    return _response_from_img_filename(filename)