Beispiel #1
0
    def test_relative_abundance(self):
        """
        Testing relative abundance() function of biom.calc.py.

        :return: Returns OK, if testing goal is achieved, otherwise raises error.
        """
        self.result = bc.relative_abundance(self.biomf)

        # List containing manual calculations
        hand_calc = {"S1": {"GG_OTU_1": 0.192307692, "GG_OTU_2": 0.076923077,
                            "GG_OTU_3": 0.192307692, "GG_OTU_4": 0.346153846,
                            "GG_OTU_5": 0.192307692},
                     "S10": {"GG_OTU_1": 0.083333333, "GG_OTU_2": 0.125,
                             "GG_OTU_3": 0.166666667, "GG_OTU_4": 0.333333333,
                             "GG_OTU_5": 0.291666667},
                     "S2": {"GG_OTU_1": 0.161290323, "GG_OTU_2": 0.258064516,
                            "GG_OTU_3": 0.258064516, "GG_OTU_4": 0.258064516,
                            "GG_OTU_5": 0.064516129},
                     "S3": {"GG_OTU_1": 0.111111111, "GG_OTU_2": 0.222222222,
                            "GG_OTU_3": 0.0, "GG_OTU_4": 0.277777778,
                            "GG_OTU_5": 0.388888889},
                     "S4": {"GG_OTU_1": 0.181818182, "GG_OTU_2": 0.0,
                            "GG_OTU_3": 0.545454545, "GG_OTU_4": 0.272727273,
                            "GG_OTU_5": 0.0},
                     "S5": {"GG_OTU_1": 0.086956522, "GG_OTU_2": 0.260869565,
                            "GG_OTU_3": 0.304347826, "GG_OTU_4": 0.217391304,
                            "GG_OTU_5": 0.130434783},
                     "S6": {"GG_OTU_1": 0.333333333, "GG_OTU_2": 0.148148148,
                            "GG_OTU_3": 0.296296296, "GG_OTU_4": 0.185185185,
                            "GG_OTU_5": 0.037037037},
                     "S7": {"GG_OTU_1": 0.071428571, "GG_OTU_2": 0.178571429,
                            "GG_OTU_3": 0.142857143, "GG_OTU_4": 0.285714286,
                            "GG_OTU_5": 0.321428571},
                     "S8": {"GG_OTU_1": 0.230769231, "GG_OTU_2": 0.192307692,
                            "GG_OTU_3": 0.307692308, "GG_OTU_4": 0.115384615,
                            "GG_OTU_5": 0.153846154},
                     "S9": {"GG_OTU_1": 0.0, "GG_OTU_2": 0.111111111,
                            "GG_OTU_3": 0.444444444, "GG_OTU_4": 0.0,
                            "GG_OTU_5": 0.444444444}}

        # Testing the validity of relative_abundance() function.
        for sid in sorted(hand_calc.keys()):
            for otuid in sorted(hand_calc[sid].keys()):
                self.assertAlmostEqual(
                    hand_calc[sid][otuid], self.result[sid][otuid],
                    msg="Relative abundances not calculated accurately."
                )

        # Test for valid sample IDs passed into function
        with self.assertRaisesRegexp(ValueError, "\nError while calculating relative "
                                     "abundances: The sampleIDs provided do not match "
                                     "the sampleIDs in biom file. Please double check "
                                     "the sampleIDs provided.\n"):
            bc.relative_abundance(self.biomf, sampleIDs=["NS01", "NS02", "NS03"])
    def test_MRA(self):
        """
        Testing mean relative abundance calculation, MRA() function
        of biom_calc.py.

        :return: Returns OK, if testing goal was achieved, otherwise
            raises error.
        """
        self.result = bc.MRA(self.biom)
        self.mean_otu = bc.mean_otu_pct_abundance(
            bc.relative_abundance(self.biom),
            ['GG_OTU_1', 'GG_OTU_2', 'GG_OTU_3', 'GG_OTU_4', 'GG_OTU_5']
            )

        # Obtaining lists of function calculations and
        # manual hand calculations
        func_calc = self.result.values()
        hand_calc = self.mean_otu.values()

        # Testing the validity of the calculations of mean_otu_pct_abundance().
        for hand, res in zip(hand_calc, func_calc):
            self.assertAlmostEqual(
                hand, res,
                msg='Mean OTU not calculated accurately.'
                )
Beispiel #3
0
    def test_mean_otu_pct_abundance(self):
        """
        Testing mean_otu_pct_abundance() function of biom_calc.py.

        :return: Returns OK, if testing goal was achieved, otherwise raises
                error.
        """
        self.rel_a = bc.relative_abundance(self.biomf)

        self.result = bc.mean_otu_pct_abundance(
            self.rel_a, ["GG_OTU_1", "GG_OTU_2"]
            )

        # Obtaining lists of function calculations and manual hand calculations
        func_calc = self.result.values()

        # list containing hand calculated relative abundance values
        hand_calc = [(0 + 0.0153846153846 + 0.0285714285714 + 0.04 + 0.05 +
                      0.0588235294118)/6,
                     (0.1 + 0.107692307692 + 0.114285714286 + 0.12 + 0.125 +
                      0.129411764706)/6]

        # Testing the validity of the calculations of mean_otu_pct_abundance().
        for hand, res in zip(hand_calc, func_calc):
            self.assertAlmostEqual(
                hand*100, res,
                msg="Mean OTU not calculated accurately."
                )
    def test_relative_abundance(self):
        """
        Testing relative abundance() function of biom.calc.py.

        :return: Returns OK, if testing goal is achieved, otherwise raises
                error.
        """
        sample = 'Sample3'
        self.result = bc.relative_abundance(self.biom)

        # List containing manual calculations
        hand_calc = [1/4.0, 1/4.0, 1/4.0, 1/4.0]

        # Obtaining list of function calculated relative abundance for sample
#         result1 = self.result.values()      # result1 is a list
#         result2 = result1[0]                # result2 is a dict
        # list containing the calculated relative abundance values
        func_calc = self.result['Sample3'].values()

        # Testing the validity of relative_abundance() function.
        for hand, res in zip(hand_calc, func_calc):
            self.assertAlmostEqual(
                hand, res,
                msg='Relative abundances not calculated accurately.'
                )
    def test_mean_otu_pct_abundance(self):
        """
        Testing mean_otu_pct_abundance() function of biom_calc.py.

        :return: Returns OK, if testing goal was achieved, otherwise raises
                error.
        """
        self.rel_a = bc.relative_abundance(self.biom)

        self.result = bc.mean_otu_pct_abundance(
            self.rel_a, ['GG_OTU_1','GG_OTU_2']
            )

        # Obtaining lists of function calculations and manual hand calculations
        func_calc = self.result.values()
        result1 = self.rel_a.values()       # result1 is a list

        # list containing hand calculated relative abundance values
        hand_calc = [0.25/6, (1.0+0.3333333333333333+0.25+0.7142857142857143+0.3333333333333333)/6]

        # Testing the validity of the calculations of mean_otu_pct_abundance().
        for hand, res in zip(hand_calc, func_calc):
            self.assertAlmostEqual(
                hand*100, res,
                msg='Mean OTU not calculated accurately.'
                )
Beispiel #6
0
    def test_arcsine_sqrt_transform(self):
            """
            Testing arcsine_sqrt_transform() function of biom_calc.py.

            :return: Returns OK if testing goal is achieved, otherwise raises
                     error.
            """
            self.result1 = bc.relative_abundance(self.biomf)
            self.result2 = bc.arcsine_sqrt_transform(self.result1)

            # Obtaining results to compare.
            hand_calc = {"S1": {"GG_OTU_1": 0.453961252, "GG_OTU_2": 0.281034902,
                                "GG_OTU_3": 0.453961252, "GG_OTU_4": 0.629014802,
                                "GG_OTU_5": 0.453961252},
                         "S10": {"GG_OTU_1": 0.292842772, "GG_OTU_2": 0.361367124,
                                 "GG_OTU_3": 0.420534335, "GG_OTU_4": 0.615479709,
                                 "GG_OTU_5": 0.570510448},
                         "S2": {"GG_OTU_1": 0.413273808, "GG_OTU_2": 0.532861869,
                                "GG_OTU_3": 0.532861869, "GG_OTU_4": 0.532861869,
                                "GG_OTU_5": 0.256813917},
                         "S3": {"GG_OTU_1": 0.339836909, "GG_OTU_2": 0.490882678,
                                "GG_OTU_3": 0, "GG_OTU_4": 0.555121168,
                                "GG_OTU_5": 0.673351617},
                         "S4": {"GG_OTU_1": 0.440510663, "GG_OTU_2": 0,
                                "GG_OTU_3": 0.830915552, "GG_OTU_4": 0.549467245,
                                "GG_OTU_5": 0},
                         "S5": {"GG_OTU_1": 0.299334026, "GG_OTU_2": 0.53606149,
                                "GG_OTU_3": 0.584373897, "GG_OTU_4": 0.485049787,
                                "GG_OTU_5": 0.36950894},
                         "S6": {"GG_OTU_1": 0.615479709, "GG_OTU_2": 0.395099667,
                                "GG_OTU_3": 0.575591472, "GG_OTU_4": 0.444859969,
                                "GG_OTU_5": 0.1936583},
                         "S7": {"GG_OTU_1": 0.270549763, "GG_OTU_2": 0.436286927,
                                "GG_OTU_3": 0.387596687, "GG_OTU_4": 0.563942641,
                                "GG_OTU_5": 0.602794553},
                         "S8": {"GG_OTU_1": 0.501093013, "GG_OTU_2": 0.453961252,
                                "GG_OTU_3": 0.588002604, "GG_OTU_4": 0.346579954,
                                "GG_OTU_5": 0.403057074},
                         "S9": {"GG_OTU_1": 0, "GG_OTU_2": 0.339836909,
                                "GG_OTU_3": 0.729727656, "GG_OTU_4": 0,
                                "GG_OTU_5": 0.729727656}}

            # Testing validity of the transforms.
            for sid in sorted(hand_calc.keys()):
                for otuid in sorted(hand_calc[sid].keys()):
                    self.assertAlmostEqual(
                        hand_calc[sid][otuid], self.result2[sid][otuid],
                        msg="Arcsine squareroot transformation was not accurate."
                    )
Beispiel #7
0
def main():
    args = handle_program_options()

    try:
        with open(args.input_biom_fp):
            pass
    except IOError as ioe:
        sys.exit('\nError in BIOM file path: {}\n'.format(ioe))

    biomf = biom.load_table(args.input_biom_fp)
    rel_abd = bc.relative_abundance(biomf)
    if args.stabilize_variance:
        rel_abd = bc.arcsine_sqrt_transform(rel_abd)

    write_relative_abundance(rel_abd, biomf, args.output_tsv_fp)
def main():
    args = handle_program_options()

    try:
        with open(args.input_biom_fp):
            pass
    except IOError as ioe:
        sys.exit('\nError in BIOM file path: {}\n'.format(ioe))

    biomf = biom.load_table(args.input_biom_fp)
    rel_abd = bc.relative_abundance(biomf)
    if args.stabilize_variance:
        rel_abd = bc.arcsine_sqrt_transform(rel_abd)

    write_relative_abundance(rel_abd, biomf, args.output_tsv_fp)
def assign_otu_membership(biomfile):
    """
    Determines the OTUIDs present in each sample.

    :type biomfile: biom.table.Table
    :param biomfile: BIOM table object from the biom-format library.

    :rtype: dict
    :return: Returns a dictionary keyed on Sample ID with sets containing
    the IDs of OTUIDs found in each sample.
    """
    samples = defaultdict(set)
    rel_abd = bc.relative_abundance(biomfile)
    for sid in rel_abd:
        samples[sid].update([oid for oid, ra in rel_abd[sid].items() if ra > 0])
    return samples
Beispiel #10
0
    def test_arcsine_sqrt_transform(self):
        """
        Testing arcsine_sqrt_transform() function of biom_calc.py.

        :return: Returns OK if testing goal is achieved, otherwise raises
                 error.
        """
        self.result1 = bc.relative_abundance(self.biomf)
        self.result2 = bc.arcsine_sqrt_transform(self.result1)

        # Obtaining results to compare.
        hand_calc = [0, 0.32175055439, 0.463647609, 0.57963974036, 0.684719203]
        func_calc = self.result2.values()[3].values()

        # Testing validity of the transforms.
        for hand, func in zip(hand_calc, func_calc):
            self.assertAlmostEqual(
                hand, func, places=7,
                msg="Function did not calculate transformation accurately."
            )
    def test_assign_otu_membership(self):
        """
        Testing assign_otu_membership() function of otu_calc.py.

        :return: Returns OK if the test goals were achieved, otherwise
                 raises error.
        """
        self.result = oc.assign_otu_membership(self.biom)

        # Obtaining the values to be tested
        result1 = bc.relative_abundance(self.biom, ['Sample1'])
        hand_calc = result1.values()[0].values()
        func_calc = [0.714286, 0.285714]

        # Testing the validity of assign_otu_membership() function
        for hand, func in zip(hand_calc, func_calc):
            self.assertAlmostEqual(
                hand, func, places=5,
                msg='Error! OTU membership calculations are inaccurate!'
            )
    def test_arcsine_sqrt_transform(self):
        """
        Testing arcsine_sqrt_transform() function of biom_calc.py.

        :return: Returns OK if testing goal is achieved, otherwise raises
                 error.
        """
        self.result1 = bc.relative_abundance(self.biom)
        self.result2 = bc.arcsine_sqrt_transform(self.result1)

        # Obtaining results to compare.
        hand_calc = [1.00685369, 0.563942641]
        func_calc = self.result2.values()[3].values()

        # Testing validity of the transforms.
        for hand, func in zip(hand_calc, func_calc):
            self.assertAlmostEqual(
                hand, func, places=7,
                msg='Function did not calculate trnasformation accurately.'
            )
Beispiel #13
0
    def test_mean_otu_pct_abundance(self):
        """
        Testing mean_otu_pct_abundance() function of biom_calc.py.

        :return: Returns OK, if testing goal was achieved, otherwise raises error.
        """
        self.rel_a = bc.relative_abundance(self.biomf)
        self.result = bc.mean_otu_pct_abundance(self.rel_a, ["GG_OTU_1", "GG_OTU_2"])

        # list containing hand calculated relative abundance values
        hand_calc = {"GG_OTU_1": 14.52348298, "GG_OTU_2": 15.73217761,
                     "GG_OTU_3": 26.58131438, "GG_OTU_4": 22.91732137,
                     "GG_OTU_5": 20.24570366}

        # Testing the validity of the calculations of mean_otu_pct_abundance().
        for oid in ["GG_OTU_1", "GG_OTU_2"]:
            self.assertAlmostEqual(
                hand_calc[oid], self.result[oid],
                msg="Mean OTU percent abundance not calculated accurately."
            )
Beispiel #14
0
    def test_relative_abundance(self):
        """
        Testing relative abundance() function of biom.calc.py.

        :return: Returns OK, if testing goal is achieved, otherwise raises
                error.
        """
        self.result = bc.relative_abundance(self.biomf)

        # List containing manual calculations
        hand_calc = [0.02857142857, 0.11428571429, 0.2, 0.28571428571,
                     0.37142857143]

        # List containing the calculated relative abundance values
        func_calc = self.result["Sample3"].values()

        # Testing the validity of relative_abundance() function.
        for hand, res in zip(hand_calc, func_calc):
            self.assertAlmostEqual(
                hand, res,
                msg="Relative abundances not calculated accurately."
                )
Beispiel #15
0
def main():
    args = handle_program_options()

    # Parse and read mapping file
    try:
        header, imap = util.parse_map_file(args.map_fp)
        category_idx = header.index(args.group_by)
    except IOError as ioe:
        err_msg = "\nError in metadata mapping filepath (-m): {}\n"
        sys.exit(err_msg.format(ioe))

    # Obtain group colors
    try:
        assert args.colors is not None
    except AssertionError:
        categories = {v[category_idx] for k, v in imap.items()}
        color_cycle = cycle(Set3_12.hex_colors)
        class_colors = {c: color_cycle.next() for c in categories}
    else:
        class_colors = util.color_mapping(imap, header, args.group_by, args.colors)

    if args.dist_matrix_file:
        try:
            dm_data = pd.read_csv(args.dist_matrix_file, sep="\t", index_col=0)
        except IOError as ioe:
            err_msg = "\nError with unifrac distance matrix file (-d): {}\n"
            sys.exit(err_msg.format(ioe))
        dm_data.insert(0, "Condition", [imap[str(sid)][category_idx] for sid in dm_data.index])
        if args.annotate_points:
            sampleids = [str(sid) for sid in dm_data.index]
        else:
            sampleids = None
        if args.save_lda_input:
            dm_data.to_csv(args.save_lda_input, sep="\t")
        # Run LDA
        X_lda, y_lda, exp_var = run_LDA(dm_data)
    else:
        # Load biom file and calculate relative abundance
        try:
            biomf = biom.load_table(args.otu_table)
        except IOError as ioe:
            err_msg = "\nError with biom format file (-d): {}\n"
            sys.exit(err_msg.format(ioe))
        # Get normalized relative abundances
        rel_abd = bc.relative_abundance(biomf)
        rel_abd = bc.arcsine_sqrt_transform(rel_abd)
        df_rel_abd = pd.DataFrame(rel_abd).T
        df_rel_abd.insert(0, "Condition", [imap[sid][category_idx]
                                           for sid in df_rel_abd.index])
        if args.annotate_points:
            sampleids = df_rel_abd.index
        else:
            sampleids = None
        if args.save_lda_input:
            df_rel_abd.to_csv(args.save_lda_input, sep="\t")
        # Run LDA
        X_lda, y_lda, exp_var = run_LDA(df_rel_abd)

    # Plot LDA
    if args.dimensions == 3:
        plot_LDA(X_lda, y_lda, class_colors, exp_var, style=args.ggplot2_style,
                 fig_size=args.figsize, label_pad=args.label_padding,
                 font_size=args.font_size, sids=sampleids, dim=3,
                 zangles=args.z_angles, pt_size=args.point_size, out_fp=args.out_fp)
    else:
        plot_LDA(X_lda, y_lda, class_colors, exp_var, style=args.ggplot2_style,
                 fig_size=args.figsize, label_pad=args.label_padding,
                 font_size=args.font_size, sids=sampleids, pt_size=args.point_size,
                 out_fp=args.out_fp)
Beispiel #16
0
def main():
    args = handle_program_options()

    # Parse and read mapping file
    try:
        header, imap = util.parse_map_file(args.map_fp)
        category_idx = header.index(args.group_by)
    except IOError as ioe:
        err_msg = "\nError in metadata mapping filepath (-m): {}\n"
        sys.exit(err_msg.format(ioe))

    # Obtain group colors
    try:
        assert args.colors is not None
    except AssertionError:
        categories = {v[category_idx] for k, v in imap.items()}
        color_cycle = cycle(Set3_12.hex_colors)
        class_colors = {c: color_cycle.next() for c in categories}
    else:
        class_colors = util.color_mapping(imap, header, args.group_by,
                                          args.colors)

    if args.dist_matrix_file:
        try:
            dm_data = pd.read_csv(args.dist_matrix_file, sep="\t", index_col=0)
        except IOError as ioe:
            err_msg = "\nError with unifrac distance matrix file (-d): {}\n"
            sys.exit(err_msg.format(ioe))
        dm_data.insert(0, "Condition",
                       [imap[str(sid)][category_idx] for sid in dm_data.index])
        if args.annotate_points:
            sampleids = [str(sid) for sid in dm_data.index]
        else:
            sampleids = None
        if args.save_lda_input:
            dm_data.to_csv(args.save_lda_input, sep="\t")
        # Run LDA
        X_lda, y_lda, exp_var = run_LDA(dm_data)
    else:
        # Load biom file and calculate relative abundance
        try:
            biomf = biom.load_table(args.otu_table)
        except IOError as ioe:
            err_msg = "\nError with biom format file (-d): {}\n"
            sys.exit(err_msg.format(ioe))
        # Get normalized relative abundances
        rel_abd = bc.relative_abundance(biomf)
        rel_abd = bc.arcsine_sqrt_transform(rel_abd)
        df_rel_abd = pd.DataFrame(rel_abd).T
        df_rel_abd.insert(
            0, "Condition",
            [imap[sid][category_idx] for sid in df_rel_abd.index])
        if args.annotate_points:
            sampleids = df_rel_abd.index
        else:
            sampleids = None
        if args.save_lda_input:
            df_rel_abd.to_csv(args.save_lda_input, sep="\t")
        # Run LDA
        X_lda, y_lda, exp_var = run_LDA(df_rel_abd)

    # Plot LDA
    if args.dimensions == 3:
        plot_LDA(X_lda,
                 y_lda,
                 class_colors,
                 exp_var,
                 style=args.ggplot2_style,
                 fig_size=args.figsize,
                 label_pad=args.label_padding,
                 font_size=args.font_size,
                 sids=sampleids,
                 dim=3,
                 zangles=args.z_angles,
                 pt_size=args.point_size,
                 out_fp=args.out_fp)
    else:
        plot_LDA(X_lda,
                 y_lda,
                 class_colors,
                 exp_var,
                 style=args.ggplot2_style,
                 fig_size=args.figsize,
                 label_pad=args.label_padding,
                 font_size=args.font_size,
                 sids=sampleids,
                 pt_size=args.point_size,
                 out_fp=args.out_fp)
def main():
    args = handle_program_options()

    # Parse and read mapping file
    try:
        header, imap = util.parse_map_file(args.map_fp)
        category_idx = header.index(args.group_by)
    except IOError as ioe:
        err_msg = "\nError in metadata mapping filepath (-m): {}\n"
        sys.exit(err_msg.format(ioe))
    # Obtain group colors
    class_colors = util.color_mapping(imap, header, args.group_by, args.color_by)

    # Get otus for LDA bubble plots
    try:
        bubble_otus = set(pd.read_csv(args.otu_ids_fp, sep="\n", header=None)[0])
    except IOError as ioe:
        err_msg = "\nError in OTU IDs file (--bubble): {}\n"
        sys.exit(err_msg.format(ioe))

    # Load biom file and calculate relative abundance
    try:
        biomf = biom.load_table(args.otu_table)
    except IOError as ioe:
        err_msg = "\nError with biom format file (-d): {}\n"
        sys.exit(err_msg.format(ioe))

    # Get normalized relative abundances
    rel_abd = bc.relative_abundance(biomf)
    rel_abd = bc.arcsine_sqrt_transform(rel_abd)
    abd_val = {abd for sid, v1 in rel_abd.items() for otuid, abd in v1.items() if abd > 0}
    bubble_range = np.linspace(min(abd_val), max(abd_val), num=5) * args.scale_by
    # Get abundance to the nearest 50
    bubble_range = [int(50 * round(float(abd)/50)) for abd in bubble_range[1:]]

    # Set up input for LDA calc and get LDA transformed data
    if args.dist_matrix_file:
        try:
            uf_data = pd.read_csv(args.dist_matrix_file, sep="\t", index_col=0)
        except IOError as ioe:
            err_msg = "\nError with unifrac distance matrix file (-d): {}\n"
            sys.exit(err_msg.format(ioe))
        uf_data.insert(0, "Condition", [imap[sid][category_idx] for sid in uf_data.index])
        sampleids = uf_data.index
        if args.save_lda_input:
            uf_data.to_csv(args.save_lda_input, sep="\t")
        # Run LDA
        X_lda, y_lda, exp_var = run_LDA(uf_data)
    else:
        df_rel_abd = pd.DataFrame(rel_abd).T
        df_rel_abd.insert(0, "Condition", [imap[sid][category_idx]
                                           for sid in df_rel_abd.index])
        sampleids = df_rel_abd.index
        if args.save_lda_input:
            df_rel_abd.to_csv(args.save_lda_input, sep="\t")
        # Run LDA
        X_lda, y_lda, exp_var = run_LDA(df_rel_abd)

    # Calculate position and size of SampleIDs to plot for each OTU
    for otuid in bubble_otus:
        otuname = oc.otu_name(biomf.metadata(otuid, axis="observation")["taxonomy"])
        plot_data = {cat: {"x": [], "y": [], "size": [], "label": []}
                     for cat in class_colors.keys()}
        for sid, data in zip(sampleids, X_lda):
            category = plot_data[imap[sid][category_idx]]
            try:
                size = rel_abd[sid][otuid] * args.scale_by
            except KeyError as ke:
                print("{} not found in {} sample.".format(ke, sid))
                continue
            category["x"].append(float(data[0]))
            category["y"].append(float(data[1]))
            category["size"].append(size)

        # Plot LDA bubble for each OTU
        fig = plt.figure(figsize=args.figsize)
        ax = fig.add_subplot(111)
        for i, cat in enumerate(plot_data):
            plt.scatter(plot_data[cat]["x"], plot_data[cat]["y"],
                        s=plot_data[cat]["size"], label=cat, color=class_colors[cat],
                        alpha=0.85, edgecolors="k")
        if X_lda.shape[1] == 1:
            plt.ylim((0.5, 2.5))
        plt.title(" ".join(otuname.split("_")), style="italic", fontsize=13)
        try:
            plt.xlabel("LD1 (Percent Explained Variance: {:.3f}%)".format(exp_var[0]*100),
                       fontsize=13, labelpad=15)
        except:
            plt.xlabel("LD1", fontsize=13, labelpad=15)
        try:
            plt.ylabel("LD2 (Percent Explained Variance: {:.3f}%)".format(exp_var[1]*100),
                       fontsize=13, labelpad=15)
        except:
            plt.ylabel("LD2", fontsize=13, labelpad=15)

        lgnd1 = plt.legend(loc="best", scatterpoints=3, fontsize=13)
        for i in range(len(class_colors.keys())):
            lgnd1.legendHandles[i]._sizes = [80]  # Change the legend marker size manually
        # Add the legend manually to the current plot
        plt.gca().add_artist(lgnd1)

        c = [plt.scatter([], [], c="w", edgecolors="k", s=s1) for s1 in bubble_range]
        plt.legend(c, ["{}".format(s2) for s2 in bubble_range],
                   title="Scaled Bubble\n       Sizes", frameon=True, labelspacing=2,
                   fontsize=13, loc=4, scatterpoints=1, borderpad=1.1)

        # Set style for LDA bubble plots
        if args.ggplot2_style:
            gu.ggplot2_style(ax)
            fc = "0.8"
        else:
            fc = "none"

        # Save LDA bubble plots to output directory
        if args.verbose:
            print("Saving chart for {}".format(" ".join(otuname.split("_"))))
        fig.savefig(pj(args.output_dir, "_".join(otuname.split())) + "." + args.save_as,
                    facecolor=fc, edgecolor="none", dpi=300,
                    bbox_inches="tight", pad_inches=0.2)
        plt.close(fig)
def main():
    args = handle_program_options()

    try:
        with open(args.otu_table):
            pass
    except IOError as ioe:
        sys.exit("\nError with BIOM format file:{}\n".format(ioe))

    try:
        with open(args.pcoa_fp):
            pass
    except IOError as ioe:
        sys.exit("\nError with principal coordinates file:{}\n".format(ioe))

    try:
        with open(args.mapping):
            pass
    except IOError as ioe:
        sys.exit("\nError with mapping file:{}\n".format(ioe))

    # check that the output dir exists, create it if not
    util.ensure_dir(args.output_dir)

    # load the BIOM table
    biomtbl = biom.load_table(args.otu_table)

    # Read unifrac principal coordinates file
    unifrac = util.parse_unifrac(args.pcoa_fp)

    # Read otu data file
    otus = set()
    with open(args.otu_ids_fp, "rU") as nciF:
        for line in nciF.readlines():
            line = line.strip()
            otus.add(line)

    # Gather categories from mapping file
    header, imap = util.parse_map_file(args.mapping)
    try:
        category_idx = header.index(args.group_by)
    except ValueError:
        msg = "Error: Specified mapping category '{}' not found."
        sys.exit(msg.format(args.group_by))
    category_ids = util.gather_categories(imap, header, [args.group_by])
    color_map = util.color_mapping(imap, header, args.group_by, args.colors)
    rel_abd = bc.relative_abundance(biomtbl)
    rel_abd = bc.arcsine_sqrt_transform(rel_abd)

    # plot samples based on relative abundance of some OTU ID
    for otuid in otus:
        otuname = oc.otu_name(
            biomtbl.metadata(otuid, axis="observation")["taxonomy"])
        cat_data = {
            cat: {
                "pc1": [],
                "pc2": [],
                "size": []
            }
            for cat in category_ids
        }

        for sid in unifrac["pcd"]:
            category = cat_data[imap[sid][category_idx]]
            try:
                size = rel_abd[sid][otuid] * args.scale_by
            except KeyError as ke:
                print("{} not found in {} sample.".format(ke, sid))
                continue
            category["pc1"].append(float(unifrac["pcd"][sid][0]))
            category["pc2"].append(float(unifrac["pcd"][sid][1]))
            category["size"].append(size)

        if args.verbose:
            print("Saving chart for {}".format(" ".join(otuname.split("_"))))
        xr, yr = calculate_xy_range(cat_data)
        plot_PCoA(cat_data, otuname, unifrac, color_map.keys(), color_map, xr,
                  yr, args.output_dir, args.save_as, args.ggplot2_style)
def main():
    args = handle_program_options()

    try:
        with open(args.otu_table):
            pass
    except IOError as ioe:
        sys.exit("\nError with BIOM format file:{}\n".format(ioe))

    try:
        with open(args.pcoa_fp):
            pass
    except IOError as ioe:
        sys.exit("\nError with principal coordinates file:{}\n".format(ioe))

    try:
        with open(args.mapping):
            pass
    except IOError as ioe:
        sys.exit("\nError with mapping file:{}\n".format(ioe))

    # check that the output dir exists, create it if not
    util.ensure_dir(args.output_dir)

    # load the BIOM table
    biomtbl = biom.load_table(args.otu_table)

    # Read unifrac principal coordinates file
    unifrac = util.parse_unifrac(args.pcoa_fp)

    # Read otu data file
    otus = set()
    with open(args.otu_ids_fp, "rU") as nciF:
        for line in nciF.readlines():
            line = line.strip()
            otus.add(line)

    # Gather categories from mapping file
    header, imap = util.parse_map_file(args.mapping)
    try:
        category_idx = header.index(args.group_by)
    except ValueError:
        msg = "Error: Specified mapping category '{}' not found."
        sys.exit(msg.format(args.group_by))
    category_ids = util.gather_categories(imap, header, [args.group_by])
    color_map = util.color_mapping(imap, header, args.group_by, args.colors)
    rel_abd = bc.relative_abundance(biomtbl)
    rel_abd = bc.arcsine_sqrt_transform(rel_abd)

    # plot samples based on relative abundance of some OTU ID
    for otuid in otus:
        otuname = oc.otu_name(biomtbl.metadata(otuid, axis="observation")["taxonomy"])
        cat_data = {cat: {"pc1": [], "pc2": [], "size": []}
                    for cat in category_ids}

        for sid in unifrac["pcd"]:
            category = cat_data[imap[sid][category_idx]]
            try:
                size = rel_abd[sid][otuid] * args.scale_by
            except KeyError as ke:
                print("{} not found in {} sample.".format(ke, sid))
                continue
            category["pc1"].append(float(unifrac["pcd"][sid][0]))
            category["pc2"].append(float(unifrac["pcd"][sid][1]))
            category["size"].append(size)

        if args.verbose:
            print("Saving chart for {}".format(" ".join(otuname.split("_"))))
        xr, yr = calculate_xy_range(cat_data)
        plot_PCoA(cat_data, otuname, unifrac, color_map.keys(),
                  color_map, xr, yr, args.output_dir,
                  args.save_as, args.ggplot2_style)
Beispiel #20
0
def main():
    args = program_options()

    try:
        biomf = biom.load_table(args.in_biomf)
    except IOError as ioe:
        sys.exit("Error with input BIOM format file: {}".format(ioe))
    else:
        rel_abd = relative_abundance(biomf)
        ast_rel_abd = ast(rel_abd)
        # Get pairwise combinations of OTUs
        otu_combos = list(combinations(biomf.ids("observation"), 2))

    try:
        mheader, mdata = parse_map_file(args.map_fnh)
    except IOError as ioe:
        sys.exit("Error with input mapping file: {}".format(ioe))
    else:
        # Gather sampleID categories
        sid_cat = gather_categories(mdata, mheader, [args.category_column])

    # Create arguments for helper function to be supplied to multiprocessing pool.map()
    chunksize = 10000
    jobs = [(
        otu_combos[x:x + chunksize],
        sid_cat,
        ast_rel_abd,
    ) for x in xrange(0, len(otu_combos), chunksize)]
    print("{0} jobs created.".format(len(jobs)))

    # Start multiprocessing jobs
    try:
        print("Starting map_async()...")
        pool = Pool()
        res = pool.map_async(calc_corr_helper, jobs)
        pool.close()
        pool.join()
    except Exception:
        sys.exit("Error while calculating correlations\n{}".format(
            format_exc()))
    else:
        s_rho_calc = []
        k_tau_calc = []
        for r in res.get():
            for s in r:
                if s[0] == "Spearman":
                    s_rho_calc.append(s)
                else:
                    k_tau_calc.append(s)

    # Get FDR corrected correlation results
    print("Running FDR correction on {} Spearman's Rho.".format(
        len(s_rho_calc)))
    fdr_corr_s_rho = run_fdr(s_rho_calc)
    print("Running FDR correction on {} Kendall Tau.".format(len(k_tau_calc)))
    fdr_corr_k_tau = run_fdr(k_tau_calc)

    # Consolidate correlation results
    k_kos = {(
        e[2],
        e[3],
    )
             for e in fdr_corr_k_tau}
    s_kos = {(
        f[2],
        f[3],
    )
             for f in fdr_corr_s_rho}
    final_kos = s_kos & k_kos
    print(
        "{0} elements from KendallTau\n{1} elements from SpearmanRho\n{2} elements are "
        "common to both.".format(len(k_kos), len(s_kos), len(final_kos)))
    final_fdr_corr_results = [
        cdata[1:] for cdata in fdr_corr_s_rho if (
            cdata[2],
            cdata[3],
        ) in final_kos
    ]

    # Write our results to file
    with open(args.out_fnh, "w") as outf:
        outf.write("Category\tVariable\tby Variable\tCorrelation\tp value\n")
        for k in final_fdr_corr_results:
            outf.write("{0}\t{1}\t{2}\t{3}\t{4}\n".format(
                k[0], k[1], k[2], k[3], k[4]))