Example #1
0
    def test_transform_raw_abundance(self):
        """
        Testing transform_raw_abundance() function of biom_calc.py.

        :return: Returns OK if testing goal is achieved, otherwise raises error.
        """
        self.result = bc.transform_raw_abundance(self.biomf, sample_abd=False)
        self.result1 = bc.transform_raw_abundance(self.biomf, fn=math.sqrt)

        # Obtaining manual calculations for comparison testing
        hand_calc = {"GG_OTU_1": 1.544068044, "GG_OTU_2": 1.579783597,
                     "GG_OTU_3": 1.73239376, "GG_OTU_4": 1.73239376,
                     "GG_OTU_5": 1.62324929}
        hand_calc1 = {"S1": 5.099019514, "S2": 5.567764363, "S3": 4.242640687,
                      "S4": 3.31662479, "S5": 4.795831523, "S6": 5.196152423,
                      "S7": 5.291502622, "S8": 5.099019514, "S9": 3, "S10": 4.898979486}

        # Testing the validity of transform function
        for oid in hand_calc.keys():
            self.assertAlmostEqual(
                hand_calc[oid], self.result[oid],
                msg="Raw abundance transformation not computed accurately (Test1)."
            )
        for sid in hand_calc1.keys():
            self.assertAlmostEqual(
                hand_calc1[sid], self.result1[sid],
                msg="Raw abundance transformation not computed accurately (Test2)."
            )
    def test_transform_raw_abundance(self):
        """
        Testing transform_raw_abundance() function of biom_calc.py.

        :return: Returns OK if testing goal is achieved, otherwise raises
                 error.
        """
        self.result = bc.transform_raw_abundance(
            self.biom, sample_abd=False
            )
        self.result1 = bc.raw_abundance(self.biom, sample_abd=False)

        # Obtaining manual calculations for comparison testing
        hand_calc = []
        for num in self.result1.values():
            hand_calc.append(math.log10(float(num)))

        # Testing the validity of transform function
        self.assertAlmostEqual(
            self.result.values(), hand_calc, places=10,
            msg='Function did not calculate the transformation accurately.'
        )
Example #3
0
    def test_transform_raw_abundance(self):
        """
        Testing transform_raw_abundance() function of biom_calc.py.

        :return: Returns OK if testing goal is achieved, otherwise raises
                 error.
        """
        self.result = bc.transform_raw_abundance(
            self.biomf, sample_abd=False
            )
        self.result1 = bc.raw_abundance(self.biomf, sample_abd=False)

        # Obtaining manual calculations for comparison testing
        hand_calc = [1.17609125906, 1.7075701761, 1.93951925262,
                     2.08990511144, 2.20139712432]

        # Testing the validity of transform function
        for hand, func in zip(hand_calc, self.result.values()):
            self.assertAlmostEqual(
                hand, func,
                msg="Function did not calculate the transformation accurately."
            )
Example #4
0
def main():
    args = handle_program_options()

    try:
        with open(args.otu_table):
            pass
    except IOError as ioe:
        sys.exit(
            '\nError with OTU_Sample abundance data file:{}\n'
            .format(ioe)
        )

    try:
        with open(args.mapping):
            pass
    except IOError as ioe:
        sys.exit(
            '\nError with mapping file:{}\n'
            .format(ioe)
        )

    # input data
    with open(args.otu_table) as bF:
        biom = json.loads(bF.readline())
    map_header, imap = util.parse_map_file(args.mapping)

    # rewrite tree file with otu names
    if args.input_tree:
        with open(args.input_tree) as treF, open(args.output_tre, 'w') as outF:
            tree = treF.readline()
            if "'" in tree:
                tree = tree.replace("'", '')
            outF.write(newick_replace_otuids(tree, biom))

    oid_rows = {row['id']: row for row in biom['rows']}

    # calculate analysis results
    categories = None
    if args.map_categories is not None:
        categories = args.map_categories.split(',')

    # set transform if --stabilize_variance is specfied
    tform = bc.arcsine_sqrt_transform if args.stabilize_variance else None

    groups = util.gather_categories(imap, map_header, categories)
    for group in groups.values():
        if args.analysis_metric in ['MRA', 'NMRA']:
            results = bc.MRA(biom, group.sids, transform=tform)
        elif args.analysis_metric == 'raw':
            results = bc.transform_raw_abundance(biom, sampleIDs=group.sids,
                                                 sample_abd=False)

        group.results.update({oc.otu_name_biom(oid_rows[oid]): results[oid]
                             for oid in results})

    # write iTol data set file
    with open(args.output_itol_table, 'w') as itolF:
        itolF.write('LABELS\t' + '\t'.join(groups.keys())+'\n')
        itolF.write('COLORS\t{}\n'.format('\t'.join(['#ff0000'
                    for _ in range(len(groups))])))
        all_otus = frozenset({oc.otu_name_biom(row) for row in biom['rows']})

        for oname in all_otus:
            row = ['{name}']        # \t{s:.2f}\t{ns:.2f}\n'
            row_data = {'name': oname}
            msum = 0
            for name, group in groups.iteritems():
                row.append('{{{}:.5f}}'.format(name))
                if oname in group.results:
                    row_data[name] = group.results[oname]
                else:
                    row_data[name] = 0.0
                msum += row_data[name]
            # normalize avg relative abundance data
            if args.analysis_metric == 'NMRA' and msum > 0:
                row_data.update({key: data/msum
                                for key, data in row_data.items()
                                if key != 'name'})

            itolF.write('\t'.join(row).format(**row_data) + '\n')
Example #5
0
def main():
    args = handle_program_options()

    try:
        with open(args.otu_table):
            pass
    except IOError as ioe:
        sys.exit(
            "\nError with OTU_Sample abundance data file:{}\n".format(ioe))

    try:
        with open(args.mapping):
            pass
    except IOError as ioe:
        sys.exit("\nError with mapping file:{}\n".format(ioe))

    # input data
    biomf = biom.load_table(args.otu_table)
    map_header, imap = util.parse_map_file(args.mapping)

    # rewrite tree file with otu names, skip if keep_otuids specified
    if args.input_tree and not args.keep_otuids:
        with open(args.input_tree) as treF, open(args.output_tre, "w") as outF:
            tree = treF.readline()
            if "'" in tree:
                tree = tree.replace("'", '')
            outF.write(newick_replace_otuids(tree, biomf))

    if not args.keep_otuids:
        oid_rows = {
            id_: md["taxonomy"]
            for val, id_, md in biomf.iter(axis="observation")
        }

    # calculate analysis results
    categories = None
    if args.map_categories is not None and args.analysis_metric != "raw":
        categories = args.map_categories.split(",")

    # set transform if --stabilize_variance is specfied
    tform = bc.arcsine_sqrt_transform if args.stabilize_variance else None

    groups = util.gather_categories(imap, map_header, categories)
    for group in groups.values():
        if args.analysis_metric in ["MRA", "NMRA"]:
            results = bc.MRA(biomf, group.sids, transform=tform)
        elif args.analysis_metric == "raw":
            results = bc.transform_raw_abundance(biomf,
                                                 sampleIDs=group.sids,
                                                 sample_abd=False)
        if args.keep_otuids:
            group.results.update({oid: results[oid] for oid in results})
        else:
            group.results.update(
                {oc.otu_name(oid_rows[oid]): results[oid]
                 for oid in results})

    # write iTol data set file
    with open(args.output_itol_table, "w") as itolF:
        if args.analysis_metric == "raw":
            itolF.write("DATASET_GRADIENT\nSEPARATOR TAB\n")
            itolF.write("DATASET_LABEL\tLog Total Abundance\n")
            itolF.write("COLOR\t#000000\n")
            itolF.write("LEGEND_TITLE\tLog Total Abundance\n")
            itolF.write("LEGEND_SHAPES\t1\n")
            itolF.write("LEGEND_COLORS\t#000000\n")
            itolF.write("LEGEND_LABELS\tLog Total Abundance\n")
            itolF.write("COLOR_MIN\t#FFFFFF\n")
            itolF.write("COLOR_MAX\t#000000\n")
        else:
            itolF.write("DATASET_MULTIBAR\nSEPARATOR TAB\n")
            itolF.write("DATASET_LABEL\t{}\n".format(args.analysis_metric))
            itolF.write("FIELD_COLORS\t{}\n".format("\t".join(
                ["#ff0000" for _ in range(len(groups))])))
            itolF.write("FIELD_LABELS\t" + "\t".join(groups.keys()) + "\n")
            itolF.write("LEGEND_TITLE\t{}\n".format(args.analysis_metric))
            itolF.write("LEGEND_SHAPES\t{}\n".format("\t".join(
                ["1" for _ in range(len(groups))])))
            itolF.write("LEGEND_COLORS\t{}\n".format("\t".join(
                ["#ff0000" for _ in range(len(groups))])))
            itolF.write("LEGEND_LABELS\t" + "\t".join(groups.keys()) + "\n")
            itolF.write("WIDTH\t300\n")
        itolF.write("DATA\n")

        if args.keep_otuids:
            all_otus = frozenset(
                {id_
                 for id_ in biomf.ids(axis="observation")})
        else:
            all_otus = frozenset({
                oc.otu_name(md["taxonomy"])
                for val, id_, md in biomf.iter(axis="observation")
            })

        for oname in all_otus:
            row = ["{name}"]  # \t{s:.2f}\t{ns:.2f}\n"
            row_data = {"name": oname}
            msum = 0
            for name, group in groups.iteritems():
                row.append("{{{}:.5f}}".format(name))
                if oname in group.results:
                    row_data[name] = group.results[oname]
                else:
                    row_data[name] = 0.0
                msum += row_data[name]
            # normalize avg relative abundance data
            if args.analysis_metric == "NMRA" and msum > 0:
                row_data.update({
                    key: data / msum
                    for key, data in row_data.items() if key != "name"
                })
            itolF.write("\t".join(row).format(**row_data) + "\n")
Example #6
0
def main():
    args = handle_program_options()

    try:
        with open(args.otu_table):
            pass
    except IOError as ioe:
        sys.exit(
            "\nError with OTU_Sample abundance data file:{}\n"
            .format(ioe)
        )

    try:
        with open(args.mapping):
            pass
    except IOError as ioe:
        sys.exit(
            "\nError with mapping file:{}\n"
            .format(ioe)
        )

    # input data
    biomf = biom.load_table(args.otu_table)
    map_header, imap = util.parse_map_file(args.mapping)

    # rewrite tree file with otu names
    if args.input_tree:
        with open(args.input_tree) as treF, open(args.output_tre, "w") as outF:
            tree = treF.readline()
            if "'" in tree:
                tree = tree.replace("'", '')
            outF.write(newick_replace_otuids(tree, biomf))

    oid_rows = {id_: md["taxonomy"]
                for val, id_, md in biomf.iter(axis="observation")}

    # calculate analysis results
    categories = None
    if args.map_categories is not None:
        categories = args.map_categories.split(",")

    # set transform if --stabilize_variance is specfied
    tform = bc.arcsine_sqrt_transform if args.stabilize_variance else None

    groups = util.gather_categories(imap, map_header, categories)
    for group in groups.values():
        if args.analysis_metric in ["MRA", "NMRA"]:
            results = bc.MRA(biomf, group.sids, transform=tform)
        elif args.analysis_metric == "raw":
            results = bc.transform_raw_abundance(biomf, sampleIDs=group.sids,
                                                 sample_abd=False)
        group.results.update({oc.otu_name(oid_rows[oid]): results[oid]
                             for oid in results})

    # write iTol data set file
    with open(args.output_itol_table, "w") as itolF:
        if args.analysis_metric == "raw":
            itolF.write("DATASET_GRADIENT\nSEPARATOR TAB\n")
            itolF.write("DATASET_LABEL\tLog Total Abundance\n")
            itolF.write("COLOR\t#000000\n")
            itolF.write("LEGEND_TITLE\tLog Total Abundance\n")
            itolF.write("LEGEND_SHAPES\t1\n")
            itolF.write("LEGEND_COLORS\t#000000\n")
            itolF.write("LEGEND_LABELS\tLog Total Abundance\n")
            itolF.write("COLOR_MIN\t#FFFFFF\n")
            itolF.write("COLOR_MAX\t#000000\n")
        else:
            itolF.write("DATASET_MULTIBAR\nSEPARATOR TAB\n")
            itolF.write("DATASET_LABEL\tNMRA\n")
            itolF.write("FIELD_COLORS\t{}\n".format("\t".join(["#ff0000"
                        for _ in range(len(groups))])))
            itolF.write("FIELD_LABELS\t" + "\t".join(groups.keys())+"\n")
            itolF.write("LEGEND_TITLE\tNMRA\n")
            itolF.write("LEGEND_SHAPES\t{}\n".format("\t".join(["1"
                        for _ in range(len(groups))])))
            itolF.write("LEGEND_COLORS\t{}\n".format("\t".join(["#ff0000"
                        for _ in range(len(groups))])))
            itolF.write("LEGEND_LABELS\t" + "\t".join(groups.keys())+"\n")
            itolF.write("WIDTH\t300\n")
        itolF.write("DATA\n")
        all_otus = frozenset({oc.otu_name(md["taxonomy"])
                              for val, id_, md in
                              biomf.iter(axis="observation")})

        for oname in all_otus:
            row = ["{name}"]        # \t{s:.2f}\t{ns:.2f}\n"
            row_data = {"name": oname}
            msum = 0
            for name, group in groups.iteritems():
                row.append("{{{}:.5f}}".format(name))
                if oname in group.results:
                    row_data[name] = group.results[oname]
                else:
                    row_data[name] = 0.0
                msum += row_data[name]
            # normalize avg relative abundance data
            if args.analysis_metric == "NMRA" and msum > 0:
                row_data.update({key: data/msum
                                for key, data in row_data.items()
                                if key != "name"})
            itolF.write("\t".join(row).format(**row_data) + "\n")