Exemple #1
0
def main():
    args = get_args()
    dna = util.Table(args.input_dna)
    rna = util.Table(args.input_rna)
    method = {"laplace": laplace, "witten_bell": witten_bell}[args.method]
    assert dna.is_stratified == rna.is_stratified, \
        "FAILED: Tables have nonequal stratification status."
    strat_mode = dna.is_stratified
    all_features = sorted(set(dna.rowheads).__or__(set(rna.rowheads)))
    for t in dna, rna:
        if strat_mode:
            remove_totals(t)
            all_features = [k for k in all_features if util.c_strat_delim in k]
        method(t, all_features)
        if strat_mode:
            hsum(t)
    # write out dna/rna
    dna.write(args.output_basename + c_new_dna_extension, unfloat=True)
    rna.write(args.output_basename + c_new_rna_extension, unfloat=True)
    # normalize rna by dna (account for seq depth [scale]), then write
    scale = [d / r for r, d in zip(rna.colsums, dna.colsums)]
    for i in range(len(dna.data)):
        rna.data[i] = [
            s * r / d for s, r, d in zip(scale, rna.data[i], dna.data[i])
        ]
        if args.log_transform:
            divisor = log(args.log_base)
            rna.data[i] = list(map(lambda x: log(x) / divisor, rna.data[i]))
    rna.write(args.output_basename + c_norm_rna_extension, unfloat=True)
Exemple #2
0
def main( ):
    args = get_args()
    # load the table; find unique feature ids (no names)
    table = util.Table( args.input )
    features = {k for k in table.rowheads}
    features = {k.split( util.c_strat_delim )[0] for k in features}
    features = {k.split( util.c_name_delim )[0] for k in features}
    # decide what grouping file to load and how
    if args.custom is not None:
        print( "Loading custom groups file: {}".format( args.custom ), file=sys.stderr )
        p_groups, start, skip = args.custom, 0, []
    elif args.groups is not None:
        p_groups, start, skip = c_default_groups[args.groups]
    else:
        sys.exit( "Must specify either 1) built-in groups option [--groups] or 2) custom groups file [--custom]" )
    # load the grouping file
    map_group_features = util.load_polymap( 
        p_groups, start=start, skip=skip, allowed_values=features )
    # coerce to features-first format (unless explicitly reversed)
    if not args.reversed:
        map_feature_groups = {}
        for group, fdict in map_group_features.items():
            for feature in fdict:
                map_feature_groups.setdefault( feature, {} )[group] = 1
    else:
        map_feature_groups = map_group_features
    # add protected cases to mapping?
    if args.protected == "Y":
        for feature in c_protected:
            map_feature_groups.setdefault( feature, {} )[feature] = 1
    # perform the table regrouping
    regroup( table, map_feature_groups, args.function, args.precision, ungrouped=args.ungrouped=="Y" )
    table.write( args.output )
def main():
    args = get_args()
    table = util.Table(args.input)
    partitions = partition_table(
        table,
        args.critical_mean,
        args.critical_count,
        args.pinterval,
    )
    for name, partition in partitions.items():
        if len( partition.get_cols() ) >= args.critical_samples and \
                len( partition.get_rows() ) >= 1 and \
                ( args.limit is None or args.limit in name ):
            write_partition(table, partition,
                            name + c_strain_profile_extension)
def main():
    args = get_args()
    table = util.Table(args.input)
    normalize(
        table,
        cpm=args.units == "cpm",
        levelwise=args.mode == "levelwise",
        special=args.special == "y",
    )
    if args.update_snames:
        for i, colhead in enumerate(table.colheads):
            if re.search(c_default_suffix + "$", colhead):
                table.colheads[i] = re.sub(c_default_suffix + "$",
                                           "-" + args.units.upper(), colhead)
            else:
                table.colheads[i] += "-" + args.units.upper()
    table.write(args.output)
def main():
    args = get_args()
    table = util.Table(args.input)
    allowed_keys = {k.split(util.c_strat_delim)[0]: 1 for k in table.rowheads}
    if args.custom is not None:
        polymap = util.load_polymap(args.custom, allowed_keys=allowed_keys)
    elif args.names is not None:
        polymap = util.load_polymap(c_default_names[args.names].path,
                                    allowed_keys=allowed_keys)
    else:
        sys.exit("Must (i) choose names option or (ii) provide names file")
    if args.simplify:
        for c, ndict in polymap.items():
            ndict = {re.sub("[^A-Za-z0-9]+", "_", n): 1 for n in ndict}
            polymap[c] = ndict
    rename(table, polymap)
    table.write(args.output)
Exemple #6
0
def main():

    args = get_args()
    T = util.Table(args.input)

    # build the taxmap (uniref -> taxon mapping)
    print("Building taxonomic map for input table", file=sys.stderr)
    if args.devdb is not None:
        p_datafile = args.devdb
    elif args.database in databases:
        p_datafile = databases[args.database]
    else:
        sys.exit(
            "Must specify a valid database (from utility mapping or --devdb)")
    taxmap = build_taxmap(T.rowheads, args.level, args.lca_choice, p_datafile)

    # refine the taxmap (remove rare taxa)
    counts = Counter(taxmap.values())
    total = float(sum(counts.values()))
    counts = {k: v / total for k, v in counts.items()}
    taxmap = {
        old: new
        for old, new in taxmap.items() if counts[new] >= args.threshold
    }

    # reindex the table (which rows to keep, which rows to pseudo-stratify)
    print("Reindexing the input table", file=sys.stderr)
    ticker = util.Ticker(T.rowheads)
    index = {}
    for i, rowhead in enumerate(T.rowheads):
        ticker.tick()
        feature, name, stratum = util.fsplit(rowhead)
        new_rowhead = tax_connect(rowhead, taxmap)
        # unmapped is never stratified
        if feature == util.c_unmapped:
            index.setdefault(rowhead, []).append(i)
        # outside of unclassified mode, keep totals
        elif stratum is None and args.mode != c_umode:
            index.setdefault(rowhead, []).append(i)
            # in totals mode, guess at taxonomy from uniref name
            if args.mode == c_tmode:
                index.setdefault(new_rowhead, []).append(i)
        # in unclassified mode, make a new row for the total...
        elif stratum == c_unclassified and args.mode == c_umode:
            index.setdefault(util.fjoin(feature, name, None), []).append(i)
            # ...then replace "unclassified" with inferred taxonomy
            index.setdefault(new_rowhead, []).append(i)
        # update strata in stratified mode
        elif stratum is not None and args.mode == c_smode:
            index.setdefault(new_rowhead, []).append(i)

    # rebuild the table
    print("Rebuilding the input table", file=sys.stderr)
    rowheads2, data2 = [], []
    ticker = util.Ticker(index)
    for rowhead in util.fsort(index):
        ticker.tick()
        rowheads2.append(rowhead)
        newrow = [0 for k in T.colheads]
        for i in index[rowhead]:
            oldrow = map(float, T.data[i])
            newrow = [a + b for a, b in zip(newrow, oldrow)]
        data2.append(newrow)
    T.rowheads = rowheads2
    T.data = data2
    print("Writing new table", file=sys.stderr)
    T.write(args.output, unfloat=True)

    # report on performance
    success, total = 0, 0
    for rowhead in T.rowheads:
        feature, name, stratum = util.fsplit(rowhead)
        if stratum is not None:
            total += 1
            if stratum != c_unclassified:
                success += 1
    print(
        "Summary: Of {TOTAL} stratifications, {SUCCESS} mapped at {TARGET} level ({PERCENT}%)"
        .format(
            TOTAL=total,
            SUCCESS=success,
            TARGET=args.level,
            PERCENT=round(100 * success / float(total), 1),
        ),
        file=sys.stderr,
    )
Exemple #7
0
def main():
    args = get_args()
    tbl = util.Table(args.input)
    # build the taxmap
    print("Building taxonomic map for input table", file=sys.stderr)
    p_datafile = args.dev if args.dev is not None else databases[
        args.resolution]
    taxmap = build_taxmap(tbl.rowheads, args.level, p_datafile)
    # refine the taxmap
    counts = {}
    for old, new in taxmap.items():
        counts[new] = counts.get(new, 0) + 1
    total = float(sum(counts.values()))
    count = {k: v / total for k, v in counts.items()}
    taxmap = {
        old: new
        for old, new in taxmap.items() if count[new] >= args.threshold
    }
    # reindex the table
    print("Reindexing the input table", file=sys.stderr)
    ticker = util.Ticker(tbl.rowheads)
    index = {}
    for i, rowhead in enumerate(tbl.rowheads):
        ticker.tick()
        feature, name, stratum = util.fsplit(rowhead)
        new_rowhead = tax_connect(rowhead, taxmap)
        # unmapped is never stratified
        if feature == util.c_unmapped:
            index.setdefault(rowhead, []).append(i)
        # outside of unclassified mode, keep totals
        elif stratum is None and args.mode != c_umode:
            index.setdefault(rowhead, []).append(i)
            # in totals mode, guess at taxonomy from uniref name
            if args.mode == c_tmode:
                index.setdefault(new_rowhead, []).append(i)
        elif stratum == c_unclassified and args.mode == c_umode:
            # in unclassified mode, make a new row for the total...
            index.setdefault(util.fjoin(feature, name, None), []).append(i)
            # ...then replace "unclassified" with inferred taxonomy
            index.setdefault(new_rowhead, []).append(i)
        elif stratum is not None and args.mode == c_smode:
            index.setdefault(new_rowhead, []).append(i)
    # rebuild the table
    print("Rebuilding the input table", file=sys.stderr)
    rowheads2, data2 = [], []
    ticker = util.Ticker(index)
    for rowhead in util.fsort(index):
        ticker.tick()
        rowheads2.append(rowhead)
        newrow = [0 for k in tbl.colheads]
        for i in index[rowhead]:
            oldrow = map(float, tbl.data[i])
            newrow = [a + b for a, b in zip(newrow, oldrow)]
        data2.append(newrow)
    tbl.rowheads = rowheads2
    tbl.data = data2
    # output
    print("Writing new table", file=sys.stderr)
    tbl.write(args.output, unfloat=True)
    # report on performance
    success, total = 0, 0
    for rowhead in tbl.rowheads:
        feature, name, stratum = util.fsplit(rowhead)
        if stratum is not None:
            total += 1
            if stratum != c_unclassified:
                success += 1
    print(
        "Summary: Of {TOTAL} stratifications, {SUCCESS} mapped at {TARGET} level ({PERCENT}%)"
        .format(
            TOTAL=total,
            SUCCESS=success,
            TARGET=args.level,
            PERCENT=round(100 * success / float(total), 1),
        ),
        file=sys.stderr,
    )