Beispiel #1
0
def write_smc(filename, smc):
    """Writes a SMC file"""

    out = argweaver.open_stream(filename, "w")

    for item in smc:
        if item["tag"] == "NAMES":
            util.print_row("NAMES", *item["names"], out=out)

        elif item["tag"] == "REGION":
            util.print_row("REGION",
                           item["chrom"],
                           item["start"],
                           item["end"],
                           out=out)

        elif item["tag"] == "TREE":
            if not isinstance(item["tree"], basestring):
                tree = format_tree(item["tree"])
            else:
                tree = item["tree"]

            util.print_row("TREE", item["start"], item["end"], tree, out=out)

        elif item["tag"] == "SPR":
            util.print_row("SPR",
                           item["pos"],
                           item["recomb_node"],
                           item["recomb_time"],
                           item["coal_node"],
                           item["coal_time"],
                           out=out)
    out.close()
Beispiel #2
0
def write_smc(filename, smc):
    """Writes a SMC file"""

    out = argweaver.open_stream(filename, "w")

    for item in smc:
        if item["tag"] == "NAMES":
            util.print_row("NAMES", *item["names"], out=out)

        elif item["tag"] == "REGION":
            util.print_row("REGION",
                           item["chrom"], item["start"], item["end"], out=out)

        elif item["tag"] == "TREE":
            if not isinstance(item["tree"], basestring):
                tree = format_tree(item["tree"])
            else:
                tree = item["tree"]

            util.print_row("TREE", item["start"], item["end"], tree, out=out)

        elif item["tag"] == "SPR":
            util.print_row("SPR", item["pos"],
                           item["recomb_node"], item["recomb_time"],
                           item["coal_node"], item["coal_time"], out=out)
    out.close()
Beispiel #3
0
def iter_arg_layout(filename):
    """
    Iterate through an ARG layout file.
    """
    with closing(argweaver.open_stream(filename, compress='bgzip')) as infile:
        for line in infile:
            tokens = line.rstrip().split("\t")
            block = [tokens[0], int(tokens[1]), int(tokens[2])]
            leaf_layout = {}
            for i in range(3, len(tokens), 2):
                leaf_layout[tokens[i]] = float(tokens[i + 1])
            yield block, leaf_layout
Beispiel #4
0
def iter_arg_layout(filename):
    """
    Iterate through an ARG layout file.
    """
    with closing(argweaver.open_stream(filename, compress='bgzip')) as infile:
        for line in infile:
            tokens = line.rstrip().split("\t")
            block = [tokens[0], int(tokens[1]), int(tokens[2])]
            leaf_layout = {}
            for i in range(3, len(tokens), 2):
                leaf_layout[tokens[i]] = float(tokens[i+1])
            yield block, leaf_layout
Beispiel #5
0
def iter_smc_file(filename, parse_trees=False, apply_spr=False, region=None):
    """
    Iterates through a SMC file.

    parse_trees: If True, parses local trees.
    apply_spr: If True, avoids reading each tree by applying the SPR
        operation to the current tree.
    region: If given, returns only trees and SPRs within region=(start, end).

    Yields item, where item can be one of the following:
        {'tag': 'NAMES',
         'names': names_of_sequences}

        {'tag': 'REGION',
         'chrom': name_of_chromosome,
         'start': start_coordinate_of_region,
         'end': end_coordinate_of_region}

        {'tag': 'TREE',
         'start': start_coordinate_of_local_region,
         'end': end_coordinate_of_local_region,
         'tree': local_tree}

        {'tag': 'SPR',
         'pos': coordinate of recombination point,
         'recomb_node': name_of_recombination_node,
         'recomb_time': time_of_recombination,
         'coal_node': name_of_branch_with_recoalescence,
         'coal_time': time_of_recoalescence}
    """

    if region:
        tree = None
        spr = None

        for item in iter_subsmc(iter_smc_file(filename), region):

            if item["tag"] == "SPR":
                spr = item
            elif item["tag"] == "TREE":
                if parse_trees:
                    if apply_spr and tree is not None and spr is not None:
                        smc_apply_spr(tree, spr)
                    else:
                        tree = parse_tree(item["tree"])
                    item["tree"] = tree
            yield item
        return

    with closing(argweaver.open_stream(filename)) as infile:
        spr = None
        tree = None

        for line in infile:
            line = line.rstrip()
            tokens = line.split("\t")

            if tokens[0] == "NAMES":
                yield {"tag": "NAMES", "names": tokens[1:]}

            elif tokens[0] == "REGION":
                yield {
                    "tag": "REGION",
                    "chrom": tokens[1],
                    "start": int(tokens[2]),
                    "end": int(tokens[3])
                }

            elif tokens[0] == "RANGE":
                raise Exception("deprecated RANGE line, use REGION instead")

            elif tokens[0] == "TREE":
                tree_text = tokens[3]
                if parse_trees:
                    if apply_spr and tree is not None and spr is not None:
                        smc_apply_spr(tree, spr)
                    else:
                        tree = parse_tree(tree_text)
                else:
                    tree = tree_text

                yield {
                    "tag": "TREE",
                    "start": int(tokens[1]),
                    "end": int(tokens[2]),
                    "tree": tree
                }

            elif tokens[0] == "SPR":
                spr = {
                    "tag": "SPR",
                    "pos": int(tokens[1]),
                    "recomb_node": int(tokens[2]),
                    "recomb_time": float(tokens[3]),
                    "coal_node": int(tokens[4]),
                    "coal_time": float(tokens[5])
                }
                yield spr
Beispiel #6
0
def iter_smc_file(filename, parse_trees=False, apply_spr=False,
                  region=None):
    """
    Iterates through a SMC file.

    parse_trees: If True, parses local trees.
    apply_spr: If True, avoids reading each tree by applying the SPR
        operation to the current tree.
    region: If given, returns only trees and SPRs within region=(start, end).

    Yields item, where item can be one of the following:
        {'tag': 'NAMES',
         'names': names_of_sequences}

        {'tag': 'REGION',
         'chrom': name_of_chromosome,
         'start': start_coordinate_of_region,
         'end': end_coordinate_of_region}

        {'tag': 'TREE',
         'start': start_coordinate_of_local_region,
         'end': end_coordinate_of_local_region,
         'tree': local_tree}

        {'tag': 'SPR',
         'pos': coordinate of recombination point,
         'recomb_node': name_of_recombination_node,
         'recomb_time': time_of_recombination,
         'coal_node': name_of_branch_with_recoalescence,
         'coal_time': time_of_recoalescence}
    """

    if region:
        tree = None
        spr = None

        for item in iter_subsmc(iter_smc_file(filename), region):

            if item["tag"] == "SPR":
                spr = item
            elif item["tag"] == "TREE":
                if parse_trees:
                    if apply_spr and tree is not None and spr is not None:
                        smc_apply_spr(tree, spr)
                    else:
                        tree = parse_tree(item["tree"])
                    item["tree"] = tree
            yield item
        return

    with closing(argweaver.open_stream(filename)) as infile:
        spr = None
        tree = None

        for line in infile:
            line = line.rstrip()
            tokens = line.split("\t")

            if tokens[0] == "NAMES":
                yield {"tag": "NAMES", "names": tokens[1:]}

            elif tokens[0] == "REGION":
                yield {"tag": "REGION",
                       "chrom": tokens[1],
                       "start": int(tokens[2]),
                       "end": int(tokens[3])}

            elif tokens[0] == "RANGE":
                raise Exception("deprecated RANGE line, use REGION instead")

            elif tokens[0] == "TREE":
                tree_text = tokens[3]
                if parse_trees:
                    if apply_spr and tree is not None and spr is not None:
                        smc_apply_spr(tree, spr)
                    else:
                        tree = parse_tree(tree_text)
                else:
                    tree = tree_text

                yield {"tag": "TREE",
                       "start": int(tokens[1]),
                       "end": int(tokens[2]),
                       "tree": tree}

            elif tokens[0] == "SPR":
                spr = {"tag": "SPR",
                       "pos": int(tokens[1]),
                       "recomb_node": int(tokens[2]),
                       "recomb_time": float(tokens[3]),
                       "coal_node": int(tokens[4]),
                       "coal_time": float(tokens[5])}
                yield spr