def get_ebola_consensus_tree(p: float, stop: float, output_dir_name: str) -> Tuple[Poagraph, AffinityTree]:
    current_path = Path(os.path.abspath(__file__)).resolve()
    output_dir_path = pathtools.get_child_dir(current_path.parent, output_dir_name)
    consensus_output_dir = pathtools.get_child_dir(output_dir_path, "consensus")
    multialignment_path = current_path.parent.joinpath("../data/Ebola/genome_whole/input/multialignment.maf")
    metadata_path = current_path.parent.joinpath("../data/Ebola/genome_whole/input/metadata.csv")
    blosum_path = current_path.parent.joinpath("../bin/blosum80.mat")


    fasta_provider = fp_ncbi.FromNCBI(use_cache=True)

    multialignment_content = pathtools.get_file_content_stringio(multialignment_path)
    multialignment = Maf(file_content=multialignment_content, filename=multialignment_path)

    metadata_content = pathtools.get_file_content_stringio(metadata_path)
    metadata = MetadataCSV(filecontent=metadata_content, filename=metadata_path)

    poagraph, dagmaf = Poagraph.build_from_dagmaf(multialignment, fasta_provider, metadata)

    blosum_content = pathtools.get_file_content_stringio(path=blosum_path)
    blosum = Blosum(blosum_content, blosum_path)

    return poagraph, atree_builders.get_affinity_tree(poagraph,
                                                      blosum,
                                                      consensus_output_dir,
                                                      Stop(stop),
                                                      P(p),
                                                      False)
def get_ebola_affinity_tree(
        p: float, stop: float,
        output_dir_name: str) -> Tuple[Poagraph, AffinityTree]:
    current_path = Path(os.path.abspath(__file__)).resolve()
    output_dir_path = pathtools.get_child_dir(current_path.parent,
                                              output_dir_name)
    consensus_output_dir = pathtools.get_child_dir(output_dir_path,
                                                   "consensus")
    multialignment_path = current_path.parent.joinpath(
        "../data/Ebola/multialignment.maf")
    metadata_path = current_path.parent.joinpath("../data/Ebola/metadata.csv")
    blosum_path = current_path.parent.joinpath("../bin/blosum80.mat")

    tp = TaskParameters(running_time="",
                        multialignment_file_path=multialignment_path,
                        multialignment_format="MAF",
                        datatype="N",
                        metadata_file_path=metadata_path,
                        blosum_file_path=blosum_path,
                        output_path=output_dir_path,
                        output_po=False,
                        output_fasta=False,
                        output_with_nodes=False,
                        verbose=False,
                        raw_maf=False,
                        fasta_provider='FromNCBI',
                        cache=True,
                        missing_base_symbol="",
                        fasta_source_file=None,
                        consensus_type="",
                        hbmin=0.8,
                        stop=stop,
                        p=p)

    fasta_provider = fp_ncbi.FromNCBI(use_cache=True)

    multialignment_content = pathtools.get_file_content_stringio(
        multialignment_path)
    multialignment = Maf(file_content=multialignment_content,
                         filename=multialignment_path)

    metadata_content = pathtools.get_file_content_stringio(metadata_path)
    metadata = MetadataCSV(filecontent=metadata_content,
                           filename=metadata_path)

    poagraph, dagmaf = Poagraph.build_from_dagmaf(multialignment,
                                                  fasta_provider, metadata)

    blosum_content = pathtools.get_file_content_stringio(path=blosum_path)
    blosum = Blosum(blosum_content, blosum_path)

    return poagraph, atree_builders.get_affinity_tree(poagraph, blosum,
                                                      consensus_output_dir,
                                                      Stop(stop), P(p), False)
Example #3
0
def get_ebola_consensus_tree(p: float, stop: float, output_dir_name: str) -> Tuple[Poagraph, ConsensusTree]:
    current_path = Path(os.path.abspath(__file__)).resolve()
    output_dir_path = pathtools.get_child_dir(current_path.parent, output_dir_name)
    consensus_output_dir = pathtools.get_child_dir(output_dir_path, "consensus")
    multialignment_path = current_path.parent.joinpath("../data/Ebola/genome_whole/input/multialignment.maf")
    metadata_path = current_path.parent.joinpath("../data/Ebola/genome_whole/input/metadata.csv")
    blosum_path = current_path.parent.joinpath("../bin/blosum80.mat")

    tp = TaskParameters(running_time="",
                        multialignment_file_path=multialignment_path,
                        multialignment_format="MAF",
                        datatype="N",
                        metadata_file_path=metadata_path,
                        blosum_file_path=blosum_path,
                        output_path=output_dir_path,
                        output_po=False,
                        output_fasta=False,
                        output_with_nodes=False,
                        verbose=False,
                        raw_maf=False,
                        fasta_provider='FromNCBI',
                        cache=True,
                        missing_base_symbol="",
                        fasta_source_file=None,
                        consensus_type="",
                        hbmin=0.8,
                        max_cutoff_option="MAX2",
                        search_range=None,
                        node_cutoff_option="NODE3",
                        multiplier=None,
                        stop=stop,
                        p=p)

    fasta_provider = fp_ncbi.FromNCBI(use_cache=True)

    multialignment_content = pathtools.get_file_content_stringio(multialignment_path)
    multialignment = inp.Maf(file_content=multialignment_content, filename=multialignment_path)

    metadata_content = pathtools.get_file_content_stringio(metadata_path)
    metadata = inp.MetadataCSV(filecontent=metadata_content, filename=metadata_path)

    poagraph, dagmaf = Poagraph.build_from_dagmaf(multialignment, fasta_provider, metadata)

    blosum_content = pathtools.get_file_content_stringio(path=blosum_path)
    blosum = cinp.Blosum(blosum_content, blosum_path)

    return poagraph, tree_generator.get_consensus_tree(poagraph,
                                             blosum,
                                             consensus_output_dir,
                                             cinp.Stop(stop),
                                             cinp.P(p),
                                             MAX2(),
                                             NODE3(),
                                             False)
Example #4
0
def run_pangtree(maf_path: Path, fasta_path: Path, output_dir: Path,
                 po_output: bool) -> None:
    output_dir = pathtools.get_child_dir(output_dir,
                                         pathtools.get_current_time())
    print(f"Runing pangtree for maf: {maf_path} and fasta: {fasta_path} "
          f"Output in: {output_dir}, include po file: {po_output}.")

    fasta_provider = missings.FromFile(fasta_path)
    maf = msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path)
    poagraph, dagmaf = builder.build_from_dagmaf(maf, fasta_provider)
    for p in p_values:
        current_output_dir = pathtools.get_child_dir(output_dir,
                                                     str(p).replace(".", "_"))
        stop = at_params.Stop(0.99)
        at = at_builders.build_affinity_tree(poagraph, None,
                                             current_output_dir, stop,
                                             at_params.P(p), True)

        at_newick = at.as_newick(None, separate_leaves=True)

        pathtools.save_to_file(
            at_newick,
            pathtools.get_child_path(current_output_dir,
                                     "affinity_tree.newick"))

        if po_output:
            pangenome_po = po.poagraph_to_PangenomePO(poagraph)
            pathtools.save_to_file(
                pangenome_po,
                pathtools.get_child_path(current_output_dir, "poagraph.po"))

        task_params = json.TaskParameters(
            multialignment_file_path=str(maf_path),
            multialignment_format="maf",
            datatype="nucleotides",
            blosum_file_path="",
            output_path=current_output_dir,
            fasta_provider=fasta_provider,
            fasta_source_file=fasta_path,
            consensus_type="tree",
            stop=str(stop),
            p=str(p),
            output_with_nodes=False)
        pangenomejson = json.to_PangenomeJSON(task_parameters=task_params,
                                              poagraph=poagraph,
                                              dagmaf=dagmaf,
                                              affinity_tree=at)

        pangenome_json_str = json.to_json(pangenomejson)
        pathtools.save_to_file(
            pangenome_json_str,
            pathtools.get_child_path(current_output_dir, "pangenome.json"))
def local_compatibilities_analysis_consensus_coordinates(
        poagraph: Poagraph, consensus_tree: ConsensusTree,
        groups: List[List[int]]) -> None:
    def produce_chart(x, ys, labels, chart_path):
        fig, ax = plt.subplots()
        for i, y in enumerate(ys):
            ax.plot(x, y, label=labels[i])
        for r in [[469, 2689], [3128, 4151], [4478, 5459], [6038, 8069],
                  [6038, 7133], [6038, 6933], [8508, 9375], [10344, 11100],
                  [11580, 18219]]:
            ax.plot([r[0], r[1]], [1, 1], color="green")
        ax.set(xlabel='POA graph columns IDs',
               ylabel='Local compatibility to other consensus',
               title=f"Base consensus: {labels[-1]}")
        ax.legend(loc=4)
        ax.grid()

        fig.savefig(chart_path)

    class Chart:
        def __init__(self, x, consensus, ys, labels):
            self.x = x
            self.consensus = consensus
            self.ys = ys
            self.labels = labels

    def produce_joint_chart(chart_datas: List[Chart], chart_path):
        fig, axs = plt.subplots(len(chart_datas) + 2, 1)
        if len(chart_datas) == 5:
            fig.set_size_inches(14.5, 8)
        elif len(chart_datas) == 3:
            fig.set_size_inches(18.5, 6.5)
        line_objects = []
        line_labels = []
        for i, cd in enumerate(chart_datas):
            for j, y in enumerate(cd.ys):
                # if j == 0 and i == 0:
                axs[len(chart_datas)].plot(cd.x, y, color='white')
                c_label = ebola_consensus_labels[cd.labels[j]][0]
                lo = axs[i].plot(cd.x,
                                 y,
                                 label=c_label,
                                 color=ebola_consensus_labels[cd.labels[j]][1])
                line_objects.append(lo)
                if c_label not in line_labels:

                    line_labels.append(c_label)
            axs[i].set_xlabel(
                f'{ebola_consensus_labels[str(cd.consensus)][0]}')
            axs[i].set_ylim(0, 1)

        fig.legend(
            line_objects,  # The line objects
            labels=line_labels,  # The labels for each line
            loc="lower center",  # Position of legend
            borderaxespad=0.1,  # Small spacing around legend box
            title="Legend Title",  # Title for the legend
            # bbox_to_anchor=(1.1, 1.05)
            ncol=2)

        for r in [(469, 2689, 1, "NP"), (3128, 4151, 1, "VP35"),
                  (4478, 5459, 1, "VP40"), (6038, 8069, 1, "GP"),
                  (6038, 7133, 2, "ssGP"), (6038, 6933, 3, "sGP"),
                  (8508, 9375, 1, "VP30"), (10344, 11100, 1, "VP24"),
                  (11580, 18219, 1, "L")]:
            # for r in [(2, 5, 1, "jeden"), (6,10, 1, "dwa")]:
            axs[len(chart_datas)].plot([r[0], r[1]], [r[2], r[2]],
                                       color="green")

            axs[len(chart_datas)].annotate(r[3], (r[0], r[2] + 0.1))
            # axs[len(chart_datas)].set_xlim(0, 19000)
            # axs[len(chart_datas)].set_xlim(0, 19000)

        for k in [len(chart_datas), len(chart_datas) + 1]:
            axs[k].tick_params(
                axis='x',  # changes apply to the x-axis
                which='both',  # both major and minor ticks are affected
                bottom=False,  # ticks along the bottom edge are off
                top=False,  # ticks along the top edge are off
                labelbottom=False)  # labels along the bottom edge are off
            axs[k].tick_params(
                axis='y',  # changes apply to the x-axis
                which='both',  # both major and minor ticks are affected
                left=False,  # ticks along the bottom edge are off
                right=False,  # ticks along the top edge are off
                labelleft=False)  # labels along the bottom edge are off
            axs[k].spines['top'].set_visible(False)
            axs[k].spines['right'].set_visible(False)
            axs[k].spines['bottom'].set_visible(False)
            axs[k].spines['left'].set_visible(False)

        fig.text(0.005,
                 0.6,
                 'Compatibility',
                 ha='center',
                 va='center',
                 rotation='vertical')

        fig.tight_layout()
        fig.savefig(chart_path, dpi=100)

    def produce_local_compatibility_chart(consensuses_group: List[int]):
        frame_size = 200
        # frame_size = 5
        frame_step = 200
        # frame_step = 5
        joint_chart_name = "_".join([str(c) for c in consensuses_group])
        joint_chart_path = output_dir_path.joinpath(f"{joint_chart_name}.png")
        chart_datas = []
        for consensus in consensuses_group:
            consensus_path = consensus_tree.nodes[consensus].consensus_path
            consensus_length = len(consensus_path)
            single_chart_path = output_dir_path.joinpath(f"{consensus}.png")
            ys = []
            labels = []
            for consensus_to_compare in set(consensuses_group) - {consensus}:
                y = []
                consensus_to_compare_path = consensus_tree.nodes[
                    consensus_to_compare].consensus_path
                frame_start = 0
                frame_end = frame_start + frame_size
                x = []
                while frame_start <= consensus_length:
                    frame_nodes_indexes = range(frame_start, frame_end)
                    frame_nodes = set([
                        consensus_path[node_index]
                        for node_index in frame_nodes_indexes
                    ])
                    comp = len(
                        frame_nodes.intersection(
                            consensus_to_compare_path)) / len(frame_nodes)
                    y.append(comp)
                    x.append(frame_start)
                    frame_start += frame_step
                    frame_end = min(frame_end + frame_step, consensus_length)

                ys.append(y)
                labels.append(str(consensus_to_compare))
            # labels.append(str(consensus))
            chart_datas.append(Chart(x, consensus, ys, labels))
        produce_joint_chart(chart_datas, joint_chart_path)

    current_path = Path(os.path.abspath(__file__)).resolve()
    output_dir_path = pathtools.get_child_dir(current_path.parent,
                                              "charts_ebola_200_200")

    for g in groups:
        produce_local_compatibility_chart(g)
def local_compatibilities_analysis_poagraph_coordinates(
        poagraph: Poagraph, consensus_tree: ConsensusTree,
        groups: List[List[int]]) -> None:
    def produce_chart(x, ys, labels, chart_path):
        fig, ax = plt.subplots()
        for i, y in enumerate(ys):
            ax.plot(x, y, label=labels[i])

        ax.set(xlabel='POA graph columns IDs',
               ylabel='Local compatibility to other consensus',
               title=f"Base consensus: {labels[-1]}")
        ax.legend(loc=1)
        ax.grid()

        fig.savefig(chart_path)

    def produce_local_compatibility_chart(consensuses_group: List[int],
                                          column_to_nodes: Dict[ColumnID,
                                                                NodeID]):
        columns_count = max(column_to_nodes.keys())
        frame_size = 1000
        frame_step = 500
        frame_start = 0

        x = list(range(frame_start, columns_count, frame_step))

        for consensus in consensuses_group:
            chart_path = output_dir_path.joinpath(f"{consensus}.png")
            ys = []
            labels = []
            for consensus_to_compare in set(consensuses_group) - {consensus}:
                y = []

                frame_start = 0
                frame_end = frame_start + frame_size

                while frame_start <= columns_count:
                    frame_columns_ids = range(frame_start, frame_end)
                    frame_nodes = set([
                        node_id for col_id in frame_columns_ids
                        for node_id in column_to_nodes[col_id]
                    ])
                    c_nodes_in_frame = frame_nodes.intersection(
                        set(consensus_tree.nodes[consensus].consensus_path))
                    consensus_to_compare_nodes_in_frame = frame_nodes.intersection(
                        set(consensus_tree.nodes[consensus_to_compare].
                            consensus_path))
                    if len(c_nodes_in_frame) != 0:
                        comp = len(
                            c_nodes_in_frame.intersection(
                                consensus_to_compare_nodes_in_frame)) / len(
                                    c_nodes_in_frame)
                    else:
                        if len(consensus_to_compare_nodes_in_frame) == 0:
                            comp = 1
                        else:
                            comp = 0
                    y.append(comp)
                    frame_start += frame_step
                    frame_end = min(frame_end + frame_step, columns_count)

                ys.append(y)
                labels.append(str(consensus_to_compare))
            labels.append(consensus)
            produce_chart(x, ys, labels, chart_path)

    column_to_nodes = {node.column_id: [] for node in poagraph.nodes}
    for node in poagraph.nodes:
        column_to_nodes[node.column_id].append(node.node_id)

    current_path = Path(os.path.abspath(__file__)).resolve()
    output_dir_path = pathtools.get_child_dir(current_path.parent, "charts")

    for g in groups:
        produce_local_compatibility_chart(g, column_to_nodes)
def local_compatibilities_analysis_consensus_coordinates(
        poagraph: Poagraph, affinity_tree: AffinityTree,
        groups: List[List[int]]) -> None:
    def produce_chart(x, ys, labels, chart_path):
        fig, ax = plt.subplots()
        for i, y in enumerate(ys):
            ax.plot(x, y, label=labels[i])
        for r in [[469, 2689], [3128, 4151], [4478, 5459], [6038, 8069],
                  [6038, 7133], [6038, 6933], [8508, 9375], [10344, 11100],
                  [11580, 18219]]:
            ax.plot([r[0], r[1]], [1, 1], color="green")
        ax.set(xlabel='POA graph columns IDs',
               ylabel='Local compatibility',
               title=f"Base consensus: {labels[-1]}")
        ax.legend(loc=4)
        ax.grid()

        fig.savefig(chart_path)

    class Chart:
        def __init__(self, x, consensus, ys, labels):
            self.x = x
            self.consensus = consensus
            self.ys = ys
            self.labels = labels

    def produce_joint_chart(chart_datas: List[Chart], chart_path):
        matplotlib.rc('text', usetex=True)
        fig, axs = plt.subplots(len(chart_datas) + 1, 1)
        if len(chart_datas) == 5:
            fig.set_size_inches(14.5, 8)
        elif len(chart_datas) == 3:
            fig.set_size_inches(18.5, 6.5)

        genes = [(469, 2689, "NP"), (3128, 4151, "VP35"), (4478, 5459, "VP40"),
                 (6038, 8069, "GP"), (8508, 9375, "VP30"),
                 (10344, 11100, "VP24"), (11580, 18219, "L")]

        line_objects = []
        line_labels = []
        for i, cd in enumerate(chart_datas):
            for j, y in enumerate(cd.ys):
                # if j == 0 and i == 0:
                axs[len(chart_datas)].plot(cd.x, [0 for _ in y], color='white')
                c_label = ebola_consensus_labels[cd.labels[j]][0]
                color = ebola_consensus_labels[cd.labels[j]][1]
                c_label = r"\textit{" + c_label + "}"
                if c_label in line_labels:
                    c_label = '_' + c_label
                line_labels.append(c_label)

                lo = axs[i].plot(cd.x, y, label=c_label, color=color)
                line_objects.append(lo)

            # plot coding areas
            for g in genes:
                axs[i].add_patch(
                    Rectangle((g[0], 0),
                              g[1] - g[0],
                              1,
                              color=(0.1, 0.2, 0.5, 0.3)))
            x_label = r"\textit{" + f'{ebola_consensus_labels[str(cd.consensus)][0]}' + "}"
            axs[i].set_xlabel(x_label)
            axs[i].set_ylim(0, 1)

        fig.legend(
            loc="lower right",  # Position of legend
            borderaxespad=1,  # Small spacing around legend box
            title="Consensus sequence",  # Title for the legend
            ncol=2,
        )

        for r in [
            (469, 2689, "NP"),
            (3128, 4151, "VP35"),
            (4478, 5459, "VP40"),
            (6038, 8069, "GP"),
                # (6038,7133, 2, "ssGP"),
                # (6038,6933, 3, "sGP"),
            (8508, 9375, "VP30"),
            (10344, 11100, "VP24"),
            (11580, 18219, "L")
        ]:

            axs[len(chart_datas)].plot([r[0], r[1]], [1, 1], color="white")
            axs[len(chart_datas)].annotate(r[2], (r[0], 1))

        for k in [len(chart_datas), len(chart_datas)]:
            axs[k].tick_params(
                axis='x',  # changes apply to the x-axis
                which='both',  # both major and minor ticks are affected
                bottom=False,  # ticks along the bottom edge are off
                top=False,  # ticks along the top edge are off
                labelbottom=False)  # labels along the bottom edge are off
            axs[k].tick_params(
                axis='y',  # changes apply to the x-axis
                which='both',  # both major and minor ticks are affected
                left=False,  # ticks along the bottom edge are off
                right=False,  # ticks along the top edge are off
                labelleft=False)  # labels along the bottom edge are off
            axs[k].spines['top'].set_visible(False)
            axs[k].spines['right'].set_visible(False)
            axs[k].spines['bottom'].set_visible(False)
            axs[k].spines['left'].set_visible(False)

        fig.text(0.005,
                 0.6,
                 'Local compatibility',
                 ha='center',
                 va='center',
                 rotation='vertical')

        fig.tight_layout()
        fig.savefig(chart_path, dpi=100)

    def produce_local_compatibility_chart(consensuses_group: List[int]):
        frame_size = 400
        frame_step = 200
        joint_chart_name = "_".join([str(c) for c in consensuses_group])
        joint_chart_path = output_dir_path.joinpath(f"{joint_chart_name}.png")
        chart_datas = []
        for consensus in consensuses_group:
            consensus_path = consensus_tree.nodes[consensus].consensus
            consensus_length = len(consensus_path)
            ys = []
            labels = []
            for consensus_to_compare in set(consensuses_group) - {consensus}:
                y = []
                consensus_to_compare_path = consensus_tree.nodes[
                    consensus_to_compare].consensus
                frame_start = 0
                frame_end = frame_start + frame_size
                x = []
                while frame_start <= consensus_length:
                    frame_nodes_indexes = range(frame_start, frame_end)
                    frame_nodes = set([
                        consensus_path[node_index]
                        for node_index in frame_nodes_indexes
                    ])
                    comp = len(
                        frame_nodes.intersection(
                            consensus_to_compare_path)) / len(frame_nodes)
                    y.append(comp)
                    x.append(frame_start + frame_step / 2)
                    frame_start += frame_step
                    frame_end = min(frame_end + frame_step, consensus_length)

                ys.append(y)
                labels.append(str(consensus_to_compare))
            chart_datas.append(Chart(x, consensus, ys, labels))
        produce_joint_chart(chart_datas, joint_chart_path)

    current_path = Path(os.path.abspath(__file__)).resolve()
    output_dir_path = pathtools.get_child_dir(
        current_path.parent, "charts_ebola_400_200_middle_prostokaty")

    for g in groups:
        produce_local_compatibility_chart(g)
Example #8
0
def main():
    parser = cli.get_parser()
    args = parser.parse_args()
    start = datetime.datetime.now()
    if not args.quiet and args.verbose:
        logprocess.add_file_handler_to_logger(args.output_dir,
                                              "details",
                                              "details.log",
                                              propagate=False)
        logprocess.add_file_handler_to_logger(args.output_dir,
                                              "",
                                              "details.log",
                                              propagate=False)
    if args.quiet:
        logprocess.disable_all_loggers()

    poagraph, dagmaf, fasta_provider = None, None, None
    if isinstance(args.multialignment, Maf) and args.raw_maf:
        poagraph = Poagraph.build_from_maf(args.multialignment, args.metadata)
    elif isinstance(args.multialignment, Maf) and not args.raw_maf:
        fasta_provider = cli.resolve_fasta_provider(args)
        poagraph, dagmaf = Poagraph.build_from_dagmaf(args.multialignment,
                                                      fasta_provider,
                                                      args.metadata)
    elif isinstance(args.multialignment, Po):
        poagraph = Poagraph.build_from_po(args.multialignment, args.metadata)

    consensus_tree = None
    if args.consensus is not None:
        blosum = args.blosum if args.blosum else cli.get_default_blosum()
        if fasta_provider is not None and isinstance(fasta_provider,
                                                     ConstSymbolProvider):
            blosum.check_if_symbol_is_present(
                fasta_provider.missing_symbol.as_str())

        consensus_output_dir = pathtools.get_child_dir(args.output_dir,
                                                       "consensus")

        if args.consensus == 'poa':
            consensus_tree = simple_tree_generator.get_simple_consensus_tree(
                poagraph, blosum, consensus_output_dir, args.hbmin,
                args.verbose)
        elif args.consensus == 'tree':
            max_strategy = cli.resolve_max_strategy(args)
            node_strategy = cli.resolve_node_strategy(args)
            consensus_tree = tree_generator.get_consensus_tree(
                poagraph, blosum, consensus_output_dir, args.stop, args.p,
                max_strategy, node_strategy, args.verbose)
        try:
            seq_id_to_name = {
                seq_id: seq.seqmetadata["name"]
                for seq_id, seq in poagraph.sequences.items()
            }
        except:
            seq_id_to_name = None

        newick_consensus_tree = consensus_tree.as_newick(seq_id_to_name)

        pathtools.save_to_file(
            newick_consensus_tree,
            pathtools.get_child_path(args.output_dir, "consensus_tree.newick"))

    if args.output_po:
        pangenome_po = poagraph_to_PangenomePO(poagraph)
        pathtools.save_to_file(
            pangenome_po,
            pathtools.get_child_path(args.output_dir, "poagraph.po"))

    if args.output_fasta:
        sequences_fasta = poagraph_to_fasta(poagraph)
        pathtools.save_to_file(
            sequences_fasta,
            pathtools.get_child_path(args.output_dir, "sequences.fasta"))
        if consensus_tree:
            consensuses_fasta = consensuses_tree_to_fasta(
                poagraph, consensus_tree)
            pathtools.save_to_file(
                consensuses_fasta,
                pathtools.get_child_path(args.output_dir, "consensuses.fasta"))

    end = datetime.datetime.now()
    pangenomejson = to_PangenomeJSON(task_parameters=cli.get_task_parameters(
        args, running_time=f"{end-start}s"),
                                     poagraph=poagraph,
                                     dagmaf=dagmaf,
                                     consensuses_tree=consensus_tree)

    pangenome_json_str = to_json(pangenomejson)
    pathtools.save_to_file(
        pangenome_json_str,
        pathtools.get_child_path(args.output_dir, "pangenome.json"))
Example #9
0
def main():
    parser = cli.get_parser()
    args = parser.parse_args()
    start = datetime.datetime.now()
    if not args.quiet and args.verbose:
        logprocess.add_file_handler_to_logger(args.output_dir,
                                              "details",
                                              "details.log",
                                              propagate=False)
        logprocess.add_file_handler_to_logger(args.output_dir,
                                              "",
                                              "details.log",
                                              propagate=False)
    if args.quiet:
        logprocess.disable_all_loggers()

    poagraph, dagmaf, fasta_provider = None, None, None
    if isinstance(args.multialignment, msa.Maf) and args.raw_maf:
        poagraph = builder.build_from_maf(args.multialignment, args.metadata)
    elif isinstance(args.multialignment, msa.Maf) and not args.raw_maf:
        fasta_provider = cli.resolve_fasta_provider(args)
        poagraph, dagmaf = builder.build_from_dagmaf(args.multialignment,
                                                     fasta_provider,
                                                     args.metadata)
    elif isinstance(args.multialignment, msa.Po):
        poagraph = builder.build_from_po(args.multialignment, args.metadata)

    affinity_tree = None
    if args.affinity is not None:
        blosum = args.blosum if args.blosum else cli.get_default_blosum()
        if fasta_provider is not None and isinstance(
                fasta_provider, missings.ConstBaseProvider):
            blosum.check_if_symbol_is_present(
                fasta_provider.missing_base.as_str())

        consensus_output_dir = pathtools.get_child_dir(args.output_dir,
                                                       "affinitytree")

        if args.affinity == 'poa':
            affinity_tree = at_builders.build_poa_affinity_tree(
                poagraph, blosum, consensus_output_dir, args.hbmin,
                args.verbose)
        elif args.affinity == 'tree':
            affinity_tree = at_builders.build_affinity_tree(
                poagraph, blosum, consensus_output_dir, args.stop, args.p,
                args.verbose)
        if args.metadata is not None:
            seq_id_to_metadata = {
                seq_id: seq.seqmetadata
                for seq_id, seq in poagraph.sequences.items()
            }
        else:
            seq_id_to_metadata = None

        affinity_tree_newick = affinity_tree.as_newick(seq_id_to_metadata,
                                                       separate_leaves=True)

        pathtools.save_to_file(
            affinity_tree_newick,
            pathtools.get_child_path(consensus_output_dir,
                                     "affinity_tree.newick"))

    if args.output_po:
        pangenome_po = po.poagraph_to_PangenomePO(poagraph)
        pathtools.save_to_file(
            pangenome_po,
            pathtools.get_child_path(args.output_dir, "poagraph.po"))

    if args.output_fasta:
        sequences_fasta = fasta.poagraph_to_fasta(poagraph)
        pathtools.save_to_file(
            sequences_fasta,
            pathtools.get_child_path(args.output_dir, "_sequences.fasta"))
        if affinity_tree:
            consensuses_fasta = fasta.affinity_tree_to_fasta(
                poagraph, affinity_tree)
            pathtools.save_to_file(
                consensuses_fasta,
                pathtools.get_child_path(args.output_dir,
                                         "affinitytree.fasta"))

    end = datetime.datetime.now()
    pangenomejson = json.to_PangenomeJSON(
        task_parameters=cli.get_task_parameters(args,
                                                running_time=f"{end-start}s"),
        poagraph=poagraph,
        dagmaf=dagmaf,
        affinity_tree=affinity_tree)

    pangenome_json_str = json.to_json(pangenomejson)
    pathtools.save_to_file(
        pangenome_json_str,
        pathtools.get_child_path(args.output_dir, "pangenome.json"))