def test_6_repeated_sequences(self):
        fasta_path = self.fasta_dir.joinpath("test_6_repeated_sequences.fasta")

        with self.assertRaises(Exception) as exp:
            _ = missings.FromFile(Path(fasta_path))

        expected_message = "Incorrect fasta provided: sequences IDs are not unique."
        actual_message = str(exp.exception)
        self.assertEqual(expected_message, actual_message)
    def test_4_empty_sequence(self):
        fasta_path = self.fasta_dir.joinpath("test_4_empty_sequence.fasta")

        with self.assertRaises(Exception) as exp:
            _ = missings.FromFile(Path(fasta_path))

        expected_message = "Empty sequence in FASTA. Provide the sequence or remove its header."
        actual_message = str(exp.exception)
        self.assertEqual(expected_message, actual_message)
    def test_5_empty_fasta(self):
        fasta_path = self.fasta_dir.joinpath("test_5_empty_fasta.fasta")

        with self.assertRaises(Exception) as exp:
            _ = missings.FromFile(Path(fasta_path))

        expected_message = "No sequences in zipped fastas or incorrect zipped files."
        actual_message = str(exp.exception)
        self.assertEqual(expected_message, actual_message)
    def test_6_empty_zip(self):
        fasta_path = self.fasta_dir.joinpath("test_6_empty_zip.zip")

        with self.assertRaises(Exception) as exp:
            _ = missings.FromFile(Path(fasta_path))

        expected_message = "Incorrect zip fasta source."
        actual_message = str(exp.exception)
        self.assertEqual(expected_message, actual_message)
    def test_1_one_sequence(self):
        fasta_path = self.fasta_dir.joinpath("test_1_one_sequence.fasta")
        fasta_provider = missings.FromFile(Path(fasta_path))

        sequence_id = msa.SequenceID("seq1")
        expected_sequence = self.read_sequence(fasta_path)

        self.raise_error_if_unequal(sequence_id,
                                    expected_sequence,
                                    fasta_provider)
    def test_1_one_sequence_one_file_in_zip(self):
        fasta_path = self.fasta_dir.joinpath(
            "test_1_one_sequence_one_file_in_zip.zip")

        fasta_provider = missings.FromFile(Path(fasta_path))

        sequence_id = msa.SequenceID("seq1")
        expected_sequence = "ACTGGGTGGGA"

        self.raise_error_if_unequal(sequence_id, expected_sequence,
                                    fasta_provider)
Beispiel #7
0
def run_pangtree(maf_path: Path, fasta_path: Path, output_dir: Path,
                 po_output: bool) -> None:
    output_dir = pathtools.get_child_dir(output_dir,
                                         pathtools.get_current_time())
    print(f"Runing pangtree for maf: {maf_path} and fasta: {fasta_path} "
          f"Output in: {output_dir}, include po file: {po_output}.")

    fasta_provider = missings.FromFile(fasta_path)
    maf = msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path)
    poagraph, dagmaf = builder.build_from_dagmaf(maf, fasta_provider)
    for p in p_values:
        current_output_dir = pathtools.get_child_dir(output_dir,
                                                     str(p).replace(".", "_"))
        stop = at_params.Stop(0.99)
        at = at_builders.build_affinity_tree(poagraph, None,
                                             current_output_dir, stop,
                                             at_params.P(p), True)

        at_newick = at.as_newick(None, separate_leaves=True)

        pathtools.save_to_file(
            at_newick,
            pathtools.get_child_path(current_output_dir,
                                     "affinity_tree.newick"))

        if po_output:
            pangenome_po = po.poagraph_to_PangenomePO(poagraph)
            pathtools.save_to_file(
                pangenome_po,
                pathtools.get_child_path(current_output_dir, "poagraph.po"))

        task_params = json.TaskParameters(
            multialignment_file_path=str(maf_path),
            multialignment_format="maf",
            datatype="nucleotides",
            blosum_file_path="",
            output_path=current_output_dir,
            fasta_provider=fasta_provider,
            fasta_source_file=fasta_path,
            consensus_type="tree",
            stop=str(stop),
            p=str(p),
            output_with_nodes=False)
        pangenomejson = json.to_PangenomeJSON(task_parameters=task_params,
                                              poagraph=poagraph,
                                              dagmaf=dagmaf,
                                              affinity_tree=at)

        pangenome_json_str = json.to_json(pangenomejson)
        pathtools.save_to_file(
            pangenome_json_str,
            pathtools.get_child_path(current_output_dir, "pangenome.json"))
    def test_2_three_sequences_in_two_files_in_zip(self):
        fasta_path = self.fasta_dir.joinpath(
            "test_2_three_sequences_in_two_files_in_zip.zip")

        fasta_provider = missings.FromFile(Path(fasta_path))

        sequence_id_1 = msa.SequenceID("seq1")
        self.raise_error_if_unequal(sequence_id_1, "ACTGGGTGGGA",
                                    fasta_provider)

        sequence_id_2 = msa.SequenceID("seq2")
        self.raise_error_if_unequal(sequence_id_2, "AA", fasta_provider)

        sequence_id_3 = msa.SequenceID("seq3")
        self.raise_error_if_unequal(sequence_id_3, "GT", fasta_provider)
Beispiel #9
0
def resolve_fasta_provider(args: argparse.Namespace) -> \
        missings.FastaProvider:
    """Returns fasta provider based on parsed arguments."""

    if args.fasta_provider is None:
        if args.missing_symbol is None:
            return missings.ConstBaseProvider(missings.MissingBase())
        else:
            return missings.ConstBaseProvider(args.missing_symbol)
    elif args.fasta_provider == 'ncbi':
        use_cache = args.cache if args.cache else False
        return missings.FromNCBI(use_cache)
    elif args.fasta_provider == 'file':
        if args.fasta_path is None:
            raise Exception("""Fasta file source must be specified.
                               It must be provided when fasta source
                               is \'local\'.""")
        return missings.FromFile(args.fasta_path)
    else:
        raise Exception("""Not known fasta provider.
                           Should be \'ncbi\' or \'file\' or None.
                           Cannot build graph.""")