Esempio n. 1
0
def test_count(assembly, tmp_path):
    out = tmp_path / "kmers.tsv"
    size = 5
    force = False
    df = kmers.count(assembly=assembly, size=size, out=out, force=force)
    assert df.shape[1] == 4**size / 2
    assert df.index.name == "contig"
    assert out.exists()
Esempio n. 2
0
def test_count_out_exists(assembly, counts, force, tmp_path):
    out = tmp_path / "kmers.tsv"
    counts.to_csv(out, sep="\t", index=True, header=True)
    size = 5
    df = kmers.count(assembly=assembly, size=size, out=out, force=force)
    assert df.shape[1] == 4**size / 2
    assert df.index.name == "contig"
    assert out.exists()
Esempio n. 3
0
    def get_kmers(self, num_records: int = 5):
        if num_records < 5:
            raise ValueError(
                f"At least 5 records are required for embedding tests! provided: {num_records}"
            )
        logger.info("Preparing kmer counts test data...")
        # kmer size is 5 (b/c this is the default).
        counts = kmers.count(assembly=self.metagenome, size=5)
        # subset counts to `num_records`
        counts = counts.sample(n=num_records, random_state=42)
        # method is am_clr (b/c this is the default).
        am_clr_normalized_counts = kmers.normalize(df=counts, method="am_clr")

        for df in [counts, am_clr_normalized_counts]:
            df.reset_index(inplace=True)
        self.data["kmers"] = {
            "counts": counts.to_json(),
            "am_clr_normalized_counts": am_clr_normalized_counts.to_json(),
        }
Esempio n. 4
0
def test_count_wrong_size(assembly):
    size = 5.5
    with pytest.raises(TypeError):
        kmers.count(assembly=assembly, size=size)