def test_count(assembly, tmp_path): out = tmp_path / "kmers.tsv" size = 5 force = False df = kmers.count(assembly=assembly, size=size, out=out, force=force) assert df.shape[1] == 4**size / 2 assert df.index.name == "contig" assert out.exists()
def test_count_out_exists(assembly, counts, force, tmp_path): out = tmp_path / "kmers.tsv" counts.to_csv(out, sep="\t", index=True, header=True) size = 5 df = kmers.count(assembly=assembly, size=size, out=out, force=force) assert df.shape[1] == 4**size / 2 assert df.index.name == "contig" assert out.exists()
def get_kmers(self, num_records: int = 5): if num_records < 5: raise ValueError( f"At least 5 records are required for embedding tests! provided: {num_records}" ) logger.info("Preparing kmer counts test data...") # kmer size is 5 (b/c this is the default). counts = kmers.count(assembly=self.metagenome, size=5) # subset counts to `num_records` counts = counts.sample(n=num_records, random_state=42) # method is am_clr (b/c this is the default). am_clr_normalized_counts = kmers.normalize(df=counts, method="am_clr") for df in [counts, am_clr_normalized_counts]: df.reset_index(inplace=True) self.data["kmers"] = { "counts": counts.to_json(), "am_clr_normalized_counts": am_clr_normalized_counts.to_json(), }
def test_count_wrong_size(assembly): size = 5.5 with pytest.raises(TypeError): kmers.count(assembly=assembly, size=size)