def test_get_genome(tmpdir): collection = GenomeCollection(data_dir=str(tmpdir)) taxid = PHAGE_TAXID path = collection.datafile_path(taxid, data_type="genomic_fasta") assert not os.path.exists(path) genome_path = collection.get_taxid_genome_data_path(taxid) assert path == genome_path file_size = os.stat(genome_path).st_size assert 200_000 > file_size > 150_000
def test_taxid_with_nonunique_genome(tmpdir): taxid = 10710 # LAMBDA PHAGE - has 2 associated assemblies on NCBI collection = GenomeCollection(data_dir=str(tmpdir)) with pytest.raises(OSError) as excinfo: path = collection.get_taxid_infos(taxid) assert "You will need to download" in str(excinfo.value) collection.download_taxid_genome_infos_from_ncbi(taxid, assembly_id="#1") path = collection.datafile_path(taxid, data_type="genomic_fasta") assert not os.path.exists(path) collection.get_taxid_genome_data_path(taxid) assert os.path.exists(path)
def test_get_blast_database(tmpdir): collection = GenomeCollection(data_dir=str(tmpdir)) taxid = PHAGE_TAXID path = collection.datafile_path(taxid, data_type="blast_nucl") assert not os.path.exists(path + ".nsq") # Test nucleotide database blast_db_path = collection.get_taxid_blastdb_path(taxid, db_type="nucl") assert path == blast_db_path file_size = os.stat(blast_db_path + ".nsq").st_size assert 50_000 > file_size > 30_000 # Test protein database blast_db_path = collection.get_taxid_blastdb_path(taxid, db_type="prot") file_size = os.stat(blast_db_path + ".psq").st_size assert 60_000 > file_size > 40_000