Example #1
0
def test_get_genome(tmpdir):
    collection = GenomeCollection(data_dir=str(tmpdir))
    taxid = PHAGE_TAXID
    path = collection.datafile_path(taxid, data_type="genomic_fasta")
    assert not os.path.exists(path)
    genome_path = collection.get_taxid_genome_data_path(taxid)
    assert path == genome_path
    file_size = os.stat(genome_path).st_size
    assert 200_000 > file_size > 150_000
Example #2
0
def test_taxid_with_nonunique_genome(tmpdir):
    taxid = 10710  # LAMBDA PHAGE - has 2 associated assemblies on NCBI
    collection = GenomeCollection(data_dir=str(tmpdir))
    with pytest.raises(OSError) as excinfo:
        path = collection.get_taxid_infos(taxid)
    assert "You will need to download" in str(excinfo.value)
    collection.download_taxid_genome_infos_from_ncbi(taxid, assembly_id="#1")
    path = collection.datafile_path(taxid, data_type="genomic_fasta")
    assert not os.path.exists(path)
    collection.get_taxid_genome_data_path(taxid)
    assert os.path.exists(path)
def test_get_blast_database(tmpdir):
    collection = GenomeCollection(data_dir=str(tmpdir))
    taxid = PHAGE_TAXID
    path = collection.datafile_path(taxid, data_type="blast_nucl")
    assert not os.path.exists(path + ".nsq")

    # Test nucleotide database
    blast_db_path = collection.get_taxid_blastdb_path(taxid, db_type="nucl")
    assert path == blast_db_path
    file_size = os.stat(blast_db_path + ".nsq").st_size
    assert 50_000 > file_size > 30_000

    # Test protein database
    blast_db_path = collection.get_taxid_blastdb_path(taxid, db_type="prot")
    file_size = os.stat(blast_db_path + ".psq").st_size
    assert 60_000 > file_size > 40_000