def test_get_blast_database(tmpdir): collection = GenomeCollection(data_dir=str(tmpdir)) taxid = PHAGE_TAXID path = collection.datafile_path(taxid, data_type="blast_nucl") assert not os.path.exists(path + ".nsq") # Test nucleotide database blast_db_path = collection.get_taxid_blastdb_path(taxid, db_type="nucl") assert path == blast_db_path file_size = os.stat(blast_db_path + ".nsq").st_size assert 50_000 > file_size > 30_000 # Test protein database blast_db_path = collection.get_taxid_blastdb_path(taxid, db_type="prot") file_size = os.stat(blast_db_path + ".psq").st_size assert 60_000 > file_size > 40_000
def test_avoid_phage_blast_matches(): PHAGE_TAXID = "697289" collection = GenomeCollection() blastdb = collection.get_taxid_blastdb_path(PHAGE_TAXID, db_type="nucl") problem = DnaOptimizationProblem(sequence=random_dna_sequence(30, seed=123), constraints=[ AvoidBlastMatches(blast_db=blastdb, min_align_length=10, word_size=7) ], logger=None) assert not problem.all_constraints_pass() problem.resolve_constraints() assert problem.all_constraints_pass()
In this example we create a 1000bp random sequence, then edit out every match with E. coli that is 14bp or longer. """ import os from genome_collector import GenomeCollection from dnachisel import ( DnaOptimizationProblem, random_dna_sequence, AvoidBlastMatches, ) # THIS CREATES THE ECOLI BLAST DATABASE ON YOUR MACHINE IF NOT ALREADY HERE collection = GenomeCollection() ecoli_blastdb = collection.get_taxid_blastdb_path(511145, db_type="nucl") # DEFINE AND SOLVE THE PROBLEM problem = DnaOptimizationProblem( sequence=random_dna_sequence(500, seed=123), constraints=[ AvoidBlastMatches( blast_db=ecoli_blastdb, min_align_length=13, perc_identity=100, word_size=5, # The bigger the word size, the faster e_value=1e20, # ungapped=False ) ],
# This is the basic example of the README to get you started. from genome_collector import GenomeCollection import subprocess # GET A BLAST PATH collection = GenomeCollection() db_path = collection.get_taxid_blastdb_path(taxid=511145, db_type="nucl") process = subprocess.run([ "blastn", "-db", db_path, "-query", "basic_example_queries.fa", "-out", "basic_example_results.txt", ], stderr=subprocess.PIPE) if process.returncode: raise OSError("BLAST failed: %s" % process.stderr) print("All good! see basic_example_results.txt for results.")