def test_avoid_blast_matches():
    avoided_seqs = [
        "GTCCTCATGCGAAAGCTACGATCGCCAACCCTGT",
        "ACCCACCTCGTTACGTCCACGGCACGAGGAATGATCTCGAGTTGCTTT"
    ]
    constraint = AvoidBlastMatches(sequences=avoided_seqs, min_align_length=8)
    problem = DnaOptimizationProblem(sequence=sequence,
                                     constraints=[constraint])
    assert not problem.all_constraints_pass()
    cst_eval = constraint.evaluate(problem)
    assert len(cst_eval.locations) == 10
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
def test_avoid_phage_blast_matches():
    PHAGE_TAXID = "697289"
    collection = GenomeCollection()
    blastdb = collection.get_taxid_blastdb_path(PHAGE_TAXID, db_type="nucl")
    problem = DnaOptimizationProblem(sequence=random_dna_sequence(30,
                                                                  seed=123),
                                     constraints=[
                                         AvoidBlastMatches(blast_db=blastdb,
                                                           min_align_length=10,
                                                           word_size=7)
                                     ],
                                     logger=None)
    assert not problem.all_constraints_pass()
    problem.resolve_constraints()
    assert problem.all_constraints_pass()
Beispiel #3
0
if not os.path.exists('downloaded_data'):
    os.mkdir('downloaded_data')
if not os.path.exists(os.path.join('downloaded_data', 'ecoli_genome.nsq')):

    urlretrieve(ecoli_genome_url, genome_gz)

    with open(genome_fasta, 'wb') as f_fasta:
        with gzip.open(genome_gz, 'rb') as f_gz:
            shutil.copyfileobj(f_gz, f_fasta)

    subprocess.Popen([
        "makeblastdb", "-in", genome_fasta, "-dbtype", "nucl", "-out",
        genome_blastdb
    ])

# DEFINE AND SOLVE THE PROBLEM

problem = DnaOptimizationProblem(sequence=random_dna_sequence(1000, seed=123),
                                 constraints=[
                                     AvoidBlastMatches(blast_db=genome_blastdb,
                                                       min_align_length=13,
                                                       perc_identity=95)
                                 ])

print("\nConstraints validity before optimization\n")
print(problem.constraints_text_summary())
print("\nLet's resolve these problems:\n")
problem.resolve_constraints()
print("\nAfter optimization\n")
print(problem.constraints_text_summary())
Beispiel #4
0
from dnachisel import (
    DnaOptimizationProblem,
    random_dna_sequence,
    AvoidBlastMatches,
)

# THIS CREATES THE ECOLI BLAST DATABASE ON YOUR MACHINE IF NOT ALREADY HERE
download_ecoli_genome_if_not_already_there()

# DEFINE AND SOLVE THE PROBLEM

problem = DnaOptimizationProblem(
    sequence=random_dna_sequence(1000, seed=123),
    constraints=[
        AvoidBlastMatches(
            blast_db=os.path.join("downloaded_data", "ecoli_genome"),
            min_align_length=13,
            perc_identity=95,
        )
    ],
)
print("\n\nWarning! BLASTing can take a long time! Be patient.", end="\n\n")
print(
    "Constraints validity before optimization\n",
    problem.constraints_text_summary(),
)
print("\nLet's resolve these problems:\n")
problem.resolve_constraints()
print("\nAfter optimization\n")
print(problem.constraints_text_summary())
Beispiel #5
0
)

# THIS CREATES THE ECOLI BLAST DATABASE ON YOUR MACHINE IF NOT ALREADY HERE

collection = GenomeCollection()
ecoli_blastdb = collection.get_taxid_blastdb_path(511145, db_type="nucl")

# DEFINE AND SOLVE THE PROBLEM

problem = DnaOptimizationProblem(
    sequence=random_dna_sequence(500, seed=123),
    constraints=[
        AvoidBlastMatches(
            blast_db=ecoli_blastdb,
            min_align_length=13,
            perc_identity=100,
            word_size=5, # The bigger the word size, the faster
            e_value=1e20,
            # ungapped=False
        )
    ],
)

print(
    "Constraints validity before optimization\n",
    problem.constraints_text_summary(),
)

print("\nNow resolving the problems\n")
problem.resolve_constraints(final_check=True)

print(