def test_EnforceRegionsCompatibility(): # Two enzymes, BsmBI(CGTCTC) is GC-rich, EcoRI(GAATTC) is GC-poor, which # enzyme will be chosen and inserted in the sequence depends on the other # constraint on GC content numpy.random.seed(123) def compatibility_condition(location1, location2, problem): seq1 = location1.extract_sequence(problem.sequence) seq2 = location2.extract_sequence(problem.sequence) return sequences_differences(seq1, seq2) >= 2 locations = [(0, 4), (50, 54), (100, 104), (150, 154)] problem = DnaOptimizationProblem( sequence=random_dna_sequence(200, seed=123), constraints=[ EnforceRegionsCompatibility( locations=locations, compatibility_condition=compatibility_condition, condition_label="2bp difference", ), EnforceGCContent(mini=0.4, maxi=0.6, window=40), ], logger=None, ) assert not any([e.passes for e in problem.constraints_evaluations()]) problem.resolve_constraints() assert problem.all_constraints_pass() seq = problem.sequence assert [ sequences_differences(seq[s1:e1], seq[s2:e2]) >= 2 for (s1, e1), (s2, e2) in itertools.combinations(locations, 2) ]
def test_avoid_hairpins_on_extremities(): # see https://github.com/Edinburgh-Genome-Foundry/DnaChisel/issues/37 problem = DnaOptimizationProblem( sequence="attcaatgggggggggggggggggggggggggtagccta", constraints=[AvoidHairpins(stem_size=3, hairpin_window=8)]) evaluation = problem.constraints_evaluations().evaluations[0] assert str(evaluation.locations) == "[0-6, 32-39]" problem.resolve_constraints() assert problem.all_constraints_pass()
def test_AvoidPattern_with_jaspar_motifs(): stringio = StringIO(JASPAR_CONTENT) motif_patterns = MotifPssmPattern.list_from_file(stringio, file_format="jaspar", relative_threshold=0.9) problem = DnaOptimizationProblem( sequence="GGGGGGGGGGTGCGTGATTAAAGGGGG", constraints=[AvoidPattern(p) for p in motif_patterns], ) assert 2 == len(problem.constraints_evaluations().all_locations()) problem.resolve_constraints() assert problem.all_constraints_pass()
def test_avoid_matches_with_phage(): PHAGE_TAXID = "697289" collection = GenomeCollection() index = collection.get_taxid_bowtie_index_path(PHAGE_TAXID, version="1") problem = DnaOptimizationProblem( sequence=random_dna_sequence(30, seed=123), constraints=[AvoidMatches(bowtie_index=index, match_length=10)], logger=None, ) all_breaches = problem.constraints_evaluations().all_locations() assert len(all_breaches) == 5 problem.resolve_constraints() assert problem.all_constraints_pass()
def compute_forbidden_patterns_locations(self, record): """Return an array where ``arr[i] == 1`` means that i is surrounded by a user-forbidden pattern.""" pattern_constraints = [ AvoidPattern(homopolymer_pattern(c, 5)) for c in 'ATGC' ] kmer_constraints = [ AvoidPattern(repeated_kmers(k, n)) for k, n in [(4, 2), (3, 3), (2, 4)] ] problem = DnaOptimizationProblem(sequence=record, constraints=pattern_constraints + kmer_constraints) constraints_breaches = group_overlapping_segments([ (f.location.start, f.location.end) for ev in problem.constraints_evaluations() for f in ev.locations_to_biopython_features() if not ev.passes ]) return segments_to_array(constraints_breaches, len(record))