def get_disruption_position_in_guide_donor_alignment(PAM, guide_with_full_PAM, donor): ''' takes the guide RNA sequence plus full PAM, the PAM, and the donor DNA aligs the guide and full PAM to the donor DNA sequence returns the best fitting alignment between the guide_with_full_PAM and donor, and the closest position of disruption with 0 denoting PAM disruption, and positive integers denoting distance of SNP/indel from the PAM only an indel can have a position of 0, as a snp at position 0 does not disrupt the NGG ''' sense_i, sense_j, sense_backtracking_array, sense_alignment_score = build_backtrack_for_fitting_alignment( donor, guide_with_full_PAM) rc_donor = bedgraph_computation.rev_comp(donor) antisense_i, antisense_j, antisense_backtracking_array, antisense_alignment_score = build_backtrack_for_fitting_alignment( rc_donor, guide_with_full_PAM) if sense_alignment_score > antisense_alignment_score: query, subject, alignment, snp_positions, indel_positions = fitting_alignment( sense_i, sense_j, sense_backtracking_array, donor, guide_with_full_PAM) else: query, subject, alignment, snp_positions, indel_positions = fitting_alignment( antisense_i, antisense_j, antisense_backtracking_array, rc_donor, guide_with_full_PAM) disruptions = process_disruption_positions_for_snps_indels( snp_positions, indel_positions) return disruptions, query, subject, alignment
def check_guide_disruption(log_outfile, individual_variants, variant_type, disruption_position, m0, PAM_chrom, PAM_strand, PAM_coord, guide_seq, donor_shift, donor_start, donor_end, donor_strand, left_side_donor, right_side_donor, ref_seq, variant_seq, donor, genomic_target): guide_disrupted = True rc_donor = bedgraph_computation.rev_comp(donor) for nt in 'ACTG': for seq in donor, rc_donor: if (guide_seq + nt + 'GG') in seq: guide_disrupted = False if not guide_disrupted: info = 'guide not disrupted', variant_type, individual_variants, '\n', str( disruption_position ), '\n', guide_seq, '\n', bedgraph_computation.rev_comp( guide_seq ), '\n', left_side_donor + ' ' + ref_seq + ' ' + right_side_donor, '\n', left_side_donor + ' ' + variant_seq + ' ' + right_side_donor, '\n', donor, '\n', genomic_target, '\n', PAM_chrom, PAM_strand, str( PAM_coord), 'donor_shift_' + str(donor_shift), str( donor_start), str(donor_end), donor_strand log_outfile.write('\t'.join(info) + '\n') #raise ValueError return False guide_in_target = False rc_target = bedgraph_computation.rev_comp(genomic_target) for nt in 'ACTG': for seq in genomic_target, rc_target: if (guide_seq + nt + 'GG') in seq: guide_in_target = True if not guide_in_target: info = 'guide not in target', variant_type, individual_variants, '\n', str( disruption_position ), '\n', guide_seq, '\n', bedgraph_computation.rev_comp( guide_seq ), '\n', left_side_donor + ' ' + ref_seq + ' ' + right_side_donor, '\n', left_side_donor + ' ' + variant_seq + ' ' + right_side_donor, '\n', donor, '\n', genomic_target, '\n', PAM_chrom, PAM_strand, str( PAM_coord), 'donor_shift_' + str(donor_shift), str( donor_start), str(donor_end), donor_strand log_outfile.write('\t'.join(info) + '\n') return False return True
def assemble_mutated_donor_sequence(ORF_strand, left_donor, right_donor, temp_ORF_seq, aa_num, synonymous_codons_to_mutate, upstream): aa_idx = aa_num - 1 if upstream: mutated_region = ''.join( temp_ORF_seq[aa_idx - synonymous_codons_to_mutate:aa_idx + 1]) else: mutated_region = ''.join(temp_ORF_seq[aa_idx:aa_idx + synonymous_codons_to_mutate + 1]) if ORF_strand == '-': mutated_region = bedgraph_computation.rev_comp(mutated_region) if len(mutated_region) % 3 != 0: print(temp_ORF_seq[aa_idx - synonymous_codons_to_mutate:aa_idx + 1]) query_donor = left_donor + mutated_region.lower() + right_donor return query_donor
def global_alignment(v, w, PAM): ''' takes two sequences v, and w, and best fit alignment of all of w against any part of v for the best fit alignment, returns the positions of the disruptions, the two sequences, and the alignment ''' v = v.upper() w = w.upper() sense_i, sense_j, sense_backtracking_array, sense_alignment_score = build_backtrack_for_fitting_alignment( v, w) rc_v = bedgraph_computation.rev_comp(v) antisense_i, antisense_j, antisense_backtracking_array, antisense_alignment_score = build_backtrack_for_fitting_alignment( rc_v, w) if sense_alignment_score > antisense_alignment_score: query, subject, alignment, snp_positions, indel_positions = fitting_alignment( sense_i, sense_j, sense_backtracking_array, v, w) else: query, subject, alignment, snp_positions, indel_positions = fitting_alignment( antisense_i, antisense_j, antisense_backtracking_array, rc_v, w) disruptions = process_disruption_positions_for_snps_indels( PAM, snp_positions, indel_positions) return disruptions, query, subject, alignment
def assemble_mutated_donor_sequence(ORF_strand, left_donor, right_donor, ORF_seq, aa_range, mutated_region_lowercase=False): ''' takes the homologous arms and mutated middle sequence, returns a complete donor with mutated sequence in lower case left donor and right donor are GENOMIC sequence, and thus may contain UTR and intron regions temp_ORF_seq is a tuple of codons, from initiating methionine to stop codon, and must be joined prior to assembly ''' N_terminal_aa_num, C_terminal_aa_num = aa_range mutated_region = ''.join(ORF_seq[N_terminal_aa_num - 1:C_terminal_aa_num]) if ORF_strand == '-': mutated_region = bedgraph_computation.rev_comp(mutated_region) if len(mutated_region) % 3 != 0: print(aa_range, ORF_seq[N_terminal_aa_num - 1:C_terminal_aa_num], 'is not a multiple of three') if mutated_region_lowercase: mutated_region = mutated_region.lower() query_donor = left_donor + mutated_region + right_donor return query_donor
def get_proximal_sense_and_antisense_PAMs(genome_seq, ORF_chrom, ORF_strand, ORF_exon_coords, coord, upstream): ''' takes a chromosome, strand, and coordinate for minus strand, coord is last coord of codon, for plus strand it is the first coord of codon finds 5 PAMs for each strand returns the PAM_coordinate, guide, and guide with full PAM sequence (outlaws guides with excessive T stretch as defined by NUM_CONSECUTIVE_T_DISALLOWED ) ''' if (ORF_strand == '+' and upstream) or (ORF_strand == '-' and not upstream): plus_strand_coord = coord + GUIDE_LENGTH minus_strand_coord = coord + GUIDE_LENGTH else: plus_strand_coord = coord - GUIDE_LENGTH minus_strand_coord = coord - GUIDE_LENGTH plus_strand_PAMs = [] shift = -GUIDE_LENGTH while len(plus_strand_PAMs ) < 10 and shift < DONOR_LENGTH - MINIMUM_HOMOLOGY * 2: query_seq = genome_seq[ORF_chrom][plus_strand_coord:plus_strand_coord + PAM_length] plus_strand_PAM_guide_seq = genome_seq[ORF_chrom][ plus_strand_coord - GUIDE_LENGTH:plus_strand_coord] plus_strand_PAM_guide_seq_with_PAM = genome_seq[ORF_chrom][ plus_strand_coord - GUIDE_LENGTH:plus_strand_coord + PAM_length] if bedgraph_computation.hamming_distance( query_seq, PAM ) == 0 and NUM_CONSECUTIVE_T_DISALLOWED * 'T' not in plus_strand_PAM_guide_seq: plus_strand_PAMs.append( ('+', query_seq, plus_strand_coord, plus_strand_PAM_guide_seq, plus_strand_PAM_guide_seq_with_PAM)) if ORF_strand == '+': if upstream: plus_strand_coord -= 1 else: plus_strand_coord += 1 else: if upstream: plus_strand_coord += 1 else: plus_strand_coord -= 1 shift += 1 shift = -GUIDE_LENGTH minus_strand_PAMs = [] while len(minus_strand_PAMs ) < 10 and shift < DONOR_LENGTH - MINIMUM_HOMOLOGY * 2: query_seq = bedgraph_computation.rev_comp( genome_seq[ORF_chrom][minus_strand_coord:minus_strand_coord + PAM_length]) minus_strand_PAM_guide_seq = bedgraph_computation.rev_comp( genome_seq[ORF_chrom][minus_strand_coord + PAM_length:minus_strand_coord + PAM_length + GUIDE_LENGTH]) minus_strand_PAM_guide_seq_with_PAM = bedgraph_computation.rev_comp( genome_seq[ORF_chrom][minus_strand_coord:minus_strand_coord + PAM_length + GUIDE_LENGTH]) if bedgraph_computation.hamming_distance( query_seq, PAM ) == 0 and NUM_CONSECUTIVE_T_DISALLOWED * 'T' not in minus_strand_PAM_guide_seq: minus_strand_PAMs.append(('-', query_seq, minus_strand_coord, minus_strand_PAM_guide_seq, minus_strand_PAM_guide_seq_with_PAM)) if ORF_strand == '+': if upstream: minus_strand_coord -= 1 else: minus_strand_coord += 1 else: if upstream: minus_strand_coord += 1 else: minus_strand_coord -= 1 shift += 1 return plus_strand_PAMs + minus_strand_PAMs
def mutate_synonymous_codons(ORF_chrom, ORF_strand, synonymous_codons_to_mutate, aa_num, aa_to_codon, suboptimal_removed_aa_to_codon, ORF_info, ORF_seq, upstream): ''' takes an aa_num and the number of codons to mutate upstream or downstream (designated by upstream = False) returns plus_strand_coord start pos of change, ref, alt alleles, and the name of ORF and the synonymous changes does not mutate the actual target amino acid ## also checks synonymous donor for inadvertent introduction of BspQI sites, and changes to other synonymous codons if possible ORF_info[aa_num] = ( codon, aa_coords, aa ) returns a list of donors and donor_infos chrVII 18036 ACG... GAC... YDR025C_(C5C,G6G,H7H,I8I)_[CCG5CCA,...]_{Y,ACG,4,F,GAC})_variant_subpool_1 ''' aa_idx = aa_num - 1 if ORF_strand == '+': if upstream: ref_allele_start_coord = ORF_info[ aa_num - synonymous_codons_to_mutate][1][0] else: ref_allele_start_coord = ORF_info[aa_num][1][0] else: if upstream: ref_allele_start_coord = ORF_info[aa_num][1][2] else: ref_allele_start_coord = ORF_info[ aa_num + synonymous_codons_to_mutate][1][2] mutated_region_length = 3 + synonymous_codons_to_mutate * 3 ## includes the amino acid to be mutated homologous_arm_length = DONOR_LENGTH - mutated_region_length left_donor_length = homologous_arm_length // 2 right_donor_length = homologous_arm_length - left_donor_length left_donor = genome_seq[ORF_chrom][ ref_allele_start_coord - left_donor_length:ref_allele_start_coord] right_donor = genome_seq[ORF_chrom][ ref_allele_start_coord + mutated_region_length:ref_allele_start_coord + mutated_region_length + right_donor_length] extended_left_donor = genome_seq[ORF_chrom][ref_allele_start_coord - left_donor_length - 200:ref_allele_start_coord] extended_right_donor = genome_seq[ORF_chrom][ ref_allele_start_coord + mutated_region_length:ref_allele_start_coord + mutated_region_length + right_donor_length + 200] temp_ORF_seq = ORF_seq[:] for idx in range(1, synonymous_codons_to_mutate + 1): if upstream: current_aa_num = aa_num - idx current_aa_idx = current_aa_num - 1 else: current_aa_num = aa_num + idx current_aa_idx = current_aa_num - 1 codon, aa_coords, aa = ORF_info[current_aa_num] synonymous_codon_with_largest_hamming_dist = codon largest_hamming_dist_for_this_aa = 0 for other_codon in suboptimal_removed_aa_to_codon[aa]: temp_ORF_seq[current_aa_idx] = other_codon hamming_distance_for_this_codon = bedgraph_computation.hamming_distance( other_codon, codon) query_donor = assemble_mutated_donor_sequence( ORF_strand, left_donor, right_donor, temp_ORF_seq, aa_num, synonymous_codons_to_mutate, upstream) ## checking for internal restriction sites with synonymoous changes if hamming_distance_for_this_codon > largest_hamming_dist_for_this_aa and INTERNAL_RESTRICTION_SITE not in query_donor and rc_INTERNAL_RESTRICTION_SITE not in query_donor: temp_ORF_seq_check_all_target_codons = temp_ORF_seq[:] all_target_codons_clear_of_BspQI_introduction = True for target_codon in codon_to_aa: temp_ORF_seq_check_all_target_codons[aa_idx] = target_codon check_all_target_codons_query_donor = assemble_mutated_donor_sequence( ORF_strand, left_donor[-(INTERNAL_RESTRICTION_SITE_LENGTH - 4):], right_donor[:INTERNAL_RESTRICTION_SITE_LENGTH - 4], temp_ORF_seq_check_all_target_codons, aa_num, synonymous_codons_to_mutate, upstream) ## if possible, remove synonymous codon changes that would generate a restriction site with the target codon and proximal upstream/downstream sequence if INTERNAL_RESTRICTION_SITE in check_all_target_codons_query_donor or rc_INTERNAL_RESTRICTION_SITE in check_all_target_codons_query_donor: all_target_codons_clear_of_BspQI_introduction = False if not all_target_codons_clear_of_BspQI_introduction: print('restriction site prevented') if all_target_codons_clear_of_BspQI_introduction: largest_hamming_dist_for_this_aa = hamming_distance_for_this_codon synonymous_codon_with_largest_hamming_dist = other_codon temp_ORF_seq[ current_aa_idx] = synonymous_codon_with_largest_hamming_dist control_donor = assemble_mutated_donor_sequence( ORF_strand, extended_left_donor, extended_right_donor, temp_ORF_seq, aa_num, synonymous_codons_to_mutate, upstream) if upstream: mutated_region = ''.join( temp_ORF_seq[aa_idx - synonymous_codons_to_mutate:aa_idx + 1]) else: mutated_region = ''.join(temp_ORF_seq[aa_idx:aa_idx + synonymous_codons_to_mutate + 1]) if ORF_strand == '-': mutated_region = bedgraph_computation.rev_comp(mutated_region) mutated_region_chromosomal_range = ref_allele_start_coord, ref_allele_start_coord + mutated_region_length donor_library = generate_donors(left_donor, right_donor, ORF, ORF_strand, synonymous_codons_to_mutate, aa_num, codon_to_aa, ORF_seq, temp_ORF_seq, ORF_info, ORF_chrom, ref_allele_start_coord, upstream) return control_donor, donor_library, mutated_region_chromosomal_range
def generate_donors(left_donor, right_donor, ORF, ORF_strand, synonymous_codons_to_mutate, aa_num, codon_to_aa, ORF_seq, temp_ORF_seq, ORF_info, ORF_chrom, ref_allele_start_coord, upstream): ''' generates a vcf-format donor dictionary with the form: donor_library [ YDR025C_Y4F_variant_ACG4GAC_C5C,G6G,H7H,I8I_CCG5CCA,... ] = codon_pool, chrVII, 18036, ACG..., GAC... donor_library [ YDR025C_Y4Y_control_ACG4ACG_C5C,G6G,H7H,I8I_CCG5CCA,... ] = codon_pool, chrVII, 18036, ACG..., ACG... ''' aa_idx = aa_num - 1 donor_library = {} synonymous_aa = [] synonymous_codons = [] WT_target_codon, WT_target_aa_coords, WT_target_aa = ORF_info[aa_num] if upstream: for upstream_aa_num in range(aa_num - synonymous_codons_to_mutate, aa_num): upstream_aa_idx = upstream_aa_num - 1 codon, aa_coords, aa = ORF_info[upstream_aa_num] synonymous_codon = temp_ORF_seq[upstream_aa_idx] synonymous_codons.append(codon + str(upstream_aa_num) + synonymous_codon) synonymous_aa.append(aa + str(upstream_aa_num) + aa) else: for downstream_aa_num in range( aa_num + 1, aa_num + synonymous_codons_to_mutate + 1): downstream_aa_idx = downstream_aa_num - 1 codon, aa_coords, aa = ORF_info[downstream_aa_num] synonymous_codon = temp_ORF_seq[downstream_aa_idx] synonymous_codons.append(codon + str(downstream_aa_num) + synonymous_codon) synonymous_aa.append(aa + str(downstream_aa_num) + aa) synonymous_codons_name = ','.join(synonymous_codons) synonymous_aa_name = ','.join(synonymous_aa) ## generate donor and donor names for mutant_target_codon in codon_to_aa: temp_ORF_seq_for_target_codon = temp_ORF_seq[:] temp_ORF_seq_for_target_codon[aa_idx] = mutant_target_codon if upstream: mutated_region = ''.join(temp_ORF_seq_for_target_codon[ aa_idx - synonymous_codons_to_mutate:aa_idx + 1]) WT_region = ''.join( ORF_seq[aa_idx - synonymous_codons_to_mutate:aa_idx + 1]) else: mutated_region = ''.join( temp_ORF_seq_for_target_codon[aa_idx:aa_idx + synonymous_codons_to_mutate + 1]) WT_region = ''.join(ORF_seq[aa_idx:aa_idx + synonymous_codons_to_mutate + 1]) if ORF_strand == '-': mutated_region = bedgraph_computation.rev_comp(mutated_region) WT_region = bedgraph_computation.rev_comp(WT_region) if mutant_target_codon == WT_target_codon: donor_type = 'control' else: donor_type = 'variant' aa_change = WT_target_aa + str( aa_num) + codon_to_aa[mutant_target_codon] codon_change = WT_target_codon + str(aa_num) + mutant_target_codon if upstream: spreading_direction = 'upstream_synonymous_changes:' else: spreading_direction = 'downstream_synonymous_changes:' donor_name = '_'.join([ ORF, aa_change, donor_type, codon_change, spreading_direction, synonymous_aa_name, synonymous_codons_name ]) full_donor_seq = assemble_mutated_donor_sequence( ORF_strand, left_donor, right_donor, temp_ORF_seq_for_target_codon, aa_num, synonymous_codons_to_mutate, upstream) vcf_fields = (ORF_chrom, ref_allele_start_coord, WT_region, mutated_region, donor_name) if donor_type == 'variant': if mutant_target_codon in highest_frequency_codons: codon_pool = 0 else: codon_pool = 1 donor_library[donor_name] = codon_pool, full_donor_seq, vcf_fields else: donor_library[donor_name] = 0, full_donor_seq, vcf_fields donor_library[donor_name] = 1, full_donor_seq, vcf_fields return donor_library
REF_DIR = '/Users/kevinroy/Dropbox/' DIR = '/Users/kevinroy/Dropbox/steinmetz_lab/CRISPR_saturation_genome_editing/' STOP_CODONS_INCLUDED = True MUTATE_INITIATING_METHIONINE = False MUTATE_STOP_CODON = True FOLD_OVERREPRESENTATION_OF_PSEUDO_WT_CONTROLS = 10 OLIGO_LENGTH = 210 GUIDE_LENGTH = 20 SUBPOOL_WINDOW_SIZE = 110 NUM_CONSECUTIVE_T_DISALLOWED = 6 MIN_GUIDE_DONOR_DISRUPTION_SCORE = 6 PAM = 'NGG' rc_PAM = bedgraph_computation.rev_comp(PAM) PAM_length = len(PAM) END_RESTRICTION_SITES = 'GGCGCGCC', 'GCGGCCGC' INTERNAL_RESTRICTION_SITE = 'GCTCTTC' ## , 'GGTCTC' BsaI site in AmpR ORF would need to be removedr rc_INTERNAL_RESTRICTION_SITE = bedgraph_computation.rev_comp( INTERNAL_RESTRICTION_SITE) INTERNAL_RESTRICTION_SITE_LENGTH = len(INTERNAL_RESTRICTION_SITE) MINIMUM_HOMOLOGY = 20 ## fwd priming sequence: GGACTTTggcgcgcc FWD_PRIMING_SEQUENCE = 'GGACTTTggcgcgcc'.upper() ## internal cloning site will be BspQI_cloning_site = 'GTTTGAAGAGC', a backup would be 'GGTCTC' GTTTAgagacc for regions that contain the BspQI site INTERNAL_CLONING_SITE = 'gtttgaagagc'.upper( ) ## 'GTTTGAAGAGC' ## 'gtttGAAGAGCGCTCTTCacga' ## a future alternative would be the BsaI site: GGTCTC, which would give the following internal cloning site: gtttaGAGACC (BsaI cuts 1 and 5 nt away from its recognition site)
sequence_type, region_type, gene_id, gene_name ] feature_annotation = '_'.join( [str(e) for e in feature_annotation]) complete_info = feature_info + seq_info + [ pA_annotation ] + [feature_annotation] coord_to_info[pA_coord] = complete_info output = [reads] + complete_info output = '\t'.join([str(e) for e in output]) + '\n' annotated_pA_sites.write(output) strand = '-' for chrom in minus_strand: for coord in sorted(list(minus_strand[chrom])): if coord > 51 and coord < (chrom_lengths[chrom] - 51): US50 = bedgraph_computation.rev_comp( R64_genome[chrom][coord:coord + 50]) DS50 = bedgraph_computation.rev_comp( R64_genome[chrom][coord - 50:coord]) reads = minus_strand[chrom][coord] pA_coord = (chrom, coord, strand) if pA_coord in coord_to_info: complete_info = coord_to_info[pA_coord] else: downstream_19 = bedgraph_computation.rev_comp( R64_genome[chrom][coord - 19:coord]) downstream_6 = bedgraph_computation.rev_comp( R64_genome[chrom][coord - 6:coord]) A19 = downstream_19.count('A') G19 = downstream_19.count('G') A6 = downstream_6.count('A') G6 = downstream_6.count('G')
def get_sense_and_antisense_PAMs_for_ORF( genome_seq, ORF_chrom, ORF_exon_coords, GUIDE_LENGTH, PAM_length, NUM_CONSECUTIVE_T_DISALLOWED, MOST_T_IN_LAST_X_BP, azimuth_target_seq_to_BLAST_mismatch_counts, max_m0=0, max_m1=0, max_m2=0, BP_FLANKING_ORF=20, RESTRICTION_SITES_DISALLOWED=['GGCGCGCC', 'GCGGCCGC', 'GCTCTTC'], FWD_RESTRICTION_SITE_FRAGMENT='CGCC'): ''' takes a chromosome, strand, and coordinate for minus strand, coord is last coord of codon, for plus strand it is the first coord of codon finds 5 PAMs for each strand returns the PAM_coordinate, guide, and guide with full PAM sequence (outlaws guides with excessive T stretch as defined by NUM_CONSECUTIVE_T_DISALLOWED ) ''' start_coord = ORF_exon_coords[0] end_coord = ORF_exon_coords[-1] if end_coord < start_coord: temp_end_coord = end_coord end_coord = start_coord start_coord = temp_end_coord plus_strand_PAMs = {} minus_strand_PAMs = {} ## PAM_strand, PAM_seq, PAM_coord, guide_seq, guide_seq_with_PAM for coord in range( start_coord - GUIDE_LENGTH - PAM_length - BP_FLANKING_ORF, end_coord + GUIDE_LENGTH + PAM_length + BP_FLANKING_ORF): query_seq = genome_seq[ORF_chrom][coord:coord + PAM_length] plus_strand_PAM_guide_seq = genome_seq[ORF_chrom][coord - GUIDE_LENGTH:coord] plus_strand_PAM_guide_seq_with_PAM = genome_seq[ORF_chrom][ coord - GUIDE_LENGTH:coord + PAM_length] restriction_site_present = False for site in RESTRICTION_SITES_DISALLOWED: if site in FWD_RESTRICTION_SITE_FRAGMENT + plus_strand_PAM_guide_seq + 'GTTT' or bedgraph_computation.rev_comp( site ) in FWD_RESTRICTION_SITE_FRAGMENT + plus_strand_PAM_guide_seq + 'GTTT': restriction_site_present = True if not restriction_site_present and bedgraph_computation.hamming_distance( query_seq, PAM ) == 0 and NUM_CONSECUTIVE_T_DISALLOWED * 'T' not in plus_strand_PAM_guide_seq and plus_strand_PAM_guide_seq[ -MOST_T_IN_LAST_X_BP[1]:].count('T') <= MOST_T_IN_LAST_X_BP[0]: plus_strand_azimuth_seq = genome_seq[ORF_chrom][coord - GUIDE_LENGTH - 4:coord + PAM_length + 3] m0, m1, m2, m3, m4, m5 = azimuth_target_seq_to_BLAST_mismatch_counts[ plus_strand_azimuth_seq] if m0 <= max_m0 and m1 <= max_m1 and m2 <= max_m2: plus_strand_PAMs[ coord] = '+', query_seq, coord, plus_strand_PAM_guide_seq, plus_strand_PAM_guide_seq_with_PAM rc_query_seq = bedgraph_computation.rev_comp(query_seq) minus_strand_PAM_guide_seq = bedgraph_computation.rev_comp( genome_seq[ORF_chrom][coord + PAM_length:coord + PAM_length + GUIDE_LENGTH]) minus_strand_PAM_guide_seq_with_PAM = bedgraph_computation.rev_comp( genome_seq[ORF_chrom][coord:coord + PAM_length + GUIDE_LENGTH]) restriction_site_present = False for site in RESTRICTION_SITES_DISALLOWED: if site in FWD_RESTRICTION_SITE_FRAGMENT + minus_strand_PAM_guide_seq + 'GTTT' or bedgraph_computation.rev_comp( site ) in FWD_RESTRICTION_SITE_FRAGMENT + minus_strand_PAM_guide_seq + 'GTTT': restriction_site_present = True if not restriction_site_present and bedgraph_computation.hamming_distance( rc_query_seq, PAM ) == 0 and NUM_CONSECUTIVE_T_DISALLOWED * 'T' not in minus_strand_PAM_guide_seq and minus_strand_PAM_guide_seq[ -MOST_T_IN_LAST_X_BP[1]:].count('T') <= MOST_T_IN_LAST_X_BP[0]: minus_strand_azimuth_seq = bedgraph_computation.rev_comp( genome_seq[ORF_chrom][coord - 3:coord + PAM_length + GUIDE_LENGTH + 4]) m0, m1, m2, m3, m4, m5 = azimuth_target_seq_to_BLAST_mismatch_counts[ minus_strand_azimuth_seq] if m0 <= max_m0 and m1 <= max_m1 and m2 <= max_m2: minus_strand_PAMs[ coord] = '-', rc_query_seq, coord, minus_strand_PAM_guide_seq, minus_strand_PAM_guide_seq_with_PAM return plus_strand_PAMs, minus_strand_PAMs