def configure_primer_designers_from_docopt(): import docopt import shlex import sgrna_sensor args = docopt.docopt(__doc__) default_backbone_name = args['--backbone'] or 'on' default_spacer = args['--spacer'] or 'none' default_quikchange = args['--quikchange'] default_cut = args['--cut'] default_tm = args['--tm'] default_verbose = args['--verbose'] for name in args['<constructs>']: sub_cli = shlex.split(name) sub_args = docopt.docopt(__doc__, sub_cli) for sub_name in sub_args['<constructs>']: designer = PrimerDesigner() designer.name = sub_name designer.spacer = sub_args['--spacer'] or default_spacer designer.quikchange = sub_args['--quikchange'] or default_quikchange designer.cut = int_or_none(sub_args['--cut'] or default_cut) designer.tm = float(sub_args['--tm'] or default_tm or \ (78 if designer.quikchange else 60)) designer.verbose = sub_args['--verbose'] or default_verbose sgrna = sgrna_sensor.from_name(sub_name, target=designer.spacer) designer.name = sgrna.underscore_name designer.construct = sgrna.dna designer.backbone = sgrna_sensor.from_name( sub_args['--backbone'] or default_backbone_name, target=designer.spacer).dna yield designer
def design_exists(construct): try: sgrna_sensor.from_name(construct) except ValueError: return False else: return True
def name_to_sequence(row): if row.strategy == Strategy.IND_DIM.value: name, n = row.design.split('/') sgrna_5 = sgrna_sensor.from_name(f'{name}/5/{n}', target='aavs') sgrna_3 = sgrna_sensor.from_name(f'{name}/3/{n}', target='aavs') return f'{sgrna_5.rna}&{sgrna_3.rna}' else: sgrna = sgrna_sensor.from_name(row.design, target='aavs') return sgrna.rna
def name_to_pretty_sequence(row): if row.strategy in (Strategy.POS_CTL.value, Strategy.NEG_CTL.value): sgrna = sgrna_sensor.from_name(row.design, target='aavs') sequence = sgrna.rna colors_from_indices = { sgrna.index_from_domain('stem', 0): 'ucsfblue', sgrna.index_from_domain('nexus', 0): 'ucsfnavy', sgrna.index_from_domain('hairpins', 0): 'ucsfteal', sgrna.index_from_domain('tail', 0): 'ucsfblack', } elif row.strategy == Strategy.IND_DIM.value: name, n = row.design.split('/') sgrna_5 = sgrna_sensor.from_name(f'{name}/5/{n}', target='aavs') sgrna_3 = sgrna_sensor.from_name(f'{name}/3/{n}') len_5 = len(sgrna_5) + 3 sequence = f'{sgrna_5.rna} & {sgrna_3.rna}' colors_from_indices = { sgrna_5.index_from_domain('stem', 0): 'ucsfblue', len_5 + sgrna_3.index_from_domain('nexus', 0): 'ucsfnavy', len_5 + sgrna_3.index_from_domain('hairpins', 0): 'ucsfteal', len_5 + sgrna_3.index_from_domain('tail', 0): 'ucsfblack', len(sgrna_5): 'ucsflightgrey', len(sgrna_5) + 3: 'ucsforange', get_aptamer_start(sgrna_5): 'ucsforange', len_5 + get_aptamer_end(sgrna_3): 'ucsfblue', } else: sgrna = sgrna_sensor.from_name(row.design, target='aavs') sequence = sgrna.rna colors_from_indices = { sgrna.index_from_domain('stem', 0): 'ucsfblue', sgrna.index_from_domain('nexus', 0): 'ucsfnavy', sgrna.index_from_domain('hairpins', 0): 'ucsfteal', sgrna.index_from_domain('tail', 0): 'ucsfblack', get_aptamer_start(sgrna): 'ucsforange', get_aptamer_end(sgrna): None, } indices = list(reversed(sorted(colors_from_indices.keys()))) colors = [colors_from_indices[i] for i in indices] for x in range(len(colors_from_indices)): index = indices[x] color = colors[x] or colors[x + 2] sequence = insert_color(sequence, index, color) return r'{ \verb|' + sequence + '| }'
def num_items(self): # If the specific number of items wasn't specified, return None. if self._num_items is None: return None # If there was a selection step before this one, see how many unique # items it yielded. try: return self._num_items.unique_items except AttributeError: pass # If the number of items is the name of an sgRNA design, count the # number of variable positions in that design and raise 4 to that power # to get the number of sequences theoretically in that library. try: import sgrna_sensor design = sgrna_sensor.from_name(self._num_items) return sgrna_sensor.library_size(design.seq) except: pass # If none of these conditions apply, return the underlying attribute, # converted to a number (e.g. via ``eval`` for strings) if necessary. return cast_to_number(self._num_items)
def tabulate_sequences(): with alignment_path.open() as file: manual_alignment = yaml.load(file) rows = [] for id, name in enumerate(manual_alignment, 1): row = {} row['id'] = id row['name'] = name row['domain'] = name_to_domain(name) row['sequence'] = sgrna_sensor.from_name(name).dna row['manual_alignment'] = manual_alignment.get(name) row['complexity'] = sgrna_sensor.library_size(row['sequence']) row['log4_complexity'] = math.log(row['complexity'], 4) rows.append(row) if manual_alignment: real_seq = row['sequence'] aligned_seq = ''.join( x for x in row['manual_alignment'] if x in 'ACGTN') if real_seq != aligned_seq: raise ValueError(f"""\ Alignment for {name} has the wrong sequence: > {real_seq} > {aligned_seq} """) return pd.DataFrame(rows)
def is_splitter(row): # if row.strategy == Strategy.IND_DIM.value: return False sgrna = sgrna_sensor.from_name(row.design, target='aavs') try: return sgrna['aptamer/splitter'] != 'GAAA' except KeyError: return False
def parse_library(library, size=None): import sgrna_sensor if size is not None: return library, size elif isinstance(library, str): library_name = library library_seq = sgrna_sensor.from_name(library_name).seq library_size = sgrna_sensor.library_size(library_seq) else: library_name, library_size = library return library_name, library_size
def load_manual_alignments(): yml_path = Path('manual_alignments.yml') with yml_path.open() as file: manual_alignments = yaml.load(file) for hit in hits: real_seq = sgrna_sensor.from_name(hit).rna aligned_seq = ''.join( x for x in manual_alignments[hit] if x in 'ACGU' ) if real_seq != aligned_seq: raise ValueError(f"""\ The manual alignment for {hit} has the wrong sequence! > {real_seq} > {aligned_seq}""") return manual_alignments
def calculate_fold(name): print name design = sgrna_sensor.from_name(name, target=None) design.show() print design.expected_fold tot_e_off = RNA.pf_fold_par(design.seq, design.constraints, None, False, False, False) min_e_off = RNA.fold_par(design.seq, design.expected_fold, None, True, False) ex = '' for f, c in zip(design.expected_fold, design.constraints): ex += f if f is not '.' else c tot_e_on = RNA.pf_fold_par(design.seq, design.constraints, None, False, True, False) min_e_on = RNA.fold_par(design.seq, ex, None, True, False) print tot_e_off print min_e_off print tot_e_on print min_e_on kT = 0.593 # kcal/mol prob_off = math.exp((tot_e_off - min_e_off) / kT) prob_on = math.exp((tot_e_on - min_e_on) / kT) print print "Probability of active conformation in:" print "off ensemble:", prob_off print "on ensemble:", prob_on print print "Fold increase of active conformation:" print prob_on / prob_off print print 79 * '*' print
print "on ensemble:", prob_on print print "Fold increase of active conformation:" print prob_on / prob_off print print 79 * '*' print if __name__ == '__main__': args = docopt.docopt(__doc__) scorefxn = locals()[args['--scorefxn']] print args['--scorefxn'] for name in args['<names>']: design = sgrna_sensor.from_name(name, target=None) x_off = scorefxn(design, False) #x_on = scorefxn(design, True) #x_ratio = x_on / x_off #print '{name:10s} {x_ratio:.2f} ({x_on:.2f} / {x_off:.2f})'.format(**locals()) print '{name:10s} {x_off:.2f}'.format(**locals()) #print """\ #This doesn't do what I want. What I want is to get the base-pairing #probability matrix (BPPM) and to calculate the probability of any structure #that makes the wildtype base pairs. This calculates the equilibrium population #of the single lowest scoring structure that makes the wildtype base pairs. #"""
yml_path = Path('manual_alignments.yml') with yml_path.open() as file: manual_alignments = yaml.load(file) for hit in hits: real_seq = sgrna_sensor.from_name(hit).rna aligned_seq = ''.join( x for x in manual_alignments[hit] if x in 'ACGU' ) if real_seq != aligned_seq: raise ValueError(f"""\ The manual alignment for {hit} has the wrong sequence! > {real_seq} > {aligned_seq}""") return manual_alignments if __name__ == '__main__': args = docopt.docopt(__doc__) fold_changes = calc_fold_changes(args['--force']) fold_change_strs = { k: fr"{v[0]:.1f}\textsuperscript{{{'−+'[v[1]]}}}" for k,v in fold_changes.items() } manual_alignments = load_manual_alignments() ligands = {k: sgrna_sensor.from_name(k).ligand or 'theo' for k in hits} sgrna_sensor.render_latex_table('library_hits.tex', locals())
def get_molecular_weight(name): if name.startswith('pcr21'): return 2686183.94 else: sgrna = sgrna_sensor.from_name(name.lower()) return sgrna.mass('rna')
def get_pretty_domains(row): def get_aptamer_start(sgrna): try: return sgrna.index_from_domain("aptamer/5'", 0) except KeyError: return sgrna.index_from_domain("aptamer", 0) def get_aptamer_end(sgrna): try: return sgrna.index_from_domain("aptamer/3'", 0) + len( sgrna["aptamer/3'"]) except KeyError: return sgrna.index_from_domain("aptamer", 0) + len( sgrna["aptamer"]) if row.algorithm == Algorithm.CONTROL.value: sgrna = sgrna_sensor.from_name(row.design, target='aavs') sequence = sgrna.rna domain_map = [ ('ucsfblue', sgrna.index_from_domain('stem', 0)), ('ucsfnavy', sgrna.index_from_domain('nexus', 0)), ('ucsfteal', sgrna.index_from_domain('hairpins', 0)), ('ucsfblack', sgrna.index_from_domain('tail', 0)), ] elif row.strategy == Strategy.IND_DIM.value: name, n = row.design.split('/') sgrna_5 = sgrna_sensor.from_name(f'{name}/5/{n}', target='aavs') sgrna_3 = sgrna_sensor.from_name(f'{name}/3/{n}') divider = ' ' len_5 = len(sgrna_5) + len(divider) sequence = f'{sgrna_5.rna}{divider}{sgrna_3.rna}' domain_map = [ ('ucsfpurple', get_aptamer_start(sgrna_5)), (None, len_5 + get_aptamer_end(sgrna_3)), ('ucsfblue', sgrna_5.index_from_domain('stem', 0)), ('ucsfnavy', len_5 + sgrna_3.index_from_domain('nexus', 0)), ('ucsfteal', len_5 + sgrna_3.index_from_domain('hairpins', 0)), ('ucsfblack', len_5 + sgrna_3.index_from_domain('tail', 0)), ] else: sgrna = sgrna_sensor.from_name(row.design, target='aavs') sequence = sgrna.rna domain_map = [ ('ucsfpurple', get_aptamer_start(sgrna)), (None, get_aptamer_end(sgrna)), ('ucsfblue', sgrna.index_from_domain('stem', 0)), ('ucsfnavy', sgrna.index_from_domain('nexus', 0)), ('ucsfteal', sgrna.index_from_domain('hairpins', 0)), ('ucsfblack', sgrna.index_from_domain('tail', 0)), ] class Domain: def __init__(self, seq, color): # self.seq = seq self.color = color def __repr__(self): # return f"Domain('{self.seq}', '{self.color}')" domains = [] colors, indices = zip(*sorted(domain_map, key=lambda x: x[1])) colors, indices = list(colors), list(indices) colors = ['ucsfblack'] + colors slices = list(zip([0] + indices, indices + [len(sequence)])) for i in range(len(indices) + 1): a, b = slices[i] color = colors[i] or colors[i - 2] subseq = sequence[a:b] domain = Domain(subseq, color) domains.append(domain) return domains
default_cut = args['--cut'] default_tm = args['--tm'] default_verbose = args['--verbose'] primers = {} for name in args['<constructs>']: sub_cli = shlex.split(name) sub_args = docopt.docopt(__doc__, sub_cli) for sub_name in sub_args['<constructs>']: designer = mut.PrimerDesigner() designer.name = sub_name designer.spacer = sub_args['--spacer'] or default_spacer designer.quikchange = sub_args['--quikchange'] or default_quikchange designer.cut = mut.int_or_none(sub_args['--cut'] or default_cut) designer.tm = mut.float_or_none(sub_args['--tm'] or default_tm) designer.verbose = sub_args['--verbose'] or default_verbose sgrna = sgrna_sensor.from_name(sub_name, target=designer.spacer) designer.name = sgrna.underscore_name designer.construct = sgrna.dna designer.backbone = sgrna_sensor.from_name(sub_args['--backbone'] or default_backbone_name, target=designer.spacer).dna primers.update(designer.design_primers()) primers = mut.consolidate_duplicate_primers(primers) mut.report_primers_for_elim(primers)
import yaml from sgrna_sensor import from_name, t7_promoter as t7, spacer, aptamer from sgrna_sensor import render_latex_table components = [ ('T7 promoter', t7()), ('AAVS spacer', spacer('aavs')), ('sgG1 spacer', spacer('gfp1')), ('sgR1 spacer', spacer('rfp1')), ('sgG2 spacer', spacer('gfp2')), ('sgR2 spacer', spacer('rfp2')), ('folA spacer', spacer('fol1')), ('Theophylline (theo) aptamer', aptamer('theo')), ('3-Methylxanthine (3mx) aptamer', aptamer('3mx')), ('Thiamine pyrophosphate (tpp) aptamer', aptamer('tpp')), ('Positive control', from_name('on')), (r'Negative control (G63C, G64C)', from_name('off')), (r'\ligrnaF{}', from_name('mhf/30')), (r'\ligrnaF[2]{}', from_name('mhf/37')), (r'\ligrnaF[3]{}', from_name('w30/65')), (r'\ligrnaF[4]{}', from_name('w30/64/1')), (r'\ligrnaB{}', from_name('rxb/11/1')), (r'\ligrnaB[2]{}', from_name('w11/2')), (r'\ligrnaB[3]{}', from_name('m11/ga')), ] with open('manual_alignments.yml') as file: manual_alignments = yaml.load(file) # Make sure there aren't any typos. for name, sgrna in components:
'w30 64', 'w30 64/1', 'w30 65', 'w30 65/1', 'w30 77', 'w30 77/1', 'w30 77/2', 'w30 78', 'w30 78/1', 'w30 78/2', 'w30 79', 'w30 79/1', 'w30 80', 'w30 80/1', ] prefolded = '............................................................(.........................).........................' print(' ' * (9 + 1 + 6 + 3) + prefolded) for name in designs: sgrna = sgrna_sensor.from_name(name) struct = vrna.fold_compound(sgrna.rna) q_tot = struct.pf()[1] struct.constraints_add(prefolded, vrna.CONSTRAINT_DB_DEFAULT) fold_prefolded, q_prefolded = struct.pf() rt_37 = 1.987203611e-3 * 310 # RT in kcal/mol at 37°C f_prefolded = exp(-(q_prefolded - q_tot) / rt_37) print(f"{sgrna.slash_name:9s} {100 * f_prefolded:6.3f}% {fold_prefolded}")
#!/usr/bin/env python3 """\ Usage: sgrna_to_300nM.py <name> <ng_uL> """ if __name__ == '__main__': import docopt import sgrna_sensor args = docopt.docopt(__doc__) sgrna = sgrna_sensor.from_name(args['<name>']) ng_uL = float(args['<ng_uL>']) nM = ng_uL * 1e6 / sgrna.mass('rna') print('{:.2f} nM'.format(nM))
from itertools import product spacers = 'fol1', 'fol2', 'fol3', 'fol4' names = 'on', 'off', 'rxb/11/1', 'mhf/30' overlap_5 = 'tgcgactactcttgcctactacctatcgactgagctgaaagaattccggttctggcaaatattctgaaatgagctgttgacaattaatcatccggctcgtataattctagt' overlap_3 = 'tttgaattcatgtggctgaccgttctgttgtctctcgctcttccgagta' inserts = [] longest_insert = 0 pretty_names = { 'on': 'pos', 'off': 'neg', 'rxb/11/1': 'ligRNA⁻', 'mhf/30': 'ligRNA⁺', } for spacer, name in product(spacers, names): sgrna = from_name(name, target=spacer) insert = overlap_5 + sgrna.dna + overlap_3 longest_insert = max(longest_insert, len(insert)) pretty_name = f'{spacer} {pretty_names.get(name, name)}' inserts.append((pretty_name, insert)) print(f'Longest insert: {longest_insert} bp') with open('gibson_inserts.tsv', 'w') as file: for insert in inserts: file.write('\t'.join(insert) + '\n')