Esempio n. 1
0
def configure_primer_designers_from_docopt():
    import docopt
    import shlex
    import sgrna_sensor

    args = docopt.docopt(__doc__)
    default_backbone_name = args['--backbone'] or 'on'
    default_spacer = args['--spacer'] or 'none'
    default_quikchange = args['--quikchange']
    default_cut = args['--cut']
    default_tm = args['--tm']
    default_verbose = args['--verbose']

    for name in args['<constructs>']:
        sub_cli = shlex.split(name)
        sub_args = docopt.docopt(__doc__, sub_cli)

        for sub_name in sub_args['<constructs>']:
            designer = PrimerDesigner()
            designer.name = sub_name
            designer.spacer = sub_args['--spacer'] or default_spacer
            designer.quikchange = sub_args['--quikchange'] or default_quikchange
            designer.cut = int_or_none(sub_args['--cut'] or default_cut)
            designer.tm = float(sub_args['--tm'] or default_tm or \
                    (78 if designer.quikchange else 60))
            designer.verbose = sub_args['--verbose'] or default_verbose

            sgrna = sgrna_sensor.from_name(sub_name, target=designer.spacer)
            designer.name = sgrna.underscore_name
            designer.construct = sgrna.dna
            designer.backbone = sgrna_sensor.from_name(
                sub_args['--backbone'] or default_backbone_name,
                target=designer.spacer).dna

            yield designer
Esempio n. 2
0
def design_exists(construct):
    try:
        sgrna_sensor.from_name(construct)
    except ValueError:
        return False
    else:
        return True
Esempio n. 3
0
def name_to_sequence(row):
    if row.strategy == Strategy.IND_DIM.value:
        name, n = row.design.split('/')
        sgrna_5 = sgrna_sensor.from_name(f'{name}/5/{n}', target='aavs')
        sgrna_3 = sgrna_sensor.from_name(f'{name}/3/{n}', target='aavs')
        return f'{sgrna_5.rna}&{sgrna_3.rna}'

    else:
        sgrna = sgrna_sensor.from_name(row.design, target='aavs')
        return sgrna.rna
Esempio n. 4
0
def name_to_pretty_sequence(row):
    if row.strategy in (Strategy.POS_CTL.value, Strategy.NEG_CTL.value):
        sgrna = sgrna_sensor.from_name(row.design, target='aavs')
        sequence = sgrna.rna

        colors_from_indices = {
            sgrna.index_from_domain('stem', 0): 'ucsfblue',
            sgrna.index_from_domain('nexus', 0): 'ucsfnavy',
            sgrna.index_from_domain('hairpins', 0): 'ucsfteal',
            sgrna.index_from_domain('tail', 0): 'ucsfblack',
        }

    elif row.strategy == Strategy.IND_DIM.value:
        name, n = row.design.split('/')
        sgrna_5 = sgrna_sensor.from_name(f'{name}/5/{n}', target='aavs')
        sgrna_3 = sgrna_sensor.from_name(f'{name}/3/{n}')
        len_5 = len(sgrna_5) + 3

        sequence = f'{sgrna_5.rna} & {sgrna_3.rna}'

        colors_from_indices = {
            sgrna_5.index_from_domain('stem', 0): 'ucsfblue',
            len_5 + sgrna_3.index_from_domain('nexus', 0): 'ucsfnavy',
            len_5 + sgrna_3.index_from_domain('hairpins', 0): 'ucsfteal',
            len_5 + sgrna_3.index_from_domain('tail', 0): 'ucsfblack',
            len(sgrna_5): 'ucsflightgrey',
            len(sgrna_5) + 3: 'ucsforange',
            get_aptamer_start(sgrna_5): 'ucsforange',
            len_5 + get_aptamer_end(sgrna_3): 'ucsfblue',
        }

    else:
        sgrna = sgrna_sensor.from_name(row.design, target='aavs')
        sequence = sgrna.rna

        colors_from_indices = {
            sgrna.index_from_domain('stem', 0): 'ucsfblue',
            sgrna.index_from_domain('nexus', 0): 'ucsfnavy',
            sgrna.index_from_domain('hairpins', 0): 'ucsfteal',
            sgrna.index_from_domain('tail', 0): 'ucsfblack',
            get_aptamer_start(sgrna): 'ucsforange',
            get_aptamer_end(sgrna): None,
        }

    indices = list(reversed(sorted(colors_from_indices.keys())))
    colors = [colors_from_indices[i] for i in indices]

    for x in range(len(colors_from_indices)):
        index = indices[x]
        color = colors[x] or colors[x + 2]
        sequence = insert_color(sequence, index, color)

    return r'{ \verb|' + sequence + '| }'
    def num_items(self):
        # If the specific number of items wasn't specified, return None.
        if self._num_items is None:
            return None

        # If there was a selection step before this one, see how many unique
        # items it yielded.
        try:
            return self._num_items.unique_items
        except AttributeError:
            pass

        # If the number of items is the name of an sgRNA design, count the
        # number of variable positions in that design and raise 4 to that power
        # to get the number of sequences theoretically in that library.
        try:
            import sgrna_sensor
            design = sgrna_sensor.from_name(self._num_items)
            return sgrna_sensor.library_size(design.seq)
        except:
            pass

        # If none of these conditions apply, return the underlying attribute,
        # converted to a number (e.g. via ``eval`` for strings) if necessary.

        return cast_to_number(self._num_items)
Esempio n. 6
0
def tabulate_sequences():
    with alignment_path.open() as file:
        manual_alignment = yaml.load(file)

    rows = []

    for id, name in enumerate(manual_alignment, 1):
        row = {}
        row['id'] = id
        row['name'] = name
        row['domain'] = name_to_domain(name)
        row['sequence'] = sgrna_sensor.from_name(name).dna
        row['manual_alignment'] = manual_alignment.get(name)
        row['complexity'] = sgrna_sensor.library_size(row['sequence'])
        row['log4_complexity'] = math.log(row['complexity'], 4)
        rows.append(row)

        if manual_alignment:
            real_seq = row['sequence']
            aligned_seq = ''.join(
                    x for x in row['manual_alignment']
                    if x in 'ACGTN')

            if real_seq != aligned_seq:
                raise ValueError(f"""\
Alignment for {name} has the wrong sequence:

> {real_seq}
> {aligned_seq} """)

    return pd.DataFrame(rows)
Esempio n. 7
0
    def is_splitter(row):  #
        if row.strategy == Strategy.IND_DIM.value:
            return False

        sgrna = sgrna_sensor.from_name(row.design, target='aavs')

        try:
            return sgrna['aptamer/splitter'] != 'GAAA'
        except KeyError:
            return False
Esempio n. 8
0
def parse_library(library, size=None):
    import sgrna_sensor

    if size is not None:
        return library, size
    elif isinstance(library, str):
        library_name = library
        library_seq = sgrna_sensor.from_name(library_name).seq
        library_size = sgrna_sensor.library_size(library_seq)
    else:
        library_name, library_size = library

    return library_name, library_size
Esempio n. 9
0
def load_manual_alignments():
    yml_path = Path('manual_alignments.yml')
    with yml_path.open() as file:
        manual_alignments = yaml.load(file)

    for hit in hits:
        real_seq = sgrna_sensor.from_name(hit).rna
        aligned_seq = ''.join(
                x for x in manual_alignments[hit]
                if x in 'ACGU'
        )
        if real_seq != aligned_seq:
            raise ValueError(f"""\
The manual alignment for {hit} has the wrong sequence!

> {real_seq}
> {aligned_seq}""")

    return manual_alignments
Esempio n. 10
0
def calculate_fold(name):
    print name

    design = sgrna_sensor.from_name(name, target=None)
    design.show()
    print design.expected_fold

    tot_e_off = RNA.pf_fold_par(design.seq, design.constraints, None, False,
                                False, False)
    min_e_off = RNA.fold_par(design.seq, design.expected_fold, None, True,
                             False)

    ex = ''
    for f, c in zip(design.expected_fold, design.constraints):
        ex += f if f is not '.' else c

    tot_e_on = RNA.pf_fold_par(design.seq, design.constraints, None, False,
                               True, False)
    min_e_on = RNA.fold_par(design.seq, ex, None, True, False)

    print tot_e_off
    print min_e_off
    print tot_e_on
    print min_e_on

    kT = 0.593  # kcal/mol
    prob_off = math.exp((tot_e_off - min_e_off) / kT)
    prob_on = math.exp((tot_e_on - min_e_on) / kT)

    print
    print "Probability of active conformation in:"
    print "off ensemble:", prob_off
    print "on ensemble:", prob_on
    print
    print "Fold increase of active conformation:"
    print prob_on / prob_off
    print
    print 79 * '*'
    print
Esempio n. 11
0
    print "on ensemble:", prob_on
    print
    print "Fold increase of active conformation:"
    print prob_on / prob_off
    print
    print 79 * '*'
    print


if __name__ == '__main__':
    args = docopt.docopt(__doc__)
    scorefxn = locals()[args['--scorefxn']]
    print args['--scorefxn']

    for name in args['<names>']:
        design = sgrna_sensor.from_name(name, target=None)

        x_off = scorefxn(design, False)
        #x_on = scorefxn(design, True)
        #x_ratio = x_on / x_off

        #print '{name:10s} {x_ratio:.2f} ({x_on:.2f} / {x_off:.2f})'.format(**locals())
        print '{name:10s} {x_off:.2f}'.format(**locals())

    #print """\
    #This doesn't do what I want.  What I want is to get the base-pairing
    #probability matrix (BPPM) and to calculate the probability of any structure
    #that makes the wildtype base pairs.  This calculates the equilibrium population
    #of the single lowest scoring structure that makes the wildtype base pairs.
    #"""
Esempio n. 12
0
    yml_path = Path('manual_alignments.yml')
    with yml_path.open() as file:
        manual_alignments = yaml.load(file)

    for hit in hits:
        real_seq = sgrna_sensor.from_name(hit).rna
        aligned_seq = ''.join(
                x for x in manual_alignments[hit]
                if x in 'ACGU'
        )
        if real_seq != aligned_seq:
            raise ValueError(f"""\
The manual alignment for {hit} has the wrong sequence!

> {real_seq}
> {aligned_seq}""")

    return manual_alignments

if __name__ == '__main__':
    args = docopt.docopt(__doc__)

    fold_changes = calc_fold_changes(args['--force'])
    fold_change_strs = {
            k: fr"{v[0]:.1f}\textsuperscript{{{'−+'[v[1]]}}}"
            for k,v in fold_changes.items()
    }
    manual_alignments = load_manual_alignments()
    ligands = {k: sgrna_sensor.from_name(k).ligand or 'theo' for k in hits}
    sgrna_sensor.render_latex_table('library_hits.tex', locals())
Esempio n. 13
0
def get_molecular_weight(name):
    if name.startswith('pcr21'):
        return 2686183.94
    else:
        sgrna = sgrna_sensor.from_name(name.lower())
        return sgrna.mass('rna')
Esempio n. 14
0
def get_pretty_domains(row):
    def get_aptamer_start(sgrna):
        try:
            return sgrna.index_from_domain("aptamer/5'", 0)
        except KeyError:
            return sgrna.index_from_domain("aptamer", 0)

    def get_aptamer_end(sgrna):
        try:
            return sgrna.index_from_domain("aptamer/3'", 0) + len(
                sgrna["aptamer/3'"])
        except KeyError:
            return sgrna.index_from_domain("aptamer", 0) + len(
                sgrna["aptamer"])

    if row.algorithm == Algorithm.CONTROL.value:
        sgrna = sgrna_sensor.from_name(row.design, target='aavs')
        sequence = sgrna.rna
        domain_map = [
            ('ucsfblue', sgrna.index_from_domain('stem', 0)),
            ('ucsfnavy', sgrna.index_from_domain('nexus', 0)),
            ('ucsfteal', sgrna.index_from_domain('hairpins', 0)),
            ('ucsfblack', sgrna.index_from_domain('tail', 0)),
        ]

    elif row.strategy == Strategy.IND_DIM.value:
        name, n = row.design.split('/')
        sgrna_5 = sgrna_sensor.from_name(f'{name}/5/{n}', target='aavs')
        sgrna_3 = sgrna_sensor.from_name(f'{name}/3/{n}')
        divider = '    '
        len_5 = len(sgrna_5) + len(divider)

        sequence = f'{sgrna_5.rna}{divider}{sgrna_3.rna}'
        domain_map = [
            ('ucsfpurple', get_aptamer_start(sgrna_5)),
            (None, len_5 + get_aptamer_end(sgrna_3)),
            ('ucsfblue', sgrna_5.index_from_domain('stem', 0)),
            ('ucsfnavy', len_5 + sgrna_3.index_from_domain('nexus', 0)),
            ('ucsfteal', len_5 + sgrna_3.index_from_domain('hairpins', 0)),
            ('ucsfblack', len_5 + sgrna_3.index_from_domain('tail', 0)),
        ]

    else:
        sgrna = sgrna_sensor.from_name(row.design, target='aavs')
        sequence = sgrna.rna
        domain_map = [
            ('ucsfpurple', get_aptamer_start(sgrna)),
            (None, get_aptamer_end(sgrna)),
            ('ucsfblue', sgrna.index_from_domain('stem', 0)),
            ('ucsfnavy', sgrna.index_from_domain('nexus', 0)),
            ('ucsfteal', sgrna.index_from_domain('hairpins', 0)),
            ('ucsfblack', sgrna.index_from_domain('tail', 0)),
        ]

    class Domain:
        def __init__(self, seq, color):  #
            self.seq = seq
            self.color = color

        def __repr__(self):  #
            return f"Domain('{self.seq}', '{self.color}')"

    domains = []
    colors, indices = zip(*sorted(domain_map, key=lambda x: x[1]))
    colors, indices = list(colors), list(indices)
    colors = ['ucsfblack'] + colors
    slices = list(zip([0] + indices, indices + [len(sequence)]))

    for i in range(len(indices) + 1):
        a, b = slices[i]
        color = colors[i] or colors[i - 2]
        subseq = sequence[a:b]

        domain = Domain(subseq, color)
        domains.append(domain)

    return domains
Esempio n. 15
0
default_cut = args['--cut']
default_tm = args['--tm']
default_verbose = args['--verbose']

primers = {}

for name in args['<constructs>']:
    sub_cli = shlex.split(name)
    sub_args = docopt.docopt(__doc__, sub_cli)

    for sub_name in sub_args['<constructs>']:
        designer = mut.PrimerDesigner()
        designer.name = sub_name
        designer.spacer = sub_args['--spacer'] or default_spacer
        designer.quikchange = sub_args['--quikchange'] or default_quikchange
        designer.cut = mut.int_or_none(sub_args['--cut'] or default_cut)
        designer.tm = mut.float_or_none(sub_args['--tm'] or default_tm)
        designer.verbose = sub_args['--verbose'] or default_verbose

        sgrna = sgrna_sensor.from_name(sub_name, target=designer.spacer)
        designer.name = sgrna.underscore_name
        designer.construct = sgrna.dna
        designer.backbone = sgrna_sensor.from_name(sub_args['--backbone']
                                                   or default_backbone_name,
                                                   target=designer.spacer).dna

        primers.update(designer.design_primers())

primers = mut.consolidate_duplicate_primers(primers)
mut.report_primers_for_elim(primers)
Esempio n. 16
0
import yaml
from sgrna_sensor import from_name, t7_promoter as t7, spacer, aptamer
from sgrna_sensor import render_latex_table

components = [
        ('T7 promoter', t7()),
        ('AAVS spacer', spacer('aavs')),
        ('sgG1 spacer', spacer('gfp1')),
        ('sgR1 spacer', spacer('rfp1')),
        ('sgG2 spacer', spacer('gfp2')),
        ('sgR2 spacer', spacer('rfp2')),
        ('folA spacer', spacer('fol1')),
        ('Theophylline (theo) aptamer', aptamer('theo')),
        ('3-Methylxanthine (3mx) aptamer', aptamer('3mx')),
        ('Thiamine pyrophosphate (tpp) aptamer', aptamer('tpp')),
        ('Positive control', from_name('on')),
        (r'Negative control (G63C, G64C)', from_name('off')),
        (r'\ligrnaF{}', from_name('mhf/30')),
        (r'\ligrnaF[2]{}', from_name('mhf/37')),
        (r'\ligrnaF[3]{}', from_name('w30/65')),
        (r'\ligrnaF[4]{}', from_name('w30/64/1')),
        (r'\ligrnaB{}', from_name('rxb/11/1')),
        (r'\ligrnaB[2]{}', from_name('w11/2')),
        (r'\ligrnaB[3]{}', from_name('m11/ga')),
]

with open('manual_alignments.yml') as file:
    manual_alignments = yaml.load(file)

# Make sure there aren't any typos.
for name, sgrna in components:
Esempio n. 17
0
        'w30 64',
        'w30 64/1',
        'w30 65',
        'w30 65/1',
        'w30 77',
        'w30 77/1',
        'w30 77/2',
        'w30 78',
        'w30 78/1',
        'w30 78/2',
        'w30 79',
        'w30 79/1',
        'w30 80',
        'w30 80/1',
]
prefolded = '............................................................(.........................).........................'

print(' ' * (9 + 1 + 6 + 3) + prefolded)
for name in designs:
    sgrna = sgrna_sensor.from_name(name)
    struct = vrna.fold_compound(sgrna.rna)
    q_tot = struct.pf()[1]

    struct.constraints_add(prefolded, vrna.CONSTRAINT_DB_DEFAULT)
    fold_prefolded, q_prefolded = struct.pf()

    rt_37 = 1.987203611e-3 * 310  # RT in kcal/mol at 37°C
    f_prefolded = exp(-(q_prefolded - q_tot) / rt_37)

    print(f"{sgrna.slash_name:9s} {100 * f_prefolded:6.3f}%  {fold_prefolded}")
Esempio n. 18
0
#!/usr/bin/env python3
"""\
Usage: sgrna_to_300nM.py <name> <ng_uL>
"""

if __name__ == '__main__':
    import docopt
    import sgrna_sensor

    args = docopt.docopt(__doc__)
    sgrna = sgrna_sensor.from_name(args['<name>'])
    ng_uL = float(args['<ng_uL>'])
    nM = ng_uL * 1e6 / sgrna.mass('rna')

    print('{:.2f} nM'.format(nM))
Esempio n. 19
0
from itertools import product

spacers = 'fol1', 'fol2', 'fol3', 'fol4'
names = 'on', 'off', 'rxb/11/1', 'mhf/30'
overlap_5 = 'tgcgactactcttgcctactacctatcgactgagctgaaagaattccggttctggcaaatattctgaaatgagctgttgacaattaatcatccggctcgtataattctagt'
overlap_3 = 'tttgaattcatgtggctgaccgttctgttgtctctcgctcttccgagta'

inserts = []
longest_insert = 0
pretty_names = {
        'on': 'pos',
        'off': 'neg',
        'rxb/11/1': 'ligRNA⁻',
        'mhf/30':   'ligRNA⁺',
}

for spacer, name in product(spacers, names):
    sgrna = from_name(name, target=spacer)
    insert = overlap_5 + sgrna.dna + overlap_3
    longest_insert = max(longest_insert, len(insert))
    pretty_name = f'{spacer} {pretty_names.get(name, name)}'
    inserts.append((pretty_name, insert))

print(f'Longest insert: {longest_insert} bp')

with open('gibson_inserts.tsv', 'w') as file:
    for insert in inserts:
        file.write('\t'.join(insert) + '\n')