Example #1
0
def _remove_rec_sites(seq, enzymes=None):
    '''It modifies all rec sites in the sequence to be able to use with
    goldenbraid pipeline'''
    if enzymes is None:
        enzymes = MANDATORY_DOMEST_ENZYMES
    rec_sites = get_ret_sites(enzymes)
    # regex with the sites to domesticate
    rec_sites_regex = '(' + '|'.join(rec_sites) + ')'
    rec_sites_regex = re.compile(rec_sites_regex, flags=re.IGNORECASE)
    rec_sites_in_seq = []
    fragments = []
    for splitted_part in rec_sites_regex.split(str(seq)):
        if rec_sites_regex.match(splitted_part):
            rec_sites_in_seq.append(splitted_part)
        else:
            fragments.append(splitted_part)
    new_seq = Seq('', alphabet=generic_dna)
    # we can not convert a rec site in another rec site
    _cumulative_patch = ''  # it is only used to know the frame
    rec_site_pairs = []
    for fragment, rec_site_in_seq in izip_longest(fragments, rec_sites_in_seq):
        new_seq += fragment
        if rec_site_in_seq is not None:
            _cumulative_patch += fragment + rec_site_in_seq
            new_rec_site = _domesticate_rec_site(rec_site_in_seq,
                                                 _cumulative_patch,
                                                 rec_sites_regex)
            rec_site_pairs.append({'original': rec_site_in_seq,
                                   'modified': new_rec_site})

            new_seq += new_rec_site
    coding_seq = Seq(_get_upper_nucls(seq))
    new_coding_seq = Seq(_get_upper_nucls(new_seq))
    if str(coding_seq.translate()) != str(new_coding_seq.translate()):
        msg = 'The generated sequence does not produce the same peptide'
        raise ValueError(msg)
    if rec_sites_regex.search(str(new_seq)):
        msg = 'Not all rec_sites modified'
        raise ValueError(msg)
    return new_seq, rec_site_pairs, fragments
Example #2
0
 def num_rec_sites(self):
     rec_sites = get_ret_sites(ENZYMES_USED_IN_GOLDENBRAID)
     # regex with the sites to domesticate
     rec_sites_regex = '(' + '|'.join(rec_sites) + ')'
     rec_sites_regex = re.compile(rec_sites_regex)
     return len(re.findall(rec_sites_regex, self.residues))