def command(args): """ Lists the available restriction enzimes """ # selects the REs to list if args.sup is not None: res = r.RestrictionBatch(first=[], suppliers=args.sup) elif args.all: res = r.AllEnzymes else: res = r.CommOnly # sorts the RE list res = sorted(res, key=str) for re in res: sys.stdout.write("{:16} {}\n".format(str(re), re.site)) """ if re_list is not None: res = list(filter(lambda re: str(re) in re_list, res)) if re_suppliers is not None: res = list(filter(lambda re: len(set(re.suppl) & re_suppliers), res)) return res """ sys.stdout.write("------\n{} restriction enzimes listed\n\n".format(len(res)))
def annotate_digestion_bands(record, enzymes, ladder): linear = record.linear if hasattr(record, 'linear') else False batch = Restriction.RestrictionBatch(enzymes) cuts_dict = batch.search(record.seq) all_cuts = sorted( set([0, len(record)] + [c for cc in cuts_dict.values() for c in cc])) bands = list(zip(all_cuts, all_cuts[1:])) if (not linear) and len(bands) > 1: start, end = bands.pop() band0 = [-(end - start), bands[0][1]] if bands == []: bands = [band0] else: bands[0] = band0 sorted_bands = sorted(bands, key=lambda b: b[0] - b[1]) new_record = deepcopy(record) for (band, label) in zip(sorted_bands, "abcdefghijkl"): band_size = abs(band[1] - band[0]) formatted_size = bw.Band._format_dna_size(band_size) annotate_record(new_record, location=band, label="%s - %s" % (label, formatted_size), feature_type="misc_feature", band_label=label, band_size=band_size) return new_record
def get_caps_result(cls, seq_target, enzyme_list): # http://biopython.org/DIST/docs/cookbook/Restriction.html # 2.6 Analysing sequences with a RestrictionBatch # 20201130 #ar_seq = Seq(seq_target, IUPACAmbiguousDNA()) ar_seq = Seq(seq_target) rb = Restriction.RestrictionBatch(enzyme_list) # If linear is False, the restriction sites that span over # the boundaries will be included. caps_result_dict = rb.search(ar_seq, linear=True) caps_result_dict_str = dict() # log.debug("caps_result_dict {}".format(caps_result_dict)) # convert enzyme class from RestrictionType to string for enzyme_RestrictionType in caps_result_dict.keys(): enzyme_string = str(enzyme_RestrictionType) caps_result_dict_str[enzyme_string] = \ caps_result_dict[enzyme_RestrictionType] # log.debug("{}".format(type(enzyme_RestrictionType))) # log.debug("{}".format(str(enzyme_RestrictionType))) # log.debug("{}".format(type(str(enzyme_RestrictionType)))) # sys.exit(1) # log.debug("{} {}".format(caps_result_dict, caps_result_dict_str)) return caps_result_dict, caps_result_dict_str
def restrict_function(in_IUPAC): rb = Restriction.RestrictionBatch([], [ "C", "B", "E", "I", "K", "J", "M", "O", "N", "Q", "S", "R", "V", "Y", "X" ]) return
def find_cuts(sequence, enzymes, linear=True): batch = Restriction.RestrictionBatch(enzymes) return [ cut for cuts in batch.search(sequence, linear=linear).values() for cut in cuts ]
def number_enzyme(record): my_batch = Restriction.RestrictionBatch(first=[], suppliers="N") A = my_batch.search(record.seq, linear=False) B = collections.OrderedDict(sorted(A.items(), key=lambda t: len(t[1]))) KK = [] for v in B.values(): KK.append(len(v)) return (KK)
def rebasecuts(Enzyme, Strand): batch = Restriction.RestrictionBatch() batch.add(Enzyme) enzyme = batch.get(Enzyme) Sites = enzyme.search(Strand) return Sites
def _look_for_site(site, re_name, should_match=False): dna = Seq(site + str(self.test_dna), IUPAC.unambiguous_dna) proc_dna = seq_opt.remove_restriction_sites( dna, self.codon_use_table, Restriction.RestrictionBatch( [Restriction.AllEnzymes.get(re_name)]), ) assert (dna == proc_dna) == should_match
def number_enzyme2(record): my_batch = Restriction.RestrictionBatch(first=[], suppliers="N") A = my_batch.search(record.seq, linear=False) B = collections.OrderedDict(sorted(A.items(), key=lambda t: len(t[1]))) print(len(B)) C = [] for k in B.keys(): C.append(k) return (C)
def filter_re_sites(candidates): rb = Restriction.RestrictionBatch(restriction_enzymes) filtered_candidates = [] for c in candidates: rbsearch = rb.search(c['seqrec'].seq) matched = any([match for re in rbsearch.keys() for match in rbsearch[re]]) if not matched: filtered_candidates.append(c) return filtered_candidates
def digest(fasta_records, enzyme): """ Divide a genome into restriction fragments. Support Arima-HiC enzyme cocktail which digest chromatin at ^GATC and G^ANTC. Parameters ---------- fasta_records : OrderedDict Dictionary of chromosome names to sequence records. enzyme: str Name of restriction enzyme. Returns ------- Dataframe with columns: 'chrom', 'start', 'end'. """ import Bio.Restriction as biorst import Bio.Seq as bioseq # http://biopython.org/DIST/docs/cookbook/Restriction.html#mozTocId447698 chroms = fasta_records.keys() try: if enzyme.lower() == 'arima': cocktail = biorst.RestrictionBatch(['MboI', 'HinfI']) cut_finder = cocktail.search else: cut_finder = getattr(biorst, enzyme).search except AttributeError: raise ValueError('Unknown enzyme name: {}'.format(enzyme)) def _each(chrom): seq = bioseq.Seq(str(fasta_records[chrom])) tmp = cut_finder(seq) if type(tmp) == list: cut_sites = tmp elif type(tmp) == dict: cut_sites = [] for e in tmp: cut_sites.extend(tmp[e]) cut_sites.sort() cuts = np.r_[0, np.array(cut_sites) + 1, len(seq)].astype(int) n_frags = len(cuts) - 1 frags = pd.DataFrame( { 'chrom': [chrom] * n_frags, 'start': cuts[:-1], 'end': cuts[1:] }, columns=['chrom', 'start', 'end']) return frags return pd.concat(map(_each, chroms), axis=0, ignore_index=True)
def _check_effect_of_enzyme(self, seq_target, enzyme_name_list): ''' http://biopython.org/DIST/docs/cookbook/Restriction.html biopython <= 1.76 for IUPACAmbiguousDNA() ''' caps_ResTyp_dict = dict() caps_check_dict = dict() enzyme_map_txt = "" # 4.1 Setting up an Analysis # 4.2 Full restriction analysis multi_site_seq = Seq(seq_target, IUPACAmbiguousDNA()) rb = Restriction.RestrictionBatch(enzyme_name_list) Analong = Restriction.Analysis(rb, multi_site_seq) # 4.5 Fancier restriction analysis # # full() # all the enzymes in the RestrictionBatch # {KpnI: [], EcoRV: [], EcoRI: [33]} # with_sites() # output only the result for enzymes which have a site # result_dict = {EcoRI: [33]} caps_ResTyp_dict = Analong.with_sites() # make dictionary as string enzyme name for enzyme_RestrictionType in caps_ResTyp_dict.keys(): enzyme_string = str(enzyme_RestrictionType) # caps_check_dict caps_check_dict[enzyme_string] = { 'ResType': enzyme_RestrictionType, 'res_list': caps_ResTyp_dict[enzyme_RestrictionType], } # detail information: make a restriction map of a sequence if glv.conf.analyse_caps == True: Analong.print_as('map') enzyme_map_txt_all = Analong.format_output() enzyme_map_txt = "" for line in enzyme_map_txt_all.split('\n'): if " Enzymes which " in line: break enzyme_map_txt += "{}\n".format(line) enzyme_map_txt += "caps_check_dict={}".format(caps_check_dict) return caps_check_dict, \ enzyme_map_txt
def changerestrictionsites(seqrecords, codons, revcodons): """uses remove site function to change restriction enzyme sites depending on location""" for seqrecord in seqrecords: rb = Restriction.RestrictionBatch([ Restriction.AscI, Restriction.BspQI, Restriction.PstI, Restriction.EcoRI, Restriction.NotI, Restriction.BtsI, Restriction.BsaI ]) reanalysis = rb.search(seqrecord.seq) for key in reanalysis: for _ in reanalysis[key]: seqkey = Seq(key.site, generic_dna) removesite(seqrecord, seqkey, codons, revcodons) return seqrecords
def restriction_digest(): if session.username == None: redirect(URL(r=request, f='../account/log_in')) form = FORM(TABLE(TR("Sequence: ", TEXTAREA(_type="text", _value="Enter your DNA sequence in plain form", _name="sequence", requires=IS_NOT_EMPTY())), TR("DNA Type: ", SELECT("Linear", "Circular", _name="dna_type")), TR("Show Fragments: ", SELECT("No", "Yes", _name="show_frag")), TR("", INPUT(_type="submit", _value="Digest DNA")))) if form.accepts(request.vars,session): from Bio import Restriction as R from Bio.Seq import Seq from Bio.Alphabet import IUPAC if request.vars.dna_type == 'Linear': dna_type = 'True' else: dna_type = 'False' seq = Seq(request.vars.sequence, IUPAC.unambiguous_dna) results = {} nocut = [] results['sequence'] = seq for enzyme in R.RestrictionBatch([], suppliers = ['F', 'N', 'R']): digest = enzyme.search(seq, linear=dna_type) digest.sort() #fragment = [digest[x+1] - digest[x] # for x in range(len(digest) - 1)] #fragment.sort() d = {} if len(digest) == 0: nocut.append(str(enzyme)) else: d['Restriction site'] = enzyme.site if dna_type == 'True': d['Number of fragments'] = str(len(digest) + 1) else: d['Number of fragments'] = str(len(digest)) if request.vars.show_frag == 'Yes': d['Cut positions'] = str(digest) results[str(enzyme)] = d results['Enzymes that do not cut'] = nocut session['result'] = results redirect(URL(r=request, f='restriction_digest_output')) return dict(form=form)
def RestrictionEnzymes(restriction_enzymes): """Create a RestrictionBatch instance to search for sites for a supplied list of restriction enzymes. Args: restriction_enzymes (list[str], optional): List of restriction enzymes to consider. Defaults to ["NdeI", "XhoI", "HpaI", "PstI", "EcoRV", "NcoI", "BamHI"]. Returns: Bio.Restriction.Restriction.RestrictionBatch: RestrictionBatch instance configured with the input restriction enzymes. """ return Restriction.RestrictionBatch( [Restriction.AllEnzymes.get(enz) for enz in restriction_enzymes])
def main(): rb_supp = Restriction.RestrictionBatch( first=[], suppliers=[ 'C','B','E','I','K','J','M',\ 'O','N','Q','S','R','V','Y','X']) enz_cnt = len(rb_supp) ''' >>> RestrictionBatch.show_codes() # as of May 2016 REBASE release. C = Minotech Biotechnology B = Life Technologies E = Agilent Technologies I = SibEnzyme Ltd. K = Takara Bio Inc. J = Nippon Gene Co., Ltd. M = Roche Applied Science O = Toyobo Biochemicals N = New England Biolabs Q = Molecular Biology Resources - CHIMERx S = Sigma Chemical Corporation R = Promega Corporation V = Vivantis Technologies Y = SinaClon BioScience Co. X = EURx Ltd. >>> # You can now choose a code and built your RestrictionBatch ''' enzyme_list = list() for enz_cls in rb_supp: enzyme_list.append("{}\t{}\t{}\t{}".format(enz_cls, enz_cls.site, enz_cls.elucidate(), len(enz_cls))) last_first_char = "" line = "" for enz_line in sorted(enzyme_list): first_char = enz_line[0:1].upper() if last_first_char != first_char: print("\n#") print(enz_line) last_first_char = first_char print("{}".format(enz_cnt), file=sys.stderr)
def getrestrictionmatches(seq, starting, enz): from Bio import Restriction rb = Restriction.RestrictionBatch(enz) from Bio import Seq from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA amb = IUPACAmbiguousDNA() s = Seq.Seq(seq, amb) dic = rb.search(s) hits = [] sites = [] for k, v in dic.items(): if len(v) > 0: if max(v) >= starting: hits.append(k) sites.append(max(v)) return (hits, sites)
def __init__(self, orig, output): """ Initialise the instance with the original sequence. @arg orig: The original sequence before mutation. @type orig: Bio.Seq.Seq @arg output: The output object. @type output: mutalyzer.Output.Output """ self._shifts = defaultdict(int) self._removed_sites = set() self._restriction_batch = Restriction.RestrictionBatch([], ['N']) self._output = output self.orig = orig # Note that we don't need to create a copy here, since mutation # operations are not in place (`self._mutate`). self.mutated = orig
def warnRestrictionSites(sequence,name,sites): sites = sites.split(",") rb = Restriction.RestrictionBatch(sites) #Get Bio.Seq object amb = IUPACAmbiguousDNA() tmpSeq = Seq(sequence,amb) #Search for sites res = rb.search(tmpSeq) #Sum hits totalSites = 0 for v in res.values(): totalSites += len(v) if totalSites > 0: print >>sys.stderr, "Warning: The following positions in '%s' will be masked from tiles due to incompatible restictions sites:" % (name) pp(res) else: pass
def hasRestrictionSites(sequence,sites): #Parse sites sites = sites.split(",") rb = Restriction.RestrictionBatch(sites) #Get Bio.Seq object amb = IUPACAmbiguousDNA() tmpSeq = Seq(sequence,amb) #Search for sites res = rb.search(tmpSeq) #Sum hits totalSites = 0 for v in res.values(): totalSites += len(v) if totalSites > 0: return True else: return False
def find_restriction_sites(fasta_file, enzyme_list: list) -> dict: """ Find the restriction sites of one or several enzyme in a sequence. """ # Create Restriction enzyme object enzymes = Restriction.RestrictionBatch(enzyme_list) # Load fasta file seq_data = SeqIO.parse(fasta_file, "fasta") restrict_dict = {} for record in seq_data: rest_sites_dict = enzymes.search(record.seq) pos = [] for enz in enzymes: pos.extend(rest_sites_dict[enz]) restrict_dict[record.id] = [ len(record.seq), list(np.unique(sorted(pos))) ] return restrict_dict
def command(args): """ Lists all suppliers """ r.RestrictionBatch().show_codes()
def write_a_file(out_file): my_batch = Restriction.RestrictionBatch(first=[], suppliers="N") my_batch.search(record.seq) f = open(out_file, 'w')
def get_restriction_enzymes(restriction_enzymes=_restriction_enzymes): return Restriction.RestrictionBatch([ getattr(Restriction, enz) for enz in restriction_enzymes if hasattr(Restriction, enz) ])
def restriction_select(self): self.rb = Restriction.RestrictionBatch([], ['B']) codingStrandAna = Restriction.Analysis(self.rb, self.codingvector) codingStrandAna.print_as('number') codingStrandAna.print_that(codingStrandAna.with_N_sites(1)) print() first = False while not first: print("Enzyme names are case sensitive.") firstEnzyme = str( input( "Enter the name of the first restriction enzyme you want to use (q to quit): " )) if firstEnzyme == 'q' or firstEnzyme == 'Q': raise ExitError for enzyme in self.rb: if str(enzyme) == firstEnzyme: tempEnzyme = enzyme if len(codingStrandAna.full()[tempEnzyme]) == 1: firstEnzyme = enzyme first = True break else: print( 'That is not a valid restriction enzyme for this vector. Did you misspell the name?' ) print() second = False while not second: print("Enzyme names are case sensitive.") secondEnzyme = str( input( "Enter the name of the second restriction enzyme you want to use (q to quit): " )) if secondEnzyme == 'q' or secondEnzyme == 'Q': raise ExitError for enzyme in self.rb: if str(enzyme) == secondEnzyme: tempEnzyme = enzyme if len(codingStrandAna.full()[tempEnzyme]) == 1: secondEnzyme = enzyme second = True break else: print( 'That is not a valid restriction enzyme for this vector. Did you misspell the name?' ) while True: print( f'RestrictionEnzymes are: \n\t{str(firstEnzyme)} at locus: {self.rb.search(self.codingvector)[firstEnzyme][0]}' ) print( f'\t{str(secondEnzyme)} at locus: {self.rb.search(self.codingvector)[secondEnzyme][0]}' ) answer = str(input( 'Does this look correct? (y or n, q to quit): ')).lower() if answer[0] == 'y': return firstEnzyme, secondEnzyme elif answer[0] == 'n': return self.restriction_select() elif answer == 'q': raise ExitError else: print('Invalid Input.') return firstEnzyme, secondEnzyme
def predict_sequence_digestions( sequence, enzymes, linear=True, max_enzymes_per_digestion=1 ): """Return a dict giving bands sizes pattern for all possible digestions. The digestions, double-digestions, etc. are listed and for each the sequence band sizes are computed. The result if of the form ``{digestion: {'cuts': [], 'bands': []}}`` Where ``digestion`` is a tuple of enzyme names e.g. ``('EcoRI', 'XbaI')``, 'cuts' is a list of cuts locations, 'bands' is a list of bands sizes Parameters ---------- sequence The sequence to be digested enzymes List of all enzymes to be considered max_enzymes_per_digestion Maximum number of enzymes allowed in one digestion bands_to_migration Function associating a migration distance to a band size. If provided, each digestion will have a ``'migration'`` field (list of migration distances) in addition to 'cuts' and 'bands'. """ restriction_batch = Restriction.RestrictionBatch(enzymes) cuts_dict = restriction_batch.search(Seq(sequence)) def get_cuts(enzyme_name): return {"cuts": cuts_dict[Restriction.__dict__[enzyme_name]]} def _merge_digestions(digestion1, digestion2, sequence_length, linear): """Merge and sort the cuts from two different digestions.""" all_cuts = sorted(list(set(digestion1["cuts"] + digestion2["cuts"]))) return { "cuts": all_cuts, "bands": _compute_bands_from_cuts( cuts=all_cuts, sequence_length=sequence_length, linear=linear ), } empty_digestion = ((), {"cuts": [], "bands": [len(sequence)]}) digestions_dict = OrderedDict([empty_digestion]) for n_enzymes in range(max_enzymes_per_digestion): sub_enzymes = [ enzs for enzs in digestions_dict.keys() if len(enzs) == n_enzymes ] for enzyme in enzymes: sub_sub_enzymes = [ enzs for enzs in sub_enzymes if enzyme not in enzs ] for enzs in sub_sub_enzymes: digestion = tuple(sorted(enzs + (enzyme,))) if digestion not in digestions_dict: no_enzyme_band = len(get_cuts(enzyme)["cuts"]) == 0 no_enzs_band = len(digestions_dict[enzs]["cuts"]) == 0 one_no_bands = no_enzs_band or no_enzyme_band if ((enzyme,) in digestions_dict) and one_no_bands: if no_enzyme_band: digestions_dict[digestion] = digestions_dict[enzs] digestions_dict[digestion]["same_as"] = enzs elif no_enzs_band: dig = (enzyme,) digestions_dict[digestion] = digestions_dict[dig] digestions_dict[digestion]["same_as"] = dig else: digestions_dict[digestion] = _merge_digestions( digestion1=get_cuts(enzyme), digestion2=digestions_dict[enzs], sequence_length=len(sequence), linear=linear, ) digestions_dict[digestion]["same_as"] = digestion digestions_dict.pop(()) # Reordering the dictionnary makes the computation of scores faster # using 'same_as' to avoid recomputing scores involving similar patterns digestions_dict = OrderedDict( sorted( digestions_dict.items(), key=lambda item: (len(item[0]), len(item[1]["cuts"])), ) ) return digestions_dict