Ejemplo n.º 1
0
    def _check_effect_of_enzyme(self, seq_target, enzyme_name_list):
        ''' http://biopython.org/DIST/docs/cookbook/Restriction.html
        biopython <= 1.76 for IUPACAmbiguousDNA()
        '''

        caps_ResTyp_dict = dict()
        caps_check_dict = dict()
        enzyme_map_txt = ""

        # 4.1 Setting up an Analysis
        # 4.2 Full restriction analysis
        multi_site_seq = Seq(seq_target, IUPACAmbiguousDNA())
        rb = Restriction.RestrictionBatch(enzyme_name_list)
        Analong = Restriction.Analysis(rb, multi_site_seq)

        # 4.5 Fancier restriction analysis
        #
        # full()
        #   all the enzymes in the RestrictionBatch
        #   {KpnI: [], EcoRV: [], EcoRI: [33]}
        # with_sites()
        #   output only the result for enzymes which have a site
        #   result_dict = {EcoRI: [33]}

        caps_ResTyp_dict = Analong.with_sites()

        # make dictionary as string enzyme name
        for enzyme_RestrictionType in caps_ResTyp_dict.keys():
            enzyme_string = str(enzyme_RestrictionType)

            # caps_check_dict
            caps_check_dict[enzyme_string] = {
                'ResType': enzyme_RestrictionType,
                'res_list': caps_ResTyp_dict[enzyme_RestrictionType],
            }

        # detail information: make a restriction map of a sequence
        if glv.conf.analyse_caps == True:
            Analong.print_as('map')
            enzyme_map_txt_all = Analong.format_output()
            enzyme_map_txt = ""

            for line in enzyme_map_txt_all.split('\n'):
                if " Enzymes which " in line:
                    break
                enzyme_map_txt += "{}\n".format(line)

            enzyme_map_txt += "caps_check_dict={}".format(caps_check_dict)

        return caps_check_dict, \
            enzyme_map_txt
Ejemplo n.º 2
0
def main(argv: list) -> None:
    if len(argv) < 2:
        print('Please specify the input fasta file.')
        exit(1)

    ### Prepare for the analysis
    amb = IUPACAmbiguousDNA()
    seq_ids = []
    sites_results = []
    output = dict()

    ### Read the fasta file
    ### Each fasta sequence should have a fasta description
    for seq_item in SeqIO.parse(argv[1], 'fasta', alphabet=amb):
        seq_ids.append(seq_item.id)
        ana = Restriction.Analysis(Restriction.AllEnzymes,
                                   seq_item.seq,
                                   linear=True)
        sites_results.append(ana.full())

    for enzyme in Restriction.AllEnzymes:
        sites = [r[enzyme] for r in sites_results]
        nub_sites = [len(s) for s in sites]
        ### Check if the number of sites are the same
        if nub_sites.count(nub_sites[0]) != len(nub_sites):
            output[str(enzyme)] = nub_sites

    print_fmt = '{:>15}' * (len(seq_ids) + 1)
    print(print_fmt.format('Enzyme Name', *seq_ids))
    for k, v in sorted(output.items()):
        print(print_fmt.format(k, *v))
Ejemplo n.º 3
0
    def restrict_function(in_IUPAC):
        rb = Restriction.RestrictionBatch([], [
            "C", "B", "E", "I", "K", "J", "M", "O", "N", "Q", "S", "R", "V",
            "Y", "X"
        ])

        return
def find_cuts(sequence, enzymes, linear=True):
    batch = Restriction.RestrictionBatch(enzymes)
    return [
        cut
        for cuts in batch.search(sequence, linear=linear).values()
        for cut in cuts
    ]
Ejemplo n.º 5
0
def command(args):
    """
    Lists the available restriction enzimes
    """
    # selects the REs to list
    if args.sup is not None:
        res = r.RestrictionBatch(first=[], suppliers=args.sup)
    elif args.all:
        res = r.AllEnzymes
    else:
        res = r.CommOnly


    # sorts the RE list
    res = sorted(res, key=str)

    for re in res:
        sys.stdout.write("{:16} {}\n".format(str(re), re.site))

    """
    if re_list is not None:
        res = list(filter(lambda re: str(re) in re_list, res))

    if re_suppliers is not None:
        res = list(filter(lambda re: len(set(re.suppl) & re_suppliers), res))

    return res
    """

    sys.stdout.write("------\n{} restriction enzimes listed\n\n".format(len(res)))
Ejemplo n.º 6
0
    def get_caps_result(cls, seq_target, enzyme_list):

        # http://biopython.org/DIST/docs/cookbook/Restriction.html
        # 2.6 Analysing sequences with a RestrictionBatch
        # 20201130
        #ar_seq = Seq(seq_target, IUPACAmbiguousDNA())
        ar_seq = Seq(seq_target)
        rb = Restriction.RestrictionBatch(enzyme_list)
        # If linear is False, the restriction sites that span over
        # the boundaries will be included.
        caps_result_dict = rb.search(ar_seq, linear=True)

        caps_result_dict_str = dict()

        #    log.debug("caps_result_dict {}".format(caps_result_dict))

        # convert enzyme class from RestrictionType to string
        for enzyme_RestrictionType in caps_result_dict.keys():
            enzyme_string = str(enzyme_RestrictionType)
            caps_result_dict_str[enzyme_string] = \
                caps_result_dict[enzyme_RestrictionType]

#        log.debug("{}".format(type(enzyme_RestrictionType)))
#        log.debug("{}".format(str(enzyme_RestrictionType)))
#        log.debug("{}".format(type(str(enzyme_RestrictionType))))

#    sys.exit(1)

#    log.debug("{} {}".format(caps_result_dict, caps_result_dict_str))

        return caps_result_dict, caps_result_dict_str
Ejemplo n.º 7
0
def annotate_digestion_bands(record, enzymes, ladder):

    linear = record.linear if hasattr(record, 'linear') else False
    batch = Restriction.RestrictionBatch(enzymes)
    cuts_dict = batch.search(record.seq)
    all_cuts = sorted(
        set([0, len(record)] + [c for cc in cuts_dict.values() for c in cc]))
    bands = list(zip(all_cuts, all_cuts[1:]))
    if (not linear) and len(bands) > 1:
        start, end = bands.pop()
        band0 = [-(end - start), bands[0][1]]
        if bands == []:
            bands = [band0]
        else:
            bands[0] = band0
    sorted_bands = sorted(bands, key=lambda b: b[0] - b[1])
    new_record = deepcopy(record)
    for (band, label) in zip(sorted_bands, "abcdefghijkl"):
        band_size = abs(band[1] - band[0])
        formatted_size = bw.Band._format_dna_size(band_size)
        annotate_record(new_record,
                        location=band,
                        label="%s - %s" % (label, formatted_size),
                        feature_type="misc_feature",
                        band_label=label,
                        band_size=band_size)
    return new_record
Ejemplo n.º 8
0
def rebasecuts(Enzyme, Strand):
    batch = Restriction.RestrictionBatch()
    batch.add(Enzyme)
    enzyme = batch.get(Enzyme)

    Sites = enzyme.search(Strand)

    return Sites
Ejemplo n.º 9
0
def check_restriction(seq, batch_list, to_print = True):
     Ana = Restriction.Analysis(batch_list, seq, linear=False)
     Ana.full()
     num_cutting = len(Ana.with_sites())
     if to_print:
         Ana.print_as("map")
         Ana.print_that()
     return num_cutting
Ejemplo n.º 10
0
def number_enzyme(record):
    my_batch = Restriction.RestrictionBatch(first=[], suppliers="N")
    A = my_batch.search(record.seq, linear=False)
    B = collections.OrderedDict(sorted(A.items(), key=lambda t: len(t[1])))
    KK = []
    for v in B.values():
        KK.append(len(v))
    return (KK)
 def _look_for_site(site, re_name, should_match=False):
     dna = Seq(site + str(self.test_dna), IUPAC.unambiguous_dna)
     proc_dna = seq_opt.remove_restriction_sites(
         dna,
         self.codon_use_table,
         Restriction.RestrictionBatch(
             [Restriction.AllEnzymes.get(re_name)]),
     )
     assert (dna == proc_dna) == should_match
Ejemplo n.º 12
0
def number_enzyme2(record):
    my_batch = Restriction.RestrictionBatch(first=[], suppliers="N")
    A = my_batch.search(record.seq, linear=False)
    B = collections.OrderedDict(sorted(A.items(), key=lambda t: len(t[1])))
    print(len(B))
    C = []
    for k in B.keys():
        C.append(k)
    return (C)
def filter_re_sites(candidates):
	rb = Restriction.RestrictionBatch(restriction_enzymes)
	filtered_candidates = []
	for c in candidates:
		rbsearch = rb.search(c['seqrec'].seq)
		matched = any([match for re in rbsearch.keys() for match in rbsearch[re]])
		if not matched:
			filtered_candidates.append(c)
	return filtered_candidates
def fragmentSeq(seq: SeqRecord, rb: RestrictionBatch, ren: str) -> str:
    sid = seq.id
    slen = len(seq.seq)

    print(f'Running insilico digest on {sid} of length {slen}')
    # Running an analysis on this sequence
    analysis = res.Analysis(rb, seq.seq)

    # locations of cut sites for this particular restriction enzyme
    resites = analysis.full()[ren]
Ejemplo n.º 15
0
def digest(fasta_records, enzyme):
    """
    Divide a genome into restriction fragments. Support Arima-HiC enzyme cocktail
    which digest chromatin at ^GATC and G^ANTC.

    Parameters
    ----------
    fasta_records : OrderedDict
        Dictionary of chromosome names to sequence records.

    enzyme: str
        Name of restriction enzyme.

    Returns
    -------
    Dataframe with columns: 'chrom', 'start', 'end'.

    """
    import Bio.Restriction as biorst
    import Bio.Seq as bioseq
    # http://biopython.org/DIST/docs/cookbook/Restriction.html#mozTocId447698
    chroms = fasta_records.keys()
    try:
        if enzyme.lower() == 'arima':
            cocktail = biorst.RestrictionBatch(['MboI', 'HinfI'])
            cut_finder = cocktail.search
        else:
            cut_finder = getattr(biorst, enzyme).search
    except AttributeError:
        raise ValueError('Unknown enzyme name: {}'.format(enzyme))

    def _each(chrom):
        seq = bioseq.Seq(str(fasta_records[chrom]))
        tmp = cut_finder(seq)
        if type(tmp) == list:
            cut_sites = tmp
        elif type(tmp) == dict:
            cut_sites = []
            for e in tmp:
                cut_sites.extend(tmp[e])
            cut_sites.sort()
        cuts = np.r_[0, np.array(cut_sites) + 1, len(seq)].astype(int)
        n_frags = len(cuts) - 1

        frags = pd.DataFrame(
            {
                'chrom': [chrom] * n_frags,
                'start': cuts[:-1],
                'end': cuts[1:]
            },
            columns=['chrom', 'start', 'end'])
        return frags

    return pd.concat(map(_each, chroms), axis=0, ignore_index=True)
Ejemplo n.º 16
0
def changerestrictionsites(seqrecords, codons, revcodons):
    """uses remove site function to change restriction enzyme sites depending on location"""
    for seqrecord in seqrecords:
        rb = Restriction.RestrictionBatch([
            Restriction.AscI, Restriction.BspQI, Restriction.PstI,
            Restriction.EcoRI, Restriction.NotI, Restriction.BtsI,
            Restriction.BsaI
        ])
        reanalysis = rb.search(seqrecord.seq)
        for key in reanalysis:
            for _ in reanalysis[key]:
                seqkey = Seq(key.site, generic_dna)
                removesite(seqrecord, seqkey, codons, revcodons)
    return seqrecords
Ejemplo n.º 17
0
    def _restriction_count(self, sequence):
        """
        Return the count per restriction enzyme that can bind in a certain
        sequence.

        @arg sequence: The sequence to be analysed
        @type sequence: str

        @return: A mapping of restriction enzymes to counts.
        @rtype: dict
        """
        analysis = Restriction.Analysis(self._restriction_batch, sequence)
        return dict(
            (unicode(k), len(v)) for k, v in analysis.with_sites().items())
Ejemplo n.º 18
0
def restriction_digest():
    if session.username == None:
        redirect(URL(r=request, f='../account/log_in'))
    form = FORM(TABLE(TR("Sequence:  ", 
                        TEXTAREA(_type="text",
                                 _value="Enter your DNA sequence in plain form",
                                 _name="sequence",
                                 requires=IS_NOT_EMPTY())),
                      TR("DNA Type: ", 
                        SELECT("Linear", "Circular",
                               _name="dna_type")),
                      TR("Show Fragments: ", 
                        SELECT("No", "Yes",
                               _name="show_frag")),
                      TR("", INPUT(_type="submit", _value="Digest DNA"))))
    if form.accepts(request.vars,session):
        from Bio import Restriction as R
        from Bio.Seq import Seq
        from Bio.Alphabet import IUPAC
        if request.vars.dna_type == 'Linear':
            dna_type = 'True'
        else:
            dna_type = 'False'
        seq = Seq(request.vars.sequence, IUPAC.unambiguous_dna)
        results = {}
        nocut = []
        results['sequence'] = seq
        for enzyme in R.RestrictionBatch([], suppliers = ['F', 'N', 'R']):
            digest = enzyme.search(seq, linear=dna_type)
            digest.sort()
            #fragment = [digest[x+1] - digest[x]
            #            for x in range(len(digest) - 1)]
            #fragment.sort()
            d = {}
            if len(digest) == 0:
                nocut.append(str(enzyme))
            else:
                d['Restriction site'] = enzyme.site
                if dna_type == 'True':
                    d['Number of fragments'] = str(len(digest) + 1)
                else:
                    d['Number of fragments'] = str(len(digest))
                if request.vars.show_frag == 'Yes':
                    d['Cut positions'] = str(digest)
                results[str(enzyme)] = d
        results['Enzymes that do not cut'] = nocut
        session['result'] = results
        redirect(URL(r=request, f='restriction_digest_output'))
    return dict(form=form)
Ejemplo n.º 19
0
def check_restriction(seq, batch_list, to_print=False):
    """
     checks, using biopython, if the restriction enzymes specified cut the given sequence.
     :param seq: the given dna sequence
     :param batch_list: the RestrictionBatch object containing the restriction enzymes
     :param to_print: True or false, to print to the screen the analysis results
     :return: a dictionary containing what enzymes cut the sequence and where
     """
    Ana = Restriction.Analysis(batch_list, seq, linear=False)
    Ana.full()
    cutting = Ana.with_sites()
    if to_print:
        Ana.print_as("map")
        Ana.print_that()
    return cutting
Ejemplo n.º 20
0
def RestrictionEnzymes(restriction_enzymes):
    """Create a RestrictionBatch instance to search for sites for a supplied
    list of restriction enzymes.

    Args:
        restriction_enzymes (list[str], optional): List of restriction
            enzymes to consider. Defaults to ["NdeI", "XhoI", "HpaI", "PstI",
            "EcoRV", "NcoI", "BamHI"].

    Returns:
        Bio.Restriction.Restriction.RestrictionBatch: RestrictionBatch instance
        configured with the input restriction enzymes.
    """
    return Restriction.RestrictionBatch(
        [Restriction.AllEnzymes.get(enz) for enz in restriction_enzymes])
Ejemplo n.º 21
0
def main():

    rb_supp = Restriction.RestrictionBatch(
        first=[],
        suppliers=[
            'C','B','E','I','K','J','M',\
            'O','N','Q','S','R','V','Y','X'])

    enz_cnt = len(rb_supp)
    '''
>>> RestrictionBatch.show_codes()  # as of May 2016 REBASE release.
C = Minotech Biotechnology
B = Life Technologies
E = Agilent Technologies
I = SibEnzyme Ltd.
K = Takara Bio Inc.
J = Nippon Gene Co., Ltd.
M = Roche Applied Science
O = Toyobo Biochemicals
N = New England Biolabs
Q = Molecular Biology Resources - CHIMERx
S = Sigma Chemical Corporation
R = Promega Corporation
V = Vivantis Technologies
Y = SinaClon BioScience Co.
X = EURx Ltd.
>>> # You can now choose a code and built your RestrictionBatch
    '''
    enzyme_list = list()

    for enz_cls in rb_supp:
        enzyme_list.append("{}\t{}\t{}\t{}".format(enz_cls, enz_cls.site,
                                                   enz_cls.elucidate(),
                                                   len(enz_cls)))

    last_first_char = ""
    line = ""

    for enz_line in sorted(enzyme_list):

        first_char = enz_line[0:1].upper()
        if last_first_char != first_char:
            print("\n#")

        print(enz_line)
        last_first_char = first_char

    print("{}".format(enz_cnt), file=sys.stderr)
Ejemplo n.º 22
0
 def getrestrictionmatches(seq, starting, enz):
     from Bio import Restriction
     rb = Restriction.RestrictionBatch(enz)
     from Bio import Seq
     from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA
     amb = IUPACAmbiguousDNA()
     s = Seq.Seq(seq, amb)
     dic = rb.search(s)
     hits = []
     sites = []
     for k, v in dic.items():
         if len(v) > 0:
             if max(v) >= starting:
                 hits.append(k)
                 sites.append(max(v))
     return (hits, sites)
Ejemplo n.º 23
0
    def __init__(self, orig, output):
        """
        Initialise the instance with the original sequence.

        @arg orig: The original sequence before mutation.
        @type orig: Bio.Seq.Seq
        @arg output: The output object.
        @type output: mutalyzer.Output.Output
        """
        self._shifts = defaultdict(int)
        self._removed_sites = set()
        self._restriction_batch = Restriction.RestrictionBatch([], ['N'])

        self._output = output
        self.orig = orig

        # Note that we don't need to create a copy here, since mutation
        # operations are not in place (`self._mutate`).
        self.mutated = orig
Ejemplo n.º 24
0
def warnRestrictionSites(sequence,name,sites):
	sites = sites.split(",")
	rb = Restriction.RestrictionBatch(sites)

	#Get Bio.Seq object
	amb = IUPACAmbiguousDNA()
	tmpSeq = Seq(sequence,amb)

	#Search for sites
	res = rb.search(tmpSeq)
	
	#Sum hits
	totalSites = 0
	for v in res.values():
		totalSites += len(v)

	if totalSites > 0:
		print >>sys.stderr, "Warning: The following positions in '%s' will be masked from tiles due to incompatible restictions sites:" % (name)
		pp(res)
	else:
		pass
Ejemplo n.º 25
0
def hasRestrictionSites(sequence,sites):
	#Parse sites
	sites = sites.split(",")
	rb = Restriction.RestrictionBatch(sites)

	#Get Bio.Seq object
	amb = IUPACAmbiguousDNA()
	tmpSeq = Seq(sequence,amb)

	#Search for sites
	res = rb.search(tmpSeq)

	#Sum hits
	totalSites = 0
	for v in res.values():
		totalSites += len(v)

	if totalSites > 0:
		return True
	else:
		return False
Ejemplo n.º 26
0
def find_restriction_sites(fasta_file, enzyme_list: list) -> dict:
    """
    Find the restriction sites of one or several enzyme in a sequence.
    """
    # Create Restriction enzyme object
    enzymes = Restriction.RestrictionBatch(enzyme_list)

    # Load fasta file
    seq_data = SeqIO.parse(fasta_file, "fasta")

    restrict_dict = {}

    for record in seq_data:
        rest_sites_dict = enzymes.search(record.seq)
        pos = []
        for enz in enzymes:
            pos.extend(rest_sites_dict[enz])
        restrict_dict[record.id] = [
            len(record.seq), list(np.unique(sorted(pos)))
        ]

    return restrict_dict
Ejemplo n.º 27
0
def command(args):
    """
    Lists all suppliers
    """
    r.RestrictionBatch().show_codes()
Ejemplo n.º 28
0
def write_a_file(out_file):
    my_batch = Restriction.RestrictionBatch(first=[], suppliers="N")
    my_batch.search(record.seq)
    f = open(out_file, 'w')
Ejemplo n.º 29
0
def get_restriction_enzymes(restriction_enzymes=_restriction_enzymes):
    return Restriction.RestrictionBatch([
        getattr(Restriction, enz) for enz in restriction_enzymes
        if hasattr(Restriction, enz)
    ])
def predict_sequence_digestions(
    sequence, enzymes, linear=True, max_enzymes_per_digestion=1
):
    """Return a dict giving bands sizes pattern for all possible digestions.

    The digestions, double-digestions, etc. are listed and for each the
    sequence band sizes are computed.

    The result if of the form ``{digestion: {'cuts': [], 'bands': []}}``
    Where ``digestion`` is a tuple of enzyme names e.g. ``('EcoRI', 'XbaI')``,
    'cuts' is a list of cuts locations, 'bands' is a list of bands sizes

    Parameters
    ----------
    sequence
      The sequence to be digested

    enzymes
      List of all enzymes to be considered

    max_enzymes_per_digestion
      Maximum number of enzymes allowed in one digestion

    bands_to_migration
      Function associating a migration distance to a band size. If provided,
      each digestion will have a ``'migration'`` field (list of migration
      distances) in addition to 'cuts' and 'bands'.

    """
    restriction_batch = Restriction.RestrictionBatch(enzymes)
    cuts_dict = restriction_batch.search(Seq(sequence))

    def get_cuts(enzyme_name):
        return {"cuts": cuts_dict[Restriction.__dict__[enzyme_name]]}

    def _merge_digestions(digestion1, digestion2, sequence_length, linear):
        """Merge and sort the cuts from two different digestions."""
        all_cuts = sorted(list(set(digestion1["cuts"] + digestion2["cuts"])))
        return {
            "cuts": all_cuts,
            "bands": _compute_bands_from_cuts(
                cuts=all_cuts, sequence_length=sequence_length, linear=linear
            ),
        }

    empty_digestion = ((), {"cuts": [], "bands": [len(sequence)]})
    digestions_dict = OrderedDict([empty_digestion])
    for n_enzymes in range(max_enzymes_per_digestion):
        sub_enzymes = [
            enzs for enzs in digestions_dict.keys() if len(enzs) == n_enzymes
        ]
        for enzyme in enzymes:
            sub_sub_enzymes = [
                enzs for enzs in sub_enzymes if enzyme not in enzs
            ]
            for enzs in sub_sub_enzymes:
                digestion = tuple(sorted(enzs + (enzyme,)))
                if digestion not in digestions_dict:
                    no_enzyme_band = len(get_cuts(enzyme)["cuts"]) == 0
                    no_enzs_band = len(digestions_dict[enzs]["cuts"]) == 0
                    one_no_bands = no_enzs_band or no_enzyme_band
                    if ((enzyme,) in digestions_dict) and one_no_bands:
                        if no_enzyme_band:
                            digestions_dict[digestion] = digestions_dict[enzs]
                            digestions_dict[digestion]["same_as"] = enzs
                        elif no_enzs_band:
                            dig = (enzyme,)
                            digestions_dict[digestion] = digestions_dict[dig]
                            digestions_dict[digestion]["same_as"] = dig
                    else:
                        digestions_dict[digestion] = _merge_digestions(
                            digestion1=get_cuts(enzyme),
                            digestion2=digestions_dict[enzs],
                            sequence_length=len(sequence),
                            linear=linear,
                        )
                        digestions_dict[digestion]["same_as"] = digestion
    digestions_dict.pop(())

    # Reordering the dictionnary makes the computation of scores faster
    # using 'same_as' to avoid recomputing scores involving similar patterns
    digestions_dict = OrderedDict(
        sorted(
            digestions_dict.items(),
            key=lambda item: (len(item[0]), len(item[1]["cuts"])),
        )
    )
    return digestions_dict