Ejemplo n.º 1
0
def main(argv: list) -> None:
    if len(argv) < 2:
        print('Please specify the input fasta file.')
        exit(1)

    ### Prepare for the analysis
    amb = IUPACAmbiguousDNA()
    seq_ids = []
    sites_results = []
    output = dict()

    ### Read the fasta file
    ### Each fasta sequence should have a fasta description
    for seq_item in SeqIO.parse(argv[1], 'fasta', alphabet=amb):
        seq_ids.append(seq_item.id)
        ana = Restriction.Analysis(Restriction.AllEnzymes,
                                   seq_item.seq,
                                   linear=True)
        sites_results.append(ana.full())

    for enzyme in Restriction.AllEnzymes:
        sites = [r[enzyme] for r in sites_results]
        nub_sites = [len(s) for s in sites]
        ### Check if the number of sites are the same
        if nub_sites.count(nub_sites[0]) != len(nub_sites):
            output[str(enzyme)] = nub_sites

    print_fmt = '{:>15}' * (len(seq_ids) + 1)
    print(print_fmt.format('Enzyme Name', *seq_ids))
    for k, v in sorted(output.items()):
        print(print_fmt.format(k, *v))
Ejemplo n.º 2
0
def check_restriction(seq, batch_list, to_print = True):
     Ana = Restriction.Analysis(batch_list, seq, linear=False)
     Ana.full()
     num_cutting = len(Ana.with_sites())
     if to_print:
         Ana.print_as("map")
         Ana.print_that()
     return num_cutting
def fragmentSeq(seq: SeqRecord, rb: RestrictionBatch, ren: str) -> str:
    sid = seq.id
    slen = len(seq.seq)

    print(f'Running insilico digest on {sid} of length {slen}')
    # Running an analysis on this sequence
    analysis = res.Analysis(rb, seq.seq)

    # locations of cut sites for this particular restriction enzyme
    resites = analysis.full()[ren]
Ejemplo n.º 4
0
    def _check_effect_of_enzyme(self, seq_target, enzyme_name_list):
        ''' http://biopython.org/DIST/docs/cookbook/Restriction.html
        biopython <= 1.76 for IUPACAmbiguousDNA()
        '''

        caps_ResTyp_dict = dict()
        caps_check_dict = dict()
        enzyme_map_txt = ""

        # 4.1 Setting up an Analysis
        # 4.2 Full restriction analysis
        multi_site_seq = Seq(seq_target, IUPACAmbiguousDNA())
        rb = Restriction.RestrictionBatch(enzyme_name_list)
        Analong = Restriction.Analysis(rb, multi_site_seq)

        # 4.5 Fancier restriction analysis
        #
        # full()
        #   all the enzymes in the RestrictionBatch
        #   {KpnI: [], EcoRV: [], EcoRI: [33]}
        # with_sites()
        #   output only the result for enzymes which have a site
        #   result_dict = {EcoRI: [33]}

        caps_ResTyp_dict = Analong.with_sites()

        # make dictionary as string enzyme name
        for enzyme_RestrictionType in caps_ResTyp_dict.keys():
            enzyme_string = str(enzyme_RestrictionType)

            # caps_check_dict
            caps_check_dict[enzyme_string] = {
                'ResType': enzyme_RestrictionType,
                'res_list': caps_ResTyp_dict[enzyme_RestrictionType],
            }

        # detail information: make a restriction map of a sequence
        if glv.conf.analyse_caps == True:
            Analong.print_as('map')
            enzyme_map_txt_all = Analong.format_output()
            enzyme_map_txt = ""

            for line in enzyme_map_txt_all.split('\n'):
                if " Enzymes which " in line:
                    break
                enzyme_map_txt += "{}\n".format(line)

            enzyme_map_txt += "caps_check_dict={}".format(caps_check_dict)

        return caps_check_dict, \
            enzyme_map_txt
Ejemplo n.º 5
0
    def _restriction_count(self, sequence):
        """
        Return the count per restriction enzyme that can bind in a certain
        sequence.

        @arg sequence: The sequence to be analysed
        @type sequence: str

        @return: A mapping of restriction enzymes to counts.
        @rtype: dict
        """
        analysis = Restriction.Analysis(self._restriction_batch, sequence)
        return dict(
            (unicode(k), len(v)) for k, v in analysis.with_sites().items())
Ejemplo n.º 6
0
def check_restriction(seq, batch_list, to_print=False):
    """
     checks, using biopython, if the restriction enzymes specified cut the given sequence.
     :param seq: the given dna sequence
     :param batch_list: the RestrictionBatch object containing the restriction enzymes
     :param to_print: True or false, to print to the screen the analysis results
     :return: a dictionary containing what enzymes cut the sequence and where
     """
    Ana = Restriction.Analysis(batch_list, seq, linear=False)
    Ana.full()
    cutting = Ana.with_sites()
    if to_print:
        Ana.print_as("map")
        Ana.print_that()
    return cutting
Ejemplo n.º 7
0
    def restriction_select(self):
        self.rb = Restriction.RestrictionBatch([], ['B'])
        codingStrandAna = Restriction.Analysis(self.rb, self.codingvector)
        codingStrandAna.print_as('number')
        codingStrandAna.print_that(codingStrandAna.with_N_sites(1))
        print()

        first = False
        while not first:
            print("Enzyme names are case sensitive.")
            firstEnzyme = str(
                input(
                    "Enter the name of the first restriction enzyme you want to use (q to quit): "
                ))
            if firstEnzyme == 'q' or firstEnzyme == 'Q':
                raise ExitError
            for enzyme in self.rb:
                if str(enzyme) == firstEnzyme:
                    tempEnzyme = enzyme
                    if len(codingStrandAna.full()[tempEnzyme]) == 1:
                        firstEnzyme = enzyme
                        first = True
                        break
            else:
                print(
                    'That is not a valid restriction enzyme for this vector. Did you misspell the name?'
                )

        print()
        second = False
        while not second:
            print("Enzyme names are case sensitive.")
            secondEnzyme = str(
                input(
                    "Enter the name of the second restriction enzyme you want to use (q to quit): "
                ))
            if secondEnzyme == 'q' or secondEnzyme == 'Q':
                raise ExitError
            for enzyme in self.rb:
                if str(enzyme) == secondEnzyme:
                    tempEnzyme = enzyme
                    if len(codingStrandAna.full()[tempEnzyme]) == 1:
                        secondEnzyme = enzyme
                        second = True
                        break
            else:
                print(
                    'That is not a valid restriction enzyme for this vector. Did you misspell the name?'
                )

        while True:
            print(
                f'RestrictionEnzymes are: \n\t{str(firstEnzyme)} at locus: {self.rb.search(self.codingvector)[firstEnzyme][0]}'
            )
            print(
                f'\t{str(secondEnzyme)} at locus: {self.rb.search(self.codingvector)[secondEnzyme][0]}'
            )
            answer = str(input(
                'Does this look correct? (y or n, q to quit): ')).lower()
            if answer[0] == 'y':
                return firstEnzyme, secondEnzyme
            elif answer[0] == 'n':
                return self.restriction_select()
            elif answer == 'q':
                raise ExitError
            else:
                print('Invalid Input.')

        return firstEnzyme, secondEnzyme
Ejemplo n.º 8
0
def ddRAD_digest(sequence_records, RE_1, RE_2):
    """Performs a double digest of DNA sequences

    Args:
        sequence_records (:obj:'list' of :obj:'SeqRecord'): List with SeqRecords imported using BioPython's SeqIO module
        RE_1 (str): Name of first restriction enzyme
        RE_2 (str): Name of second restriction enzyme

    Returns:
        fragment_lengths (Dict of str: :obj:'dict' of str: :obj:'list' of obj:'int'): A complicated nested dictionary.
        Primary keys are sequence record ID's. Primary values are dictionaries where keys are strings representing the
        REs used in the analysis and the values are lists with values corresponding to the cut positions of the enzyme
        along the DNA sequences. Additional values are lists with integers representing the length of fragments bounded
        the same enzyme (2 keys: one for each RE) or bounded on either end by a different enzyme (1 key).
    """

    # Counters
    fragment_counter = 0
    bound_by_RE_1 = 0
    bound_by_RE_2 = 0
    dual_fragment_counter = 0
    singletons = 0

    # Retrieve RE's
    RE_1 = getattr(rst, RE_1)
    RE_2 = getattr(rst, RE_2)

    # Create restriction batch object
    RE_batch = RE_1 + RE_2

    # Initialize dictionary
    cut_positions = {}
    fragment_lengths = {}

    # Create strings from REs that will later become dictionary keys
    RE_1_both_sides_key = "{0}_2".format(RE_1)
    RE_2_both_sides_key = "{0}_2".format(RE_2)
    dual_key = "{0}_{1}".format(RE_1, RE_2)

    # Identify overhangs produced by digest and determine index in recognition site
    # where the cut occurs. Used to determine which RE cut on either end of the fragments
    # so they can be placed in the appropriate bin (above)
    print "Determining 5' and 3' overhangs for chosen enzymes"
    time.sleep(1)
    RE_1_FivePrime_cut_index = RE_1.elucidate().find('^')
    RE_2_FivePrime_cut_index = RE_2.elucidate().find('^')
    RE_1_overhang_length = len(RE_1.site[RE_1_FivePrime_cut_index:])
    RE_2_overhang_length = len(RE_2.site[RE_2_FivePrime_cut_index:])

    # Iterate over sequence records and perform a restriction analysis
    # Return an analysis object where the cut positions of all REs
    # can be extracted. Add Analysis object as value to dictionary where the
    # key is the sequence record ID.
    print "Identifying cut positions of chosen enzymes"
    time.sleep(1)
    for record in sequence_records:
        cut_positions[record.id] = rst.Analysis(RE_batch,
                                                record.seq,
                                                linear=True)

    # Iterate over dictionary and for each record ID, extract the cut positions for
    # both enzymes and create a list that merges these and sorts them in ascending order.
    # Add merged list as additional key:value pair to dictionary
    print "Retrieving cut positions"
    time.sleep(1)
    for ID, ana in cut_positions.items():
        ana.full()["merged_cuts"] = sorted(ana.full()[RE_1] + ana.full()[RE_2])
        cut_positions[ID] = ana.full()

    # Iterate over sequence records
    print "Digesting fragments in multi-fasta file and assessing fragments ends"
    time.sleep(1)
    for record in sequence_records:

        sequence = str(record.seq)  # Get sequence

        # Extract list with cut positions for both enzymes. # None added to cut_positions
        # to make sure last fragment is included
        merged_cut_positions = [
            0
        ] + cut_positions[record.id]["merged_cuts"] + [None]

        # Create lists in dictionary to strore fragment lengths.
        fragment_lengths[record.id] = {
            RE_1_both_sides_key: [],
            RE_2_both_sides_key: [],
            dual_key: [],
            "Singles": []
        }

        fragments = []
        for i, j in zip(merged_cut_positions, merged_cut_positions[1:]):
            # print i, j
            if i == 0 and j is not None:
                fragments.append(sequence[i:j - 1])
            elif j is None:
                fragments.append(sequence[i - 1:j])
            else:
                fragments.append(sequence[i - 1:j - 1])

        for fragment in fragments:
            fragment_counter += 1
            #             print fragment

            # If fragment is bounded on both sides by RE_1 sequence
            if fragment[0:RE_1_overhang_length] == RE_1.site[
                    RE_1_FivePrime_cut_index:] and fragment[
                        -1] == RE_1.site[:RE_1_FivePrime_cut_index]:
                #                 print "RE_1 on both sides"
                bound_by_RE_1 += 1
                fragment_lengths[record.id][RE_1_both_sides_key].append(
                    len(fragment))

            # If fragment is bounded on both sides by RE_2 sequence
            elif fragment[0:RE_2_overhang_length] == RE_2.site[
                    RE_2_FivePrime_cut_index:] and fragment[
                        -1] == RE_2.site[:RE_2_FivePrime_cut_index]:
                #                 print "RE_2 on both sides"
                bound_by_RE_2 += 1
                fragment_lengths[record.id][RE_2_both_sides_key].append(
                    len(fragment))

            # If fragment is bounded on left (i.e. 5 prime) by RE_1 sequence and right (i.e. 3 prime) by RE_2 sequence
            elif fragment[0:RE_1_overhang_length] == RE_1.site[
                    RE_1_FivePrime_cut_index:] and fragment[
                        -1] == RE_2.site[:RE_2_FivePrime_cut_index]:
                #                 print "Dual"
                dual_fragment_counter += 1
                fragment_lengths[record.id][dual_key].append(len(fragment))

            # If fragment is bounded on left by RE_2 sequence and right by RE_1 sequence
            elif fragment[0:RE_2_overhang_length] == RE_2.site[
                    RE_2_FivePrime_cut_index:] and fragment[
                        -1] == RE_1.site[:RE_1_FivePrime_cut_index]:
                #                 print "Dual"
                dual_fragment_counter += 1
                fragment_lengths[record.id][dual_key].append(len(fragment))

            # No cut site on one end (e.g. ends of sequences)
            else:
                #                 print "End piece"
                singletons += 1
                fragment_lengths[record.id]["Singles"].append(len(fragment))

    print "The digest generated a total of {0} fragments".format(
        fragment_counter)
    time.sleep(1)
    print "The digest generated {0} fragments bound on both sides by {1}".format(
        bound_by_RE_1, RE_1)
    time.sleep(1)
    print "The digest generated {0} fragments bound on both sides by {1}".format(
        bound_by_RE_2, RE_2)
    time.sleep(1)
    print "The digest generated {0} fragments bound by different cut sites on either side".format(
        dual_fragment_counter)
    time.sleep(1)
    print "The digest generated {0} fragments with a cut side on only one side".format(
        singletons)
    time.sleep(1)

    return fragment_lengths
Ejemplo n.º 9
0
    """
    p = len(s1)
    for x, y in zip(s1, s2):  # Walk through 2 sequences.
        if x == y:
            p -= 1
    return p


args = parser.parse_args()
dna = Seq.Seq(args.input, IUPAC.unambiguous_dna)
# Translate DNA sequence.
ori_pep = dna.translate()
# Get all backtranslations.
bakpeps = backtrans(ori_pep, args.table_id)
# Make a restriction analysis for the orignal sequence.
analysis = Restriction.Analysis(Restriction.CommOnly, dna)
analysis.print_as('map')
ori_map = analysis.format_output()
# Store the enzymes that cut in the original sequence.
enz = list(analysis.with_sites().keys())
# Get a string out of the enzyme list, for printing.
oname = str(enz)[1:-1]
enz = set(enz)
bakpeps_out = []
for bakpep in bakpeps:
    tmp_d = {}
    if bakpep not in args.input:
        # Make a restriction analysis for each sequence.
        analysis = Restriction.Analysis(Restriction.CommOnly,
                                        Seq.Seq(bakpep, IUPAC.unambiguous_dna))
        # Store the enzymes that cut in this sequence.