Example #1
0
def _cap_enzymes_between_alleles(allele1, allele2, reference, start, end,
                                 all_enzymes=False):
    '''It looks in the enzymes that differenciate the given alleles.

    It returns a set.
    '''

    start += 1
    # we have to build the two sequences
    if all_enzymes:
        restriction_batch = CommOnly
    else:
        restriction_batch = RestrictionBatch(COMMON_ENZYMES)

    sseq = reference.seq
    post_seq_start = start - 100 if start - 100 > 0 else 0
    prev_seq = sseq[post_seq_start: start - 1]
    post_seq = sseq[end: end + 100]

    seq1 = prev_seq + allele1 + post_seq
    seq2 = prev_seq + allele2 + post_seq
    anal1 = Analysis(restriction_batch, seq1, linear=True)
    enzymes1 = set(anal1.with_sites().keys())
    anal1 = Analysis(restriction_batch, seq2, linear=True)
    enzymes2 = set(anal1.with_sites().keys())
    enzymes = set(enzymes1).symmetric_difference(set(enzymes2))
    return enzymes
Example #2
0
def fragment(args):
    """
    %prog fragment fastafile enzyme

    Cut the fastafile using the specified enzyme, and grab upstream and
    downstream nucleotide sequence along with the cut site. In this case, the
    sequences extracted are:

                |- PstI
    ============|===========
            (-------)

    Sometimes we need to limit the size of the restriction fragments, for
    example the GBS protocol does not allow fragments larger than 800bp.

           |-PstI        |- PstI              |- PstI
    ~~~====|=============|==========~~~~~~~===|============
           (---)     (---)

    In this case, the second fragment is longer than 800bp, therefore the two
    ends are NOT extracted, as in the first fragment.
    """
    p = OptionParser(fragment.__doc__)
    p.add_option(
        "--flank",
        default=150,
        type="int",
        help="Extract flanking bases of the cut sites",
    )
    p.add_option(
        "--full",
        default=False,
        action="store_true",
        help="The full extraction mode",
    )
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, enzyme = args
    flank = opts.flank
    assert flank > 0
    extract = extract_full if opts.full else extract_ends
    tag = "full" if opts.full else "ends"

    assert enzyme in set(str(x) for x in AllEnzymes)
    fragfastafile = fastafile.split(".")[0] + ".{0}.flank{1}.{2}.fasta".format(
        enzyme, flank, tag
    )
    enzyme = [x for x in AllEnzymes if str(x) == enzyme][0]

    f = Fasta(fastafile, lazy=True)
    fw = open(fragfastafile, "w")
    for name, rec in f.iteritems_ordered():
        a = Analysis([enzyme], rec.seq)
        sites = a.full()[enzyme]
        extract(rec, sites, flank, fw)

    logging.debug("Fragments written to `{0}`.".format(fragfastafile))
Example #3
0
def fragment(args):
    """
    %prog fragment fastafile enzyme

    Cut the fastafile using the specified enzyme, and grab upstream and
    downstream nucleotide sequence along with the cut site. In this case, the
    sequences extracted are:

                |- PstI
    ============|===========
            (-------)

    Sometimes we need to limit the size of the restriction fragments, for
    example the GBS protocol does not allow fragments larger than 800bp.

           |-PstI        |- PstI              |- PstI
    ~~~====|=============|==========~~~~~~~===|============
           (---)     (---)

    In this case, the second fragment is longer than 800bp, therefore the two
    ends are NOT extracted, as in the first fragment.
    """
    p = OptionParser(fragment.__doc__)
    p.add_option("--flank", default=150, type="int",
            help="Extract flanking bases of the cut sites [default: %default]")
    p.add_option("--full", default=False, action="store_true",
            help="The full extraction mode [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, enzyme = args
    flank = opts.flank
    assert flank > 0
    extract = extract_full if opts.full else extract_ends
    tag = "full" if opts.full else "ends"

    assert enzyme in set(str(x) for x in AllEnzymes)
    fragfastafile = fastafile.split(".")[0] + \
        ".{0}.flank{1}.{2}.fasta".format(enzyme, flank, tag)
    enzyme = [x for x in AllEnzymes if str(x) == enzyme][0]

    f = Fasta(fastafile, lazy=True)
    fw = open(fragfastafile, "w")
    for name, rec in f.iteritems_ordered():
        a = Analysis([enzyme], rec.seq)
        sites = a.full()[enzyme]
        extract(rec, sites, flank, fw)

    logging.debug("Fragments written to `{0}`.".format(fragfastafile))