def _cap_enzymes_between_alleles(allele1, allele2, reference, start, end, all_enzymes=False): '''It looks in the enzymes that differenciate the given alleles. It returns a set. ''' start += 1 # we have to build the two sequences if all_enzymes: restriction_batch = CommOnly else: restriction_batch = RestrictionBatch(COMMON_ENZYMES) sseq = reference.seq post_seq_start = start - 100 if start - 100 > 0 else 0 prev_seq = sseq[post_seq_start: start - 1] post_seq = sseq[end: end + 100] seq1 = prev_seq + allele1 + post_seq seq2 = prev_seq + allele2 + post_seq anal1 = Analysis(restriction_batch, seq1, linear=True) enzymes1 = set(anal1.with_sites().keys()) anal1 = Analysis(restriction_batch, seq2, linear=True) enzymes2 = set(anal1.with_sites().keys()) enzymes = set(enzymes1).symmetric_difference(set(enzymes2)) return enzymes
def fragment(args): """ %prog fragment fastafile enzyme Cut the fastafile using the specified enzyme, and grab upstream and downstream nucleotide sequence along with the cut site. In this case, the sequences extracted are: |- PstI ============|=========== (-------) Sometimes we need to limit the size of the restriction fragments, for example the GBS protocol does not allow fragments larger than 800bp. |-PstI |- PstI |- PstI ~~~====|=============|==========~~~~~~~===|============ (---) (---) In this case, the second fragment is longer than 800bp, therefore the two ends are NOT extracted, as in the first fragment. """ p = OptionParser(fragment.__doc__) p.add_option( "--flank", default=150, type="int", help="Extract flanking bases of the cut sites", ) p.add_option( "--full", default=False, action="store_true", help="The full extraction mode", ) opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) fastafile, enzyme = args flank = opts.flank assert flank > 0 extract = extract_full if opts.full else extract_ends tag = "full" if opts.full else "ends" assert enzyme in set(str(x) for x in AllEnzymes) fragfastafile = fastafile.split(".")[0] + ".{0}.flank{1}.{2}.fasta".format( enzyme, flank, tag ) enzyme = [x for x in AllEnzymes if str(x) == enzyme][0] f = Fasta(fastafile, lazy=True) fw = open(fragfastafile, "w") for name, rec in f.iteritems_ordered(): a = Analysis([enzyme], rec.seq) sites = a.full()[enzyme] extract(rec, sites, flank, fw) logging.debug("Fragments written to `{0}`.".format(fragfastafile))
def fragment(args): """ %prog fragment fastafile enzyme Cut the fastafile using the specified enzyme, and grab upstream and downstream nucleotide sequence along with the cut site. In this case, the sequences extracted are: |- PstI ============|=========== (-------) Sometimes we need to limit the size of the restriction fragments, for example the GBS protocol does not allow fragments larger than 800bp. |-PstI |- PstI |- PstI ~~~====|=============|==========~~~~~~~===|============ (---) (---) In this case, the second fragment is longer than 800bp, therefore the two ends are NOT extracted, as in the first fragment. """ p = OptionParser(fragment.__doc__) p.add_option("--flank", default=150, type="int", help="Extract flanking bases of the cut sites [default: %default]") p.add_option("--full", default=False, action="store_true", help="The full extraction mode [default: %default]") opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) fastafile, enzyme = args flank = opts.flank assert flank > 0 extract = extract_full if opts.full else extract_ends tag = "full" if opts.full else "ends" assert enzyme in set(str(x) for x in AllEnzymes) fragfastafile = fastafile.split(".")[0] + \ ".{0}.flank{1}.{2}.fasta".format(enzyme, flank, tag) enzyme = [x for x in AllEnzymes if str(x) == enzyme][0] f = Fasta(fastafile, lazy=True) fw = open(fragfastafile, "w") for name, rec in f.iteritems_ordered(): a = Analysis([enzyme], rec.seq) sites = a.full()[enzyme] extract(rec, sites, flank, fw) logging.debug("Fragments written to `{0}`.".format(fragfastafile))