def load_HXB2(cropped=False, fragment=None, trim_primers=False): '''Load HXB2 reference sequence''' if fragment is None: return SeqIO.read(get_HXB2_entire(cropped=cropped), 'fasta') else: return SeqIO.read(get_HXB2_fragmented(fragment, trim_primers=trim_primers), 'fasta')
import os import re import Bio.SeqIO as SeqIO from hivwholeseq.reference import load_HXB2 from hivwholeseq.data.primers import primers_coordinates_HXB2_inner as pci from hivwholeseq.data.primers import primers_coordinates_HXB2_outer as pco from hivwholeseq.sequencing.filenames import get_HXB2_fragmented, get_HXB2_entire # Script if __name__ == '__main__': # Make output folder if necessary dirname = os.path.dirname(get_HXB2_entire()) if not os.path.isdir(dirname): os.mkdir(dirname) # Get the annotated sequence seq = load_HXB2() # 1. Copy the entire reference verbatim SeqIO.write(seq, get_HXB2_entire(), 'fasta') # 2. Make a cropped sequence from F1o to F6o (outer primers), to reduce LTR # degeneracy problems during premapping start = pco['F1'][0][0] end = pco['F6'][1][1] seq_cropped = seq[start: end] seq_cropped.id = seq_cropped.name = seq.name+'_cropped_F1o_F6o'