def write(self, outfile, twoheaders=False): name = str_to_bytes(self.name) if self.qualities is not None: s = b'@' + name + b'\n' + self.sequence + b'\n+' if twoheaders: s += name s += b'\n' + self.qualities + b'\n' else: s = b'>' + name + b'\n' + self.sequence + b'\n' if PY3: outfile.buffer.write(s) else: outfile.write(s)
def __init__(self, sequence, where, max_error_rate, min_overlap=3, match_read_wildcards=False, match_adapter_wildcards=False, name=None, indels=True): if name is None: self.name = str(self.__class__.automatic_name) self.__class__.automatic_name += 1 self.name_is_generated = True else: self.name = name self.name_is_generated = False if isinstance(sequence, str) and PY3: self.sequence = str_to_bytes(sequence).upper() else: self.sequence = sequence.upper() self.where = where self.max_error_rate = max_error_rate self.min_overlap = min_overlap self.indels = indels assert where != FRONT or self.indels self.wildcard_flags = 0 self.match_adapter_wildcards = match_adapter_wildcards and b'N' in self.sequence if match_read_wildcards: self.wildcard_flags |= align.ALLOW_WILDCARD_SEQ2 if self.match_adapter_wildcards: self.wildcard_flags |= align.ALLOW_WILDCARD_SEQ1 # redirect to appropriate trimmed() function depending on # adapter type trimmers = { FRONT: self._trimmed_front, PREFIX: self._trimmed_front, BACK: self._trimmed_back, ANYWHERE: self._trimmed_anywhere } self.trimmed = trimmers[where] if where == ANYWHERE: self._front_flag = None # means: guess else: self._front_flag = where != BACK # statistics about length of removed sequences self.lengths_front = defaultdict(int) self.lengths_back = defaultdict(int) self.errors_front = defaultdict(lambda: defaultdict(int)) self.errors_back = defaultdict(lambda: defaultdict(int))
def __init__(self, sequence, where, max_error_rate, min_overlap=3, match_read_wildcards=False, match_adapter_wildcards=False, name=None, indels=True): if name is None: self.name = str(self.__class__.automatic_name) self.__class__.automatic_name += 1 self.name_is_generated = True else: self.name = name self.name_is_generated = False if isinstance(sequence, str) and PY3: self.sequence = str_to_bytes(sequence).upper() else: self.sequence = sequence.upper() self.sequence = self.sequence.replace(b'U', b'T') self.where = where self.max_error_rate = max_error_rate self.min_overlap = min_overlap self.indels = indels assert where != FRONT or self.indels self.wildcard_flags = 0 self.match_adapter_wildcards = match_adapter_wildcards and b'N' in self.sequence if match_read_wildcards: self.wildcard_flags |= align.ALLOW_WILDCARD_SEQ2 if self.match_adapter_wildcards: self.wildcard_flags |= align.ALLOW_WILDCARD_SEQ1 # redirect to appropriate trimmed() function depending on # adapter type trimmers = { FRONT: self._trimmed_front, PREFIX: self._trimmed_front, BACK: self._trimmed_back, ANYWHERE: self._trimmed_anywhere } self.trimmed = trimmers[where] if where == ANYWHERE: self._front_flag = None # means: guess else: self._front_flag = where != BACK # statistics about length of removed sequences self.lengths_front = defaultdict(int) self.lengths_back = defaultdict(int) self.errors_front = defaultdict(lambda: defaultdict(int)) self.errors_back = defaultdict(lambda: defaultdict(int))