def __getitem__(self, args): """ Allows to get AlignmentPositions using an index or a list of AlignmentPosition instances. """ #TODO: replace by list slice in alignment if type(args) == int: if args in range(1, len(self.alignment) + 1): return self.alignment[args - 1] else: raise AlignmentError( "There is no such position in the alignment: %i" % args) elif type(args) == slice: counter = args.start returned_positions = {} while counter <= args.stop: if counter <= len(self.alignment): returned_positions[counter] = self.alignment[counter - 1] counter += 1 else: raise AlignmentError("The alignment is shorter than %i" % counter) return returned_positions else: raise AlignmentError('Bad argument type.')
def get_alignment_from_file(self, filename, shrink=DEFAULT_SHRINK): """Returns a RNAAlignment object.""" try: data = open(filename).read() except IOError: raise AlignmentError('File does not exist: %s ' % filename) return self.get_alignment(data)
def get_differences(self, mode='is_different'): """ Returns these positions in alignment which are different in a given sequences as a list of AlignmentPosition instances. Arguments: - mode (by default 'all') Modes: - 'is_different' - all differences are taken into account ('mismatch','gap in template','gap in target') - 'has_gap' - only gaps in template and gaps in target - 'has_template_gap' - 'has_target_gap' - 'is_mismatch' - 'is_unidentified' - when one of letters or both is ANY_RESIDUE """ differences = [] if mode in MODES: for apos in self.alignment: method = getattr(apos, mode) if method(): differences.append(apos) return differences raise AlignmentError("Bad mode type. Should be one of %s." % str(MODES))
def set_aligned_sequences(self, char_tuples): """Resets the sequences in the RNAAlignment object.""" transposed = map(list, zip(*char_tuples)) target = Sequence(transposed[0]) template = Sequence(transposed[1]) if len(target) != len(template): raise AlignmentError( "Error correcting alignment; lenghts differ:\n%s\%s" % (str(target), str(template))) self.align.set_aligned_sequences(target, template)
def read_alignment(data, shrink=DEFAULT_SHRINK): parser = RNAAlignmentParser() if os.access(data, os.F_OK): alignment = parser.get_alignment_from_file(data, shrink) elif data.startswith('>'): alignment = parser.get_alignment(data, shrink) else: raise AlignmentError( 'Alignment not in FASTA format or file does not exist: %s' % data) log.write_message('Alignment loaded from %s:%s' % (data, str(alignment))) return alignment
def set_aligned_sequences(self, seq1, seq2): """Sets alignment to a list of AlignmentPosition instances.""" self.alignment = [] self.aligned_sequences = (seq1, seq2) i_alignment = 1 i_template = 1 i_target = 1 ali_length = len(seq1) seq1 = seq1.seq_alphabet_list seq2 = seq2.seq_alphabet_list if not len(seq1) == len(seq2): raise AlignmentError( 'Sequence lengths in alignment do not match (%i, %i)' % (len(seq1), len(seq2))) for pos in range(ali_length): # default values for creating AlignmentPositions target_letter = seq1[pos] template_letter = seq2[pos] target_position = i_target template_position = i_template alignment_position = i_alignment if seq1[pos].short_abbrev in ['-', '_'] \ and seq2[pos].short_abbrev in ['-', '_']: continue elif seq2[pos].short_abbrev == '-': template_position = None template_letter = None i_target += 1 elif seq1[pos].short_abbrev == '-': target_position = None target_letter = None i_template += 1 else: i_template += 1 i_target += 1 i_alignment += 1 apos = AlignmentPosition(target_position=target_position, target_letter=target_letter, template_position=template_position, template_letter=template_letter, alignment_position=alignment_position) self.alignment.append(apos)
def get_alignment(self, fasta_string, shrink=DEFAULT_SHRINK): """Returns a RNAAlignment object.""" temp_aln = [] for hit in fasta_string.split('\n>'): defline = hit.split('\n', 1)[0].replace('>', '') sequence = hit.split('\n', 1)[1:] sequence = re.sub("[\r\s\t]+", '', ''.join(sequence)) temp_aln.append((defline, Sequence(sequence))) name1, seq1 = temp_aln[0] name2, seq2 = temp_aln[1] alignment = RNAAlignment(name1, seq1, name2, seq2, shrink) #TODO: remove this check after kicking underscores out. for apos in alignment: if not apos.check_underscore(): raise AlignmentError( 'Underscore symbols must be aligned to other underscores or gaps.' ) return alignment