Esempio n. 1
0
 def __getitem__(self, args):
     """
     Allows to get AlignmentPositions using an index
     or a list of AlignmentPosition instances.
     """
     #TODO: replace by list slice in alignment
     if type(args) == int:
         if args in range(1, len(self.alignment) + 1):
             return self.alignment[args - 1]
         else:
             raise AlignmentError(
                 "There is no such position in the alignment: %i" % args)
     elif type(args) == slice:
         counter = args.start
         returned_positions = {}
         while counter <= args.stop:
             if counter <= len(self.alignment):
                 returned_positions[counter] = self.alignment[counter - 1]
                 counter += 1
             else:
                 raise AlignmentError("The alignment is shorter than %i" %
                                      counter)
         return returned_positions
     else:
         raise AlignmentError('Bad argument type.')
Esempio n. 2
0
 def get_alignment_from_file(self, filename, shrink=DEFAULT_SHRINK):
     """Returns a RNAAlignment object."""
     try:
         data = open(filename).read()
     except IOError:
         raise AlignmentError('File does not exist: %s ' % filename)
     return self.get_alignment(data)
Esempio n. 3
0
    def get_differences(self, mode='is_different'):
        """
        Returns these positions in alignment which are different 
        in a given sequences as a list of AlignmentPosition instances.

        Arguments:
        - mode (by default 'all')
        Modes:
        - 'is_different' - all differences are taken into account 
            ('mismatch','gap in template','gap in target')
        - 'has_gap' - only gaps in template and gaps in target
        - 'has_template_gap'
        - 'has_target_gap'
        - 'is_mismatch'
        - 'is_unidentified' - when one of letters or both is ANY_RESIDUE
        """
        differences = []
        if mode in MODES:
            for apos in self.alignment:
                method = getattr(apos, mode)
                if method():
                    differences.append(apos)
            return differences
        raise AlignmentError("Bad mode type. Should be one of %s." %
                             str(MODES))
Esempio n. 4
0
 def set_aligned_sequences(self, char_tuples):
     """Resets the sequences in the RNAAlignment object."""
     transposed = map(list, zip(*char_tuples))
     target = Sequence(transposed[0])
     template = Sequence(transposed[1])
     if len(target) != len(template):
         raise AlignmentError(
             "Error correcting alignment; lenghts differ:\n%s\%s" %
             (str(target), str(template)))
     self.align.set_aligned_sequences(target, template)
Esempio n. 5
0
def read_alignment(data, shrink=DEFAULT_SHRINK):
    parser = RNAAlignmentParser()
    if os.access(data, os.F_OK):
        alignment = parser.get_alignment_from_file(data, shrink)
    elif data.startswith('>'):
        alignment = parser.get_alignment(data, shrink)
    else:
        raise AlignmentError(
            'Alignment not in FASTA format or file does not exist: %s' % data)
    log.write_message('Alignment loaded from %s:%s' % (data, str(alignment)))
    return alignment
Esempio n. 6
0
    def set_aligned_sequences(self, seq1, seq2):
        """Sets alignment to a list of AlignmentPosition instances."""
        self.alignment = []
        self.aligned_sequences = (seq1, seq2)
        i_alignment = 1
        i_template = 1
        i_target = 1
        ali_length = len(seq1)
        seq1 = seq1.seq_alphabet_list
        seq2 = seq2.seq_alphabet_list
        if not len(seq1) == len(seq2):
            raise AlignmentError(
                'Sequence lengths in alignment do not match (%i, %i)' %
                (len(seq1), len(seq2)))

        for pos in range(ali_length):
            # default values for creating AlignmentPositions
            target_letter = seq1[pos]
            template_letter = seq2[pos]
            target_position = i_target
            template_position = i_template
            alignment_position = i_alignment

            if seq1[pos].short_abbrev in ['-', '_'] \
                and seq2[pos].short_abbrev in ['-', '_']:
                continue

            elif seq2[pos].short_abbrev == '-':
                template_position = None
                template_letter = None
                i_target += 1

            elif seq1[pos].short_abbrev == '-':
                target_position = None
                target_letter = None
                i_template += 1
            else:
                i_template += 1
                i_target += 1

            i_alignment += 1
            apos = AlignmentPosition(target_position=target_position,
                                     target_letter=target_letter,
                                     template_position=template_position,
                                     template_letter=template_letter,
                                     alignment_position=alignment_position)
            self.alignment.append(apos)
Esempio n. 7
0
    def get_alignment(self, fasta_string, shrink=DEFAULT_SHRINK):
        """Returns a RNAAlignment object."""
        temp_aln = []

        for hit in fasta_string.split('\n>'):
            defline = hit.split('\n', 1)[0].replace('>', '')
            sequence = hit.split('\n', 1)[1:]
            sequence = re.sub("[\r\s\t]+", '', ''.join(sequence))
            temp_aln.append((defline, Sequence(sequence)))

        name1, seq1 = temp_aln[0]
        name2, seq2 = temp_aln[1]
        alignment = RNAAlignment(name1, seq1, name2, seq2, shrink)
        #TODO: remove this check after kicking underscores out.
        for apos in alignment:
            if not apos.check_underscore():
                raise AlignmentError(
                    'Underscore symbols must be aligned to other underscores or gaps.'
                )
        return alignment