Example #1
0
 def _parse_table(self, config_file):
     self.alignments = OrderedDict()
     cfg_stream, close = fileio.process_file_arg(config_file)
     try:
         table_started = False
         table_finished = False
         row_num = 0
         for i, l in enumerate(cfg_stream):
             line = l.strip()
             if self._end_pattern.match(line):
                 if not table_started:
                     raise errors.SampleTableError(
                             'hit end of sample table before beginning')
                 if len(self.alignments) < 1:
                     raise errors.SampleTableError(
                             'no rows found in sample table')
                 table_finished = True
                 break
             if self._begin_pattern.match(line):
                 table_started = True
                 continue
             if not table_started:
                 continue
             if (line == '') or (line.startswith('#')):
                 continue
             row_num += 1
             try:
                 al = AlignmentConfig(line)
             except errors.SampleTableRowError as e:
                 _LOG.error('sample table row {0} is invalid'.format(
                         row_num))
                 raise e
             if not al.taxon_name in self.alignments:
                 self.alignments[al.taxon_name] = OrderedDict()
                 self.alignments[al.taxon_name][al.locus_name] = al
                 self._ordering.append((al.taxon_name, al.locus_name))
                 continue
             if al.locus_name in self.alignments[al.taxon_name]:
                 raise errors.SampleTableError('locus {0} found twice '
                         'for taxon {1} at row {2} of sample '
                         'table'.format(al.locus_name, al.taxon_name,
                                 row_num))
             self.alignments[al.taxon_name][al.locus_name] = al
             self._ordering.append((al.taxon_name, al.locus_name))
         if not table_started:
             raise errors.SampleTableError('no sample table found')
         if not table_finished:
             raise errors.SampleTableError('no end of table found')
     finally:
         if close:
             cfg_stream.close()
Example #2
0
class SampleTable(object):
    _begin_pattern = re.compile(r'^begin\s*sample_tbl$', re.IGNORECASE)
    _end_pattern = re.compile(r'^end\s*sample_tbl$', re.IGNORECASE)

    def __init__(self, config_file):
        self.alignments = None
        self._ordering = []
        self._parse_table(config_file)

    def _parse_table(self, config_file):
        self.alignments = OrderedDict()
        cfg_stream, close = fileio.process_file_arg(config_file)
        try:
            table_started = False
            table_finished = False
            row_num = 0
            for i, l in enumerate(cfg_stream):
                line = l.strip()
                if self._end_pattern.match(line):
                    if not table_started:
                        raise errors.SampleTableError(
                                'hit end of sample table before beginning')
                    if len(self.alignments) < 1:
                        raise errors.SampleTableError(
                                'no rows found in sample table')
                    table_finished = True
                    break
                if self._begin_pattern.match(line):
                    table_started = True
                    continue
                if not table_started:
                    continue
                if (line == '') or (line.startswith('#')):
                    continue
                row_num += 1
                try:
                    al = AlignmentConfig(line)
                except errors.SampleTableRowError as e:
                    _LOG.error('sample table row {0} is invalid'.format(
                            row_num))
                    raise e
                if not al.taxon_name in self.alignments:
                    self.alignments[al.taxon_name] = OrderedDict()
                    self.alignments[al.taxon_name][al.locus_name] = al
                    self._ordering.append((al.taxon_name, al.locus_name))
                    continue
                if al.locus_name in self.alignments[al.taxon_name]:
                    raise errors.SampleTableError('locus {0} found twice '
                            'for taxon {1} at row {2} of sample '
                            'table'.format(al.locus_name, al.taxon_name,
                                    row_num))
                self.alignments[al.taxon_name][al.locus_name] = al
                self._ordering.append((al.taxon_name, al.locus_name))
            if not table_started:
                raise errors.SampleTableError('no sample table found')
            if not table_finished:
                raise errors.SampleTableError('no end of table found')
        finally:
            if close:
                cfg_stream.close()

    def _get_taxa(self):
        return self.alignments.keys()

    taxa = property(_get_taxa)

    def _get_loci(self):
        l = []
        for t, d in self.alignments.iteritems():
            for locus in d.iterkeys():
                if not locus in l:
                    l.append(locus)
        return l

    loci = property(_get_loci)

    def _get_number_of_taxa(self):
        return len(self.taxa)

    npairs = property(_get_number_of_taxa)

    def get_sample_table_string(self):    
        return '\n'.join(('BEGIN SAMPLE_TBL',
                '\n'.join((str(self.alignments[t][l]) for t, l in self._ordering)),
                'END SAMPLE_TBL'))

    def __str__(self):
        return self.get_sample_table_string()

    def equals(self, other):
        if not isinstance(other, SampleTable):
            return False
        if len(self.alignments) != len(other.alignments):
            return False
        for i1, i2 in zip(self.alignments.items(), other.alignments.items()):
            if i1[0] != i2[0]:
                return False
            if len(i1[1]) != len(i2[1]):
                return False
            for t1, t2 in zip(i1[1].items(), i2[1].items()):
                if t1[0] != t2[0]:
                    return False
                if not t1[1].equals(t2[1]):
                    return False
        if self._ordering != self._ordering:
            return False
        return True