コード例 #1
0
ファイル: bucketer.py プロジェクト: tabilab-dip/BOUN-PARS
    def compute_splits(self, data, plot=True):
        """ """

        len2cnt = Counter(data)
        # Error checking
        if len(len2cnt) < self.k:
            raise ValueError('Trying to sort %d lengths into %d buckets' %
                             (len(len2cnt), self.k))

        # Initialize
        self._len2cnt = len2cnt
        self._lengths = sorted(self.len2cnt.keys())

        # Initialize the splits evenly
        lengths = sorted([
            l for length, count in list(len2cnt.items())
            for l in [length] * count
        ])
        self._splits = [
            np.max(split) for split in np.array_split(lengths, self.k)
        ]

        # Make sure all the splits are ordered correctly and present in the len2cnt
        idx = len(self) - 1
        while idx > 0:
            while self[idx] > self.lengths[0] and (self[idx] <= self[idx - 1]
                                                   or self[idx]
                                                   not in self.len2cnt):
                self[idx] -= 1
            idx -= 1

        idx = 1
        while idx < len(self) - 1:
            while self[idx] < self.lengths[-1] and (self[idx] <= self[idx - 1]
                                                    or self[idx]
                                                    not in self.len2cnt):
                self[idx] += 1
            idx += 1

        # Reindex
        self.reindex()

        # Iterate
        old_splits = None
        i = 0
        if self.verbose:
            print(color_pattern('Initial # of tokens in buckets:',
                                str(self.size()), 'bright_red'),
                  file=sys.stderr)
        while self != old_splits:
            old_splits = list(self)
            self.recenter()
            i += 1
        if self.verbose:
            print(color_pattern('Final # of tokens in buckets:',
                                str(self.size()), 'bright_white'),
                  file=sys.stderr)

        self.reindex()
        return self._splits
コード例 #2
0
ファイル: base_tagger.py プロジェクト: Tumetsu/Parser-v2
 def print_accuracy(self, accumulators, time, prefix='Train'):
   """ """
   
   acc_dict = self.process_accumulators(accumulators, time=time)
   strings = []
   strings.append(color_pattern('Loss:', '{Loss:7.3f}', 'bright_red'))
   strings.append(color_pattern('TS:', '{TS:5.2f}%', 'bright_cyan'))
   strings.append(color_pattern('SS:', '{SS:5.2f}%', 'bright_green'))
   strings.append(color_pattern('Speed:', '{Seq_rate:6.1f} seqs/sec', 'bright_magenta'))
   string = ctext('{0}  ', 'bold') + ' | '.join(strings)
   print(string.format(prefix, **acc_dict),file=sys.stderr)
   return