def close(self): """compute and display similarities on closing (i.e. end of parsing)""" total = sum([len(lineset) for lineset in self.linesets]) duplicated = 0 stats = self.stats for num, couples in self._compute_sims(): msg = [] for lineset, idx in couples: msg.append("==%s:%s" % (lineset.name, idx)) msg.sort() # pylint: disable-msg=W0631 for line in lineset._real_lines[idx:idx+num]: msg.append(line.rstrip()) self.add_message('R0801', args=(len(couples), '\n'.join(msg))) duplicated += num * (len(couples) - 1) stats['nb_duplicated_lines'] = duplicated stats['percent_duplicated_lines'] = total and duplicated * 100. / total
def close(self): """compute and display similarities on closing (i.e. end of parsing)""" total = sum([len(lineset) for lineset in self.linesets]) duplicated = 0 stats = self.stats for num, couples in self._compute_sims(): msg = [] for lineset, idx in couples: msg.append("==%s:%s" % (lineset.name, idx)) msg.sort() # pylint: disable-msg=W0631 for line in lineset._real_lines[idx:idx + num]: msg.append(line.rstrip()) self.add_message('R0801', args=(len(couples), '\n'.join(msg))) duplicated += num * (len(couples) - 1) stats['nb_duplicated_lines'] = duplicated stats['percent_duplicated_lines'] = total and duplicated * 100. / total
def _display_sims(self, sims): """display computed similarities on stdout""" nb_lignes_dupliquees = 0 for num, couples in sims: print print num, "similar lines in", len(couples), "files" couples = list(couples) couples.sort() for lineset, idx in couples: print "==%s:%s" % (lineset.name, idx) # pylint: disable-msg=W0631 for line in lineset._real_lines[idx:idx+num]: print " ", line, nb_lignes_dupliquees += num * (len(couples)-1) nb_total_lignes = sum([len(lineset) for lineset in self.linesets]) print "TOTAL lines=%s duplicates=%s percent=%s" \ % (nb_total_lignes, nb_lignes_dupliquees, nb_lignes_dupliquees*1. / nb_total_lignes)
def _display_sims(self, sims): """display computed similarities on stdout""" nb_lignes_dupliquees = 0 for num, couples in sims: print print num, "similar lines in", len(couples), "files" couples = list(couples) couples.sort() for lineset, idx in couples: print "==%s:%s" % (lineset.name, idx) # pylint: disable-msg=W0631 for line in lineset._real_lines[idx:idx + num]: print " ", line, nb_lignes_dupliquees += num * (len(couples) - 1) nb_total_lignes = sum([len(lineset) for lineset in self.linesets]) print "TOTAL lines=%s duplicates=%s percent=%s" \ % (nb_total_lignes, nb_lignes_dupliquees, nb_lignes_dupliquees*1. / nb_total_lignes)
def test_sum(self): from logilab.common.compat import sum self.assertEquals(sum(range(5)), 10) self.assertRaises(TypeError, sum, 'abc')