def calcStringStats(self, values, sink, feedback): stat = QgsStringStatisticalSummary() total = 50.0 / len(values) if values else 0 current = 0 for cat, v in values.items(): if feedback.isCanceled(): break feedback.setProgress(int(current * total) + 50) stat.calculate(v) f = QgsFeature() f.setAttributes(list(cat) + [stat.count(), stat.countDistinct(), stat.countMissing(), stat.count() - stat.countMissing(), stat.min(), stat.max(), stat.minLength(), stat.maxLength(), stat.meanLength() ]) sink.addFeature(f, QgsFeatureSink.FastInsert) current += 1
def testStats(self): # we test twice, once with values added as a list and once using values # added one-at-a-time s = QgsStringStatisticalSummary() self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All) strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd'] s.calculate(strings) s2 = QgsStringStatisticalSummary() for string in strings: s2.addString(string) s2.finalize() self.assertEqual(s.count(), 9) self.assertEqual(s2.count(), 9) self.assertEqual(s.countDistinct(), 6) self.assertEqual(s2.countDistinct(), 6) self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', ''])) self.assertEqual(s2.distinctValues(), s.distinctValues()) self.assertEqual(s.countMissing(), 2) self.assertEqual(s2.countMissing(), 2) self.assertEqual(s.min(), 'aaaa') self.assertEqual(s2.min(), 'aaaa') self.assertEqual(s.max(), 'eeee') self.assertEqual(s2.max(), 'eeee') self.assertEqual(s.minLength(), 0) self.assertEqual(s2.minLength(), 0) self.assertEqual(s.maxLength(), 8) self.assertEqual(s2.maxLength(), 8) self.assertEqual(s.meanLength(), 3.33333333333333333333333) self.assertEqual(s2.meanLength(), 3.33333333333333333333333) #extra check for minLength without empty strings s.calculate(['1111111', '111', '11111']) self.assertEqual(s.minLength(), 3)
def testIndividualStats(self): # tests calculation of statistics one at a time, to make sure statistic calculations are not # dependent on each other tests = [{'stat': QgsStringStatisticalSummary.Count, 'expected': 9}, {'stat': QgsStringStatisticalSummary.CountDistinct, 'expected': 6}, {'stat': QgsStringStatisticalSummary.CountMissing, 'expected': 2}, {'stat': QgsStringStatisticalSummary.Min, 'expected': 'aaaa'}, {'stat': QgsStringStatisticalSummary.Max, 'expected': 'eeee'}, {'stat': QgsStringStatisticalSummary.MinimumLength, 'expected': 0}, {'stat': QgsStringStatisticalSummary.MaximumLength, 'expected': 8}, ] s = QgsStringStatisticalSummary() s3 = QgsStringStatisticalSummary() for t in tests: # test constructor s2 = QgsStringStatisticalSummary(t['stat']) self.assertEqual(s2.statistics(), t['stat']) s.setStatistics(t['stat']) s3.setStatistics(t['stat']) self.assertEqual(s.statistics(), t['stat']) strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd'] s.calculate(strings) s3.reset() for string in strings: s3.addString(string) s3.finalize() self.assertEqual(s.statistic(t['stat']), t['expected']) self.assertEqual(s3.statistic(t['stat']), t['expected']) # display name self.assertTrue(len(QgsStringStatisticalSummary.displayName(t['stat'])) > 0)
def testStats(self): s = QgsStringStatisticalSummary() self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All) s.calculate(['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd']) self.assertEqual(s.count(), 9) self.assertEqual(s.countDistinct(), 6) self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', ''])) self.assertEqual(s.countMissing(), 2) self.assertEqual(s.min(), 'aaaa') self.assertEqual(s.max(), 'eeee') self.assertEqual(s.minLength(), 0) self.assertEqual(s.maxLength(), 8) #extra check for minLength without empty strings s.calculate(['1111111', '111', '11111']) self.assertEqual(s.minLength(), 3)
def testIndividualStats(self): # tests calculation of statistics one at a time, to make sure statistic calculations are not # dependent on each other tests = [ {"stat": QgsStringStatisticalSummary.Count, "expected": 9}, {"stat": QgsStringStatisticalSummary.CountDistinct, "expected": 6}, {"stat": QgsStringStatisticalSummary.CountMissing, "expected": 2}, {"stat": QgsStringStatisticalSummary.Min, "expected": "aaaa"}, {"stat": QgsStringStatisticalSummary.Max, "expected": "eeee"}, {"stat": QgsStringStatisticalSummary.MinimumLength, "expected": 0}, {"stat": QgsStringStatisticalSummary.MaximumLength, "expected": 8}, {"stat": QgsStringStatisticalSummary.MeanLength, "expected": 3.3333333333333335}, ] s = QgsStringStatisticalSummary() s3 = QgsStringStatisticalSummary() for t in tests: # test constructor s2 = QgsStringStatisticalSummary(t["stat"]) self.assertEqual(s2.statistics(), t["stat"]) s.setStatistics(t["stat"]) s3.setStatistics(t["stat"]) self.assertEqual(s.statistics(), t["stat"]) strings = ["cc", "aaaa", "bbbbbbbb", "aaaa", "eeee", "", "eeee", "", "dddd"] s.calculate(strings) s3.reset() for string in strings: s3.addString(string) s3.finalize() self.assertEqual(s.statistic(t["stat"]), t["expected"]) self.assertEqual(s3.statistic(t["stat"]), t["expected"]) # display name self.assertTrue(len(QgsStringStatisticalSummary.displayName(t["stat"])) > 0)
def testIndividualStats(self): # tests calculation of statistics one at a time, to make sure statistic calculations are not # dependent on each other tests = [{'stat': QgsStringStatisticalSummary.Count, 'expected': 9}, {'stat': QgsStringStatisticalSummary.CountDistinct, 'expected': 6}, {'stat': QgsStringStatisticalSummary.CountMissing, 'expected': 2}, {'stat': QgsStringStatisticalSummary.Min, 'expected': 'aaaa'}, {'stat': QgsStringStatisticalSummary.Max, 'expected': 'eeee'}, {'stat': QgsStringStatisticalSummary.MinimumLength, 'expected': 0}, {'stat': QgsStringStatisticalSummary.MaximumLength, 'expected': 8}, {'stat': QgsStringStatisticalSummary.MeanLength, 'expected': 3.3333333333333335}, ] s = QgsStringStatisticalSummary() s3 = QgsStringStatisticalSummary() for t in tests: # test constructor s2 = QgsStringStatisticalSummary(t['stat']) self.assertEqual(s2.statistics(), t['stat']) s.setStatistics(t['stat']) s3.setStatistics(t['stat']) self.assertEqual(s.statistics(), t['stat']) strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd'] s.calculate(strings) s3.reset() for string in strings: s3.addString(string) s3.finalize() self.assertEqual(s.statistic(t['stat']), t['expected']) self.assertEqual(s3.statistic(t['stat']), t['expected']) # display name self.assertTrue(len(QgsStringStatisticalSummary.displayName(t['stat'])) > 0)
def testStats(self): # we test twice, once with values added as a list and once using values # added one-at-a-time s = QgsStringStatisticalSummary() self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All) strings = [ 'cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd' ] s.calculate(strings) s2 = QgsStringStatisticalSummary() for string in strings: s2.addString(string) s2.finalize() self.assertEqual(s.count(), 9) self.assertEqual(s2.count(), 9) self.assertEqual(s.countDistinct(), 6) self.assertEqual(s2.countDistinct(), 6) self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', ''])) self.assertEqual(s2.distinctValues(), s.distinctValues()) self.assertEqual(s.countMissing(), 2) self.assertEqual(s2.countMissing(), 2) self.assertEqual(s.min(), 'aaaa') self.assertEqual(s2.min(), 'aaaa') self.assertEqual(s.max(), 'eeee') self.assertEqual(s2.max(), 'eeee') self.assertEqual(s.minLength(), 0) self.assertEqual(s2.minLength(), 0) self.assertEqual(s.maxLength(), 8) self.assertEqual(s2.maxLength(), 8) self.assertEqual(s.meanLength(), 3.33333333333333333333333) self.assertEqual(s2.meanLength(), 3.33333333333333333333333) #extra check for minLength without empty strings s.calculate(['1111111', '111', '11111']) self.assertEqual(s.minLength(), 3)