Esempio n. 1
0
    def calcStringStats(self, features, feedback, field, count):
        total = 100.0 / count if count else 1
        stat = QgsStringStatisticalSummary()
        for current, ft in enumerate(features):
            if feedback.isCanceled():
                break
            stat.addValue(ft[field.name()])
            feedback.setProgress(int(current * total))
        stat.finalize()

        results = {self.COUNT: stat.count(),
                   self.UNIQUE: stat.countDistinct(),
                   self.EMPTY: stat.countMissing(),
                   self.FILLED: stat.count() - stat.countMissing(),
                   self.MIN: stat.min(),
                   self.MAX: stat.max(),
                   self.MIN_LENGTH: stat.minLength(),
                   self.MAX_LENGTH: stat.maxLength(),
                   self.MEAN_LENGTH: stat.meanLength()}

        data = []
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(stat.countDistinct()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))

        return data, results
    def testStats(self):
        # we test twice, once with values added as a list and once using values
        # added one-at-a-time
        s = QgsStringStatisticalSummary()
        self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
        strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd']
        s.calculate(strings)
        s2 = QgsStringStatisticalSummary()
        for string in strings:
            s2.addString(string)
        s2.finalize()
        self.assertEqual(s.count(), 9)
        self.assertEqual(s2.count(), 9)
        self.assertEqual(s.countDistinct(), 6)
        self.assertEqual(s2.countDistinct(), 6)
        self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', '']))
        self.assertEqual(s2.distinctValues(), s.distinctValues())
        self.assertEqual(s.countMissing(), 2)
        self.assertEqual(s2.countMissing(), 2)
        self.assertEqual(s.min(), 'aaaa')
        self.assertEqual(s2.min(), 'aaaa')
        self.assertEqual(s.max(), 'eeee')
        self.assertEqual(s2.max(), 'eeee')
        self.assertEqual(s.minLength(), 0)
        self.assertEqual(s2.minLength(), 0)
        self.assertEqual(s.maxLength(), 8)
        self.assertEqual(s2.maxLength(), 8)
        self.assertEqual(s.meanLength(), 3.33333333333333333333333)
        self.assertEqual(s2.meanLength(), 3.33333333333333333333333)

        #extra check for minLength without empty strings
        s.calculate(['1111111', '111', '11111'])
        self.assertEqual(s.minLength(), 3)
    def testIndividualStats(self):
        # tests calculation of statistics one at a time, to make sure statistic calculations are not
        # dependent on each other
        tests = [{'stat': QgsStringStatisticalSummary.Count, 'expected': 9},
                 {'stat': QgsStringStatisticalSummary.CountDistinct, 'expected': 6},
                 {'stat': QgsStringStatisticalSummary.CountMissing, 'expected': 2},
                 {'stat': QgsStringStatisticalSummary.Min, 'expected': 'aaaa'},
                 {'stat': QgsStringStatisticalSummary.Max, 'expected': 'eeee'},
                 {'stat': QgsStringStatisticalSummary.MinimumLength, 'expected': 0},
                 {'stat': QgsStringStatisticalSummary.MaximumLength, 'expected': 8},
                 ]

        s = QgsStringStatisticalSummary()
        s3 = QgsStringStatisticalSummary()
        for t in tests:
            # test constructor
            s2 = QgsStringStatisticalSummary(t['stat'])
            self.assertEqual(s2.statistics(), t['stat'])

            s.setStatistics(t['stat'])
            s3.setStatistics(t['stat'])
            self.assertEqual(s.statistics(), t['stat'])

            strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd']
            s.calculate(strings)
            s3.reset()
            for string in strings:
                s3.addString(string)
            s3.finalize()

            self.assertEqual(s.statistic(t['stat']), t['expected'])
            self.assertEqual(s3.statistic(t['stat']), t['expected'])

            # display name
            self.assertTrue(len(QgsStringStatisticalSummary.displayName(t['stat'])) > 0)
Esempio n. 4
0
    def calcStringStats(self, features, progress, field):
        count = len(features)
        total = 100.0 / float(count)
        stat = QgsStringStatisticalSummary()
        for current, ft in enumerate(features):
            stat.addValue(ft[field.name()])
            progress.setPercentage(int(current * total))
        stat.finalize()

        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.countDistinct())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
        self.setOutputValue(self.MIN, stat.min())
        self.setOutputValue(self.MAX, stat.max())
        self.setOutputValue(self.MIN_LENGTH, stat.minLength())
        self.setOutputValue(self.MAX_LENGTH, stat.maxLength())
        self.setOutputValue(self.MEAN_LENGTH, stat.meanLength())

        data = []
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(stat.countDistinct()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))

        return data
Esempio n. 5
0
    def processAlgorithm(self, progress):
        layer = dataobjects.getObjectFromUri(self.getParameterValue(self.INPUT_LAYER))
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        request = (
            QgsFeatureRequest()
            .setFlags(QgsFeatureRequest.NoGeometry)
            .setSubsetOfAttributes([fieldName], layer.fields())
        )
        stat = QgsStringStatisticalSummary()
        features = vector.features(layer, request)
        count = len(features)
        total = 100.0 / float(count)
        for current, ft in enumerate(features):
            stat.addValue(ft[fieldName])
            progress.setPercentage(int(current * total))

        stat.finalize()

        data = []
        data.append(self.tr("Analyzed layer: {}").format(layer.name()))
        data.append(self.tr("Analyzed field: {}").format(fieldName))
        data.append(self.tr("Minimum length: {}").format(stat.minLength()))
        data.append(self.tr("Maximum length: {}").format(stat.maxLength()))
        data.append(self.tr("Mean length: {}").format(stat.meanLength()))
        data.append(self.tr("Filled values: {}").format(stat.count() - stat.countMissing()))
        data.append(self.tr("NULL (missing) values: {}").format(stat.countMissing()))
        data.append(self.tr("Count: {}").format(stat.count()))
        data.append(self.tr("Unique: {}").format(stat.countDistinct()))
        data.append(self.tr("Minimum string value: {}").format(stat.min()))
        data.append(self.tr("Maximum string value: {}").format(stat.max()))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.MIN_LEN, stat.minLength())
        self.setOutputValue(self.MAX_LEN, stat.maxLength())
        self.setOutputValue(self.MEAN_LEN, stat.meanLength())
        self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.countDistinct())
        self.setOutputValue(self.MIN_VALUE, stat.min())
        self.setOutputValue(self.MAX_VALUE, stat.max())
Esempio n. 6
0
    def processAlgorithm(self, parameters, context, feedback):
        layer = QgsProcessingUtils.mapLayerFromString(self.getParameterValue(self.INPUT_LAYER), context)
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName],
                                                                                                   layer.fields())
        stat = QgsStringStatisticalSummary()
        features = QgsProcessingUtils.getFeatures(layer, context, request)
        count = QgsProcessingUtils.featureCount(layer, context)
        total = 100.0 / float(count)
        for current, ft in enumerate(features):
            stat.addValue(ft[fieldName])
            feedback.setProgress(int(current * total))

        stat.finalize()

        data = []
        data.append(self.tr('Analyzed layer: {}').format(layer.name()))
        data.append(self.tr('Analyzed field: {}').format(fieldName))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))
        data.append(self.tr('Filled values: {}').format(stat.count() - stat.countMissing()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Count: {}').format(stat.count()))
        data.append(self.tr('Unique: {}').format(stat.countDistinct()))
        data.append(self.tr('Minimum string value: {}').format(stat.min()))
        data.append(self.tr('Maximum string value: {}').format(stat.max()))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.MIN_LEN, stat.minLength())
        self.setOutputValue(self.MAX_LEN, stat.maxLength())
        self.setOutputValue(self.MEAN_LEN, stat.meanLength())
        self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.countDistinct())
        self.setOutputValue(self.MIN_VALUE, stat.min())
        self.setOutputValue(self.MAX_VALUE, stat.max())
    def testIndividualStats(self):
        # tests calculation of statistics one at a time, to make sure statistic calculations are not
        # dependent on each other
        tests = [
            {"stat": QgsStringStatisticalSummary.Count, "expected": 9},
            {"stat": QgsStringStatisticalSummary.CountDistinct, "expected": 6},
            {"stat": QgsStringStatisticalSummary.CountMissing, "expected": 2},
            {"stat": QgsStringStatisticalSummary.Min, "expected": "aaaa"},
            {"stat": QgsStringStatisticalSummary.Max, "expected": "eeee"},
            {"stat": QgsStringStatisticalSummary.MinimumLength, "expected": 0},
            {"stat": QgsStringStatisticalSummary.MaximumLength, "expected": 8},
            {"stat": QgsStringStatisticalSummary.MeanLength, "expected": 3.3333333333333335},
        ]

        s = QgsStringStatisticalSummary()
        s3 = QgsStringStatisticalSummary()
        for t in tests:
            # test constructor
            s2 = QgsStringStatisticalSummary(t["stat"])
            self.assertEqual(s2.statistics(), t["stat"])

            s.setStatistics(t["stat"])
            s3.setStatistics(t["stat"])
            self.assertEqual(s.statistics(), t["stat"])

            strings = ["cc", "aaaa", "bbbbbbbb", "aaaa", "eeee", "", "eeee", "", "dddd"]
            s.calculate(strings)
            s3.reset()
            for string in strings:
                s3.addString(string)
            s3.finalize()

            self.assertEqual(s.statistic(t["stat"]), t["expected"])
            self.assertEqual(s3.statistic(t["stat"]), t["expected"])

            # display name
            self.assertTrue(len(QgsStringStatisticalSummary.displayName(t["stat"])) > 0)
Esempio n. 8
0
    def testStats(self):
        # we test twice, once with values added as a list and once using values
        # added one-at-a-time
        s = QgsStringStatisticalSummary()
        self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
        strings = [
            'cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd'
        ]
        s.calculate(strings)
        s2 = QgsStringStatisticalSummary()
        for string in strings:
            s2.addString(string)
        s2.finalize()
        self.assertEqual(s.count(), 9)
        self.assertEqual(s2.count(), 9)
        self.assertEqual(s.countDistinct(), 6)
        self.assertEqual(s2.countDistinct(), 6)
        self.assertEqual(set(s.distinctValues()),
                         set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', '']))
        self.assertEqual(s2.distinctValues(), s.distinctValues())
        self.assertEqual(s.countMissing(), 2)
        self.assertEqual(s2.countMissing(), 2)
        self.assertEqual(s.min(), 'aaaa')
        self.assertEqual(s2.min(), 'aaaa')
        self.assertEqual(s.max(), 'eeee')
        self.assertEqual(s2.max(), 'eeee')
        self.assertEqual(s.minLength(), 0)
        self.assertEqual(s2.minLength(), 0)
        self.assertEqual(s.maxLength(), 8)
        self.assertEqual(s2.maxLength(), 8)
        self.assertEqual(s.meanLength(), 3.33333333333333333333333)
        self.assertEqual(s2.meanLength(), 3.33333333333333333333333)

        #extra check for minLength without empty strings
        s.calculate(['1111111', '111', '11111'])
        self.assertEqual(s.minLength(), 3)
Esempio n. 9
0
    def testIndividualStats(self):
        # tests calculation of statistics one at a time, to make sure statistic calculations are not
        # dependent on each other
        tests = [{'stat': QgsStringStatisticalSummary.Count, 'expected': 9},
                 {'stat': QgsStringStatisticalSummary.CountDistinct, 'expected': 6},
                 {'stat': QgsStringStatisticalSummary.CountMissing, 'expected': 2},
                 {'stat': QgsStringStatisticalSummary.Min, 'expected': 'aaaa'},
                 {'stat': QgsStringStatisticalSummary.Max, 'expected': 'eeee'},
                 {'stat': QgsStringStatisticalSummary.MinimumLength, 'expected': 0},
                 {'stat': QgsStringStatisticalSummary.MaximumLength, 'expected': 8},
                 {'stat': QgsStringStatisticalSummary.MeanLength, 'expected': 3.3333333333333335},
                 ]

        s = QgsStringStatisticalSummary()
        s3 = QgsStringStatisticalSummary()
        for t in tests:
            # test constructor
            s2 = QgsStringStatisticalSummary(t['stat'])
            self.assertEqual(s2.statistics(), t['stat'])

            s.setStatistics(t['stat'])
            s3.setStatistics(t['stat'])
            self.assertEqual(s.statistics(), t['stat'])

            strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd']
            s.calculate(strings)
            s3.reset()
            for string in strings:
                s3.addString(string)
            s3.finalize()

            self.assertEqual(s.statistic(t['stat']), t['expected'])
            self.assertEqual(s3.statistic(t['stat']), t['expected'])

            # display name
            self.assertTrue(len(QgsStringStatisticalSummary.displayName(t['stat'])) > 0)