def testStats(self):
        # we test twice, once with values added as a list and once using values
        # added one-at-a-time
        s = QgsStringStatisticalSummary()
        self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
        strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd']
        s.calculate(strings)
        s2 = QgsStringStatisticalSummary()
        for string in strings:
            s2.addString(string)
        s2.finalize()
        self.assertEqual(s.count(), 9)
        self.assertEqual(s2.count(), 9)
        self.assertEqual(s.countDistinct(), 6)
        self.assertEqual(s2.countDistinct(), 6)
        self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', '']))
        self.assertEqual(s2.distinctValues(), s.distinctValues())
        self.assertEqual(s.countMissing(), 2)
        self.assertEqual(s2.countMissing(), 2)
        self.assertEqual(s.min(), 'aaaa')
        self.assertEqual(s2.min(), 'aaaa')
        self.assertEqual(s.max(), 'eeee')
        self.assertEqual(s2.max(), 'eeee')
        self.assertEqual(s.minLength(), 0)
        self.assertEqual(s2.minLength(), 0)
        self.assertEqual(s.maxLength(), 8)
        self.assertEqual(s2.maxLength(), 8)
        self.assertEqual(s.meanLength(), 3.33333333333333333333333)
        self.assertEqual(s2.meanLength(), 3.33333333333333333333333)

        #extra check for minLength without empty strings
        s.calculate(['1111111', '111', '11111'])
        self.assertEqual(s.minLength(), 3)
Exemple #2
0
    def calcStringStats(self, features, progress, field):
        count = len(features)
        total = 100.0 / float(count)
        stat = QgsStringStatisticalSummary()
        for current, ft in enumerate(features):
            stat.addValue(ft[field.name()])
            progress.setPercentage(int(current * total))
        stat.finalize()

        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.countDistinct())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
        self.setOutputValue(self.MIN, stat.min())
        self.setOutputValue(self.MAX, stat.max())
        self.setOutputValue(self.MIN_LENGTH, stat.minLength())
        self.setOutputValue(self.MAX_LENGTH, stat.maxLength())
        self.setOutputValue(self.MEAN_LENGTH, stat.meanLength())

        data = []
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(stat.countDistinct()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))

        return data
Exemple #3
0
    def calcStringStats(self, features, feedback, field, count):
        total = 100.0 / count if count else 1
        stat = QgsStringStatisticalSummary()
        for current, ft in enumerate(features):
            if feedback.isCanceled():
                break
            stat.addValue(ft[field.name()])
            feedback.setProgress(int(current * total))
        stat.finalize()

        results = {self.COUNT: stat.count(),
                   self.UNIQUE: stat.countDistinct(),
                   self.EMPTY: stat.countMissing(),
                   self.FILLED: stat.count() - stat.countMissing(),
                   self.MIN: stat.min(),
                   self.MAX: stat.max(),
                   self.MIN_LENGTH: stat.minLength(),
                   self.MAX_LENGTH: stat.maxLength(),
                   self.MEAN_LENGTH: stat.meanLength()}

        data = []
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(stat.countDistinct()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))

        return data, results
Exemple #4
0
    def calcStringStats(self, features, feedback, field, count):
        total = 100.0 / count if count else 1
        stat = QgsStringStatisticalSummary()
        for current, ft in enumerate(features):
            if feedback.isCanceled():
                break
            stat.addValue(ft[field.name()])
            feedback.setProgress(int(current * total))
        stat.finalize()

        results = {self.COUNT: stat.count(),
                   self.UNIQUE: stat.countDistinct(),
                   self.EMPTY: stat.countMissing(),
                   self.FILLED: stat.count() - stat.countMissing(),
                   self.MIN: stat.min(),
                   self.MAX: stat.max(),
                   self.MIN_LENGTH: stat.minLength(),
                   self.MAX_LENGTH: stat.maxLength(),
                   self.MEAN_LENGTH: stat.meanLength()}

        data = []
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(stat.countDistinct()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))

        return data, results
Exemple #5
0
    def calcStringStats(self, features, feedback, field):
        count = len(features)
        total = 100.0 / float(count)
        stat = QgsStringStatisticalSummary()
        for current, ft in enumerate(features):
            stat.addValue(ft[field.name()])
            feedback.setProgress(int(current * total))
        stat.finalize()

        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.countDistinct())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
        self.setOutputValue(self.MIN, stat.min())
        self.setOutputValue(self.MAX, stat.max())
        self.setOutputValue(self.MIN_LENGTH, stat.minLength())
        self.setOutputValue(self.MAX_LENGTH, stat.maxLength())
        self.setOutputValue(self.MEAN_LENGTH, stat.meanLength())

        data = []
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(stat.countDistinct()))
        data.append(
            self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))

        return data
    def testStats(self):
        # we test twice, once with values added as a list and once using values
        # added one-at-a-time
        s = QgsStringStatisticalSummary()
        self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
        strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd']
        s.calculate(strings)
        s2 = QgsStringStatisticalSummary()
        for string in strings:
            s2.addString(string)
        s2.finalize()
        self.assertEqual(s.count(), 9)
        self.assertEqual(s2.count(), 9)
        self.assertEqual(s.countDistinct(), 6)
        self.assertEqual(s2.countDistinct(), 6)
        self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', '']))
        self.assertEqual(s2.distinctValues(), s.distinctValues())
        self.assertEqual(s.countMissing(), 2)
        self.assertEqual(s2.countMissing(), 2)
        self.assertEqual(s.min(), 'aaaa')
        self.assertEqual(s2.min(), 'aaaa')
        self.assertEqual(s.max(), 'eeee')
        self.assertEqual(s2.max(), 'eeee')
        self.assertEqual(s.minLength(), 0)
        self.assertEqual(s2.minLength(), 0)
        self.assertEqual(s.maxLength(), 8)
        self.assertEqual(s2.maxLength(), 8)
        self.assertEqual(s.meanLength(), 3.33333333333333333333333)
        self.assertEqual(s2.meanLength(), 3.33333333333333333333333)

        #extra check for minLength without empty strings
        s.calculate(['1111111', '111', '11111'])
        self.assertEqual(s.minLength(), 3)
Exemple #7
0
    def calcStringStats(self, values, sink, feedback):
        stat = QgsStringStatisticalSummary()

        total = 50.0 / len(values) if values else 0
        current = 0
        for cat, v in values.items():
            if feedback.isCanceled():
                break

            feedback.setProgress(int(current * total) + 50)

            stat.calculate(v)
            f = QgsFeature()
            f.setAttributes(
                list(cat) + [
                    stat.count(),
                    stat.countDistinct(),
                    stat.countMissing(),
                    stat.count() - stat.countMissing(),
                    stat.min(),
                    stat.max(),
                    stat.minLength(),
                    stat.maxLength(),
                    stat.meanLength()
                ])

            sink.addFeature(f, QgsFeatureSink.FastInsert)
            current += 1
    def calcStringStats(self, values, sink, feedback):
        stat = QgsStringStatisticalSummary()

        total = 50.0 / len(values) if values else 0
        current = 0
        for cat, v in values.items():
            if feedback.isCanceled():
                break

            feedback.setProgress(int(current * total) + 50)

            stat.calculate(v)
            f = QgsFeature()
            f.setAttributes(list(cat) + [stat.count(),
                                         stat.countDistinct(),
                                         stat.countMissing(),
                                         stat.count() - stat.countMissing(),
                                         stat.min(),
                                         stat.max(),
                                         stat.minLength(),
                                         stat.maxLength(),
                                         stat.meanLength()
                                         ])

            sink.addFeature(f, QgsFeatureSink.FastInsert)
            current += 1
Exemple #9
0
    def processAlgorithm(self, feedback):
        layer = dataobjects.getObjectFromUri(
            self.getParameterValue(self.INPUT_LAYER))
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        request = QgsFeatureRequest().setFlags(
            QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName],
                                                                layer.fields())
        stat = QgsStringStatisticalSummary()
        features = vector.features(layer, request)
        count = len(features)
        total = 100.0 / float(count)
        for current, ft in enumerate(features):
            stat.addValue(ft[fieldName])
            feedback.setProgress(int(current * total))

        stat.finalize()

        data = []
        data.append(self.tr('Analyzed layer: {}').format(layer.name()))
        data.append(self.tr('Analyzed field: {}').format(fieldName))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))
        data.append(
            self.tr('Filled values: {}').format(stat.count() -
                                                stat.countMissing()))
        data.append(
            self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Count: {}').format(stat.count()))
        data.append(self.tr('Unique: {}').format(stat.countDistinct()))
        data.append(self.tr('Minimum string value: {}').format(stat.min()))
        data.append(self.tr('Maximum string value: {}').format(stat.max()))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.MIN_LEN, stat.minLength())
        self.setOutputValue(self.MAX_LEN, stat.maxLength())
        self.setOutputValue(self.MEAN_LEN, stat.meanLength())
        self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.countDistinct())
        self.setOutputValue(self.MIN_VALUE, stat.min())
        self.setOutputValue(self.MAX_VALUE, stat.max())
Exemple #10
0
    def processAlgorithm(self, progress):
        layer = dataobjects.getObjectFromUri(self.getParameterValue(self.INPUT_LAYER))
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        request = (
            QgsFeatureRequest()
            .setFlags(QgsFeatureRequest.NoGeometry)
            .setSubsetOfAttributes([fieldName], layer.fields())
        )
        stat = QgsStringStatisticalSummary()
        features = vector.features(layer, request)
        count = len(features)
        total = 100.0 / float(count)
        for current, ft in enumerate(features):
            stat.addValue(ft[fieldName])
            progress.setPercentage(int(current * total))

        stat.finalize()

        data = []
        data.append(self.tr("Analyzed layer: {}").format(layer.name()))
        data.append(self.tr("Analyzed field: {}").format(fieldName))
        data.append(self.tr("Minimum length: {}").format(stat.minLength()))
        data.append(self.tr("Maximum length: {}").format(stat.maxLength()))
        data.append(self.tr("Mean length: {}").format(stat.meanLength()))
        data.append(self.tr("Filled values: {}").format(stat.count() - stat.countMissing()))
        data.append(self.tr("NULL (missing) values: {}").format(stat.countMissing()))
        data.append(self.tr("Count: {}").format(stat.count()))
        data.append(self.tr("Unique: {}").format(stat.countDistinct()))
        data.append(self.tr("Minimum string value: {}").format(stat.min()))
        data.append(self.tr("Maximum string value: {}").format(stat.max()))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.MIN_LEN, stat.minLength())
        self.setOutputValue(self.MAX_LEN, stat.maxLength())
        self.setOutputValue(self.MEAN_LEN, stat.meanLength())
        self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.countDistinct())
        self.setOutputValue(self.MIN_VALUE, stat.min())
        self.setOutputValue(self.MAX_VALUE, stat.max())
 def testVariantStats(self):
     s = QgsStringStatisticalSummary()
     self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
     s.calculateFromVariants(['cc', 5, 'bbbb', 'aaaa', 'eeee', 6, 9, '9', ''])
     self.assertEqual(s.count(), 6)
     self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbb', 'eeee', '', '9']))
     self.assertEqual(s.countMissing(), 1)
     self.assertEqual(s.min(), '9')
     self.assertEqual(s.max(), 'eeee')
 def testVariantStats(self):
     s = QgsStringStatisticalSummary()
     self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
     s.calculateFromVariants(['cc', 5, 'bbbb', 'aaaa', 'eeee', 6, 9, '9', ''])
     self.assertEqual(s.count(), 6)
     self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbb', 'eeee', '', '9']))
     self.assertEqual(s.countMissing(), 1)
     self.assertEqual(s.min(), '9')
     self.assertEqual(s.max(), 'eeee')
 def testVariantStats(self):
     s = QgsStringStatisticalSummary()
     self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
     s.calculateFromVariants(["cc", 5, "bbbb", "aaaa", "eeee", 6, 9, "9", ""])
     self.assertEqual(s.count(), 6)
     self.assertEqual(set(s.distinctValues()), set(["cc", "aaaa", "bbbb", "eeee", "", "9"]))
     self.assertEqual(s.countMissing(), 1)
     self.assertEqual(s.min(), "9")
     self.assertEqual(s.max(), "eeee")
    def processAlgorithm(self, parameters, context, feedback):
        layer = QgsProcessingUtils.mapLayerFromString(self.getParameterValue(self.INPUT_LAYER), context)
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName],
                                                                                                   layer.fields())
        stat = QgsStringStatisticalSummary()
        features = QgsProcessingUtils.getFeatures(layer, context, request)
        count = QgsProcessingUtils.featureCount(layer, context)
        total = 100.0 / float(count)
        for current, ft in enumerate(features):
            stat.addValue(ft[fieldName])
            feedback.setProgress(int(current * total))

        stat.finalize()

        data = []
        data.append(self.tr('Analyzed layer: {}').format(layer.name()))
        data.append(self.tr('Analyzed field: {}').format(fieldName))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))
        data.append(self.tr('Filled values: {}').format(stat.count() - stat.countMissing()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Count: {}').format(stat.count()))
        data.append(self.tr('Unique: {}').format(stat.countDistinct()))
        data.append(self.tr('Minimum string value: {}').format(stat.min()))
        data.append(self.tr('Maximum string value: {}').format(stat.max()))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.MIN_LEN, stat.minLength())
        self.setOutputValue(self.MAX_LEN, stat.maxLength())
        self.setOutputValue(self.MEAN_LEN, stat.meanLength())
        self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.countDistinct())
        self.setOutputValue(self.MIN_VALUE, stat.min())
        self.setOutputValue(self.MAX_VALUE, stat.max())
    def testStats(self):
        s = QgsStringStatisticalSummary()
        self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
        s.calculate(['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd'])
        self.assertEqual(s.count(), 9)
        self.assertEqual(s.countDistinct(), 6)
        self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', '']))
        self.assertEqual(s.countMissing(), 2)
        self.assertEqual(s.min(), 'aaaa')
        self.assertEqual(s.max(), 'eeee')
        self.assertEqual(s.minLength(), 0)
        self.assertEqual(s.maxLength(), 8)

        #extra check for minLength without empty strings
        s.calculate(['1111111', '111', '11111'])
        self.assertEqual(s.minLength(), 3)
Exemple #16
0
    def testStats(self):
        s = QgsStringStatisticalSummary()
        self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
        s.calculate(
            ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd'])
        self.assertEqual(s.count(), 9)
        self.assertEqual(s.countDistinct(), 6)
        self.assertEqual(set(s.distinctValues()),
                         set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', '']))
        self.assertEqual(s.countMissing(), 2)
        self.assertEqual(s.min(), 'aaaa')
        self.assertEqual(s.max(), 'eeee')
        self.assertEqual(s.minLength(), 0)
        self.assertEqual(s.maxLength(), 8)

        #extra check for minLength without empty strings
        s.calculate(['1111111', '111', '11111'])
        self.assertEqual(s.minLength(), 3)