def testIndividualStats(self):
        # tests calculation of statistics one at a time, to make sure statistic calculations are not
        # dependent on each other
        tests = [{'stat': QgsStringStatisticalSummary.Count, 'expected': 9},
                 {'stat': QgsStringStatisticalSummary.CountDistinct, 'expected': 6},
                 {'stat': QgsStringStatisticalSummary.CountMissing, 'expected': 2},
                 {'stat': QgsStringStatisticalSummary.Min, 'expected': 'aaaa'},
                 {'stat': QgsStringStatisticalSummary.Max, 'expected': 'eeee'},
                 {'stat': QgsStringStatisticalSummary.MinimumLength, 'expected': 0},
                 {'stat': QgsStringStatisticalSummary.MaximumLength, 'expected': 8},
                 ]

        s = QgsStringStatisticalSummary()
        for t in tests:
            # test constructor
            s2 = QgsStringStatisticalSummary(t['stat'])
            self.assertEqual(s2.statistics(), t['stat'])

            s.setStatistics(t['stat'])
            self.assertEqual(s.statistics(), t['stat'])
            s.calculate(['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd'])
            self.assertEqual(s.statistic(t['stat']), t['expected'])

            # display name
            self.assertTrue(len(QgsStringStatisticalSummary.displayName(t['stat'])) > 0)
 def testVariantStats(self):
     s = QgsStringStatisticalSummary()
     self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
     s.calculateFromVariants(['cc', 5, 'bbbb', 'aaaa', 'eeee', 6, 9, '9', ''])
     self.assertEqual(s.count(), 6)
     self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbb', 'eeee', '', '9']))
     self.assertEqual(s.countMissing(), 1)
     self.assertEqual(s.min(), '9')
     self.assertEqual(s.max(), 'eeee')
 def testVariantStats(self):
     s = QgsStringStatisticalSummary()
     self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
     s.calculateFromVariants(["cc", 5, "bbbb", "aaaa", "eeee", 6, 9, "9", ""])
     self.assertEqual(s.count(), 6)
     self.assertEqual(set(s.distinctValues()), set(["cc", "aaaa", "bbbb", "eeee", "", "9"]))
     self.assertEqual(s.countMissing(), 1)
     self.assertEqual(s.min(), "9")
     self.assertEqual(s.max(), "eeee")
    def testStats(self):
        # we test twice, once with values added as a list and once using values
        # added one-at-a-time
        s = QgsStringStatisticalSummary()
        self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
        strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd']
        s.calculate(strings)
        s2 = QgsStringStatisticalSummary()
        for string in strings:
            s2.addString(string)
        s2.finalize()
        self.assertEqual(s.count(), 9)
        self.assertEqual(s2.count(), 9)
        self.assertEqual(s.countDistinct(), 6)
        self.assertEqual(s2.countDistinct(), 6)
        self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', '']))
        self.assertEqual(s2.distinctValues(), s.distinctValues())
        self.assertEqual(s.countMissing(), 2)
        self.assertEqual(s2.countMissing(), 2)
        self.assertEqual(s.min(), 'aaaa')
        self.assertEqual(s2.min(), 'aaaa')
        self.assertEqual(s.max(), 'eeee')
        self.assertEqual(s2.max(), 'eeee')
        self.assertEqual(s.minLength(), 0)
        self.assertEqual(s2.minLength(), 0)
        self.assertEqual(s.maxLength(), 8)
        self.assertEqual(s2.maxLength(), 8)
        self.assertEqual(s.meanLength(), 3.33333333333333333333333)
        self.assertEqual(s2.meanLength(), 3.33333333333333333333333)

        #extra check for minLength without empty strings
        s.calculate(['1111111', '111', '11111'])
        self.assertEqual(s.minLength(), 3)
Example #5
0
    def calcStringStats(self, features, feedback, field, count):
        total = 100.0 / count if count else 1
        stat = QgsStringStatisticalSummary()
        for current, ft in enumerate(features):
            if feedback.isCanceled():
                break
            stat.addValue(ft[field.name()])
            feedback.setProgress(int(current * total))
        stat.finalize()

        results = {self.COUNT: stat.count(),
                   self.UNIQUE: stat.countDistinct(),
                   self.EMPTY: stat.countMissing(),
                   self.FILLED: stat.count() - stat.countMissing(),
                   self.MIN: stat.min(),
                   self.MAX: stat.max(),
                   self.MIN_LENGTH: stat.minLength(),
                   self.MAX_LENGTH: stat.maxLength(),
                   self.MEAN_LENGTH: stat.meanLength()}

        data = []
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(stat.countDistinct()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))

        return data, results
Example #6
0
    def calcStringStats(self, values, sink, feedback):
        stat = QgsStringStatisticalSummary()

        total = 50.0 / len(values) if values else 0
        current = 0
        for cat, v in values.items():
            if feedback.isCanceled():
                break

            feedback.setProgress(int(current * total) + 50)

            stat.calculate(v)
            f = QgsFeature()
            f.setAttributes(list(cat) + [stat.count(),
                                         stat.countDistinct(),
                                         stat.countMissing(),
                                         stat.count() - stat.countMissing(),
                                         stat.min(),
                                         stat.max(),
                                         stat.minLength(),
                                         stat.maxLength(),
                                         stat.meanLength()
                                         ])

            sink.addFeature(f, QgsFeatureSink.FastInsert)
            current += 1
Example #7
0
    def calcStringStats(self, features, progress, field):
        count = len(features)
        total = 100.0 / float(count)
        stat = QgsStringStatisticalSummary()
        for current, ft in enumerate(features):
            stat.addValue(ft[field.name()])
            progress.setPercentage(int(current * total))
        stat.finalize()

        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.countDistinct())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
        self.setOutputValue(self.MIN, stat.min())
        self.setOutputValue(self.MAX, stat.max())
        self.setOutputValue(self.MIN_LENGTH, stat.minLength())
        self.setOutputValue(self.MAX_LENGTH, stat.maxLength())
        self.setOutputValue(self.MEAN_LENGTH, stat.meanLength())

        data = []
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(stat.countDistinct()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))

        return data
    def testStats(self):
        s = QgsStringStatisticalSummary()
        self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
        s.calculate(['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd'])
        self.assertEqual(s.count(), 9)
        self.assertEqual(s.countDistinct(), 6)
        self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', '']))
        self.assertEqual(s.countMissing(), 2)
        self.assertEqual(s.min(), 'aaaa')
        self.assertEqual(s.max(), 'eeee')
        self.assertEqual(s.minLength(), 0)
        self.assertEqual(s.maxLength(), 8)

        #extra check for minLength without empty strings
        s.calculate(['1111111', '111', '11111'])
        self.assertEqual(s.minLength(), 3)
Example #9
0
    def calcStringStats(self, features, feedback, field):
        count = len(features)
        total = 100.0 / float(count)
        stat = QgsStringStatisticalSummary()
        for current, ft in enumerate(features):
            stat.addValue(ft[field.name()])
            feedback.setProgress(int(current * total))
        stat.finalize()

        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.countDistinct())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
        self.setOutputValue(self.MIN, stat.min())
        self.setOutputValue(self.MAX, stat.max())
        self.setOutputValue(self.MIN_LENGTH, stat.minLength())
        self.setOutputValue(self.MAX_LENGTH, stat.maxLength())
        self.setOutputValue(self.MEAN_LENGTH, stat.meanLength())

        data = []
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(stat.countDistinct()))
        data.append(
            self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))

        return data
    def testIndividualStats(self):
        # tests calculation of statistics one at a time, to make sure statistic calculations are not
        # dependent on each other
        tests = [
            {"stat": QgsStringStatisticalSummary.Count, "expected": 9},
            {"stat": QgsStringStatisticalSummary.CountDistinct, "expected": 6},
            {"stat": QgsStringStatisticalSummary.CountMissing, "expected": 2},
            {"stat": QgsStringStatisticalSummary.Min, "expected": "aaaa"},
            {"stat": QgsStringStatisticalSummary.Max, "expected": "eeee"},
            {"stat": QgsStringStatisticalSummary.MinimumLength, "expected": 0},
            {"stat": QgsStringStatisticalSummary.MaximumLength, "expected": 8},
            {"stat": QgsStringStatisticalSummary.MeanLength, "expected": 3.3333333333333335},
        ]

        s = QgsStringStatisticalSummary()
        s3 = QgsStringStatisticalSummary()
        for t in tests:
            # test constructor
            s2 = QgsStringStatisticalSummary(t["stat"])
            self.assertEqual(s2.statistics(), t["stat"])

            s.setStatistics(t["stat"])
            s3.setStatistics(t["stat"])
            self.assertEqual(s.statistics(), t["stat"])

            strings = ["cc", "aaaa", "bbbbbbbb", "aaaa", "eeee", "", "eeee", "", "dddd"]
            s.calculate(strings)
            s3.reset()
            for string in strings:
                s3.addString(string)
            s3.finalize()

            self.assertEqual(s.statistic(t["stat"]), t["expected"])
            self.assertEqual(s3.statistic(t["stat"]), t["expected"])

            # display name
            self.assertTrue(len(QgsStringStatisticalSummary.displayName(t["stat"])) > 0)
Example #11
0
    def calcStringStats(self, features, feedback, field, count):
        total = 100.0 / count if count else 1
        stat = QgsStringStatisticalSummary()
        for current, ft in enumerate(features):
            if feedback.isCanceled():
                break
            stat.addValue(ft[field.name()])
            feedback.setProgress(int(current * total))
        stat.finalize()

        results = {self.COUNT: stat.count(),
                   self.UNIQUE: stat.countDistinct(),
                   self.EMPTY: stat.countMissing(),
                   self.FILLED: stat.count() - stat.countMissing(),
                   self.MIN: stat.min(),
                   self.MAX: stat.max(),
                   self.MIN_LENGTH: stat.minLength(),
                   self.MAX_LENGTH: stat.maxLength(),
                   self.MEAN_LENGTH: stat.meanLength()}

        data = []
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(stat.countDistinct()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))

        return data, results
Example #12
0
    def processAlgorithm(self, parameters, context, feedback):
        source = self.parameterAsSource(parameters, self.INPUT, context)
        if source is None:
            raise QgsProcessingException(
                self.invalidSourceError(parameters, self.INPUT))

        join_source = self.parameterAsSource(parameters, self.JOIN, context)
        if join_source is None:
            raise QgsProcessingException(
                self.invalidSourceError(parameters, self.JOIN))

        join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS,
                                             context)
        discard_nomatch = self.parameterAsBool(parameters,
                                               self.DISCARD_NONMATCHING,
                                               context)
        summaries = [
            self.statistics[i][0] for i in sorted(
                self.parameterAsEnums(parameters, self.SUMMARIES, context))
        ]

        if not summaries:
            # none selected, so use all
            summaries = [s[0] for s in self.statistics]

        source_fields = source.fields()
        fields_to_join = QgsFields()
        join_field_indexes = []
        if not join_fields:
            # no fields selected, use all
            join_fields = [
                join_source.fields().at(i).name()
                for i in range(len(join_source.fields()))
            ]

        def addFieldKeepType(original, stat):
            """
            Adds a field to the output, keeping the same data type as the original
            """
            field = QgsField(original)
            field.setName(field.name() + '_' + stat)
            fields_to_join.append(field)

        def addField(original, stat, type):
            """
            Adds a field to the output, with a specified type
            """
            field = QgsField(original)
            field.setName(field.name() + '_' + stat)
            field.setType(type)
            if type == QVariant.Double:
                field.setLength(20)
                field.setPrecision(6)
            fields_to_join.append(field)

        numeric_fields = (('count', QVariant.Int,
                           'count'), ('unique', QVariant.Int, 'variety'),
                          ('min', QVariant.Double,
                           'min'), ('max', QVariant.Double,
                                    'max'), ('range', QVariant.Double,
                                             'range'), ('sum', QVariant.Double,
                                                        'sum'),
                          ('mean', QVariant.Double,
                           'mean'), ('median', QVariant.Double, 'median'),
                          ('stddev', QVariant.Double,
                           'stDev'), ('minority', QVariant.Double, 'minority'),
                          ('majority', QVariant.Double,
                           'majority'), ('q1', QVariant.Double,
                                         'firstQuartile'),
                          ('q3', QVariant.Double,
                           'thirdQuartile'), ('iqr', QVariant.Double,
                                              'interQuartileRange'))

        datetime_fields = (('count', QVariant.Int, 'count'),
                           ('unique', QVariant.Int, 'countDistinct'),
                           ('empty', QVariant.Int, 'countMissing'),
                           ('filled', QVariant.Int), ('min', None), ('max',
                                                                     None))

        string_fields = (('count', QVariant.Int,
                          'count'), ('unique', QVariant.Int, 'countDistinct'),
                         ('empty', QVariant.Int, 'countMissing'),
                         ('filled', QVariant.Int), ('min', None, 'min'),
                         ('max', None, 'max'), ('min_length', QVariant.Int,
                                                'minLength'),
                         ('max_length', QVariant.Int, 'maxLength'),
                         ('mean_length', QVariant.Double, 'meanLength'))

        field_types = []
        for f in join_fields:
            idx = join_source.fields().lookupField(f)
            if idx >= 0:
                join_field_indexes.append(idx)

                join_field = join_source.fields().at(idx)
                if join_field.isNumeric():
                    field_types.append('numeric')
                    field_list = numeric_fields
                elif join_field.type() in (QVariant.Date, QVariant.Time,
                                           QVariant.DateTime):
                    field_types.append('datetime')
                    field_list = datetime_fields
                else:
                    field_types.append('string')
                    field_list = string_fields

                for f in field_list:
                    if f[0] in summaries:
                        if f[1] is not None:
                            addField(join_field, f[0], f[1])
                        else:
                            addFieldKeepType(join_field, f[0])

        out_fields = QgsProcessingUtils.combineFields(source_fields,
                                                      fields_to_join)

        (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT,
                                               context, out_fields,
                                               source.wkbType(),
                                               source.sourceCrs())
        if sink is None:
            raise QgsProcessingException(
                self.invalidSinkError(parameters, self.OUTPUT))

        # do the join
        predicates = [
            self.predicates[i][0]
            for i in self.parameterAsEnums(parameters, self.PREDICATE, context)
        ]

        features = source.getFeatures()
        total = 100.0 / source.featureCount() if source.featureCount() else 0

        # bounding box transform
        bbox_transform = QgsCoordinateTransform(source.sourceCrs(),
                                                join_source.sourceCrs(),
                                                context.project())

        for current, f in enumerate(features):
            if feedback.isCanceled():
                break

            if not f.hasGeometry():
                if not discard_nomatch:
                    # ensure consistent count of attributes - otherwise non matching
                    # features will have incorrect attribute length
                    # and provider may reject them
                    attrs = f.attributes()
                    if len(attrs) < len(out_fields):
                        attrs += [NULL] * (len(out_fields) - len(attrs))
                    f.setAttributes(attrs)
                    sink.addFeature(f, QgsFeatureSink.FastInsert)
                continue

            bbox = bbox_transform.transformBoundingBox(
                f.geometry().boundingBox())
            engine = None

            values = []

            request = QgsFeatureRequest().setFilterRect(
                bbox).setSubsetOfAttributes(
                    join_field_indexes).setDestinationCrs(
                        source.sourceCrs(), context.transformContext())
            for test_feat in join_source.getFeatures(request):
                if feedback.isCanceled():
                    break

                join_attributes = []
                for a in join_field_indexes:
                    join_attributes.append(test_feat.attributes()[a])

                if engine is None:
                    engine = QgsGeometry.createGeometryEngine(
                        f.geometry().constGet())
                    engine.prepareGeometry()

                for predicate in predicates:
                    if getattr(engine,
                               predicate)(test_feat.geometry().constGet()):
                        values.append(join_attributes)
                        break

            feedback.setProgress(int(current * total))

            if len(values) == 0:
                if discard_nomatch:
                    continue
                else:
                    # ensure consistent count of attributes - otherwise non matching
                    # features will have incorrect attribute length
                    # and provider may reject them
                    attrs = f.attributes()
                    if len(attrs) < len(out_fields):
                        attrs += [NULL] * (len(out_fields) - len(attrs))
                    f.setAttributes(attrs)
                    sink.addFeature(f, QgsFeatureSink.FastInsert)
            else:
                attrs = f.attributes()
                for i in range(len(join_field_indexes)):
                    attribute_values = [v[i] for v in values]
                    field_type = field_types[i]
                    if field_type == 'numeric':
                        stat = QgsStatisticalSummary()
                        for v in attribute_values:
                            stat.addVariant(v)
                        stat.finalize()
                        for s in numeric_fields:
                            if s[0] in summaries:
                                attrs.append(getattr(stat, s[2])())
                    elif field_type == 'datetime':
                        stat = QgsDateTimeStatisticalSummary()
                        stat.calculate(attribute_values)
                        for s in datetime_fields:
                            if s[0] in summaries:
                                if s[0] == 'filled':
                                    attrs.append(stat.count() -
                                                 stat.countMissing())
                                elif s[0] == 'min':
                                    attrs.append(
                                        stat.statistic(
                                            QgsDateTimeStatisticalSummary.Min))
                                elif s[0] == 'max':
                                    attrs.append(
                                        stat.statistic(
                                            QgsDateTimeStatisticalSummary.Max))
                                else:
                                    attrs.append(getattr(stat, s[2])())
                    else:
                        stat = QgsStringStatisticalSummary()
                        for v in attribute_values:
                            if v == NULL:
                                stat.addString('')
                            else:
                                stat.addString(str(v))
                        stat.finalize()
                        for s in string_fields:
                            if s[0] in summaries:
                                if s[0] == 'filled':
                                    attrs.append(stat.count() -
                                                 stat.countMissing())
                                else:
                                    attrs.append(getattr(stat, s[2])())

                f.setAttributes(attrs)
                sink.addFeature(f, QgsFeatureSink.FastInsert)

        return {self.OUTPUT: dest_id}
Example #13
0
    def calcStringStats(self, values, sink, feedback):
        stat = QgsStringStatisticalSummary()

        total = 50.0 / len(values) if values else 0
        current = 0
        for cat, v in values.items():
            if feedback.isCanceled():
                break

            feedback.setProgress(int(current * total) + 50)

            stat.calculate(v)
            f = QgsFeature()
            f.setAttributes(
                list(cat) + [
                    stat.count(),
                    stat.countDistinct(),
                    stat.countMissing(),
                    stat.count() - stat.countMissing(),
                    stat.min(),
                    stat.max(),
                    stat.minLength(),
                    stat.maxLength(),
                    stat.meanLength()
                ])

            sink.addFeature(f, QgsFeatureSink.FastInsert)
            current += 1
    def testIndividualStats(self):
        # tests calculation of statistics one at a time, to make sure statistic calculations are not
        # dependent on each other
        tests = [
            {
                'stat': QgsStringStatisticalSummary.Count,
                'expected': 9
            },
            {
                'stat': QgsStringStatisticalSummary.CountDistinct,
                'expected': 6
            },
            {
                'stat': QgsStringStatisticalSummary.CountMissing,
                'expected': 2
            },
            {
                'stat': QgsStringStatisticalSummary.Min,
                'expected': 'aaaa'
            },
            {
                'stat': QgsStringStatisticalSummary.Max,
                'expected': 'eeee'
            },
            {
                'stat': QgsStringStatisticalSummary.MinimumLength,
                'expected': 0
            },
            {
                'stat': QgsStringStatisticalSummary.MaximumLength,
                'expected': 8
            },
            {
                'stat': QgsStringStatisticalSummary.MeanLength,
                'expected': 3.3333333333333335
            },
        ]

        s = QgsStringStatisticalSummary()
        s3 = QgsStringStatisticalSummary()
        for t in tests:
            # test constructor
            s2 = QgsStringStatisticalSummary(t['stat'])
            self.assertEqual(s2.statistics(), t['stat'])

            s.setStatistics(t['stat'])
            s3.setStatistics(t['stat'])
            self.assertEqual(s.statistics(), t['stat'])

            strings = [
                'cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '',
                'dddd'
            ]
            s.calculate(strings)
            s3.reset()
            for string in strings:
                s3.addString(string)
            s3.finalize()

            self.assertEqual(s.statistic(t['stat']), t['expected'])
            self.assertEqual(s3.statistic(t['stat']), t['expected'])

            # display name
            self.assertTrue(
                len(QgsStringStatisticalSummary.displayName(t['stat'])) > 0)
    def testStats(self):
        # we test twice, once with values added as a list and once using values
        # added one-at-a-time
        s = QgsStringStatisticalSummary()
        self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All)
        strings = [
            'cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd'
        ]
        s.calculate(strings)
        s2 = QgsStringStatisticalSummary()
        for string in strings:
            s2.addString(string)
        s2.finalize()
        self.assertEqual(s.count(), 9)
        self.assertEqual(s2.count(), 9)
        self.assertEqual(s.countDistinct(), 6)
        self.assertEqual(s2.countDistinct(), 6)
        self.assertEqual(set(s.distinctValues()),
                         set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', '']))
        self.assertEqual(s2.distinctValues(), s.distinctValues())
        self.assertEqual(s.countMissing(), 2)
        self.assertEqual(s2.countMissing(), 2)
        self.assertEqual(s.min(), 'aaaa')
        self.assertEqual(s2.min(), 'aaaa')
        self.assertEqual(s.max(), 'eeee')
        self.assertEqual(s2.max(), 'eeee')
        self.assertEqual(s.minLength(), 0)
        self.assertEqual(s2.minLength(), 0)
        self.assertEqual(s.maxLength(), 8)
        self.assertEqual(s2.maxLength(), 8)
        self.assertEqual(s.meanLength(), 3.33333333333333333333333)
        self.assertEqual(s2.meanLength(), 3.33333333333333333333333)

        #extra check for minLength without empty strings
        s.calculate(['1111111', '111', '11111'])
        self.assertEqual(s.minLength(), 3)
Example #16
0
    def processAlgorithm(self, feedback):
        layer = dataobjects.getObjectFromUri(
            self.getParameterValue(self.INPUT_LAYER))
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName],
                                                                                                   layer.fields())
        stat = QgsStringStatisticalSummary()
        features = vector.features(layer, request)
        count = len(features)
        total = 100.0 / float(count)
        for current, ft in enumerate(features):
            stat.addValue(ft[fieldName])
            feedback.setProgress(int(current * total))

        stat.finalize()

        data = []
        data.append(self.tr('Analyzed layer: {}').format(layer.name()))
        data.append(self.tr('Analyzed field: {}').format(fieldName))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))
        data.append(self.tr('Filled values: {}').format(stat.count() - stat.countMissing()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Count: {}').format(stat.count()))
        data.append(self.tr('Unique: {}').format(stat.countDistinct()))
        data.append(self.tr('Minimum string value: {}').format(stat.min()))
        data.append(self.tr('Maximum string value: {}').format(stat.max()))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.MIN_LEN, stat.minLength())
        self.setOutputValue(self.MAX_LEN, stat.maxLength())
        self.setOutputValue(self.MEAN_LEN, stat.meanLength())
        self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.countDistinct())
        self.setOutputValue(self.MIN_VALUE, stat.min())
        self.setOutputValue(self.MAX_VALUE, stat.max())
Example #17
0
    def processAlgorithm(self, feedback):
        layer = dataobjects.getLayerFromString(
            self.getParameterValue(self.INPUT_LAYER))
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        request = QgsFeatureRequest().setFlags(
            QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName],
                                                                layer.fields())
        stat = QgsStringStatisticalSummary()
        features = vector.features(layer, request)
        count = len(features)
        total = 100.0 / float(count)
        for current, ft in enumerate(features):
            stat.addValue(ft[fieldName])
            feedback.setProgress(int(current * total))

        stat.finalize()

        data = []
        data.append(self.tr('Analyzed layer: {}').format(layer.name()))
        data.append(self.tr('Analyzed field: {}').format(fieldName))
        data.append(self.tr('Minimum length: {}').format(stat.minLength()))
        data.append(self.tr('Maximum length: {}').format(stat.maxLength()))
        data.append(self.tr('Mean length: {}').format(stat.meanLength()))
        data.append(
            self.tr('Filled values: {}').format(stat.count() -
                                                stat.countMissing()))
        data.append(
            self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Count: {}').format(stat.count()))
        data.append(self.tr('Unique: {}').format(stat.countDistinct()))
        data.append(self.tr('Minimum string value: {}').format(stat.min()))
        data.append(self.tr('Maximum string value: {}').format(stat.max()))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.MIN_LEN, stat.minLength())
        self.setOutputValue(self.MAX_LEN, stat.maxLength())
        self.setOutputValue(self.MEAN_LEN, stat.meanLength())
        self.setOutputValue(self.FILLED, stat.count() - stat.countMissing())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.countDistinct())
        self.setOutputValue(self.MIN_VALUE, stat.min())
        self.setOutputValue(self.MAX_VALUE, stat.max())