def testIndividualStats(self): # tests calculation of statistics one at a time, to make sure statistic calculations are not # dependent on each other tests = [{'stat': QgsStringStatisticalSummary.Count, 'expected': 9}, {'stat': QgsStringStatisticalSummary.CountDistinct, 'expected': 6}, {'stat': QgsStringStatisticalSummary.CountMissing, 'expected': 2}, {'stat': QgsStringStatisticalSummary.Min, 'expected': 'aaaa'}, {'stat': QgsStringStatisticalSummary.Max, 'expected': 'eeee'}, {'stat': QgsStringStatisticalSummary.MinimumLength, 'expected': 0}, {'stat': QgsStringStatisticalSummary.MaximumLength, 'expected': 8}, ] s = QgsStringStatisticalSummary() for t in tests: # test constructor s2 = QgsStringStatisticalSummary(t['stat']) self.assertEqual(s2.statistics(), t['stat']) s.setStatistics(t['stat']) self.assertEqual(s.statistics(), t['stat']) s.calculate(['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd']) self.assertEqual(s.statistic(t['stat']), t['expected']) # display name self.assertTrue(len(QgsStringStatisticalSummary.displayName(t['stat'])) > 0)
def testVariantStats(self): s = QgsStringStatisticalSummary() self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All) s.calculateFromVariants(['cc', 5, 'bbbb', 'aaaa', 'eeee', 6, 9, '9', '']) self.assertEqual(s.count(), 6) self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbb', 'eeee', '', '9'])) self.assertEqual(s.countMissing(), 1) self.assertEqual(s.min(), '9') self.assertEqual(s.max(), 'eeee')
def testVariantStats(self): s = QgsStringStatisticalSummary() self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All) s.calculateFromVariants(["cc", 5, "bbbb", "aaaa", "eeee", 6, 9, "9", ""]) self.assertEqual(s.count(), 6) self.assertEqual(set(s.distinctValues()), set(["cc", "aaaa", "bbbb", "eeee", "", "9"])) self.assertEqual(s.countMissing(), 1) self.assertEqual(s.min(), "9") self.assertEqual(s.max(), "eeee")
def testStats(self): # we test twice, once with values added as a list and once using values # added one-at-a-time s = QgsStringStatisticalSummary() self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All) strings = ['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd'] s.calculate(strings) s2 = QgsStringStatisticalSummary() for string in strings: s2.addString(string) s2.finalize() self.assertEqual(s.count(), 9) self.assertEqual(s2.count(), 9) self.assertEqual(s.countDistinct(), 6) self.assertEqual(s2.countDistinct(), 6) self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', ''])) self.assertEqual(s2.distinctValues(), s.distinctValues()) self.assertEqual(s.countMissing(), 2) self.assertEqual(s2.countMissing(), 2) self.assertEqual(s.min(), 'aaaa') self.assertEqual(s2.min(), 'aaaa') self.assertEqual(s.max(), 'eeee') self.assertEqual(s2.max(), 'eeee') self.assertEqual(s.minLength(), 0) self.assertEqual(s2.minLength(), 0) self.assertEqual(s.maxLength(), 8) self.assertEqual(s2.maxLength(), 8) self.assertEqual(s.meanLength(), 3.33333333333333333333333) self.assertEqual(s2.meanLength(), 3.33333333333333333333333) #extra check for minLength without empty strings s.calculate(['1111111', '111', '11111']) self.assertEqual(s.minLength(), 3)
def calcStringStats(self, features, feedback, field, count): total = 100.0 / count if count else 1 stat = QgsStringStatisticalSummary() for current, ft in enumerate(features): if feedback.isCanceled(): break stat.addValue(ft[field.name()]) feedback.setProgress(int(current * total)) stat.finalize() results = {self.COUNT: stat.count(), self.UNIQUE: stat.countDistinct(), self.EMPTY: stat.countMissing(), self.FILLED: stat.count() - stat.countMissing(), self.MIN: stat.min(), self.MAX: stat.max(), self.MIN_LENGTH: stat.minLength(), self.MAX_LENGTH: stat.maxLength(), self.MEAN_LENGTH: stat.meanLength()} data = [] data.append(self.tr('Count: {}').format(count)) data.append(self.tr('Unique values: {}').format(stat.countDistinct())) data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing())) data.append(self.tr('Minimum value: {}').format(stat.min())) data.append(self.tr('Maximum value: {}').format(stat.max())) data.append(self.tr('Minimum length: {}').format(stat.minLength())) data.append(self.tr('Maximum length: {}').format(stat.maxLength())) data.append(self.tr('Mean length: {}').format(stat.meanLength())) return data, results
def calcStringStats(self, values, sink, feedback): stat = QgsStringStatisticalSummary() total = 50.0 / len(values) if values else 0 current = 0 for cat, v in values.items(): if feedback.isCanceled(): break feedback.setProgress(int(current * total) + 50) stat.calculate(v) f = QgsFeature() f.setAttributes(list(cat) + [stat.count(), stat.countDistinct(), stat.countMissing(), stat.count() - stat.countMissing(), stat.min(), stat.max(), stat.minLength(), stat.maxLength(), stat.meanLength() ]) sink.addFeature(f, QgsFeatureSink.FastInsert) current += 1
def calcStringStats(self, features, progress, field): count = len(features) total = 100.0 / float(count) stat = QgsStringStatisticalSummary() for current, ft in enumerate(features): stat.addValue(ft[field.name()]) progress.setPercentage(int(current * total)) stat.finalize() self.setOutputValue(self.COUNT, stat.count()) self.setOutputValue(self.UNIQUE, stat.countDistinct()) self.setOutputValue(self.EMPTY, stat.countMissing()) self.setOutputValue(self.FILLED, stat.count() - stat.countMissing()) self.setOutputValue(self.MIN, stat.min()) self.setOutputValue(self.MAX, stat.max()) self.setOutputValue(self.MIN_LENGTH, stat.minLength()) self.setOutputValue(self.MAX_LENGTH, stat.maxLength()) self.setOutputValue(self.MEAN_LENGTH, stat.meanLength()) data = [] data.append(self.tr('Count: {}').format(count)) data.append(self.tr('Unique values: {}').format(stat.countDistinct())) data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing())) data.append(self.tr('Minimum value: {}').format(stat.min())) data.append(self.tr('Maximum value: {}').format(stat.max())) data.append(self.tr('Minimum length: {}').format(stat.minLength())) data.append(self.tr('Maximum length: {}').format(stat.maxLength())) data.append(self.tr('Mean length: {}').format(stat.meanLength())) return data
def testStats(self): s = QgsStringStatisticalSummary() self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All) s.calculate(['cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd']) self.assertEqual(s.count(), 9) self.assertEqual(s.countDistinct(), 6) self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', ''])) self.assertEqual(s.countMissing(), 2) self.assertEqual(s.min(), 'aaaa') self.assertEqual(s.max(), 'eeee') self.assertEqual(s.minLength(), 0) self.assertEqual(s.maxLength(), 8) #extra check for minLength without empty strings s.calculate(['1111111', '111', '11111']) self.assertEqual(s.minLength(), 3)
def calcStringStats(self, features, feedback, field): count = len(features) total = 100.0 / float(count) stat = QgsStringStatisticalSummary() for current, ft in enumerate(features): stat.addValue(ft[field.name()]) feedback.setProgress(int(current * total)) stat.finalize() self.setOutputValue(self.COUNT, stat.count()) self.setOutputValue(self.UNIQUE, stat.countDistinct()) self.setOutputValue(self.EMPTY, stat.countMissing()) self.setOutputValue(self.FILLED, stat.count() - stat.countMissing()) self.setOutputValue(self.MIN, stat.min()) self.setOutputValue(self.MAX, stat.max()) self.setOutputValue(self.MIN_LENGTH, stat.minLength()) self.setOutputValue(self.MAX_LENGTH, stat.maxLength()) self.setOutputValue(self.MEAN_LENGTH, stat.meanLength()) data = [] data.append(self.tr('Count: {}').format(count)) data.append(self.tr('Unique values: {}').format(stat.countDistinct())) data.append( self.tr('NULL (missing) values: {}').format(stat.countMissing())) data.append(self.tr('Minimum value: {}').format(stat.min())) data.append(self.tr('Maximum value: {}').format(stat.max())) data.append(self.tr('Minimum length: {}').format(stat.minLength())) data.append(self.tr('Maximum length: {}').format(stat.maxLength())) data.append(self.tr('Mean length: {}').format(stat.meanLength())) return data
def testIndividualStats(self): # tests calculation of statistics one at a time, to make sure statistic calculations are not # dependent on each other tests = [ {"stat": QgsStringStatisticalSummary.Count, "expected": 9}, {"stat": QgsStringStatisticalSummary.CountDistinct, "expected": 6}, {"stat": QgsStringStatisticalSummary.CountMissing, "expected": 2}, {"stat": QgsStringStatisticalSummary.Min, "expected": "aaaa"}, {"stat": QgsStringStatisticalSummary.Max, "expected": "eeee"}, {"stat": QgsStringStatisticalSummary.MinimumLength, "expected": 0}, {"stat": QgsStringStatisticalSummary.MaximumLength, "expected": 8}, {"stat": QgsStringStatisticalSummary.MeanLength, "expected": 3.3333333333333335}, ] s = QgsStringStatisticalSummary() s3 = QgsStringStatisticalSummary() for t in tests: # test constructor s2 = QgsStringStatisticalSummary(t["stat"]) self.assertEqual(s2.statistics(), t["stat"]) s.setStatistics(t["stat"]) s3.setStatistics(t["stat"]) self.assertEqual(s.statistics(), t["stat"]) strings = ["cc", "aaaa", "bbbbbbbb", "aaaa", "eeee", "", "eeee", "", "dddd"] s.calculate(strings) s3.reset() for string in strings: s3.addString(string) s3.finalize() self.assertEqual(s.statistic(t["stat"]), t["expected"]) self.assertEqual(s3.statistic(t["stat"]), t["expected"]) # display name self.assertTrue(len(QgsStringStatisticalSummary.displayName(t["stat"])) > 0)
def calcStringStats(self, features, feedback, field, count): total = 100.0 / count if count else 1 stat = QgsStringStatisticalSummary() for current, ft in enumerate(features): if feedback.isCanceled(): break stat.addValue(ft[field.name()]) feedback.setProgress(int(current * total)) stat.finalize() results = {self.COUNT: stat.count(), self.UNIQUE: stat.countDistinct(), self.EMPTY: stat.countMissing(), self.FILLED: stat.count() - stat.countMissing(), self.MIN: stat.min(), self.MAX: stat.max(), self.MIN_LENGTH: stat.minLength(), self.MAX_LENGTH: stat.maxLength(), self.MEAN_LENGTH: stat.meanLength()} data = [] data.append(self.tr('Count: {}').format(count)) data.append(self.tr('Unique values: {}').format(stat.countDistinct())) data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing())) data.append(self.tr('Minimum value: {}').format(stat.min())) data.append(self.tr('Maximum value: {}').format(stat.max())) data.append(self.tr('Minimum length: {}').format(stat.minLength())) data.append(self.tr('Maximum length: {}').format(stat.maxLength())) data.append(self.tr('Mean length: {}').format(stat.meanLength())) return data, results
def processAlgorithm(self, parameters, context, feedback): source = self.parameterAsSource(parameters, self.INPUT, context) if source is None: raise QgsProcessingException( self.invalidSourceError(parameters, self.INPUT)) join_source = self.parameterAsSource(parameters, self.JOIN, context) if join_source is None: raise QgsProcessingException( self.invalidSourceError(parameters, self.JOIN)) join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS, context) discard_nomatch = self.parameterAsBool(parameters, self.DISCARD_NONMATCHING, context) summaries = [ self.statistics[i][0] for i in sorted( self.parameterAsEnums(parameters, self.SUMMARIES, context)) ] if not summaries: # none selected, so use all summaries = [s[0] for s in self.statistics] source_fields = source.fields() fields_to_join = QgsFields() join_field_indexes = [] if not join_fields: # no fields selected, use all join_fields = [ join_source.fields().at(i).name() for i in range(len(join_source.fields())) ] def addFieldKeepType(original, stat): """ Adds a field to the output, keeping the same data type as the original """ field = QgsField(original) field.setName(field.name() + '_' + stat) fields_to_join.append(field) def addField(original, stat, type): """ Adds a field to the output, with a specified type """ field = QgsField(original) field.setName(field.name() + '_' + stat) field.setType(type) if type == QVariant.Double: field.setLength(20) field.setPrecision(6) fields_to_join.append(field) numeric_fields = (('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'variety'), ('min', QVariant.Double, 'min'), ('max', QVariant.Double, 'max'), ('range', QVariant.Double, 'range'), ('sum', QVariant.Double, 'sum'), ('mean', QVariant.Double, 'mean'), ('median', QVariant.Double, 'median'), ('stddev', QVariant.Double, 'stDev'), ('minority', QVariant.Double, 'minority'), ('majority', QVariant.Double, 'majority'), ('q1', QVariant.Double, 'firstQuartile'), ('q3', QVariant.Double, 'thirdQuartile'), ('iqr', QVariant.Double, 'interQuartileRange')) datetime_fields = (('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'countDistinct'), ('empty', QVariant.Int, 'countMissing'), ('filled', QVariant.Int), ('min', None), ('max', None)) string_fields = (('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'countDistinct'), ('empty', QVariant.Int, 'countMissing'), ('filled', QVariant.Int), ('min', None, 'min'), ('max', None, 'max'), ('min_length', QVariant.Int, 'minLength'), ('max_length', QVariant.Int, 'maxLength'), ('mean_length', QVariant.Double, 'meanLength')) field_types = [] for f in join_fields: idx = join_source.fields().lookupField(f) if idx >= 0: join_field_indexes.append(idx) join_field = join_source.fields().at(idx) if join_field.isNumeric(): field_types.append('numeric') field_list = numeric_fields elif join_field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime): field_types.append('datetime') field_list = datetime_fields else: field_types.append('string') field_list = string_fields for f in field_list: if f[0] in summaries: if f[1] is not None: addField(join_field, f[0], f[1]) else: addFieldKeepType(join_field, f[0]) out_fields = QgsProcessingUtils.combineFields(source_fields, fields_to_join) (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, out_fields, source.wkbType(), source.sourceCrs()) if sink is None: raise QgsProcessingException( self.invalidSinkError(parameters, self.OUTPUT)) # do the join predicates = [ self.predicates[i][0] for i in self.parameterAsEnums(parameters, self.PREDICATE, context) ] features = source.getFeatures() total = 100.0 / source.featureCount() if source.featureCount() else 0 # bounding box transform bbox_transform = QgsCoordinateTransform(source.sourceCrs(), join_source.sourceCrs(), context.project()) for current, f in enumerate(features): if feedback.isCanceled(): break if not f.hasGeometry(): if not discard_nomatch: # ensure consistent count of attributes - otherwise non matching # features will have incorrect attribute length # and provider may reject them attrs = f.attributes() if len(attrs) < len(out_fields): attrs += [NULL] * (len(out_fields) - len(attrs)) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) continue bbox = bbox_transform.transformBoundingBox( f.geometry().boundingBox()) engine = None values = [] request = QgsFeatureRequest().setFilterRect( bbox).setSubsetOfAttributes( join_field_indexes).setDestinationCrs( source.sourceCrs(), context.transformContext()) for test_feat in join_source.getFeatures(request): if feedback.isCanceled(): break join_attributes = [] for a in join_field_indexes: join_attributes.append(test_feat.attributes()[a]) if engine is None: engine = QgsGeometry.createGeometryEngine( f.geometry().constGet()) engine.prepareGeometry() for predicate in predicates: if getattr(engine, predicate)(test_feat.geometry().constGet()): values.append(join_attributes) break feedback.setProgress(int(current * total)) if len(values) == 0: if discard_nomatch: continue else: # ensure consistent count of attributes - otherwise non matching # features will have incorrect attribute length # and provider may reject them attrs = f.attributes() if len(attrs) < len(out_fields): attrs += [NULL] * (len(out_fields) - len(attrs)) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) else: attrs = f.attributes() for i in range(len(join_field_indexes)): attribute_values = [v[i] for v in values] field_type = field_types[i] if field_type == 'numeric': stat = QgsStatisticalSummary() for v in attribute_values: stat.addVariant(v) stat.finalize() for s in numeric_fields: if s[0] in summaries: attrs.append(getattr(stat, s[2])()) elif field_type == 'datetime': stat = QgsDateTimeStatisticalSummary() stat.calculate(attribute_values) for s in datetime_fields: if s[0] in summaries: if s[0] == 'filled': attrs.append(stat.count() - stat.countMissing()) elif s[0] == 'min': attrs.append( stat.statistic( QgsDateTimeStatisticalSummary.Min)) elif s[0] == 'max': attrs.append( stat.statistic( QgsDateTimeStatisticalSummary.Max)) else: attrs.append(getattr(stat, s[2])()) else: stat = QgsStringStatisticalSummary() for v in attribute_values: if v == NULL: stat.addString('') else: stat.addString(str(v)) stat.finalize() for s in string_fields: if s[0] in summaries: if s[0] == 'filled': attrs.append(stat.count() - stat.countMissing()) else: attrs.append(getattr(stat, s[2])()) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) return {self.OUTPUT: dest_id}
def calcStringStats(self, values, sink, feedback): stat = QgsStringStatisticalSummary() total = 50.0 / len(values) if values else 0 current = 0 for cat, v in values.items(): if feedback.isCanceled(): break feedback.setProgress(int(current * total) + 50) stat.calculate(v) f = QgsFeature() f.setAttributes( list(cat) + [ stat.count(), stat.countDistinct(), stat.countMissing(), stat.count() - stat.countMissing(), stat.min(), stat.max(), stat.minLength(), stat.maxLength(), stat.meanLength() ]) sink.addFeature(f, QgsFeatureSink.FastInsert) current += 1
def testIndividualStats(self): # tests calculation of statistics one at a time, to make sure statistic calculations are not # dependent on each other tests = [ { 'stat': QgsStringStatisticalSummary.Count, 'expected': 9 }, { 'stat': QgsStringStatisticalSummary.CountDistinct, 'expected': 6 }, { 'stat': QgsStringStatisticalSummary.CountMissing, 'expected': 2 }, { 'stat': QgsStringStatisticalSummary.Min, 'expected': 'aaaa' }, { 'stat': QgsStringStatisticalSummary.Max, 'expected': 'eeee' }, { 'stat': QgsStringStatisticalSummary.MinimumLength, 'expected': 0 }, { 'stat': QgsStringStatisticalSummary.MaximumLength, 'expected': 8 }, { 'stat': QgsStringStatisticalSummary.MeanLength, 'expected': 3.3333333333333335 }, ] s = QgsStringStatisticalSummary() s3 = QgsStringStatisticalSummary() for t in tests: # test constructor s2 = QgsStringStatisticalSummary(t['stat']) self.assertEqual(s2.statistics(), t['stat']) s.setStatistics(t['stat']) s3.setStatistics(t['stat']) self.assertEqual(s.statistics(), t['stat']) strings = [ 'cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd' ] s.calculate(strings) s3.reset() for string in strings: s3.addString(string) s3.finalize() self.assertEqual(s.statistic(t['stat']), t['expected']) self.assertEqual(s3.statistic(t['stat']), t['expected']) # display name self.assertTrue( len(QgsStringStatisticalSummary.displayName(t['stat'])) > 0)
def testStats(self): # we test twice, once with values added as a list and once using values # added one-at-a-time s = QgsStringStatisticalSummary() self.assertEqual(s.statistics(), QgsStringStatisticalSummary.All) strings = [ 'cc', 'aaaa', 'bbbbbbbb', 'aaaa', 'eeee', '', 'eeee', '', 'dddd' ] s.calculate(strings) s2 = QgsStringStatisticalSummary() for string in strings: s2.addString(string) s2.finalize() self.assertEqual(s.count(), 9) self.assertEqual(s2.count(), 9) self.assertEqual(s.countDistinct(), 6) self.assertEqual(s2.countDistinct(), 6) self.assertEqual(set(s.distinctValues()), set(['cc', 'aaaa', 'bbbbbbbb', 'eeee', 'dddd', ''])) self.assertEqual(s2.distinctValues(), s.distinctValues()) self.assertEqual(s.countMissing(), 2) self.assertEqual(s2.countMissing(), 2) self.assertEqual(s.min(), 'aaaa') self.assertEqual(s2.min(), 'aaaa') self.assertEqual(s.max(), 'eeee') self.assertEqual(s2.max(), 'eeee') self.assertEqual(s.minLength(), 0) self.assertEqual(s2.minLength(), 0) self.assertEqual(s.maxLength(), 8) self.assertEqual(s2.maxLength(), 8) self.assertEqual(s.meanLength(), 3.33333333333333333333333) self.assertEqual(s2.meanLength(), 3.33333333333333333333333) #extra check for minLength without empty strings s.calculate(['1111111', '111', '11111']) self.assertEqual(s.minLength(), 3)
def processAlgorithm(self, feedback): layer = dataobjects.getObjectFromUri( self.getParameterValue(self.INPUT_LAYER)) fieldName = self.getParameterValue(self.FIELD_NAME) outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE) request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName], layer.fields()) stat = QgsStringStatisticalSummary() features = vector.features(layer, request) count = len(features) total = 100.0 / float(count) for current, ft in enumerate(features): stat.addValue(ft[fieldName]) feedback.setProgress(int(current * total)) stat.finalize() data = [] data.append(self.tr('Analyzed layer: {}').format(layer.name())) data.append(self.tr('Analyzed field: {}').format(fieldName)) data.append(self.tr('Minimum length: {}').format(stat.minLength())) data.append(self.tr('Maximum length: {}').format(stat.maxLength())) data.append(self.tr('Mean length: {}').format(stat.meanLength())) data.append(self.tr('Filled values: {}').format(stat.count() - stat.countMissing())) data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing())) data.append(self.tr('Count: {}').format(stat.count())) data.append(self.tr('Unique: {}').format(stat.countDistinct())) data.append(self.tr('Minimum string value: {}').format(stat.min())) data.append(self.tr('Maximum string value: {}').format(stat.max())) self.createHTML(outputFile, data) self.setOutputValue(self.MIN_LEN, stat.minLength()) self.setOutputValue(self.MAX_LEN, stat.maxLength()) self.setOutputValue(self.MEAN_LEN, stat.meanLength()) self.setOutputValue(self.FILLED, stat.count() - stat.countMissing()) self.setOutputValue(self.EMPTY, stat.countMissing()) self.setOutputValue(self.COUNT, stat.count()) self.setOutputValue(self.UNIQUE, stat.countDistinct()) self.setOutputValue(self.MIN_VALUE, stat.min()) self.setOutputValue(self.MAX_VALUE, stat.max())
def processAlgorithm(self, feedback): layer = dataobjects.getLayerFromString( self.getParameterValue(self.INPUT_LAYER)) fieldName = self.getParameterValue(self.FIELD_NAME) outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE) request = QgsFeatureRequest().setFlags( QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName], layer.fields()) stat = QgsStringStatisticalSummary() features = vector.features(layer, request) count = len(features) total = 100.0 / float(count) for current, ft in enumerate(features): stat.addValue(ft[fieldName]) feedback.setProgress(int(current * total)) stat.finalize() data = [] data.append(self.tr('Analyzed layer: {}').format(layer.name())) data.append(self.tr('Analyzed field: {}').format(fieldName)) data.append(self.tr('Minimum length: {}').format(stat.minLength())) data.append(self.tr('Maximum length: {}').format(stat.maxLength())) data.append(self.tr('Mean length: {}').format(stat.meanLength())) data.append( self.tr('Filled values: {}').format(stat.count() - stat.countMissing())) data.append( self.tr('NULL (missing) values: {}').format(stat.countMissing())) data.append(self.tr('Count: {}').format(stat.count())) data.append(self.tr('Unique: {}').format(stat.countDistinct())) data.append(self.tr('Minimum string value: {}').format(stat.min())) data.append(self.tr('Maximum string value: {}').format(stat.max())) self.createHTML(outputFile, data) self.setOutputValue(self.MIN_LEN, stat.minLength()) self.setOutputValue(self.MAX_LEN, stat.maxLength()) self.setOutputValue(self.MEAN_LEN, stat.meanLength()) self.setOutputValue(self.FILLED, stat.count() - stat.countMissing()) self.setOutputValue(self.EMPTY, stat.countMissing()) self.setOutputValue(self.COUNT, stat.count()) self.setOutputValue(self.UNIQUE, stat.countDistinct()) self.setOutputValue(self.MIN_VALUE, stat.min()) self.setOutputValue(self.MAX_VALUE, stat.max())