def processAlgorithm(self, parameters, context, feedback): layer = QgsProcessingUtils.mapLayerFromString(self.getParameterValue(self.INPUT_LAYER), context) valuesFieldName = self.getParameterValue(self.VALUES_FIELD_NAME) categoriesFieldName = self.getParameterValue(self.CATEGORIES_FIELD_NAME) output = self.getOutputFromName(self.OUTPUT) valuesField = layer.fields().lookupField(valuesFieldName) categoriesField = layer.fields().lookupField(categoriesFieldName) features = QgsProcessingUtils.getFeatures(layer, context) total = 100.0 / QgsProcessingUtils.featureCount(layer, context) values = {} for current, feat in enumerate(features): feedback.setProgress(int(current * total)) attrs = feat.attributes() try: value = float(attrs[valuesField]) cat = str(attrs[categoriesField]) if cat not in values: values[cat] = [] values[cat].append(value) except: pass fields = ['category', 'min', 'max', 'mean', 'stddev', 'sum', 'count'] writer = output.getTableWriter(fields) stat = QgsStatisticalSummary(QgsStatisticalSummary.Min | QgsStatisticalSummary.Max | QgsStatisticalSummary.Mean | QgsStatisticalSummary.StDevSample | QgsStatisticalSummary.Sum | QgsStatisticalSummary.Count) for (cat, v) in list(values.items()): stat.calculate(v) record = [cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()] writer.addRecord(record)
def calcNumericStats(self, values, sink, feedback): stat = QgsStatisticalSummary() total = 50.0 / len(values) if values else 0 current = 0 for cat, v in values.items(): if feedback.isCanceled(): break feedback.setProgress(int(current * total) + 50) stat.calculate(v) f = QgsFeature() f.setAttributes( list(cat) + [ stat.count(), stat.variety(), stat.min(), stat.max(), stat.range(), stat.sum(), stat.mean(), stat.median(), stat.stDev(), stat.minority(), stat.majority(), stat.firstQuartile(), stat.thirdQuartile(), stat.interQuartileRange() ]) sink.addFeature(f, QgsFeatureSink.FastInsert) current += 1
def calcNumericStats(self, values, sink, feedback): stat = QgsStatisticalSummary() total = 50.0 / len(values) if values else 0 current = 0 for cat, v in values.items(): if feedback.isCanceled(): break feedback.setProgress(int(current * total) + 50) stat.calculate(v) f = QgsFeature() f.setAttributes(list(cat) + [stat.count(), stat.variety(), stat.min(), stat.max(), stat.range(), stat.sum(), stat.mean(), stat.median(), stat.stDev(), stat.minority(), stat.majority(), stat.firstQuartile(), stat.thirdQuartile(), stat.interQuartileRange()]) sink.addFeature(f, QgsFeatureSink.FastInsert) current += 1
def processAlgorithm(self, progress): layer = dataobjects.getObjectFromUri(self.getParameterValue(self.INPUT_LAYER)) valuesFieldName = self.getParameterValue(self.VALUES_FIELD_NAME) categoriesFieldName = self.getParameterValue(self.CATEGORIES_FIELD_NAME) output = self.getOutputFromName(self.OUTPUT) valuesField = layer.fieldNameIndex(valuesFieldName) categoriesField = layer.fieldNameIndex(categoriesFieldName) features = vector.features(layer) total = 100.0 / len(features) if len(features) > 0 else 1 values = {} for current, feat in enumerate(features): progress.setPercentage(int(current * total)) attrs = feat.attributes() try: value = float(attrs[valuesField]) cat = unicode(attrs[categoriesField]) if cat not in values: values[cat] = [] values[cat].append(value) except: pass fields = ['category', 'min', 'max', 'mean', 'stddev', 'sum', 'count'] writer = output.getTableWriter(fields) stat = QgsStatisticalSummary(QgsStatisticalSummary.Min | QgsStatisticalSummary.Max | QgsStatisticalSummary.Mean | QgsStatisticalSummary.StDevSample | QgsStatisticalSummary.Sum | QgsStatisticalSummary.Count) for (cat, v) in values.items(): stat.calculate(v) record = [cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()] writer.addRecord(record)
def processAlgorithm(self, context, feedback): layer = QgsProcessingUtils.mapLayerFromString(self.getParameterValue(self.INPUT_LAYER), context) valuesFieldName = self.getParameterValue(self.VALUES_FIELD_NAME) categoriesFieldName = self.getParameterValue(self.CATEGORIES_FIELD_NAME) output = self.getOutputFromName(self.OUTPUT) valuesField = layer.fields().lookupField(valuesFieldName) categoriesField = layer.fields().lookupField(categoriesFieldName) features = QgsProcessingUtils.getFeatures(layer, context) total = 100.0 / QgsProcessingUtils.featureCount(layer, context) values = {} for current, feat in enumerate(features): feedback.setProgress(int(current * total)) attrs = feat.attributes() try: value = float(attrs[valuesField]) cat = str(attrs[categoriesField]) if cat not in values: values[cat] = [] values[cat].append(value) except: pass fields = ['category', 'min', 'max', 'mean', 'stddev', 'sum', 'count'] writer = output.getTableWriter(fields) stat = QgsStatisticalSummary(QgsStatisticalSummary.Min | QgsStatisticalSummary.Max | QgsStatisticalSummary.Mean | QgsStatisticalSummary.StDevSample | QgsStatisticalSummary.Sum | QgsStatisticalSummary.Count) for (cat, v) in list(values.items()): stat.calculate(v) record = [cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()] writer.addRecord(record)
def processAlgorithm(self, progress): layer = dataobjects.getObjectFromUri(self.getParameterValue(self.INPUT_LAYER)) valuesFieldName = self.getParameterValue(self.VALUES_FIELD_NAME) categoriesFieldName = self.getParameterValue(self.CATEGORIES_FIELD_NAME) output = self.getOutputFromName(self.OUTPUT) valuesField = layer.fieldNameIndex(valuesFieldName) categoriesField = layer.fieldNameIndex(categoriesFieldName) features = vector.features(layer) total = 100.0 / len(features) values = {} for current, feat in enumerate(features): progress.setPercentage(int(current * total)) attrs = feat.attributes() try: value = float(attrs[valuesField]) cat = unicode(attrs[categoriesField]) if cat not in values: values[cat] = [] values[cat].append(value) except: pass fields = ['category', 'min', 'max', 'mean', 'stddev', 'sum', 'count'] writer = output.getTableWriter(fields) stat = QgsStatisticalSummary(QgsStatisticalSummary.Min | QgsStatisticalSummary.Max | QgsStatisticalSummary.Mean | QgsStatisticalSummary.StDevSample | QgsStatisticalSummary.Sum | QgsStatisticalSummary.Count) for (cat, v) in values.items(): stat.calculate(v) record = [cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()] writer.addRecord(record)
def processAlgorithm(self, parameters, context, feedback): source = self.parameterAsSource(parameters, self.INPUT, context) value_field_name = self.parameterAsString(parameters, self.VALUES_FIELD_NAME, context) category_field_name = self.parameterAsString(parameters, self.CATEGORIES_FIELD_NAME, context) value_field_index = source.fields().lookupField(value_field_name) category_field_index = source.fields().lookupField(category_field_name) features = source.getFeatures(QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry)) total = 100.0 / source.featureCount() if source.featureCount() else 0 values = {} for current, feat in enumerate(features): if feedback.isCanceled(): break feedback.setProgress(int(current * total)) attrs = feat.attributes() try: value = float(attrs[value_field_index]) cat = attrs[category_field_index] if cat not in values: values[cat] = [] values[cat].append(value) except: pass fields = QgsFields() fields.append(source.fields().at(category_field_index)) fields.append(QgsField('min', QVariant.Double)) fields.append(QgsField('max', QVariant.Double)) fields.append(QgsField('mean', QVariant.Double)) fields.append(QgsField('stddev', QVariant.Double)) fields.append(QgsField('sum', QVariant.Double)) fields.append(QgsField('count', QVariant.Int)) (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, fields, QgsWkbTypes.NoGeometry, QgsCoordinateReferenceSystem()) stat = QgsStatisticalSummary(QgsStatisticalSummary.Min | QgsStatisticalSummary.Max | QgsStatisticalSummary.Mean | QgsStatisticalSummary.StDevSample | QgsStatisticalSummary.Sum | QgsStatisticalSummary.Count) for (cat, v) in list(values.items()): stat.calculate(v) f = QgsFeature() f.setAttributes([cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()]) sink.addFeature(f, QgsFeatureSink.FastInsert) return {self.OUTPUT: dest_id}
def processAlgorithm(self, parameters, context, feedback): source = self.parameterAsSource(parameters, self.INPUT, context) if source is None: raise QgsProcessingException( self.invalidSourceError(parameters, self.INPUT)) join_source = self.parameterAsSource(parameters, self.JOIN, context) if join_source is None: raise QgsProcessingException( self.invalidSourceError(parameters, self.JOIN)) join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS, context) discard_nomatch = self.parameterAsBool(parameters, self.DISCARD_NONMATCHING, context) summaries = [ self.statistics[i][0] for i in sorted( self.parameterAsEnums(parameters, self.SUMMARIES, context)) ] if not summaries: # none selected, so use all summaries = [s[0] for s in self.statistics] source_fields = source.fields() fields_to_join = QgsFields() join_field_indexes = [] if not join_fields: # no fields selected, use all join_fields = [ join_source.fields().at(i).name() for i in range(len(join_source.fields())) ] def addFieldKeepType(original, stat): """ Adds a field to the output, keeping the same data type as the original """ field = QgsField(original) field.setName(field.name() + '_' + stat) fields_to_join.append(field) def addField(original, stat, type): """ Adds a field to the output, with a specified type """ field = QgsField(original) field.setName(field.name() + '_' + stat) field.setType(type) if type == QVariant.Double: field.setLength(20) field.setPrecision(6) fields_to_join.append(field) numeric_fields = (('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'variety'), ('min', QVariant.Double, 'min'), ('max', QVariant.Double, 'max'), ('range', QVariant.Double, 'range'), ('sum', QVariant.Double, 'sum'), ('mean', QVariant.Double, 'mean'), ('median', QVariant.Double, 'median'), ('stddev', QVariant.Double, 'stDev'), ('minority', QVariant.Double, 'minority'), ('majority', QVariant.Double, 'majority'), ('q1', QVariant.Double, 'firstQuartile'), ('q3', QVariant.Double, 'thirdQuartile'), ('iqr', QVariant.Double, 'interQuartileRange')) datetime_fields = (('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'countDistinct'), ('empty', QVariant.Int, 'countMissing'), ('filled', QVariant.Int), ('min', None), ('max', None)) string_fields = (('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'countDistinct'), ('empty', QVariant.Int, 'countMissing'), ('filled', QVariant.Int), ('min', None, 'min'), ('max', None, 'max'), ('min_length', QVariant.Int, 'minLength'), ('max_length', QVariant.Int, 'maxLength'), ('mean_length', QVariant.Double, 'meanLength')) field_types = [] for f in join_fields: idx = join_source.fields().lookupField(f) if idx >= 0: join_field_indexes.append(idx) join_field = join_source.fields().at(idx) if join_field.isNumeric(): field_types.append('numeric') field_list = numeric_fields elif join_field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime): field_types.append('datetime') field_list = datetime_fields else: field_types.append('string') field_list = string_fields for f in field_list: if f[0] in summaries: if f[1] is not None: addField(join_field, f[0], f[1]) else: addFieldKeepType(join_field, f[0]) out_fields = QgsProcessingUtils.combineFields(source_fields, fields_to_join) (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, out_fields, source.wkbType(), source.sourceCrs()) if sink is None: raise QgsProcessingException( self.invalidSinkError(parameters, self.OUTPUT)) # do the join predicates = [ self.predicates[i][0] for i in self.parameterAsEnums(parameters, self.PREDICATE, context) ] features = source.getFeatures() total = 100.0 / source.featureCount() if source.featureCount() else 0 # bounding box transform bbox_transform = QgsCoordinateTransform(source.sourceCrs(), join_source.sourceCrs(), context.project()) for current, f in enumerate(features): if feedback.isCanceled(): break if not f.hasGeometry(): if not discard_nomatch: # ensure consistent count of attributes - otherwise non matching # features will have incorrect attribute length # and provider may reject them attrs = f.attributes() if len(attrs) < len(out_fields): attrs += [NULL] * (len(out_fields) - len(attrs)) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) continue bbox = bbox_transform.transformBoundingBox( f.geometry().boundingBox()) engine = None values = [] request = QgsFeatureRequest().setFilterRect( bbox).setSubsetOfAttributes( join_field_indexes).setDestinationCrs( source.sourceCrs(), context.transformContext()) for test_feat in join_source.getFeatures(request): if feedback.isCanceled(): break join_attributes = [] for a in join_field_indexes: join_attributes.append(test_feat.attributes()[a]) if engine is None: engine = QgsGeometry.createGeometryEngine( f.geometry().constGet()) engine.prepareGeometry() for predicate in predicates: if getattr(engine, predicate)(test_feat.geometry().constGet()): values.append(join_attributes) break feedback.setProgress(int(current * total)) if len(values) == 0: if discard_nomatch: continue else: # ensure consistent count of attributes - otherwise non matching # features will have incorrect attribute length # and provider may reject them attrs = f.attributes() if len(attrs) < len(out_fields): attrs += [NULL] * (len(out_fields) - len(attrs)) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) else: attrs = f.attributes() for i in range(len(join_field_indexes)): attribute_values = [v[i] for v in values] field_type = field_types[i] if field_type == 'numeric': stat = QgsStatisticalSummary() for v in attribute_values: stat.addVariant(v) stat.finalize() for s in numeric_fields: if s[0] in summaries: attrs.append(getattr(stat, s[2])()) elif field_type == 'datetime': stat = QgsDateTimeStatisticalSummary() stat.calculate(attribute_values) for s in datetime_fields: if s[0] in summaries: if s[0] == 'filled': attrs.append(stat.count() - stat.countMissing()) elif s[0] == 'min': attrs.append( stat.statistic( QgsDateTimeStatisticalSummary.Min)) elif s[0] == 'max': attrs.append( stat.statistic( QgsDateTimeStatisticalSummary.Max)) else: attrs.append(getattr(stat, s[2])()) else: stat = QgsStringStatisticalSummary() for v in attribute_values: if v == NULL: stat.addString('') else: stat.addString(str(v)) stat.finalize() for s in string_fields: if s[0] in summaries: if s[0] == 'filled': attrs.append(stat.count() - stat.countMissing()) else: attrs.append(getattr(stat, s[2])()) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) return {self.OUTPUT: dest_id}
def processAlgorithm(self, parameters, context, feedback): source = self.parameterAsSource(parameters, self.INPUT, context) join_source = self.parameterAsSource(parameters, self.JOIN, context) join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS, context) discard_nomatch = self.parameterAsBool(parameters, self.DISCARD_NONMATCHING, context) summaries = [self.statistics[i][0] for i in sorted(self.parameterAsEnums(parameters, self.SUMMARIES, context))] if not summaries: # none selected, so use all summaries = [s[0] for s in self.statistics] source_fields = source.fields() fields_to_join = QgsFields() join_field_indexes = [] if not join_fields: # no fields selected, use all join_fields = [join_source.fields().at(i).name() for i in range(len(join_source.fields()))] def addFieldKeepType(original, stat): """ Adds a field to the output, keeping the same data type as the original """ field = QgsField(original) field.setName(field.name() + '_' + stat) fields_to_join.append(field) def addField(original, stat, type): """ Adds a field to the output, with a specified type """ field = QgsField(original) field.setName(field.name() + '_' + stat) field.setType(type) if type == QVariant.Double: field.setLength(20) field.setPrecision(6) fields_to_join.append(field) numeric_fields = ( ('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'variety'), ('min', QVariant.Double, 'min'), ('max', QVariant.Double, 'max'), ('range', QVariant.Double, 'range'), ('sum', QVariant.Double, 'sum'), ('mean', QVariant.Double, 'mean'), ('median', QVariant.Double, 'median'), ('stddev', QVariant.Double, 'stDev'), ('minority', QVariant.Double, 'minority'), ('majority', QVariant.Double, 'majority'), ('q1', QVariant.Double, 'firstQuartile'), ('q3', QVariant.Double, 'thirdQuartile'), ('iqr', QVariant.Double, 'interQuartileRange') ) datetime_fields = ( ('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'countDistinct'), ('empty', QVariant.Int, 'countMissing'), ('filled', QVariant.Int), ('min', None), ('max', None) ) string_fields = ( ('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'countDistinct'), ('empty', QVariant.Int, 'countMissing'), ('filled', QVariant.Int), ('min', None, 'min'), ('max', None, 'max'), ('min_length', QVariant.Int, 'minLength'), ('max_length', QVariant.Int, 'maxLength'), ('mean_length', QVariant.Double, 'meanLength') ) field_types = [] for f in join_fields: idx = join_source.fields().lookupField(f) if idx >= 0: join_field_indexes.append(idx) join_field = join_source.fields().at(idx) if join_field.isNumeric(): field_types.append('numeric') field_list = numeric_fields elif join_field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime): field_types.append('datetime') field_list = datetime_fields else: field_types.append('string') field_list = string_fields for f in field_list: if f[0] in summaries: if f[1] is not None: addField(join_field, f[0], f[1]) else: addFieldKeepType(join_field, f[0]) out_fields = QgsProcessingUtils.combineFields(source_fields, fields_to_join) (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, out_fields, source.wkbType(), source.sourceCrs()) # do the join predicates = [self.predicates[i][0] for i in self.parameterAsEnums(parameters, self.PREDICATE, context)] features = source.getFeatures() total = 100.0 / source.featureCount() if source.featureCount() else 0 # bounding box transform bbox_transform = QgsCoordinateTransform(source.sourceCrs(), join_source.sourceCrs(), context.project()) for current, f in enumerate(features): if feedback.isCanceled(): break if not f.hasGeometry(): if not discard_nomatch: # ensure consistent count of attributes - otherwise non matching # features will have incorrect attribute length # and provider may reject them attrs = f.attributes() if len(attrs) < len(out_fields): attrs += [NULL] * (len(out_fields) - len(attrs)) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) continue bbox = bbox_transform.transformBoundingBox(f.geometry().boundingBox()) engine = None values = [] request = QgsFeatureRequest().setFilterRect(bbox).setSubsetOfAttributes(join_field_indexes).setDestinationCrs(source.sourceCrs(), context.transformContext()) for test_feat in join_source.getFeatures(request): if feedback.isCanceled(): break join_attributes = [] for a in join_field_indexes: join_attributes.append(test_feat.attributes()[a]) if engine is None: engine = QgsGeometry.createGeometryEngine(f.geometry().constGet()) engine.prepareGeometry() for predicate in predicates: if getattr(engine, predicate)(test_feat.geometry().constGet()): values.append(join_attributes) break feedback.setProgress(int(current * total)) if len(values) == 0: if discard_nomatch: continue else: # ensure consistent count of attributes - otherwise non matching # features will have incorrect attribute length # and provider may reject them attrs = f.attributes() if len(attrs) < len(out_fields): attrs += [NULL] * (len(out_fields) - len(attrs)) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) else: attrs = f.attributes() for i in range(len(join_field_indexes)): attribute_values = [v[i] for v in values] field_type = field_types[i] if field_type == 'numeric': stat = QgsStatisticalSummary() for v in attribute_values: stat.addVariant(v) stat.finalize() for s in numeric_fields: if s[0] in summaries: attrs.append(getattr(stat, s[2])()) elif field_type == 'datetime': stat = QgsDateTimeStatisticalSummary() stat.calculate(attribute_values) for s in datetime_fields: if s[0] in summaries: if s[0] == 'filled': attrs.append(stat.count() - stat.countMissing()) elif s[0] == 'min': attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Min)) elif s[0] == 'max': attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Max)) else: attrs.append(getattr(stat, s[2])()) else: stat = QgsStringStatisticalSummary() for v in attribute_values: if v == NULL: stat.addString('') else: stat.addString(str(v)) stat.finalize() for s in string_fields: if s[0] in summaries: if s[0] == 'filled': attrs.append(stat.count() - stat.countMissing()) else: attrs.append(getattr(stat, s[2])()) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) return {self.OUTPUT: dest_id}
def processAlgorithm(self, progress): layer = dataobjects.getObjectFromUri( self.getParameterValue(self.INPUT_LAYER)) fieldName = self.getParameterValue(self.FIELD_NAME) outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE) cvValue = 0 minValue = 0 maxValue = 0 sumValue = 0 meanValue = 0 medianValue = 0 stdDevValue = 0 minority = 0 majority = 0 firstQuartile = 0 thirdQuartile = 0 nullValues = 0 iqr = 0 isFirst = True values = [] features = vector.features(layer) count = len(features) total = 100.0 / float(count) for current, ft in enumerate(features): value = ft[fieldName] if value or value == 0: values.append(float(value)) else: nullValues += 1 progress.setPercentage(int(current * total)) stat = QgsStatisticalSummary() stat.calculate(values) count = stat.count() uniqueValue = stat.variety() minValue = stat.min() maxValue = stat.max() rValue = stat.range() sumValue = stat.sum() meanValue = stat.mean() medianValue = stat.median() stdDevValue = stat.stDev() if meanValue != 0.00: cvValue = stdDevValue / meanValue minority = stat.minority() majority = stat.majority() firstQuartile = stat.firstQuartile() thirdQuartile = stat.thirdQuartile() iqr = stat.interQuartileRange() data = [] data.append(self.tr('Analyzed layer: {}').format(layer.name())) data.append(self.tr('Analyzed field: {}').format(fieldName)) data.append(self.tr('Count: {}').format(count)) data.append(self.tr('Unique values: {}').format(uniqueValue)) data.append(self.tr('Minimum value: {}').format(minValue)) data.append(self.tr('Maximum value: {}').format(maxValue)) data.append(self.tr('Range: {}').format(rValue)) data.append(self.tr('Sum: {}').format(sumValue)) data.append(self.tr('Mean value: {}').format(meanValue)) data.append(self.tr('Median value: {}').format(medianValue)) data.append(self.tr('Standard deviation: {}').format(stdDevValue)) data.append(self.tr('Coefficient of Variation: {}').format(cvValue)) data.append(self.tr('Minority (rarest occurring value): {}').format(minority)) data.append(self.tr('Majority (most frequently occurring value): {}').format(majority)) data.append(self.tr('First quartile: {}').format(firstQuartile)) data.append(self.tr('Third quartile: {}').format(thirdQuartile)) data.append(self.tr('NULL (missing) values: {}').format(nullValues)) data.append(self.tr('Interquartile Range (IQR): {}').format(iqr)) self.createHTML(outputFile, data) self.setOutputValue(self.COUNT, count) self.setOutputValue(self.UNIQUE, uniqueValue) self.setOutputValue(self.MIN, minValue) self.setOutputValue(self.MAX, maxValue) self.setOutputValue(self.RANGE, rValue) self.setOutputValue(self.SUM, sumValue) self.setOutputValue(self.MEAN, meanValue) self.setOutputValue(self.MEDIAN, medianValue) self.setOutputValue(self.STD_DEV, stdDevValue) self.setOutputValue(self.MINORITY, minority) self.setOutputValue(self.MAJORITY, majority) self.setOutputValue(self.FIRSTQUARTILE, firstQuartile) self.setOutputValue(self.THIRDQUARTILE, thirdQuartile) self.setOutputValue(self.NULLVALUES, nullValues) self.setOutputValue(self.IQR, iqr)
def processAlgorithm(self, progress): layer = dataobjects.getObjectFromUri( self.getParameterValue(self.INPUT_LAYER)) fieldName = self.getParameterValue(self.FIELD_NAME) outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE) cvValue = 0 minValue = 0 maxValue = 0 sumValue = 0 meanValue = 0 medianValue = 0 stdDevValue = 0 minority = 0 majority = 0 firstQuartile = 0 thirdQuartile = 0 nullValues = 0 iqr = 0 values = [] features = vector.features(layer) count = len(features) total = 100.0 / float(count) for current, ft in enumerate(features): value = ft[fieldName] if value or value == 0: values.append(float(value)) else: nullValues += 1 progress.setPercentage(int(current * total)) stat = QgsStatisticalSummary() stat.calculate(values) count = stat.count() uniqueValue = stat.variety() minValue = stat.min() maxValue = stat.max() rValue = stat.range() sumValue = stat.sum() meanValue = stat.mean() medianValue = stat.median() stdDevValue = stat.stDev() if meanValue != 0.00: cvValue = stdDevValue / meanValue minority = stat.minority() majority = stat.majority() firstQuartile = stat.firstQuartile() thirdQuartile = stat.thirdQuartile() iqr = stat.interQuartileRange() data = [] data.append(self.tr('Analyzed layer: {}').format(layer.name())) data.append(self.tr('Analyzed field: {}').format(fieldName)) data.append(self.tr('Count: {}').format(count)) data.append(self.tr('Unique values: {}').format(uniqueValue)) data.append(self.tr('Minimum value: {}').format(minValue)) data.append(self.tr('Maximum value: {}').format(maxValue)) data.append(self.tr('Range: {}').format(rValue)) data.append(self.tr('Sum: {}').format(sumValue)) data.append(self.tr('Mean value: {}').format(meanValue)) data.append(self.tr('Median value: {}').format(medianValue)) data.append(self.tr('Standard deviation: {}').format(stdDevValue)) data.append(self.tr('Coefficient of Variation: {}').format(cvValue)) data.append( self.tr('Minority (rarest occurring value): {}').format(minority)) data.append( self.tr('Majority (most frequently occurring value): {}').format( majority)) data.append(self.tr('First quartile: {}').format(firstQuartile)) data.append(self.tr('Third quartile: {}').format(thirdQuartile)) data.append(self.tr('NULL (missing) values: {}').format(nullValues)) data.append(self.tr('Interquartile Range (IQR): {}').format(iqr)) self.createHTML(outputFile, data) self.setOutputValue(self.COUNT, count) self.setOutputValue(self.UNIQUE, uniqueValue) self.setOutputValue(self.MIN, minValue) self.setOutputValue(self.MAX, maxValue) self.setOutputValue(self.RANGE, rValue) self.setOutputValue(self.SUM, sumValue) self.setOutputValue(self.MEAN, meanValue) self.setOutputValue(self.MEDIAN, medianValue) self.setOutputValue(self.STD_DEV, stdDevValue) self.setOutputValue(self.MINORITY, minority) self.setOutputValue(self.MAJORITY, majority) self.setOutputValue(self.FIRSTQUARTILE, firstQuartile) self.setOutputValue(self.THIRDQUARTILE, thirdQuartile) self.setOutputValue(self.NULLVALUES, nullValues) self.setOutputValue(self.IQR, iqr)
def processAlgorithm(self, progress): layer = dataobjects.getObjectFromUri( self.getParameterValue(self.INPUT_LAYER)) fieldName = self.getParameterValue(self.FIELD_NAME) outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE) index = layer.fieldNameIndex(fieldName) cvValue = 0 minValue = 0 maxValue = 0 sumValue = 0 meanValue = 0 medianValue = 0 stdDevValue = 0 minority = 0 majority = 0 firstQuartile = 0 thirdQuartile = 0 iqr = 0 isFirst = True values = [] features = vector.features(layer) count = len(features) total = 100.0 / float(count) current = 0 for ft in features: if ft.attributes()[index]: values.append(float(ft.attributes()[index])) current += 1 progress.setPercentage(int(current * total)) stat = QgsStatisticalSummary() stat.calculate(values) count = stat.count() uniqueValue = stat.variety() minValue = stat.min() maxValue = stat.max() rValue = stat.range() sumValue = stat.sum() meanValue = stat.mean() medianValue = stat.median() stdDevValue = stat.stDev() if meanValue != 0.00: cvValue = stdDevValue / meanValue minority = stat.minority() majority = stat.majority() firstQuartile = stat.firstQuartile() thirdQuartile = stat.thirdQuartile() iqr = stat.interQuartileRange() data = [] data.append('Count: ' + unicode(count)) data.append('Unique values: ' + unicode(uniqueValue)) data.append('Minimum value: ' + unicode(minValue)) data.append('Maximum value: ' + unicode(maxValue)) data.append('Range: ' + unicode(rValue)) data.append('Sum: ' + unicode(sumValue)) data.append('Mean value: ' + unicode(meanValue)) data.append('Median value: ' + unicode(medianValue)) data.append('Standard deviation: ' + unicode(stdDevValue)) data.append('Coefficient of Variation: ' + unicode(cvValue)) data.append('Minority (rarest occurring value): ' + unicode(minority)) data.append('Majority (most frequently occurring value): ' + unicode(majority)) data.append('First quartile: ' + unicode(firstQuartile)) data.append('Third quartile: ' + unicode(thirdQuartile)) data.append('Interquartile Range (IQR): ' + unicode(iqr)) self.createHTML(outputFile, data) self.setOutputValue(self.COUNT, count) self.setOutputValue(self.UNIQUE, uniqueValue) self.setOutputValue(self.MIN, minValue) self.setOutputValue(self.MAX, maxValue) self.setOutputValue(self.RANGE, rValue) self.setOutputValue(self.SUM, sumValue) self.setOutputValue(self.MEAN, meanValue) self.setOutputValue(self.MEDIAN, medianValue) self.setOutputValue(self.STD_DEV, stdDevValue) self.setOutputValue(self.MINORITY, minority) self.setOutputValue(self.MAJORITY, majority) self.setOutputValue(self.FIRSTQUARTILE, firstQuartile) self.setOutputValue(self.THIRDQUARTILE, thirdQuartile) self.setOutputValue(self.IQR, iqr)
def processAlgorithm(self, progress): layer = dataobjects.getObjectFromUri( self.getParameterValue(self.INPUT_LAYER)) fieldName = self.getParameterValue(self.FIELD_NAME) outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE) index = layer.fieldNameIndex(fieldName) cvValue = 0 minValue = 0 maxValue = 0 sumValue = 0 meanValue = 0 medianValue = 0 stdDevValue = 0 minority = 0 majority = 0 firstQuartile = 0 thirdQuartile = 0 iqr = 0 isFirst = True values = [] features = vector.features(layer) count = len(features) total = 100.0 / float(count) current = 0 for ft in features: if ft.attributes()[index]: values.append(float(ft.attributes()[index])) current += 1 progress.setPercentage(int(current * total)) stat = QgsStatisticalSummary() stat.calculate(values) count = stat.count() uniqueValue = stat.variety() minValue = stat.min() maxValue = stat.max() rValue = stat.range() sumValue = stat.sum() meanValue = stat.mean() medianValue = stat.median() stdDevValue = stat.stDev() if meanValue != 0.00: cvValue = stdDevValue / meanValue minority = stat.minority() majority = stat.majority() firstQuartile = stat.firstQuartile() thirdQuartile = stat.thirdQuartile() iqr = stat.interQuartileRange() data = [] data.append('Analyzed layer: ' + layer.name()) data.append('Analyzed field: ' + fieldName) data.append('Count: ' + unicode(count)) data.append('Unique values: ' + unicode(uniqueValue)) data.append('Minimum value: ' + unicode(minValue)) data.append('Maximum value: ' + unicode(maxValue)) data.append('Range: ' + unicode(rValue)) data.append('Sum: ' + unicode(sumValue)) data.append('Mean value: ' + unicode(meanValue)) data.append('Median value: ' + unicode(medianValue)) data.append('Standard deviation: ' + unicode(stdDevValue)) data.append('Coefficient of Variation: ' + unicode(cvValue)) data.append('Minority (rarest occurring value): ' + unicode(minority)) data.append('Majority (most frequently occurring value): ' + unicode(majority)) data.append('First quartile: ' + unicode(firstQuartile)) data.append('Third quartile: ' + unicode(thirdQuartile)) data.append('Interquartile Range (IQR): ' + unicode(iqr)) self.createHTML(outputFile, data) self.setOutputValue(self.COUNT, count) self.setOutputValue(self.UNIQUE, uniqueValue) self.setOutputValue(self.MIN, minValue) self.setOutputValue(self.MAX, maxValue) self.setOutputValue(self.RANGE, rValue) self.setOutputValue(self.SUM, sumValue) self.setOutputValue(self.MEAN, meanValue) self.setOutputValue(self.MEDIAN, medianValue) self.setOutputValue(self.STD_DEV, stdDevValue) self.setOutputValue(self.MINORITY, minority) self.setOutputValue(self.MAJORITY, majority) self.setOutputValue(self.FIRSTQUARTILE, firstQuartile) self.setOutputValue(self.THIRDQUARTILE, thirdQuartile) self.setOutputValue(self.IQR, iqr)