def processAlgorithm(self, progress):
        layer = dataobjects.getObjectFromUri(self.getParameterValue(self.INPUT_LAYER))
        valuesFieldName = self.getParameterValue(self.VALUES_FIELD_NAME)
        categoriesFieldName = self.getParameterValue(self.CATEGORIES_FIELD_NAME)

        output = self.getOutputFromName(self.OUTPUT)
        valuesField = layer.fieldNameIndex(valuesFieldName)
        categoriesField = layer.fieldNameIndex(categoriesFieldName)

        features = vector.features(layer)
        total = 100.0 / len(features) if len(features) > 0 else 1
        values = {}
        for current, feat in enumerate(features):
            progress.setPercentage(int(current * total))
            attrs = feat.attributes()
            try:
                value = float(attrs[valuesField])
                cat = unicode(attrs[categoriesField])
                if cat not in values:
                    values[cat] = []
                values[cat].append(value)
            except:
                pass

        fields = ['category', 'min', 'max', 'mean', 'stddev', 'sum', 'count']
        writer = output.getTableWriter(fields)
        stat = QgsStatisticalSummary(QgsStatisticalSummary.Min | QgsStatisticalSummary.Max |
                                     QgsStatisticalSummary.Mean | QgsStatisticalSummary.StDevSample |
                                     QgsStatisticalSummary.Sum | QgsStatisticalSummary.Count)

        for (cat, v) in values.items():
            stat.calculate(v)
            record = [cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()]
            writer.addRecord(record)
Beispiel #2
0
    def processAlgorithm(self, parameters, context, feedback):
        layer = QgsProcessingUtils.mapLayerFromString(self.getParameterValue(self.INPUT_LAYER), context)
        valuesFieldName = self.getParameterValue(self.VALUES_FIELD_NAME)
        categoriesFieldName = self.getParameterValue(self.CATEGORIES_FIELD_NAME)

        output = self.getOutputFromName(self.OUTPUT)
        valuesField = layer.fields().lookupField(valuesFieldName)
        categoriesField = layer.fields().lookupField(categoriesFieldName)

        features = QgsProcessingUtils.getFeatures(layer, context)
        total = 100.0 / QgsProcessingUtils.featureCount(layer, context)
        values = {}
        for current, feat in enumerate(features):
            feedback.setProgress(int(current * total))
            attrs = feat.attributes()
            try:
                value = float(attrs[valuesField])
                cat = str(attrs[categoriesField])
                if cat not in values:
                    values[cat] = []
                values[cat].append(value)
            except:
                pass

        fields = ['category', 'min', 'max', 'mean', 'stddev', 'sum', 'count']
        writer = output.getTableWriter(fields)
        stat = QgsStatisticalSummary(QgsStatisticalSummary.Min | QgsStatisticalSummary.Max |
                                     QgsStatisticalSummary.Mean | QgsStatisticalSummary.StDevSample |
                                     QgsStatisticalSummary.Sum | QgsStatisticalSummary.Count)

        for (cat, v) in list(values.items()):
            stat.calculate(v)
            record = [cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()]
            writer.addRecord(record)
    def processAlgorithm(self, parameters, context, feedback):
        source = self.parameterAsSource(parameters, self.INPUT, context)
        value_field_name = self.parameterAsString(parameters, self.VALUES_FIELD_NAME, context)
        category_field_name = self.parameterAsString(parameters, self.CATEGORIES_FIELD_NAME, context)

        value_field_index = source.fields().lookupField(value_field_name)
        category_field_index = source.fields().lookupField(category_field_name)

        features = source.getFeatures(QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry))
        total = 100.0 / source.featureCount() if source.featureCount() else 0
        values = {}
        for current, feat in enumerate(features):
            if feedback.isCanceled():
                break

            feedback.setProgress(int(current * total))
            attrs = feat.attributes()
            try:
                value = float(attrs[value_field_index])
                cat = attrs[category_field_index]
                if cat not in values:
                    values[cat] = []
                values[cat].append(value)
            except:
                pass

        fields = QgsFields()
        fields.append(source.fields().at(category_field_index))
        fields.append(QgsField('min', QVariant.Double))
        fields.append(QgsField('max', QVariant.Double))
        fields.append(QgsField('mean', QVariant.Double))
        fields.append(QgsField('stddev', QVariant.Double))
        fields.append(QgsField('sum', QVariant.Double))
        fields.append(QgsField('count', QVariant.Int))

        (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context,
                                               fields, QgsWkbTypes.NoGeometry, QgsCoordinateReferenceSystem())

        stat = QgsStatisticalSummary(QgsStatisticalSummary.Min | QgsStatisticalSummary.Max |
                                     QgsStatisticalSummary.Mean | QgsStatisticalSummary.StDevSample |
                                     QgsStatisticalSummary.Sum | QgsStatisticalSummary.Count)

        for (cat, v) in list(values.items()):
            stat.calculate(v)
            f = QgsFeature()
            f.setAttributes([cat, stat.min(), stat.max(), stat.mean(), stat.sampleStDev(), stat.sum(), stat.count()])
            sink.addFeature(f, QgsFeatureSink.FastInsert)

        return {self.OUTPUT: dest_id}
Beispiel #4
0
    def processAlgorithm(self, progress):
        layer = dataobjects.getObjectFromUri(
            self.getParameterValue(self.INPUT_LAYER))
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName], layer.fields())
        stat = QgsStatisticalSummary()
        features = vector.features(layer, request)
        count = len(features)
        total = 100.0 / float(count)
        for current, ft in enumerate(features):
            stat.addVariant(ft[fieldName])
            progress.setPercentage(int(current * total))

        stat.finalize()

        count = stat.count()
        uniqueValue = stat.variety()
        minValue = stat.min()
        maxValue = stat.max()
        rValue = stat.range()
        sumValue = stat.sum()
        meanValue = stat.mean()
        medianValue = stat.median()
        stdDevValue = stat.stDev()
        cvValue = stdDevValue / meanValue if meanValue != 0 else 0
        minority = stat.minority()
        majority = stat.majority()
        firstQuartile = stat.firstQuartile()
        thirdQuartile = stat.thirdQuartile()
        iqr = stat.interQuartileRange()
        nullValues = stat.countMissing()

        data = []
        data.append(self.tr('Analyzed layer: {}').format(layer.name()))
        data.append(self.tr('Analyzed field: {}').format(fieldName))
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(uniqueValue))
        data.append(self.tr('Minimum value: {}').format(minValue))
        data.append(self.tr('Maximum value: {}').format(maxValue))
        data.append(self.tr('Range: {}').format(rValue))
        data.append(self.tr('Sum: {}').format(sumValue))
        data.append(self.tr('Mean value: {}').format(meanValue))
        data.append(self.tr('Median value: {}').format(medianValue))
        data.append(self.tr('Standard deviation: {}').format(stdDevValue))
        data.append(self.tr('Coefficient of Variation: {}').format(cvValue))
        data.append(self.tr('Minority (rarest occurring value): {}').format(minority))
        data.append(self.tr('Majority (most frequently occurring value): {}').format(majority))
        data.append(self.tr('First quartile: {}').format(firstQuartile))
        data.append(self.tr('Third quartile: {}').format(thirdQuartile))
        data.append(self.tr('NULL (missing) values: {}').format(nullValues))
        data.append(self.tr('Interquartile Range (IQR): {}').format(iqr))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.COUNT, count)
        self.setOutputValue(self.UNIQUE, uniqueValue)
        self.setOutputValue(self.MIN, minValue)
        self.setOutputValue(self.MAX, maxValue)
        self.setOutputValue(self.RANGE, rValue)
        self.setOutputValue(self.SUM, sumValue)
        self.setOutputValue(self.MEAN, meanValue)
        self.setOutputValue(self.MEDIAN, medianValue)
        self.setOutputValue(self.STD_DEV, stdDevValue)
        self.setOutputValue(self.MINORITY, minority)
        self.setOutputValue(self.MAJORITY, majority)
        self.setOutputValue(self.FIRSTQUARTILE, firstQuartile)
        self.setOutputValue(self.THIRDQUARTILE, thirdQuartile)
        self.setOutputValue(self.NULLVALUES, nullValues)
        self.setOutputValue(self.IQR, iqr)
Beispiel #5
0
    def calcNumericStats(self, features, feedback, field, count):
        total = 100.0 / count if count else 0
        stat = QgsStatisticalSummary()
        for current, ft in enumerate(features):
            if feedback.isCanceled():
                break
            stat.addVariant(ft[field.name()])
            feedback.setProgress(int(current * total))
        stat.finalize()

        cv = stat.stDev() / stat.mean() if stat.mean() != 0 else 0

        results = {self.COUNT: stat.count(),
                   self.UNIQUE: stat.variety(),
                   self.EMPTY: stat.countMissing(),
                   self.FILLED: count - stat.countMissing(),
                   self.MIN: stat.min(),
                   self.MAX: stat.max(),
                   self.RANGE: stat.range(),
                   self.SUM: stat.sum(),
                   self.MEAN: stat.mean(),
                   self.MEDIAN: stat.median(),
                   self.STD_DEV: stat.stDev(),
                   self.CV: cv,
                   self.MINORITY: stat.minority(),
                   self.MAJORITY: stat.majority(),
                   self.FIRSTQUARTILE: stat.firstQuartile(),
                   self.THIRDQUARTILE: stat.thirdQuartile(),
                   self.IQR: stat.interQuartileRange()}

        data = []
        data.append(self.tr('Count: {}').format(stat.count()))
        data.append(self.tr('Unique values: {}').format(stat.variety()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Range: {}').format(stat.range()))
        data.append(self.tr('Sum: {}').format(stat.sum()))
        data.append(self.tr('Mean value: {}').format(stat.mean()))
        data.append(self.tr('Median value: {}').format(stat.median()))
        data.append(self.tr('Standard deviation: {}').format(stat.stDev()))
        data.append(self.tr('Coefficient of Variation: {}').format(cv))
        data.append(self.tr('Minority (rarest occurring value): {}').format(stat.minority()))
        data.append(self.tr('Majority (most frequently occurring value): {}').format(stat.majority()))
        data.append(self.tr('First quartile: {}').format(stat.firstQuartile()))
        data.append(self.tr('Third quartile: {}').format(stat.thirdQuartile()))
        data.append(self.tr('Interquartile Range (IQR): {}').format(stat.interQuartileRange()))
        return data, results
    def processAlgorithm(self, parameters, context, feedback):
        source = self.parameterAsSource(parameters, self.INPUT, context)
        join_source = self.parameterAsSource(parameters, self.JOIN, context)
        join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS, context)
        discard_nomatch = self.parameterAsBool(parameters, self.DISCARD_NONMATCHING, context)
        summaries = [self.statistics[i][0] for i in
                     sorted(self.parameterAsEnums(parameters, self.SUMMARIES, context))]

        if not summaries:
            # none selected, so use all
            summaries = [s[0] for s in self.statistics]

        source_fields = source.fields()
        fields_to_join = QgsFields()
        join_field_indexes = []
        if not join_fields:
            # no fields selected, use all
            join_fields = [join_source.fields().at(i).name() for i in range(len(join_source.fields()))]

        def addFieldKeepType(original, stat):
            """
            Adds a field to the output, keeping the same data type as the original
            """
            field = QgsField(original)
            field.setName(field.name() + '_' + stat)
            fields_to_join.append(field)

        def addField(original, stat, type):
            """
            Adds a field to the output, with a specified type
            """
            field = QgsField(original)
            field.setName(field.name() + '_' + stat)
            field.setType(type)
            if type == QVariant.Double:
                field.setLength(20)
                field.setPrecision(6)
            fields_to_join.append(field)

        numeric_fields = (
            ('count', QVariant.Int, 'count'),
            ('unique', QVariant.Int, 'variety'),
            ('min', QVariant.Double, 'min'),
            ('max', QVariant.Double, 'max'),
            ('range', QVariant.Double, 'range'),
            ('sum', QVariant.Double, 'sum'),
            ('mean', QVariant.Double, 'mean'),
            ('median', QVariant.Double, 'median'),
            ('stddev', QVariant.Double, 'stDev'),
            ('minority', QVariant.Double, 'minority'),
            ('majority', QVariant.Double, 'majority'),
            ('q1', QVariant.Double, 'firstQuartile'),
            ('q3', QVariant.Double, 'thirdQuartile'),
            ('iqr', QVariant.Double, 'interQuartileRange')
        )

        datetime_fields = (
            ('count', QVariant.Int, 'count'),
            ('unique', QVariant.Int, 'countDistinct'),
            ('empty', QVariant.Int, 'countMissing'),
            ('filled', QVariant.Int),
            ('min', None),
            ('max', None)
        )

        string_fields = (
            ('count', QVariant.Int, 'count'),
            ('unique', QVariant.Int, 'countDistinct'),
            ('empty', QVariant.Int, 'countMissing'),
            ('filled', QVariant.Int),
            ('min', None, 'min'),
            ('max', None, 'max'),
            ('min_length', QVariant.Int, 'minLength'),
            ('max_length', QVariant.Int, 'maxLength'),
            ('mean_length', QVariant.Double, 'meanLength')
        )

        field_types = []
        for f in join_fields:
            idx = join_source.fields().lookupField(f)
            if idx >= 0:
                join_field_indexes.append(idx)

                join_field = join_source.fields().at(idx)
                if join_field.isNumeric():
                    field_types.append('numeric')
                    field_list = numeric_fields
                elif join_field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime):
                    field_types.append('datetime')
                    field_list = datetime_fields
                else:
                    field_types.append('string')
                    field_list = string_fields

                for f in field_list:
                    if f[0] in summaries:
                        if f[1] is not None:
                            addField(join_field, f[0], f[1])
                        else:
                            addFieldKeepType(join_field, f[0])

        out_fields = QgsProcessingUtils.combineFields(source_fields, fields_to_join)

        (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context,
                                               out_fields, source.wkbType(), source.sourceCrs())

        # do the join
        predicates = [self.predicates[i][0] for i in self.parameterAsEnums(parameters, self.PREDICATE, context)]

        features = source.getFeatures()
        total = 100.0 / source.featureCount() if source.featureCount() else 0

        # bounding box transform
        bbox_transform = QgsCoordinateTransform(source.sourceCrs(), join_source.sourceCrs(), context.project())

        for current, f in enumerate(features):
            if feedback.isCanceled():
                break

            if not f.hasGeometry():
                if not discard_nomatch:
                    # ensure consistent count of attributes - otherwise non matching
                    # features will have incorrect attribute length
                    # and provider may reject them
                    attrs = f.attributes()
                    if len(attrs) < len(out_fields):
                        attrs += [NULL] * (len(out_fields) - len(attrs))
                    f.setAttributes(attrs)
                    sink.addFeature(f, QgsFeatureSink.FastInsert)
                continue

            bbox = bbox_transform.transformBoundingBox(f.geometry().boundingBox())
            engine = None

            values = []

            request = QgsFeatureRequest().setFilterRect(bbox).setSubsetOfAttributes(join_field_indexes).setDestinationCrs(source.sourceCrs(), context.transformContext())
            for test_feat in join_source.getFeatures(request):
                if feedback.isCanceled():
                    break

                join_attributes = []
                for a in join_field_indexes:
                    join_attributes.append(test_feat.attributes()[a])

                if engine is None:
                    engine = QgsGeometry.createGeometryEngine(f.geometry().constGet())
                    engine.prepareGeometry()

                for predicate in predicates:
                    if getattr(engine, predicate)(test_feat.geometry().constGet()):
                        values.append(join_attributes)
                        break

            feedback.setProgress(int(current * total))

            if len(values) == 0:
                if discard_nomatch:
                    continue
                else:
                    # ensure consistent count of attributes - otherwise non matching
                    # features will have incorrect attribute length
                    # and provider may reject them
                    attrs = f.attributes()
                    if len(attrs) < len(out_fields):
                        attrs += [NULL] * (len(out_fields) - len(attrs))
                    f.setAttributes(attrs)
                    sink.addFeature(f, QgsFeatureSink.FastInsert)
            else:
                attrs = f.attributes()
                for i in range(len(join_field_indexes)):
                    attribute_values = [v[i] for v in values]
                    field_type = field_types[i]
                    if field_type == 'numeric':
                        stat = QgsStatisticalSummary()
                        for v in attribute_values:
                            stat.addVariant(v)
                        stat.finalize()
                        for s in numeric_fields:
                            if s[0] in summaries:
                                attrs.append(getattr(stat, s[2])())
                    elif field_type == 'datetime':
                        stat = QgsDateTimeStatisticalSummary()
                        stat.calculate(attribute_values)
                        for s in datetime_fields:
                            if s[0] in summaries:
                                if s[0] == 'filled':
                                    attrs.append(stat.count() - stat.countMissing())
                                elif s[0] == 'min':
                                    attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Min))
                                elif s[0] == 'max':
                                    attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Max))
                                else:
                                    attrs.append(getattr(stat, s[2])())
                    else:
                        stat = QgsStringStatisticalSummary()
                        for v in attribute_values:
                            if v == NULL:
                                stat.addString('')
                            else:
                                stat.addString(str(v))
                        stat.finalize()
                        for s in string_fields:
                            if s[0] in summaries:
                                if s[0] == 'filled':
                                    attrs.append(stat.count() - stat.countMissing())
                                else:
                                    attrs.append(getattr(stat, s[2])())

                f.setAttributes(attrs)
                sink.addFeature(f, QgsFeatureSink.FastInsert)

        return {self.OUTPUT: dest_id}
Beispiel #7
0
    def calcNumericStats(self, values, sink, feedback):
        stat = QgsStatisticalSummary()

        total = 50.0 / len(values) if values else 0
        current = 0
        for cat, v in values.items():
            if feedback.isCanceled():
                break

            feedback.setProgress(int(current * total) + 50)

            stat.calculate(v)
            f = QgsFeature()
            f.setAttributes(list(cat) + [stat.count(),
                                         stat.variety(),
                                         stat.min(),
                                         stat.max(),
                                         stat.range(),
                                         stat.sum(),
                                         stat.mean(),
                                         stat.median(),
                                         stat.stDev(),
                                         stat.minority(),
                                         stat.majority(),
                                         stat.firstQuartile(),
                                         stat.thirdQuartile(),
                                         stat.interQuartileRange()])

            sink.addFeature(f, QgsFeatureSink.FastInsert)
            current += 1
Beispiel #8
0
    def processAlgorithm(self, progress):
        layer = dataobjects.getObjectFromUri(
            self.getParameterValue(self.INPUT_LAYER))
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        index = layer.fieldNameIndex(fieldName)

        cvValue = 0
        minValue = 0
        maxValue = 0
        sumValue = 0
        meanValue = 0
        medianValue = 0
        stdDevValue = 0
        minority = 0
        majority = 0
        firstQuartile = 0
        thirdQuartile = 0
        iqr = 0

        isFirst = True
        values = []

        features = vector.features(layer)
        count = len(features)
        total = 100.0 / float(count)
        current = 0
        for ft in features:
            if ft.attributes()[index]:
                values.append(float(ft.attributes()[index]))

            current += 1
            progress.setPercentage(int(current * total))

        stat = QgsStatisticalSummary()
        stat.calculate(values)

        count = stat.count()
        uniqueValue = stat.variety()
        minValue = stat.min()
        maxValue = stat.max()
        rValue = stat.range()
        sumValue = stat.sum()
        meanValue = stat.mean()
        medianValue = stat.median()
        stdDevValue = stat.stDev()
        if meanValue != 0.00:
            cvValue = stdDevValue / meanValue
        minority = stat.minority()
        majority = stat.majority()
        firstQuartile = stat.firstQuartile()
        thirdQuartile = stat.thirdQuartile()
        iqr = stat.interQuartileRange()

        data = []
        data.append('Count: ' + unicode(count))
        data.append('Unique values: ' + unicode(uniqueValue))
        data.append('Minimum value: ' + unicode(minValue))
        data.append('Maximum value: ' + unicode(maxValue))
        data.append('Range: ' + unicode(rValue))
        data.append('Sum: ' + unicode(sumValue))
        data.append('Mean value: ' + unicode(meanValue))
        data.append('Median value: ' + unicode(medianValue))
        data.append('Standard deviation: ' + unicode(stdDevValue))
        data.append('Coefficient of Variation: ' + unicode(cvValue))
        data.append('Minority (rarest occurring value): ' + unicode(minority))
        data.append('Majority (most frequently occurring value): ' + unicode(majority))
        data.append('First quartile: ' + unicode(firstQuartile))
        data.append('Third quartile: ' + unicode(thirdQuartile))
        data.append('Interquartile Range (IQR): ' + unicode(iqr))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.COUNT, count)
        self.setOutputValue(self.UNIQUE, uniqueValue)
        self.setOutputValue(self.MIN, minValue)
        self.setOutputValue(self.MAX, maxValue)
        self.setOutputValue(self.RANGE, rValue)
        self.setOutputValue(self.SUM, sumValue)
        self.setOutputValue(self.MEAN, meanValue)
        self.setOutputValue(self.MEDIAN, medianValue)
        self.setOutputValue(self.STD_DEV, stdDevValue)
        self.setOutputValue(self.MINORITY, minority)
        self.setOutputValue(self.MAJORITY, majority)
        self.setOutputValue(self.FIRSTQUARTILE, firstQuartile)
        self.setOutputValue(self.THIRDQUARTILE, thirdQuartile)
        self.setOutputValue(self.IQR, iqr)
Beispiel #9
0
    def calcNumericStats(self, features, progress, field):
        count = len(features)
        total = 100.0 / float(count)
        stat = QgsStatisticalSummary()
        for current, ft in enumerate(features):
            stat.addVariant(ft[field.name()])
            progress.setPercentage(int(current * total))
        stat.finalize()

        cv = stat.stDev() / stat.mean() if stat.mean() != 0 else 0

        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.variety())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.FILLED, count - stat.countMissing())
        self.setOutputValue(self.MIN, stat.min())
        self.setOutputValue(self.MAX, stat.max())
        self.setOutputValue(self.RANGE, stat.range())
        self.setOutputValue(self.SUM, stat.sum())
        self.setOutputValue(self.MEAN, stat.mean())
        self.setOutputValue(self.MEDIAN, stat.median())
        self.setOutputValue(self.STD_DEV, stat.stDev())
        self.setOutputValue(self.CV, cv)
        self.setOutputValue(self.MINORITY, stat.minority())
        self.setOutputValue(self.MAJORITY, stat.majority())
        self.setOutputValue(self.FIRSTQUARTILE, stat.firstQuartile())
        self.setOutputValue(self.THIRDQUARTILE, stat.thirdQuartile())
        self.setOutputValue(self.IQR, stat.interQuartileRange())

        data = []
        data.append(self.tr('Count: {}').format(stat.count()))
        data.append(self.tr('Unique values: {}').format(stat.variety()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Range: {}').format(stat.range()))
        data.append(self.tr('Sum: {}').format(stat.sum()))
        data.append(self.tr('Mean value: {}').format(stat.mean()))
        data.append(self.tr('Median value: {}').format(stat.median()))
        data.append(self.tr('Standard deviation: {}').format(stat.stDev()))
        data.append(self.tr('Coefficient of Variation: {}').format(cv))
        data.append(self.tr('Minority (rarest occurring value): {}').format(stat.minority()))
        data.append(self.tr('Majority (most frequently occurring value): {}').format(stat.majority()))
        data.append(self.tr('First quartile: {}').format(stat.firstQuartile()))
        data.append(self.tr('Third quartile: {}').format(stat.thirdQuartile()))
        data.append(self.tr('Interquartile Range (IQR): {}').format(stat.interQuartileRange()))
        return data
    def processAlgorithm(self, feedback):
        layer = dataobjects.getObjectFromUri(
            self.getParameterValue(self.INPUT_LAYER))
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        request = QgsFeatureRequest().setFlags(
            QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName],
                                                                layer.fields())
        stat = QgsStatisticalSummary()
        features = vector.features(layer, request)
        count = len(features)
        total = 100.0 / float(count)
        for current, ft in enumerate(features):
            stat.addVariant(ft[fieldName])
            feedback.setProgress(int(current * total))

        stat.finalize()

        count = stat.count()
        uniqueValue = stat.variety()
        minValue = stat.min()
        maxValue = stat.max()
        rValue = stat.range()
        sumValue = stat.sum()
        meanValue = stat.mean()
        medianValue = stat.median()
        stdDevValue = stat.stDev()
        cvValue = stdDevValue / meanValue if meanValue != 0 else 0
        minority = stat.minority()
        majority = stat.majority()
        firstQuartile = stat.firstQuartile()
        thirdQuartile = stat.thirdQuartile()
        iqr = stat.interQuartileRange()
        nullValues = stat.countMissing()

        data = []
        data.append(self.tr('Analyzed layer: {}').format(layer.name()))
        data.append(self.tr('Analyzed field: {}').format(fieldName))
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(uniqueValue))
        data.append(self.tr('Minimum value: {}').format(minValue))
        data.append(self.tr('Maximum value: {}').format(maxValue))
        data.append(self.tr('Range: {}').format(rValue))
        data.append(self.tr('Sum: {}').format(sumValue))
        data.append(self.tr('Mean value: {}').format(meanValue))
        data.append(self.tr('Median value: {}').format(medianValue))
        data.append(self.tr('Standard deviation: {}').format(stdDevValue))
        data.append(self.tr('Coefficient of Variation: {}').format(cvValue))
        data.append(
            self.tr('Minority (rarest occurring value): {}').format(minority))
        data.append(
            self.tr('Majority (most frequently occurring value): {}').format(
                majority))
        data.append(self.tr('First quartile: {}').format(firstQuartile))
        data.append(self.tr('Third quartile: {}').format(thirdQuartile))
        data.append(self.tr('NULL (missing) values: {}').format(nullValues))
        data.append(self.tr('Interquartile Range (IQR): {}').format(iqr))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.COUNT, count)
        self.setOutputValue(self.UNIQUE, uniqueValue)
        self.setOutputValue(self.MIN, minValue)
        self.setOutputValue(self.MAX, maxValue)
        self.setOutputValue(self.RANGE, rValue)
        self.setOutputValue(self.SUM, sumValue)
        self.setOutputValue(self.MEAN, meanValue)
        self.setOutputValue(self.MEDIAN, medianValue)
        self.setOutputValue(self.STD_DEV, stdDevValue)
        self.setOutputValue(self.MINORITY, minority)
        self.setOutputValue(self.MAJORITY, majority)
        self.setOutputValue(self.FIRSTQUARTILE, firstQuartile)
        self.setOutputValue(self.THIRDQUARTILE, thirdQuartile)
        self.setOutputValue(self.NULLVALUES, nullValues)
        self.setOutputValue(self.IQR, iqr)
Beispiel #11
0
    def processAlgorithm(self, parameters, context, feedback):
        source = self.parameterAsSource(parameters, self.INPUT, context)
        join_source = self.parameterAsSource(parameters, self.JOIN, context)
        join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS,
                                             context)
        discard_nomatch = self.parameterAsBool(parameters,
                                               self.DISCARD_NONMATCHING,
                                               context)
        summaries = [
            self.statistics[i][0] for i in sorted(
                self.parameterAsEnums(parameters, self.SUMMARIES, context))
        ]

        if not summaries:
            # none selected, so use all
            summaries = [s[0] for s in self.statistics]

        source_fields = source.fields()
        fields_to_join = QgsFields()
        join_field_indexes = []
        if not join_fields:
            # no fields selected, use all
            join_fields = [
                join_source.fields().at(i).name()
                for i in range(len(join_source.fields()))
            ]

        def addFieldKeepType(original, stat):
            """
            Adds a field to the output, keeping the same data type as the original
            """
            field = QgsField(original)
            field.setName(field.name() + '_' + stat)
            fields_to_join.append(field)

        def addField(original, stat, type):
            """
            Adds a field to the output, with a specified type
            """
            field = QgsField(original)
            field.setName(field.name() + '_' + stat)
            field.setType(type)
            if type == QVariant.Double:
                field.setLength(20)
                field.setPrecision(6)
            fields_to_join.append(field)

        numeric_fields = (('count', QVariant.Int,
                           'count'), ('unique', QVariant.Int, 'variety'),
                          ('min', QVariant.Double,
                           'min'), ('max', QVariant.Double,
                                    'max'), ('range', QVariant.Double,
                                             'range'), ('sum', QVariant.Double,
                                                        'sum'),
                          ('mean', QVariant.Double,
                           'mean'), ('median', QVariant.Double, 'median'),
                          ('stddev', QVariant.Double,
                           'stDev'), ('minority', QVariant.Double, 'minority'),
                          ('majority', QVariant.Double,
                           'majority'), ('q1', QVariant.Double,
                                         'firstQuartile'),
                          ('q3', QVariant.Double,
                           'thirdQuartile'), ('iqr', QVariant.Double,
                                              'interQuartileRange'))

        datetime_fields = (('count', QVariant.Int, 'count'),
                           ('unique', QVariant.Int, 'countDistinct'),
                           ('empty', QVariant.Int, 'countMissing'),
                           ('filled', QVariant.Int), ('min', None), ('max',
                                                                     None))

        string_fields = (('count', QVariant.Int,
                          'count'), ('unique', QVariant.Int, 'countDistinct'),
                         ('empty', QVariant.Int, 'countMissing'),
                         ('filled', QVariant.Int), ('min', None, 'min'),
                         ('max', None, 'max'), ('min_length', QVariant.Int,
                                                'minLength'),
                         ('max_length', QVariant.Int, 'maxLength'),
                         ('mean_length', QVariant.Double, 'meanLength'))

        field_types = []
        for f in join_fields:
            idx = join_source.fields().lookupField(f)
            if idx >= 0:
                join_field_indexes.append(idx)

                join_field = join_source.fields().at(idx)
                if join_field.isNumeric():
                    field_types.append('numeric')
                    field_list = numeric_fields
                elif join_field.type() in (QVariant.Date, QVariant.Time,
                                           QVariant.DateTime):
                    field_types.append('datetime')
                    field_list = datetime_fields
                else:
                    field_types.append('string')
                    field_list = string_fields

                for f in field_list:
                    if f[0] in summaries:
                        if f[1] is not None:
                            addField(join_field, f[0], f[1])
                        else:
                            addFieldKeepType(join_field, f[0])

        out_fields = QgsProcessingUtils.combineFields(source_fields,
                                                      fields_to_join)

        (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT,
                                               context, out_fields,
                                               source.wkbType(),
                                               source.sourceCrs())

        # do the join
        predicates = [
            self.predicates[i][0]
            for i in self.parameterAsEnums(parameters, self.PREDICATE, context)
        ]

        features = source.getFeatures()
        total = 100.0 / source.featureCount() if source.featureCount() else 0

        # bounding box transform
        bbox_transform = QgsCoordinateTransform(source.sourceCrs(),
                                                join_source.sourceCrs(),
                                                context.project())

        for current, f in enumerate(features):
            if feedback.isCanceled():
                break

            if not f.hasGeometry():
                if not discard_nomatch:
                    # ensure consistent count of attributes - otherwise non matching
                    # features will have incorrect attribute length
                    # and provider may reject them
                    attrs = f.attributes()
                    if len(attrs) < len(out_fields):
                        attrs += [NULL] * (len(out_fields) - len(attrs))
                    f.setAttributes(attrs)
                    sink.addFeature(f, QgsFeatureSink.FastInsert)
                continue

            bbox = bbox_transform.transformBoundingBox(
                f.geometry().boundingBox())
            engine = None

            values = []

            request = QgsFeatureRequest().setFilterRect(
                bbox).setSubsetOfAttributes(
                    join_field_indexes).setDestinationCrs(
                        source.sourceCrs(), context.transformContext())
            for test_feat in join_source.getFeatures(request):
                if feedback.isCanceled():
                    break

                join_attributes = []
                for a in join_field_indexes:
                    join_attributes.append(test_feat.attributes()[a])

                if engine is None:
                    engine = QgsGeometry.createGeometryEngine(
                        f.geometry().constGet())
                    engine.prepareGeometry()

                for predicate in predicates:
                    if getattr(engine,
                               predicate)(test_feat.geometry().constGet()):
                        values.append(join_attributes)
                        break

            feedback.setProgress(int(current * total))

            if len(values) == 0:
                if discard_nomatch:
                    continue
                else:
                    # ensure consistent count of attributes - otherwise non matching
                    # features will have incorrect attribute length
                    # and provider may reject them
                    attrs = f.attributes()
                    if len(attrs) < len(out_fields):
                        attrs += [NULL] * (len(out_fields) - len(attrs))
                    f.setAttributes(attrs)
                    sink.addFeature(f, QgsFeatureSink.FastInsert)
            else:
                attrs = f.attributes()
                for i in range(len(join_field_indexes)):
                    attribute_values = [v[i] for v in values]
                    field_type = field_types[i]
                    if field_type == 'numeric':
                        stat = QgsStatisticalSummary()
                        for v in attribute_values:
                            stat.addVariant(v)
                        stat.finalize()
                        for s in numeric_fields:
                            if s[0] in summaries:
                                attrs.append(getattr(stat, s[2])())
                    elif field_type == 'datetime':
                        stat = QgsDateTimeStatisticalSummary()
                        stat.calculate(attribute_values)
                        for s in datetime_fields:
                            if s[0] in summaries:
                                if s[0] == 'filled':
                                    attrs.append(stat.count() -
                                                 stat.countMissing())
                                elif s[0] == 'min':
                                    attrs.append(
                                        stat.statistic(
                                            QgsDateTimeStatisticalSummary.Min))
                                elif s[0] == 'max':
                                    attrs.append(
                                        stat.statistic(
                                            QgsDateTimeStatisticalSummary.Max))
                                else:
                                    attrs.append(getattr(stat, s[2])())
                    else:
                        stat = QgsStringStatisticalSummary()
                        for v in attribute_values:
                            if v == NULL:
                                stat.addString('')
                            else:
                                stat.addString(str(v))
                        stat.finalize()
                        for s in string_fields:
                            if s[0] in summaries:
                                if s[0] == 'filled':
                                    attrs.append(stat.count() -
                                                 stat.countMissing())
                                else:
                                    attrs.append(getattr(stat, s[2])())

                f.setAttributes(attrs)
                sink.addFeature(f, QgsFeatureSink.FastInsert)

        return {self.OUTPUT: dest_id}
Beispiel #12
0
    def calcNumericStats(self, features, feedback, field, count):
        total = 100.0 / float(count)
        stat = QgsStatisticalSummary()
        for current, ft in enumerate(features):
            stat.addVariant(ft[field.name()])
            feedback.setProgress(int(current * total))
        stat.finalize()

        cv = stat.stDev() / stat.mean() if stat.mean() != 0 else 0

        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.variety())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.FILLED, count - stat.countMissing())
        self.setOutputValue(self.MIN, stat.min())
        self.setOutputValue(self.MAX, stat.max())
        self.setOutputValue(self.RANGE, stat.range())
        self.setOutputValue(self.SUM, stat.sum())
        self.setOutputValue(self.MEAN, stat.mean())
        self.setOutputValue(self.MEDIAN, stat.median())
        self.setOutputValue(self.STD_DEV, stat.stDev())
        self.setOutputValue(self.CV, cv)
        self.setOutputValue(self.MINORITY, stat.minority())
        self.setOutputValue(self.MAJORITY, stat.majority())
        self.setOutputValue(self.FIRSTQUARTILE, stat.firstQuartile())
        self.setOutputValue(self.THIRDQUARTILE, stat.thirdQuartile())
        self.setOutputValue(self.IQR, stat.interQuartileRange())

        data = []
        data.append(self.tr('Count: {}').format(stat.count()))
        data.append(self.tr('Unique values: {}').format(stat.variety()))
        data.append(
            self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Range: {}').format(stat.range()))
        data.append(self.tr('Sum: {}').format(stat.sum()))
        data.append(self.tr('Mean value: {}').format(stat.mean()))
        data.append(self.tr('Median value: {}').format(stat.median()))
        data.append(self.tr('Standard deviation: {}').format(stat.stDev()))
        data.append(self.tr('Coefficient of Variation: {}').format(cv))
        data.append(
            self.tr('Minority (rarest occurring value): {}').format(
                stat.minority()))
        data.append(
            self.tr('Majority (most frequently occurring value): {}').format(
                stat.majority()))
        data.append(self.tr('First quartile: {}').format(stat.firstQuartile()))
        data.append(self.tr('Third quartile: {}').format(stat.thirdQuartile()))
        data.append(
            self.tr('Interquartile Range (IQR): {}').format(
                stat.interQuartileRange()))
        return data
    def calcNumericStats(self, values, sink, feedback):
        stat = QgsStatisticalSummary()

        total = 50.0 / len(values) if values else 0
        current = 0
        for cat, v in values.items():
            if feedback.isCanceled():
                break

            feedback.setProgress(int(current * total) + 50)

            stat.calculate(v)
            f = QgsFeature()
            f.setAttributes(
                list(cat) + [
                    stat.count(),
                    stat.variety(),
                    stat.min(),
                    stat.max(),
                    stat.range(),
                    stat.sum(),
                    stat.mean(),
                    stat.median(),
                    stat.stDev(),
                    stat.minority(),
                    stat.majority(),
                    stat.firstQuartile(),
                    stat.thirdQuartile(),
                    stat.interQuartileRange()
                ])

            sink.addFeature(f, QgsFeatureSink.FastInsert)
            current += 1
Beispiel #14
0
    def processAlgorithm(self, parameters, context, feedback):
        source = self.parameterAsSource(parameters, self.INPUT, context)
        value_field_name = self.parameterAsString(parameters,
                                                  self.VALUES_FIELD_NAME,
                                                  context)
        category_field_name = self.parameterAsString(
            parameters, self.CATEGORIES_FIELD_NAME, context)

        value_field_index = source.fields().lookupField(value_field_name)
        category_field_index = source.fields().lookupField(category_field_name)

        features = source.getFeatures(QgsFeatureRequest().setFlags(
            QgsFeatureRequest.NoGeometry))
        total = 100.0 / source.featureCount() if source.featureCount() else 0
        values = {}
        for current, feat in enumerate(features):
            if feedback.isCanceled():
                break

            feedback.setProgress(int(current * total))
            attrs = feat.attributes()
            try:
                value = float(attrs[value_field_index])
                cat = attrs[category_field_index]
                if cat not in values:
                    values[cat] = []
                values[cat].append(value)
            except:
                pass

        fields = QgsFields()
        fields.append(source.fields().at(category_field_index))
        fields.append(QgsField('min', QVariant.Double))
        fields.append(QgsField('max', QVariant.Double))
        fields.append(QgsField('mean', QVariant.Double))
        fields.append(QgsField('stddev', QVariant.Double))
        fields.append(QgsField('sum', QVariant.Double))
        fields.append(QgsField('count', QVariant.Int))

        (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT,
                                               context, fields,
                                               QgsWkbTypes.NoGeometry,
                                               QgsCoordinateReferenceSystem())

        stat = QgsStatisticalSummary(QgsStatisticalSummary.Min
                                     | QgsStatisticalSummary.Max
                                     | QgsStatisticalSummary.Mean
                                     | QgsStatisticalSummary.StDevSample
                                     | QgsStatisticalSummary.Sum
                                     | QgsStatisticalSummary.Count)

        for (cat, v) in list(values.items()):
            stat.calculate(v)
            f = QgsFeature()
            f.setAttributes([
                cat,
                stat.min(),
                stat.max(),
                stat.mean(),
                stat.sampleStDev(),
                stat.sum(),
                stat.count()
            ])
            sink.addFeature(f, QgsFeatureSink.FastInsert)

        return {self.OUTPUT: dest_id}
Beispiel #15
0
    def processAlgorithm(self, progress):
        layer = dataobjects.getObjectFromUri(
            self.getParameterValue(self.INPUT_LAYER))
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        cvValue = 0
        minValue = 0
        maxValue = 0
        sumValue = 0
        meanValue = 0
        medianValue = 0
        stdDevValue = 0
        minority = 0
        majority = 0
        firstQuartile = 0
        thirdQuartile = 0
        nullValues = 0
        iqr = 0

        values = []

        features = vector.features(layer)
        count = len(features)
        total = 100.0 / float(count)
        for current, ft in enumerate(features):
            value = ft[fieldName]
            if value or value == 0:
                values.append(float(value))
            else:
                nullValues += 1

            progress.setPercentage(int(current * total))

        stat = QgsStatisticalSummary()
        stat.calculate(values)

        count = stat.count()
        uniqueValue = stat.variety()
        minValue = stat.min()
        maxValue = stat.max()
        rValue = stat.range()
        sumValue = stat.sum()
        meanValue = stat.mean()
        medianValue = stat.median()
        stdDevValue = stat.stDev()
        if meanValue != 0.00:
            cvValue = stdDevValue / meanValue
        minority = stat.minority()
        majority = stat.majority()
        firstQuartile = stat.firstQuartile()
        thirdQuartile = stat.thirdQuartile()
        iqr = stat.interQuartileRange()

        data = []
        data.append(self.tr('Analyzed layer: {}').format(layer.name()))
        data.append(self.tr('Analyzed field: {}').format(fieldName))
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(uniqueValue))
        data.append(self.tr('Minimum value: {}').format(minValue))
        data.append(self.tr('Maximum value: {}').format(maxValue))
        data.append(self.tr('Range: {}').format(rValue))
        data.append(self.tr('Sum: {}').format(sumValue))
        data.append(self.tr('Mean value: {}').format(meanValue))
        data.append(self.tr('Median value: {}').format(medianValue))
        data.append(self.tr('Standard deviation: {}').format(stdDevValue))
        data.append(self.tr('Coefficient of Variation: {}').format(cvValue))
        data.append(
            self.tr('Minority (rarest occurring value): {}').format(minority))
        data.append(
            self.tr('Majority (most frequently occurring value): {}').format(
                majority))
        data.append(self.tr('First quartile: {}').format(firstQuartile))
        data.append(self.tr('Third quartile: {}').format(thirdQuartile))
        data.append(self.tr('NULL (missing) values: {}').format(nullValues))
        data.append(self.tr('Interquartile Range (IQR): {}').format(iqr))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.COUNT, count)
        self.setOutputValue(self.UNIQUE, uniqueValue)
        self.setOutputValue(self.MIN, minValue)
        self.setOutputValue(self.MAX, maxValue)
        self.setOutputValue(self.RANGE, rValue)
        self.setOutputValue(self.SUM, sumValue)
        self.setOutputValue(self.MEAN, meanValue)
        self.setOutputValue(self.MEDIAN, medianValue)
        self.setOutputValue(self.STD_DEV, stdDevValue)
        self.setOutputValue(self.MINORITY, minority)
        self.setOutputValue(self.MAJORITY, majority)
        self.setOutputValue(self.FIRSTQUARTILE, firstQuartile)
        self.setOutputValue(self.THIRDQUARTILE, thirdQuartile)
        self.setOutputValue(self.NULLVALUES, nullValues)
        self.setOutputValue(self.IQR, iqr)
Beispiel #16
0
    def processAlgorithm(self, progress):
        layer = dataobjects.getObjectFromUri(
            self.getParameterValue(self.INPUT_LAYER))
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        index = layer.fieldNameIndex(fieldName)

        cvValue = 0
        minValue = 0
        maxValue = 0
        sumValue = 0
        meanValue = 0
        medianValue = 0
        stdDevValue = 0
        minority = 0
        majority = 0
        firstQuartile = 0
        thirdQuartile = 0
        iqr = 0

        isFirst = True
        values = []

        features = vector.features(layer)
        count = len(features)
        total = 100.0 / float(count)
        current = 0
        for ft in features:
            if ft.attributes()[index]:
                values.append(float(ft.attributes()[index]))

            current += 1
            progress.setPercentage(int(current * total))

        stat = QgsStatisticalSummary()
        stat.calculate(values)

        count = stat.count()
        uniqueValue = stat.variety()
        minValue = stat.min()
        maxValue = stat.max()
        rValue = stat.range()
        sumValue = stat.sum()
        meanValue = stat.mean()
        medianValue = stat.median()
        stdDevValue = stat.stDev()
        if meanValue != 0.00:
            cvValue = stdDevValue / meanValue
        minority = stat.minority()
        majority = stat.majority()
        firstQuartile = stat.firstQuartile()
        thirdQuartile = stat.thirdQuartile()
        iqr = stat.interQuartileRange()

        data = []
        data.append('Count: ' + unicode(count))
        data.append('Unique values: ' + unicode(uniqueValue))
        data.append('Minimum value: ' + unicode(minValue))
        data.append('Maximum value: ' + unicode(maxValue))
        data.append('Range: ' + unicode(rValue))
        data.append('Sum: ' + unicode(sumValue))
        data.append('Mean value: ' + unicode(meanValue))
        data.append('Median value: ' + unicode(medianValue))
        data.append('Standard deviation: ' + unicode(stdDevValue))
        data.append('Coefficient of Variation: ' + unicode(cvValue))
        data.append('Minority (rarest occurring value): ' + unicode(minority))
        data.append('Majority (most frequently occurring value): ' +
                    unicode(majority))
        data.append('First quartile: ' + unicode(firstQuartile))
        data.append('Third quartile: ' + unicode(thirdQuartile))
        data.append('Interquartile Range (IQR): ' + unicode(iqr))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.COUNT, count)
        self.setOutputValue(self.UNIQUE, uniqueValue)
        self.setOutputValue(self.MIN, minValue)
        self.setOutputValue(self.MAX, maxValue)
        self.setOutputValue(self.RANGE, rValue)
        self.setOutputValue(self.SUM, sumValue)
        self.setOutputValue(self.MEAN, meanValue)
        self.setOutputValue(self.MEDIAN, medianValue)
        self.setOutputValue(self.STD_DEV, stdDevValue)
        self.setOutputValue(self.MINORITY, minority)
        self.setOutputValue(self.MAJORITY, majority)
        self.setOutputValue(self.FIRSTQUARTILE, firstQuartile)
        self.setOutputValue(self.THIRDQUARTILE, thirdQuartile)
        self.setOutputValue(self.IQR, iqr)