Ejemplo n.º 1
0
    def calcNumericStats(self, features, feedback, field, count):
        total = 100.0 / count if count else 0
        stat = QgsStatisticalSummary()
        for current, ft in enumerate(features):
            if feedback.isCanceled():
                break
            stat.addVariant(ft[field.name()])
            feedback.setProgress(int(current * total))
        stat.finalize()

        cv = stat.stDev() / stat.mean() if stat.mean() != 0 else 0

        results = {
            self.COUNT: stat.count(),
            self.UNIQUE: stat.variety(),
            self.EMPTY: stat.countMissing(),
            self.FILLED: count - stat.countMissing(),
            self.MIN: stat.min(),
            self.MAX: stat.max(),
            self.RANGE: stat.range(),
            self.SUM: stat.sum(),
            self.MEAN: stat.mean(),
            self.MEDIAN: stat.median(),
            self.STD_DEV: stat.stDev(),
            self.CV: cv,
            self.MINORITY: stat.minority(),
            self.MAJORITY: stat.majority(),
            self.FIRSTQUARTILE: stat.firstQuartile(),
            self.THIRDQUARTILE: stat.thirdQuartile(),
            self.IQR: stat.interQuartileRange()
        }

        data = []
        data.append(self.tr('Count: {}').format(stat.count()))
        data.append(self.tr('Unique values: {}').format(stat.variety()))
        data.append(
            self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Range: {}').format(stat.range()))
        data.append(self.tr('Sum: {}').format(stat.sum()))
        data.append(self.tr('Mean value: {}').format(stat.mean()))
        data.append(self.tr('Median value: {}').format(stat.median()))
        data.append(self.tr('Standard deviation: {}').format(stat.stDev()))
        data.append(self.tr('Coefficient of Variation: {}').format(cv))
        data.append(
            self.tr('Minority (rarest occurring value): {}').format(
                stat.minority()))
        data.append(
            self.tr('Majority (most frequently occurring value): {}').format(
                stat.majority()))
        data.append(self.tr('First quartile: {}').format(stat.firstQuartile()))
        data.append(self.tr('Third quartile: {}').format(stat.thirdQuartile()))
        data.append(
            self.tr('Interquartile Range (IQR): {}').format(
                stat.interQuartileRange()))
        return data, results
Ejemplo n.º 2
0
    def calcNumericStats(self, features, progress, field):
        count = len(features)
        total = 100.0 / float(count)
        stat = QgsStatisticalSummary()
        for current, ft in enumerate(features):
            stat.addVariant(ft[field.name()])
            progress.setPercentage(int(current * total))
        stat.finalize()

        cv = stat.stDev() / stat.mean() if stat.mean() != 0 else 0

        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.variety())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.FILLED, count - stat.countMissing())
        self.setOutputValue(self.MIN, stat.min())
        self.setOutputValue(self.MAX, stat.max())
        self.setOutputValue(self.RANGE, stat.range())
        self.setOutputValue(self.SUM, stat.sum())
        self.setOutputValue(self.MEAN, stat.mean())
        self.setOutputValue(self.MEDIAN, stat.median())
        self.setOutputValue(self.STD_DEV, stat.stDev())
        self.setOutputValue(self.CV, cv)
        self.setOutputValue(self.MINORITY, stat.minority())
        self.setOutputValue(self.MAJORITY, stat.majority())
        self.setOutputValue(self.FIRSTQUARTILE, stat.firstQuartile())
        self.setOutputValue(self.THIRDQUARTILE, stat.thirdQuartile())
        self.setOutputValue(self.IQR, stat.interQuartileRange())

        data = []
        data.append(self.tr('Count: {}').format(stat.count()))
        data.append(self.tr('Unique values: {}').format(stat.variety()))
        data.append(
            self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Range: {}').format(stat.range()))
        data.append(self.tr('Sum: {}').format(stat.sum()))
        data.append(self.tr('Mean value: {}').format(stat.mean()))
        data.append(self.tr('Median value: {}').format(stat.median()))
        data.append(self.tr('Standard deviation: {}').format(stat.stDev()))
        data.append(self.tr('Coefficient of Variation: {}').format(cv))
        data.append(
            self.tr('Minority (rarest occurring value): {}').format(
                stat.minority()))
        data.append(
            self.tr('Majority (most frequently occurring value): {}').format(
                stat.majority()))
        data.append(self.tr('First quartile: {}').format(stat.firstQuartile()))
        data.append(self.tr('Third quartile: {}').format(stat.thirdQuartile()))
        data.append(
            self.tr('Interquartile Range (IQR): {}').format(
                stat.interQuartileRange()))
        return data
Ejemplo n.º 3
0
    def calcNumericStats(self, features, feedback, field, count):
        total = 100.0 / count if count else 0
        stat = QgsStatisticalSummary()
        for current, ft in enumerate(features):
            if feedback.isCanceled():
                break
            stat.addVariant(ft[field.name()])
            feedback.setProgress(int(current * total))
        stat.finalize()

        cv = stat.stDev() / stat.mean() if stat.mean() != 0 else 0

        results = {self.COUNT: stat.count(),
                   self.UNIQUE: stat.variety(),
                   self.EMPTY: stat.countMissing(),
                   self.FILLED: count - stat.countMissing(),
                   self.MIN: stat.min(),
                   self.MAX: stat.max(),
                   self.RANGE: stat.range(),
                   self.SUM: stat.sum(),
                   self.MEAN: stat.mean(),
                   self.MEDIAN: stat.median(),
                   self.STD_DEV: stat.stDev(),
                   self.CV: cv,
                   self.MINORITY: stat.minority(),
                   self.MAJORITY: stat.majority(),
                   self.FIRSTQUARTILE: stat.firstQuartile(),
                   self.THIRDQUARTILE: stat.thirdQuartile(),
                   self.IQR: stat.interQuartileRange()}

        data = []
        data.append(self.tr('Count: {}').format(stat.count()))
        data.append(self.tr('Unique values: {}').format(stat.variety()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Range: {}').format(stat.range()))
        data.append(self.tr('Sum: {}').format(stat.sum()))
        data.append(self.tr('Mean value: {}').format(stat.mean()))
        data.append(self.tr('Median value: {}').format(stat.median()))
        data.append(self.tr('Standard deviation: {}').format(stat.stDev()))
        data.append(self.tr('Coefficient of Variation: {}').format(cv))
        data.append(self.tr('Minority (rarest occurring value): {}').format(stat.minority()))
        data.append(self.tr('Majority (most frequently occurring value): {}').format(stat.majority()))
        data.append(self.tr('First quartile: {}').format(stat.firstQuartile()))
        data.append(self.tr('Third quartile: {}').format(stat.thirdQuartile()))
        data.append(self.tr('Interquartile Range (IQR): {}').format(stat.interQuartileRange()))
        return data, results
Ejemplo n.º 4
0
    def calcNumericStats(self, features, progress, field):
        count = len(features)
        total = 100.0 / float(count)
        stat = QgsStatisticalSummary()
        for current, ft in enumerate(features):
            stat.addVariant(ft[field.name()])
            progress.setPercentage(int(current * total))
        stat.finalize()

        cv = stat.stDev() / stat.mean() if stat.mean() != 0 else 0

        self.setOutputValue(self.COUNT, stat.count())
        self.setOutputValue(self.UNIQUE, stat.variety())
        self.setOutputValue(self.EMPTY, stat.countMissing())
        self.setOutputValue(self.FILLED, count - stat.countMissing())
        self.setOutputValue(self.MIN, stat.min())
        self.setOutputValue(self.MAX, stat.max())
        self.setOutputValue(self.RANGE, stat.range())
        self.setOutputValue(self.SUM, stat.sum())
        self.setOutputValue(self.MEAN, stat.mean())
        self.setOutputValue(self.MEDIAN, stat.median())
        self.setOutputValue(self.STD_DEV, stat.stDev())
        self.setOutputValue(self.CV, cv)
        self.setOutputValue(self.MINORITY, stat.minority())
        self.setOutputValue(self.MAJORITY, stat.majority())
        self.setOutputValue(self.FIRSTQUARTILE, stat.firstQuartile())
        self.setOutputValue(self.THIRDQUARTILE, stat.thirdQuartile())
        self.setOutputValue(self.IQR, stat.interQuartileRange())

        data = []
        data.append(self.tr('Count: {}').format(stat.count()))
        data.append(self.tr('Unique values: {}').format(stat.variety()))
        data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing()))
        data.append(self.tr('Minimum value: {}').format(stat.min()))
        data.append(self.tr('Maximum value: {}').format(stat.max()))
        data.append(self.tr('Range: {}').format(stat.range()))
        data.append(self.tr('Sum: {}').format(stat.sum()))
        data.append(self.tr('Mean value: {}').format(stat.mean()))
        data.append(self.tr('Median value: {}').format(stat.median()))
        data.append(self.tr('Standard deviation: {}').format(stat.stDev()))
        data.append(self.tr('Coefficient of Variation: {}').format(cv))
        data.append(self.tr('Minority (rarest occurring value): {}').format(stat.minority()))
        data.append(self.tr('Majority (most frequently occurring value): {}').format(stat.majority()))
        data.append(self.tr('First quartile: {}').format(stat.firstQuartile()))
        data.append(self.tr('Third quartile: {}').format(stat.thirdQuartile()))
        data.append(self.tr('Interquartile Range (IQR): {}').format(stat.interQuartileRange()))
        return data
Ejemplo n.º 5
0
    def processAlgorithm(self, parameters, context, feedback):
        source = self.parameterAsSource(parameters, self.INPUT, context)
        if source is None:
            raise QgsProcessingException(
                self.invalidSourceError(parameters, self.INPUT))

        join_source = self.parameterAsSource(parameters, self.JOIN, context)
        if join_source is None:
            raise QgsProcessingException(
                self.invalidSourceError(parameters, self.JOIN))

        join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS,
                                             context)
        discard_nomatch = self.parameterAsBool(parameters,
                                               self.DISCARD_NONMATCHING,
                                               context)
        summaries = [
            self.statistics[i][0] for i in sorted(
                self.parameterAsEnums(parameters, self.SUMMARIES, context))
        ]

        if not summaries:
            # none selected, so use all
            summaries = [s[0] for s in self.statistics]

        source_fields = source.fields()
        fields_to_join = QgsFields()
        join_field_indexes = []
        if not join_fields:
            # no fields selected, use all
            join_fields = [
                join_source.fields().at(i).name()
                for i in range(len(join_source.fields()))
            ]

        def addFieldKeepType(original, stat):
            """
            Adds a field to the output, keeping the same data type as the original
            """
            field = QgsField(original)
            field.setName(field.name() + '_' + stat)
            fields_to_join.append(field)

        def addField(original, stat, type):
            """
            Adds a field to the output, with a specified type
            """
            field = QgsField(original)
            field.setName(field.name() + '_' + stat)
            field.setType(type)
            if type == QVariant.Double:
                field.setLength(20)
                field.setPrecision(6)
            fields_to_join.append(field)

        numeric_fields = (('count', QVariant.Int,
                           'count'), ('unique', QVariant.Int, 'variety'),
                          ('min', QVariant.Double,
                           'min'), ('max', QVariant.Double,
                                    'max'), ('range', QVariant.Double,
                                             'range'), ('sum', QVariant.Double,
                                                        'sum'),
                          ('mean', QVariant.Double,
                           'mean'), ('median', QVariant.Double, 'median'),
                          ('stddev', QVariant.Double,
                           'stDev'), ('minority', QVariant.Double, 'minority'),
                          ('majority', QVariant.Double,
                           'majority'), ('q1', QVariant.Double,
                                         'firstQuartile'),
                          ('q3', QVariant.Double,
                           'thirdQuartile'), ('iqr', QVariant.Double,
                                              'interQuartileRange'))

        datetime_fields = (('count', QVariant.Int, 'count'),
                           ('unique', QVariant.Int, 'countDistinct'),
                           ('empty', QVariant.Int, 'countMissing'),
                           ('filled', QVariant.Int), ('min', None), ('max',
                                                                     None))

        string_fields = (('count', QVariant.Int,
                          'count'), ('unique', QVariant.Int, 'countDistinct'),
                         ('empty', QVariant.Int, 'countMissing'),
                         ('filled', QVariant.Int), ('min', None, 'min'),
                         ('max', None, 'max'), ('min_length', QVariant.Int,
                                                'minLength'),
                         ('max_length', QVariant.Int, 'maxLength'),
                         ('mean_length', QVariant.Double, 'meanLength'))

        field_types = []
        for f in join_fields:
            idx = join_source.fields().lookupField(f)
            if idx >= 0:
                join_field_indexes.append(idx)

                join_field = join_source.fields().at(idx)
                if join_field.isNumeric():
                    field_types.append('numeric')
                    field_list = numeric_fields
                elif join_field.type() in (QVariant.Date, QVariant.Time,
                                           QVariant.DateTime):
                    field_types.append('datetime')
                    field_list = datetime_fields
                else:
                    field_types.append('string')
                    field_list = string_fields

                for f in field_list:
                    if f[0] in summaries:
                        if f[1] is not None:
                            addField(join_field, f[0], f[1])
                        else:
                            addFieldKeepType(join_field, f[0])

        out_fields = QgsProcessingUtils.combineFields(source_fields,
                                                      fields_to_join)

        (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT,
                                               context, out_fields,
                                               source.wkbType(),
                                               source.sourceCrs())
        if sink is None:
            raise QgsProcessingException(
                self.invalidSinkError(parameters, self.OUTPUT))

        # do the join
        predicates = [
            self.predicates[i][0]
            for i in self.parameterAsEnums(parameters, self.PREDICATE, context)
        ]

        features = source.getFeatures()
        total = 100.0 / source.featureCount() if source.featureCount() else 0

        # bounding box transform
        bbox_transform = QgsCoordinateTransform(source.sourceCrs(),
                                                join_source.sourceCrs(),
                                                context.project())

        for current, f in enumerate(features):
            if feedback.isCanceled():
                break

            if not f.hasGeometry():
                if not discard_nomatch:
                    # ensure consistent count of attributes - otherwise non matching
                    # features will have incorrect attribute length
                    # and provider may reject them
                    attrs = f.attributes()
                    if len(attrs) < len(out_fields):
                        attrs += [NULL] * (len(out_fields) - len(attrs))
                    f.setAttributes(attrs)
                    sink.addFeature(f, QgsFeatureSink.FastInsert)
                continue

            bbox = bbox_transform.transformBoundingBox(
                f.geometry().boundingBox())
            engine = None

            values = []

            request = QgsFeatureRequest().setFilterRect(
                bbox).setSubsetOfAttributes(
                    join_field_indexes).setDestinationCrs(
                        source.sourceCrs(), context.transformContext())
            for test_feat in join_source.getFeatures(request):
                if feedback.isCanceled():
                    break

                join_attributes = []
                for a in join_field_indexes:
                    join_attributes.append(test_feat.attributes()[a])

                if engine is None:
                    engine = QgsGeometry.createGeometryEngine(
                        f.geometry().constGet())
                    engine.prepareGeometry()

                for predicate in predicates:
                    if getattr(engine,
                               predicate)(test_feat.geometry().constGet()):
                        values.append(join_attributes)
                        break

            feedback.setProgress(int(current * total))

            if len(values) == 0:
                if discard_nomatch:
                    continue
                else:
                    # ensure consistent count of attributes - otherwise non matching
                    # features will have incorrect attribute length
                    # and provider may reject them
                    attrs = f.attributes()
                    if len(attrs) < len(out_fields):
                        attrs += [NULL] * (len(out_fields) - len(attrs))
                    f.setAttributes(attrs)
                    sink.addFeature(f, QgsFeatureSink.FastInsert)
            else:
                attrs = f.attributes()
                for i in range(len(join_field_indexes)):
                    attribute_values = [v[i] for v in values]
                    field_type = field_types[i]
                    if field_type == 'numeric':
                        stat = QgsStatisticalSummary()
                        for v in attribute_values:
                            stat.addVariant(v)
                        stat.finalize()
                        for s in numeric_fields:
                            if s[0] in summaries:
                                attrs.append(getattr(stat, s[2])())
                    elif field_type == 'datetime':
                        stat = QgsDateTimeStatisticalSummary()
                        stat.calculate(attribute_values)
                        for s in datetime_fields:
                            if s[0] in summaries:
                                if s[0] == 'filled':
                                    attrs.append(stat.count() -
                                                 stat.countMissing())
                                elif s[0] == 'min':
                                    attrs.append(
                                        stat.statistic(
                                            QgsDateTimeStatisticalSummary.Min))
                                elif s[0] == 'max':
                                    attrs.append(
                                        stat.statistic(
                                            QgsDateTimeStatisticalSummary.Max))
                                else:
                                    attrs.append(getattr(stat, s[2])())
                    else:
                        stat = QgsStringStatisticalSummary()
                        for v in attribute_values:
                            if v == NULL:
                                stat.addString('')
                            else:
                                stat.addString(str(v))
                        stat.finalize()
                        for s in string_fields:
                            if s[0] in summaries:
                                if s[0] == 'filled':
                                    attrs.append(stat.count() -
                                                 stat.countMissing())
                                else:
                                    attrs.append(getattr(stat, s[2])())

                f.setAttributes(attrs)
                sink.addFeature(f, QgsFeatureSink.FastInsert)

        return {self.OUTPUT: dest_id}
Ejemplo n.º 6
0
    def processAlgorithm(self, feedback):
        layer = dataobjects.getObjectFromUri(
            self.getParameterValue(self.INPUT_LAYER))
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        request = QgsFeatureRequest().setFlags(
            QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName],
                                                                layer.fields())
        stat = QgsStatisticalSummary()
        features = vector.features(layer, request)
        count = len(features)
        total = 100.0 / float(count)
        for current, ft in enumerate(features):
            stat.addVariant(ft[fieldName])
            feedback.setProgress(int(current * total))

        stat.finalize()

        count = stat.count()
        uniqueValue = stat.variety()
        minValue = stat.min()
        maxValue = stat.max()
        rValue = stat.range()
        sumValue = stat.sum()
        meanValue = stat.mean()
        medianValue = stat.median()
        stdDevValue = stat.stDev()
        cvValue = stdDevValue / meanValue if meanValue != 0 else 0
        minority = stat.minority()
        majority = stat.majority()
        firstQuartile = stat.firstQuartile()
        thirdQuartile = stat.thirdQuartile()
        iqr = stat.interQuartileRange()
        nullValues = stat.countMissing()

        data = []
        data.append(self.tr('Analyzed layer: {}').format(layer.name()))
        data.append(self.tr('Analyzed field: {}').format(fieldName))
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(uniqueValue))
        data.append(self.tr('Minimum value: {}').format(minValue))
        data.append(self.tr('Maximum value: {}').format(maxValue))
        data.append(self.tr('Range: {}').format(rValue))
        data.append(self.tr('Sum: {}').format(sumValue))
        data.append(self.tr('Mean value: {}').format(meanValue))
        data.append(self.tr('Median value: {}').format(medianValue))
        data.append(self.tr('Standard deviation: {}').format(stdDevValue))
        data.append(self.tr('Coefficient of Variation: {}').format(cvValue))
        data.append(
            self.tr('Minority (rarest occurring value): {}').format(minority))
        data.append(
            self.tr('Majority (most frequently occurring value): {}').format(
                majority))
        data.append(self.tr('First quartile: {}').format(firstQuartile))
        data.append(self.tr('Third quartile: {}').format(thirdQuartile))
        data.append(self.tr('NULL (missing) values: {}').format(nullValues))
        data.append(self.tr('Interquartile Range (IQR): {}').format(iqr))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.COUNT, count)
        self.setOutputValue(self.UNIQUE, uniqueValue)
        self.setOutputValue(self.MIN, minValue)
        self.setOutputValue(self.MAX, maxValue)
        self.setOutputValue(self.RANGE, rValue)
        self.setOutputValue(self.SUM, sumValue)
        self.setOutputValue(self.MEAN, meanValue)
        self.setOutputValue(self.MEDIAN, medianValue)
        self.setOutputValue(self.STD_DEV, stdDevValue)
        self.setOutputValue(self.MINORITY, minority)
        self.setOutputValue(self.MAJORITY, majority)
        self.setOutputValue(self.FIRSTQUARTILE, firstQuartile)
        self.setOutputValue(self.THIRDQUARTILE, thirdQuartile)
        self.setOutputValue(self.NULLVALUES, nullValues)
        self.setOutputValue(self.IQR, iqr)
Ejemplo n.º 7
0
    def processAlgorithm(self, progress):
        layer = dataobjects.getObjectFromUri(
            self.getParameterValue(self.INPUT_LAYER))
        fieldName = self.getParameterValue(self.FIELD_NAME)

        outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE)

        request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName], layer.fields())
        stat = QgsStatisticalSummary()
        features = vector.features(layer, request)
        count = len(features)
        total = 100.0 / float(count)
        for current, ft in enumerate(features):
            stat.addVariant(ft[fieldName])
            progress.setPercentage(int(current * total))

        stat.finalize()

        count = stat.count()
        uniqueValue = stat.variety()
        minValue = stat.min()
        maxValue = stat.max()
        rValue = stat.range()
        sumValue = stat.sum()
        meanValue = stat.mean()
        medianValue = stat.median()
        stdDevValue = stat.stDev()
        cvValue = stdDevValue / meanValue if meanValue != 0 else 0
        minority = stat.minority()
        majority = stat.majority()
        firstQuartile = stat.firstQuartile()
        thirdQuartile = stat.thirdQuartile()
        iqr = stat.interQuartileRange()
        nullValues = stat.countMissing()

        data = []
        data.append(self.tr('Analyzed layer: {}').format(layer.name()))
        data.append(self.tr('Analyzed field: {}').format(fieldName))
        data.append(self.tr('Count: {}').format(count))
        data.append(self.tr('Unique values: {}').format(uniqueValue))
        data.append(self.tr('Minimum value: {}').format(minValue))
        data.append(self.tr('Maximum value: {}').format(maxValue))
        data.append(self.tr('Range: {}').format(rValue))
        data.append(self.tr('Sum: {}').format(sumValue))
        data.append(self.tr('Mean value: {}').format(meanValue))
        data.append(self.tr('Median value: {}').format(medianValue))
        data.append(self.tr('Standard deviation: {}').format(stdDevValue))
        data.append(self.tr('Coefficient of Variation: {}').format(cvValue))
        data.append(self.tr('Minority (rarest occurring value): {}').format(minority))
        data.append(self.tr('Majority (most frequently occurring value): {}').format(majority))
        data.append(self.tr('First quartile: {}').format(firstQuartile))
        data.append(self.tr('Third quartile: {}').format(thirdQuartile))
        data.append(self.tr('NULL (missing) values: {}').format(nullValues))
        data.append(self.tr('Interquartile Range (IQR): {}').format(iqr))

        self.createHTML(outputFile, data)

        self.setOutputValue(self.COUNT, count)
        self.setOutputValue(self.UNIQUE, uniqueValue)
        self.setOutputValue(self.MIN, minValue)
        self.setOutputValue(self.MAX, maxValue)
        self.setOutputValue(self.RANGE, rValue)
        self.setOutputValue(self.SUM, sumValue)
        self.setOutputValue(self.MEAN, meanValue)
        self.setOutputValue(self.MEDIAN, medianValue)
        self.setOutputValue(self.STD_DEV, stdDevValue)
        self.setOutputValue(self.MINORITY, minority)
        self.setOutputValue(self.MAJORITY, majority)
        self.setOutputValue(self.FIRSTQUARTILE, firstQuartile)
        self.setOutputValue(self.THIRDQUARTILE, thirdQuartile)
        self.setOutputValue(self.NULLVALUES, nullValues)
        self.setOutputValue(self.IQR, iqr)
Ejemplo n.º 8
0
    def processAlgorithm(self, parameters, context, feedback):
        source = self.parameterAsSource(parameters, self.INPUT, context)
        join_source = self.parameterAsSource(parameters, self.JOIN, context)
        join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS, context)
        discard_nomatch = self.parameterAsBool(parameters, self.DISCARD_NONMATCHING, context)
        summaries = [self.statistics[i][0] for i in
                     sorted(self.parameterAsEnums(parameters, self.SUMMARIES, context))]

        if not summaries:
            # none selected, so use all
            summaries = [s[0] for s in self.statistics]

        source_fields = source.fields()
        fields_to_join = QgsFields()
        join_field_indexes = []
        if not join_fields:
            # no fields selected, use all
            join_fields = [join_source.fields().at(i).name() for i in range(len(join_source.fields()))]

        def addFieldKeepType(original, stat):
            """
            Adds a field to the output, keeping the same data type as the original
            """
            field = QgsField(original)
            field.setName(field.name() + '_' + stat)
            fields_to_join.append(field)

        def addField(original, stat, type):
            """
            Adds a field to the output, with a specified type
            """
            field = QgsField(original)
            field.setName(field.name() + '_' + stat)
            field.setType(type)
            if type == QVariant.Double:
                field.setLength(20)
                field.setPrecision(6)
            fields_to_join.append(field)

        numeric_fields = (
            ('count', QVariant.Int, 'count'),
            ('unique', QVariant.Int, 'variety'),
            ('min', QVariant.Double, 'min'),
            ('max', QVariant.Double, 'max'),
            ('range', QVariant.Double, 'range'),
            ('sum', QVariant.Double, 'sum'),
            ('mean', QVariant.Double, 'mean'),
            ('median', QVariant.Double, 'median'),
            ('stddev', QVariant.Double, 'stDev'),
            ('minority', QVariant.Double, 'minority'),
            ('majority', QVariant.Double, 'majority'),
            ('q1', QVariant.Double, 'firstQuartile'),
            ('q3', QVariant.Double, 'thirdQuartile'),
            ('iqr', QVariant.Double, 'interQuartileRange')
        )

        datetime_fields = (
            ('count', QVariant.Int, 'count'),
            ('unique', QVariant.Int, 'countDistinct'),
            ('empty', QVariant.Int, 'countMissing'),
            ('filled', QVariant.Int),
            ('min', None),
            ('max', None)
        )

        string_fields = (
            ('count', QVariant.Int, 'count'),
            ('unique', QVariant.Int, 'countDistinct'),
            ('empty', QVariant.Int, 'countMissing'),
            ('filled', QVariant.Int),
            ('min', None, 'min'),
            ('max', None, 'max'),
            ('min_length', QVariant.Int, 'minLength'),
            ('max_length', QVariant.Int, 'maxLength'),
            ('mean_length', QVariant.Double, 'meanLength')
        )

        field_types = []
        for f in join_fields:
            idx = join_source.fields().lookupField(f)
            if idx >= 0:
                join_field_indexes.append(idx)

                join_field = join_source.fields().at(idx)
                if join_field.isNumeric():
                    field_types.append('numeric')
                    field_list = numeric_fields
                elif join_field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime):
                    field_types.append('datetime')
                    field_list = datetime_fields
                else:
                    field_types.append('string')
                    field_list = string_fields

                for f in field_list:
                    if f[0] in summaries:
                        if f[1] is not None:
                            addField(join_field, f[0], f[1])
                        else:
                            addFieldKeepType(join_field, f[0])

        out_fields = QgsProcessingUtils.combineFields(source_fields, fields_to_join)

        (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context,
                                               out_fields, source.wkbType(), source.sourceCrs())

        # do the join
        predicates = [self.predicates[i][0] for i in self.parameterAsEnums(parameters, self.PREDICATE, context)]

        features = source.getFeatures()
        total = 100.0 / source.featureCount() if source.featureCount() else 0

        # bounding box transform
        bbox_transform = QgsCoordinateTransform(source.sourceCrs(), join_source.sourceCrs(), context.project())

        for current, f in enumerate(features):
            if feedback.isCanceled():
                break

            if not f.hasGeometry():
                if not discard_nomatch:
                    # ensure consistent count of attributes - otherwise non matching
                    # features will have incorrect attribute length
                    # and provider may reject them
                    attrs = f.attributes()
                    if len(attrs) < len(out_fields):
                        attrs += [NULL] * (len(out_fields) - len(attrs))
                    f.setAttributes(attrs)
                    sink.addFeature(f, QgsFeatureSink.FastInsert)
                continue

            bbox = bbox_transform.transformBoundingBox(f.geometry().boundingBox())
            engine = None

            values = []

            request = QgsFeatureRequest().setFilterRect(bbox).setSubsetOfAttributes(join_field_indexes).setDestinationCrs(source.sourceCrs(), context.transformContext())
            for test_feat in join_source.getFeatures(request):
                if feedback.isCanceled():
                    break

                join_attributes = []
                for a in join_field_indexes:
                    join_attributes.append(test_feat.attributes()[a])

                if engine is None:
                    engine = QgsGeometry.createGeometryEngine(f.geometry().constGet())
                    engine.prepareGeometry()

                for predicate in predicates:
                    if getattr(engine, predicate)(test_feat.geometry().constGet()):
                        values.append(join_attributes)
                        break

            feedback.setProgress(int(current * total))

            if len(values) == 0:
                if discard_nomatch:
                    continue
                else:
                    # ensure consistent count of attributes - otherwise non matching
                    # features will have incorrect attribute length
                    # and provider may reject them
                    attrs = f.attributes()
                    if len(attrs) < len(out_fields):
                        attrs += [NULL] * (len(out_fields) - len(attrs))
                    f.setAttributes(attrs)
                    sink.addFeature(f, QgsFeatureSink.FastInsert)
            else:
                attrs = f.attributes()
                for i in range(len(join_field_indexes)):
                    attribute_values = [v[i] for v in values]
                    field_type = field_types[i]
                    if field_type == 'numeric':
                        stat = QgsStatisticalSummary()
                        for v in attribute_values:
                            stat.addVariant(v)
                        stat.finalize()
                        for s in numeric_fields:
                            if s[0] in summaries:
                                attrs.append(getattr(stat, s[2])())
                    elif field_type == 'datetime':
                        stat = QgsDateTimeStatisticalSummary()
                        stat.calculate(attribute_values)
                        for s in datetime_fields:
                            if s[0] in summaries:
                                if s[0] == 'filled':
                                    attrs.append(stat.count() - stat.countMissing())
                                elif s[0] == 'min':
                                    attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Min))
                                elif s[0] == 'max':
                                    attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Max))
                                else:
                                    attrs.append(getattr(stat, s[2])())
                    else:
                        stat = QgsStringStatisticalSummary()
                        for v in attribute_values:
                            if v == NULL:
                                stat.addString('')
                            else:
                                stat.addString(str(v))
                        stat.finalize()
                        for s in string_fields:
                            if s[0] in summaries:
                                if s[0] == 'filled':
                                    attrs.append(stat.count() - stat.countMissing())
                                else:
                                    attrs.append(getattr(stat, s[2])())

                f.setAttributes(attrs)
                sink.addFeature(f, QgsFeatureSink.FastInsert)

        return {self.OUTPUT: dest_id}
Ejemplo n.º 9
0
    def processAlgorithm(self, parameters, context, feedback):
        ''' Here is where the processing itself takes place. '''
        #
        if not is_dependencies_satisfied:
            return {}

# Init
# The number of features in the input layer could be trimmed to user selection.
        the_layer = self.parameterAsSource(parameters, self.THE_LAYER, context)
        gok = QgsWkbTypes.geometryType(
            the_layer.wkbType()) == QgsWkbTypes.PointGeometry
        if the_layer is None or not gok:
            raise QgsProcessingException(
                self.invalidSourceError(parameters, self.THE_LAYER))
        #
        bCHscal = self.parameterAsBool(parameters, self.BSCALE, context)
        if bCHscal:
            # Use another channel for scaling. All data from that channel will be used.
            scally = self.parameterAsSource(parameters, self.SCALLY, context)
            if scally is None or the_layer.wkbType() != QgsWkbTypes.Point:
                raise QgsProcessingException(
                    self.invalidSourceError(parameters, self.SCALLY))
        fidu_fld = self.parameterAsString(parameters, self.FID_FLD, context)
        data_fld = self.parameterAsString(parameters, self.DATA_FLD, context)
        line_fld = self.parameterAsString(parameters, self.LINE_FLD, context)
        invP = self.parameterAsBool(parameters, self.INVERTP, context)
        dumval = self.parameterAsDouble(parameters, self.DUMVAL, context)
        scale = self.parameterAsDouble(parameters, self.SCALE, context)
        offset = self.parameterAsDouble(parameters, self.OFFSET, context)
        join_to_line = self.parameterAsBool(parameters, self.JOINL, context)

        data = the_layer.fields().at(the_layer.fields().lookupField(data_fld))
        fidu = the_layer.fields().at(the_layer.fields().lookupField(fidu_fld))
        if not data.isNumeric() or not fidu.isNumeric():
            raise QgsProcessingException(
                self.invalidSourceError(parameters, self.THE_LAYER))

        line = the_layer.fields().at(the_layer.fields().lookupField(line_fld))
        data_ix = the_layer.fields().lookupField(data_fld)
        line_ix = the_layer.fields().lookupField(line_fld)
        fidu_ix = the_layer.fields().lookupField(fidu_fld)

        # Set output vector layer: point(X, Y, M) M is data value at that point
        output_wkb = QgsWkbTypes.LineString
        output_wkb = QgsWkbTypes.addM(output_wkb)

        # Fields of stacked profiles vector
        line_def = the_layer.fields().at(line_ix)
        fields = QgsFields()
        if line_def is not None:
            fields = QgsFields()
            fields.append(QgsField('Line', QVariant.String, '', 16))
            fields.append(QgsField('Type', QVariant.String, '', 2))
            fields.append(QgsField('NbPts', QVariant.Int, '', 10, 0))
            fields.append(QgsField('Azimuth', QVariant.Double, '', 10, 6))
            fields.append(QgsField('DistEP', QVariant.Double, '', 10, 2))
            fields.append(QgsField('Length', QVariant.Double, '', 10, 2))
        (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT,
                                               context, fields, output_wkb,
                                               the_layer.sourceCrs())
        if sink is None:
            raise QgsProcessingException(
                self.invalidSinkError(parameters, self.OUTPUT))

        # Get the features and fields of interest
        features = the_layer.getFeatures(
            QgsFeatureRequest().setSubsetOfAttributes(
                [fidu_ix, line_ix, data_ix]),
            QgsProcessingFeatureSource.FlagSkipGeometryValidityChecks)

        # CSV
        # Find min/max of data values for all lines and save each line in a csv file
        # Then process each line separately: can have any number of lines...
        lines = []
        xyzf = []
        lineN = ''
        nL = 0
        TL = 0.
        total = 60.0 / the_layer.featureCount() if the_layer.featureCount(
        ) else 0
        stat = QgsStatisticalSummary()
        for current, ft in enumerate(features):
            if feedback.isCanceled():
                break
            feedback.setProgress(int(current * total))
            if not ft.hasGeometry():
                continue
            #
            if ft[line.name()] != lineN:
                if xyzf != []:
                    lines.append([lineN, nL])
                    the_csv = os.path.join(self.tmpDir, '%s.csv' % str(lineN))
                    with codecs.open(the_csv, 'w', 'utf-8') as fo:
                        fo.write('X,Y,FID,Data\n')
                        for ar in xyzf:
                            fo.write(','.join(map(str, ar)))
                            fo.write('\n')
                    le = sqrt((xyzf[0][0] - xyzf[-1][0])**2 +
                              (xyzf[0][1] - xyzf[-1][1])**2)
                    if le > TL:
                        TL = le
                    xyzf = []
                    nL = 0
                lineN = ft[line.name()]
            #
            rdata = float(ft[data.name()])
            fiduu = int(ft[fidu.name()])
            if abs(rdata - dumval) < 1e-6:
                # Dummy value: skip
                continue
            #
            stat.addVariant(ft[data.name()])
            # how to handle QgsMultiPoint ???
            if (the_layer.wkbType() == QgsWkbTypes.MultiPoint
                    or the_layer.wkbType() == QgsWkbTypes.MultiPointM
                    or the_layer.wkbType() == QgsWkbTypes.MultiPointZ
                    or the_layer.wkbType() == QgsWkbTypes.MultiPointZM
                    or the_layer.wkbType() == QgsWkbTypes.MultiPoint25D):
                # Suppose they all have the same attributes:
                #  in this case it seems useless to get more than the first point, but...
                points = ft.geometry().constGet().clone()
            else:
                points = [ft.geometry().constGet().clone()]
            try:
                for point in points:
                    xyzf.append([point.x(), point.y(), fiduu, rdata])
                    nL += 1
            except:
                pass
        # last line
        if xyzf != []:
            lines.append([lineN, nL])
            the_csv = os.path.join(self.tmpDir, '%s.csv' % str(lineN))
            with codecs.open(the_csv, 'w', 'utf-8') as fo:
                fo.write('X,Y,FID,Data\n')
                for ar in xyzf:
                    fo.write(','.join(map(str, ar)))
                    fo.write('\n')
            le = sqrt((xyzf[0][0] - xyzf[-1][0])**2 +
                      (xyzf[0][1] - xyzf[-1][1])**2)
            if le > TL:
                TL = le
        #
        stat.finalize()
        self.dmean = stat.mean()
        self.mult = TL / (stat.max() - stat.min())
        #
        if bCHscal:
            # Scaling field: retrieve its stats
            scch_fld = self.parameterAsString(parameters, self.SCALCH, context)
            scch = scally.fields().at(scally.fields().lookupField(scch_fld))
            scch_ix = scally.fields().lookupField(scch_fld)
            scch_f = scally.getFeatures(
                QgsFeatureRequest().setSubsetOfAttributes([scch_ix]),
                QgsProcessingFeatureSource.FlagSkipGeometryValidityChecks)
            stat = QgsStatisticalSummary()
            for current, ft in enumerate(scch_f):
                stat.addVariant(ft[scch.name()])
            stat.finalize()
            self.dmean = stat.mean()
            self.mult = TL / (stat.max() - stat.min())
        #
        if invP:
            iv = -1
        else:
            iv = 1

# Profile
        total = 40.0 / (len(lines) + 1)
        # For each line:
        for current, z in enumerate(lines):
            line = z[0]
            if feedback.isCanceled():
                break
            if not ft.hasGeometry():
                continue
            feedback.setProgress(int(current * total) + 60.)

            # Read line back from csv
            the_csv = os.path.join(self.tmpDir, '%s.csv' % str(line))
            if not os.path.exists(the_csv):
                raise ValueError(
                    'It seems parameters are swaped: LINE <-> DATA!')

            ar = pd.read_csv(the_csv)
            ar = ar.sort_values('FID')

            # Create the profile
            px, py = self._do_profile(ar, iv, scale, offset)

            #Construct vector layer
            f = QgsFeature()
            typeL = str(self.type)
            azimut = float(self.azimut)
            Len = float(self.length)
            CLen = float(self.clength)
            f.setAttributes(
                [str(line), typeL,
                 int(len(px)), azimut, Len, CLen])
            line_pts = [
                QgsPoint(x, y, m=m) for x, y, m in zip(px, py, ar.Data)
            ]
            if join_to_line:
                # Join profile to its line
                e = len(ar) - 1
                ar0 = [QgsPoint(ar.X[0], ar.Y[0], m=0.)]
                ar1 = [QgsPoint(ar.X[e], ar.Y[e], m=0.)]
                line_pts = ar0 + line_pts + ar1
            #
            f.setGeometry(QgsGeometry(QgsLineString(line_pts)))
            sink.addFeature(f, QgsFeatureSink.FastInsert)
            # Delete temp csv file
            try:
                os.remove(the_csv)
            except:
                pass

        return {self.OUTPUT: dest_id}