Python merge_bins 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: bigml.multivote

메소드/함수: merge_bins

hotexamples.com에서의 예제들: 3

Python merge_bins - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 bigml.multivote.merge_bins에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

    def predict(self, input_data, path=None, missing_strategy=LAST_PREDICTION):
        """Makes a prediction based on a number of field values.

        The input fields must be keyed by Id. There are two possible
        strategies to predict when the value for the splitting field
        is missing:
            0 - LAST_PREDICTION: the last issued prediction is returned.
            1 - PROPORTIONAL: as we cannot choose between the two branches
                in the tree that stem from this split, we consider both. The
                algorithm goes on until the final leaves are reached and
                all their predictions are used to decide the final prediction.
        """

        if path is None:
            path = []
        if missing_strategy == PROPORTIONAL:
            (final_distribution, d_min, d_max, last_node, population,
             parent_node) = self.predict_proportional(input_data, path=path)

            if self.regression:
                # singular case:
                # when the prediction is the one given in a 1-instance node
                if len(final_distribution.items()) == 1:
                    prediction, instances = final_distribution.items()[0]
                    if instances == 1:
                        return Prediction(
                            last_node.output,
                            path,
                            last_node.confidence,
                            distribution=(last_node.distribution if not  \
                                self.weighted else \
                                last_node.weighted_distribution),
                            count=instances,
                            median=last_node.median,
                            distribution_unit=last_node.distribution_unit,
                            children=last_node.children,
                            d_min=last_node.min,
                            d_max=last_node.max)
                # when there's more instances, sort elements by their mean
                distribution = [
                    list(element)
                    for element in sorted(final_distribution.items(),
                                          key=lambda x: x[0])
                ]
                distribution_unit = ('bins' if len(distribution) > BINS_LIMIT
                                     else 'counts')
                distribution = merge_bins(distribution, BINS_LIMIT)
                total_instances = sum(
                    [instances for _, instances in distribution])
                if len(distribution) == 1:
                    # where there's only one bin, there will be no error, but
                    # we use a correction derived from the parent's error
                    prediction = distribution[0][0]
                    if total_instances < 2:
                        total_instances = 1
                    try:
                        # some strange models can have nodes with no confidence
                        confidence = round(
                            parent_node.confidence /
                            math.sqrt(total_instances), PRECISION)
                    except AttributeError:
                        confidence = None
                else:
                    prediction = mean(distribution)
                    confidence = round(
                        regression_error(
                            unbiased_sample_variance(distribution, prediction),
                            total_instances), PRECISION)
                return Prediction(prediction,
                                  path,
                                  confidence,
                                  distribution=distribution,
                                  count=total_instances,
                                  median=dist_median(distribution,
                                                     total_instances),
                                  distribution_unit=distribution_unit,
                                  children=last_node.children,
                                  d_min=d_min,
                                  d_max=d_max)
            else:
                distribution = [
                    list(element)
                    for element in sorted(final_distribution.items(),
                                          key=lambda x: (-x[1], x[0]))
                ]
                return Prediction(distribution[0][0],
                                  path,
                                  ws_confidence(distribution[0][0],
                                                final_distribution,
                                                ws_n=population),
                                  distribution=distribution,
                                  count=population,
                                  median=None,
                                  distribution_unit='categorical',
                                  children=last_node.children)

        else:
            if self.children:
                for child in self.children:
                    if child.predicate.apply(input_data, self.fields):
                        path.append(child.predicate.to_rule(self.fields))
                        return child.predict(input_data, path=path)

            if self.weighted:
                output_distribution = self.weighted_distribution
                output_unit = self.weighted_distribution_unit
            else:
                output_distribution = self.distribution
                output_unit = self.distribution_unit

            return Prediction(
                self.output,
                path,
                self.confidence,
                distribution=output_distribution,
                count=get_instances(output_distribution),
                median=None if not self.regression else self.median,
                distribution_unit=output_unit,
                children=self.children,
                d_min=None if not self.regression else self.min,
                d_max=None if not self.regression else self.max)

예제 #2

파일 보기

파일: tree.py 프로젝트: david-x-chen/python

    def predict(self, input_data, path=None, missing_strategy=LAST_PREDICTION):
        """Makes a prediction based on a number of field values.

        The input fields must be keyed by Id. There are two possible
        strategies to predict when the value for the splitting field
        is missing:
            0 - LAST_PREDICTION: the last issued prediction is returned.
            1 - PROPORTIONAL: as we cannot choose between the two branches
                in the tree that stem from this split, we consider both. The
                algorithm goes on until the final leaves are reached and
                all their predictions are used to decide the final prediction.
        """

        if path is None:
            path = []
        if missing_strategy == PROPORTIONAL:
            (final_distribution,
             last_node) = self.predict_proportional(input_data, path=path)

            if self.regression:
                # singular case:
                # when the prediction is the one given in a 1-instance node
                if len(final_distribution.items()) == 1:
                    prediction, instances = final_distribution.items()[0]
                    if instances == 1:
                        return Prediction(
                            last_node.output,
                            path,
                            last_node.confidence,
                            distribution=last_node.distribution,
                            count=instances,
                            median=last_node.median,
                            distribution_unit=last_node.distribution_unit,
                            children=last_node.children)
                # when there's more instances, sort elements by their mean
                distribution = [list(element) for element in
                                sorted(final_distribution.items(),
                                       key=lambda x: x[0])]
                distribution_unit = ('bins' if len(distribution) > BINS_LIMIT
                                     else 'counts')
                distribution = merge_bins(distribution, BINS_LIMIT)
                total_instances = sum([instances
                                       for _, instances in distribution])
                prediction = mean(distribution)
                confidence = regression_error(
                    unbiased_sample_variance(distribution, prediction),
                    total_instances)
                return Prediction(
                    prediction,
                    path,
                    confidence,
                    distribution=distribution,
                    count=total_instances,
                    median=dist_median(distribution, total_instances),
                    distribution_unit=distribution_unit,
                    children=last_node.children)
            else:
                distribution = [list(element) for element in
                                sorted(final_distribution.items(),
                                       key=lambda x: (-x[1], x[0]))]
                return Prediction(
                    distribution[0][0],
                    path,
                    ws_confidence(distribution[0][0], final_distribution),
                    distribution=distribution,
                    count=get_instances(distribution),
                    median=None,
                    distribution_unit='categorical',
                    children=last_node.children)

        else:
            if self.children:
                for child in self.children:
                    if child.predicate.apply(input_data, self.fields):
                        path.append(child.predicate.to_rule(self.fields))
                        return child.predict(input_data, path=path)

            return Prediction(
                self.output,
                path,
                self.confidence,
                distribution=self.distribution,
                count=get_instances(self.distribution),
                median=None if not self.regression else self.median,
                distribution_unit=self.distribution_unit,
                children=self.children)

예제 #3

파일 보기

def regression_proportional_predict(tree, weighted, fields, input_data):
    """Proportional prediction for regressions

    """

    offset = OFFSETS[str(weighted)]
    (final_distribution, d_min, d_max, last_node, population,
     parent_node, path) = proportional_predict( \
        tree, offset, fields, input_data, path=None)
    # singular case:
    # when the prediction is the one given in a 1-instance node
    if len(list(final_distribution.items())) == 1:
        prediction, instances = list(final_distribution.items())[0]
        if instances == 1:
            return Prediction( \
                last_node[offset["output"]],
                path,
                last_node[offset["confidence"]],
                distribution=last_node[offset["distribution"]] \
                    if not weighted else \
                    last_node[offset["wdistribution"]],
                count=instances,
                median=last_node[offset["median"]],
                distribution_unit=last_node[offset["distribution_unit"]],
                children=[] if last_node[offset["children#"]] == 0 else \
                    last_node[offset["children"]],
                d_min=last_node[offset["min"]],
                d_max=last_node[offset["max"]])
    # when there's more instances, sort elements by their mean
    distribution = [
        list(element) for element in sorted(list(final_distribution.items()),
                                            key=lambda x: x[0])
    ]
    distribution_unit = ('bins'
                         if len(distribution) > BINS_LIMIT else 'counts')
    distribution = merge_bins(distribution, BINS_LIMIT)
    total_instances = sum([instances for _, instances in distribution])
    if len(distribution) == 1:
        # where there's only one bin, there will be no error, but
        # we use a correction derived from the parent's error
        prediction = distribution[0][0]
        if total_instances < 2:
            total_instances = 1
        try:
            # some strange models can have nodes with no confidence
            confidence = round(
                parent_node[offset["confidence"]] / math.sqrt(total_instances),
                PRECISION)
        except AttributeError:
            confidence = None
    else:
        prediction = mean(distribution)
        # weighted trees use the unweighted population to
        # compute the associated error
        confidence = round(
            regression_error(
                unbiased_sample_variance(distribution, prediction),
                population), PRECISION)
    return Prediction( \
        prediction,
        path,
        confidence,
        distribution=distribution,
        count=total_instances,
        median=dist_median(distribution, total_instances),
        distribution_unit=distribution_unit,
        children=[] if last_node[offset["children#"]] == 0 else \
            last_node[offset["children"]],
        d_min=d_min,
        d_max=d_max)