예제 #1
0
def empty_feature_detection(data_instances):
    is_empty_feature = data_overview.is_empty_feature(data_instances)
    if is_empty_feature:
        table_name = data_instances.get_name()
        namespace = data_instances.get_namespace()
        raise ValueError(
            "Number of features of DTable is 0., table_name: {}, namespace: {}"
            .format(table_name, namespace))
예제 #2
0
파일: union.py 프로젝트: zpskt/FATE
    def fit(self, data):
        LOGGER.debug(f"fit receives data is {data}")
        if not isinstance(data, dict):
            raise ValueError(
                "Union module must receive more than one table as input.")
        empty_count = 0
        combined_table = None
        combined_schema = None
        metrics = []

        for (key, local_table) in data.items():
            LOGGER.debug("table to combine name: {}".format(key))
            num_data = local_table.count()
            LOGGER.debug("table count: {}".format(num_data))
            metrics.append(Metric(key, num_data))
            self.add_summary(key, num_data)

            if num_data == 0:
                LOGGER.warning("Table {} is empty.".format(key))
                if combined_table is None:
                    combined_table = local_table
                    combined_schema = local_table.schema
                empty_count += 1
                continue

            local_is_data_instance = self.check_is_data_instance(local_table)
            if combined_table is None:
                self.is_data_instance = local_is_data_instance
            LOGGER.debug(f"self.is_data_instance is {self.is_data_instance}, "
                         f"local_is_data_instance is {local_is_data_instance}")
            if self.is_data_instance != local_is_data_instance:
                raise ValueError(
                    f"Cannot combine DataInstance and non-DataInstance object. Union aborted."
                )

            if self.is_data_instance:
                self.is_empty_feature = data_overview.is_empty_feature(
                    local_table)
                if self.is_empty_feature:
                    LOGGER.warning("Table {} has empty feature.".format(key))
                else:
                    self.check_schema_content(local_table.schema)

            if combined_table is None:
                # first table to combine
                combined_table = local_table
                combined_schema = local_table.schema
            else:
                self.check_id(local_table, combined_table)
                self.check_label_name(local_table, combined_table)
                self.check_header(local_table, combined_table)
                if self.keep_duplicate:
                    repeated_ids = combined_table.join(local_table,
                                                       lambda v1, v2: 1)
                    self.repeated_ids = [v[0] for v in repeated_ids.collect()]
                    self.key = key
                    local_table = local_table.flatMap(self._renew_id)

                combined_table = combined_table.union(local_table,
                                                      self._keep_first)

                combined_table.schema = combined_schema

            # only check feature length if not empty
            if self.is_data_instance and not self.is_empty_feature:
                self.feature_count = len(combined_schema.get("header"))
                LOGGER.debug("feature count: {}".format(self.feature_count))
                combined_table.mapValues(self.check_feature_length)

        if combined_table is None:
            num_data = 0
            LOGGER.warning(
                "All tables provided are empty or have empty features.")
        else:
            num_data = combined_table.count()
        metrics.append(Metric("Total", num_data))
        self.add_summary("Total", num_data)

        self.callback_metric(metric_name=self.metric_name,
                             metric_namespace=self.metric_namespace,
                             metric_data=metrics)
        self.tracker.set_metric_meta(metric_namespace=self.metric_namespace,
                                     metric_name=self.metric_name,
                                     metric_meta=MetricMeta(
                                         name=self.metric_name,
                                         metric_type=self.metric_type))

        LOGGER.info(
            "Union operation finished. Total {} empty tables encountered.".
            format(empty_count))

        return combined_table
예제 #3
0
def empty_feature_detection(data_instances):
    is_empty_feature = data_overview.is_empty_feature(data_instances)
    if is_empty_feature:
        raise ValueError(f"Number of features of Table is 0: {data_instances}")
예제 #4
0
파일: union.py 프로젝트: xthzhjwzyc/FedRec
    def fit(self, data):
        if not isinstance(data, dict):
            raise ValueError(
                "Union module must receive more than one table as input.")
        empty_count = 0
        combined_table = None
        combined_schema = None
        metrics = []

        for (key, local_table) in data.items():
            LOGGER.debug("table to combine name: {}".format(key))
            num_data = local_table.count()
            LOGGER.debug("table count: {}".format(num_data))
            local_schema = local_table.schema
            metrics.append(Metric(key, num_data))

            if num_data == 0:
                LOGGER.warning("Table {} is empty.".format(key))
                empty_count += 1
                continue
            if combined_table is None:
                self.check_is_data_instance(local_table)
            if self.is_data_instance:
                self.is_empty_feature = data_overview.is_empty_feature(
                    local_table)
                if self.is_empty_feature:
                    LOGGER.warning("Table {} has empty feature.".format(key))

            if combined_table is None:
                # first table to combine
                combined_table = local_table
                if self.is_data_instance:
                    combined_schema = local_table.schema
                    combined_table.schema = combined_schema
            else:
                self.check_schema_id(local_schema, combined_schema)
                self.check_schema_label_name(local_schema, combined_schema)
                self.check_schema_header(local_schema, combined_schema)
                combined_table = combined_table.union(local_table,
                                                      self._keep_first)

            # only check feature length if not empty
            if self.is_data_instance and not self.is_empty_feature:
                self.feature_count = len(combined_schema.get("header"))
                LOGGER.debug("feature count: {}".format(self.feature_count))
                combined_table.mapValues(self.check_feature_length)

        if combined_table is None:
            num_data = 0
            LOGGER.warning(
                "All tables provided are empty or have empty features.")
        else:
            num_data = combined_table.count()
        metrics.append(Metric("Total", num_data))

        self.callback_metric(metric_name=self.metric_name,
                             metric_namespace=self.metric_namespace,
                             metric_data=metrics)
        self.tracker.set_metric_meta(metric_namespace=self.metric_namespace,
                                     metric_name=self.metric_name,
                                     metric_meta=MetricMeta(
                                         name=self.metric_name,
                                         metric_type=self.metric_type))

        LOGGER.info(
            "Union operation finished. Total {} empty tables encountered.".
            format(empty_count))
        return combined_table