Exemple #1
0
    def intersect(self, data_instance, intersect_flowid=''):
        if data_instance is None:
            return data_instance

        if self.workflow_param.need_intersect:
            header = data_instance.schema.get('header')
            LOGGER.info("need_intersect: true!")
            intersect_param = IntersectParam()
            self.intersect_params = ParamExtract.parse_param_from_config(
                intersect_param, self.config_path)

            LOGGER.info("Start intersection!")
            if self.role == consts.HOST:
                intersect_operator = RawIntersectionHost(self.intersect_params)
            elif self.role == consts.GUEST:
                intersect_operator = RawIntersectionGuest(
                    self.intersect_params)
            elif self.role == consts.ARBITER:
                return data_instance
            else:
                raise ValueError("Unknown role of workflow")
            intersect_operator.set_flowid(intersect_flowid)
            intersect_ids = intersect_operator.run(data_instance)
            LOGGER.info("finish intersection!")

            intersect_data_instance = intersect_ids.join(
                data_instance, lambda i, d: d)
            LOGGER.info("get intersect data_instance!")
            # LOGGER.debug("intersect_data_instance count:{}".format(intersect_data_instance.count()))
            intersect_data_instance.schema['header'] = header
            return intersect_data_instance

        else:
            LOGGER.info("need_intersect: false!")
            return data_instance
Exemple #2
0
    def intersect(self, data_instance, intersect_flowid=''):
        if data_instance is None:
            return data_instance

        if self.workflow_param.need_intersect:
            header = data_instance.schema.get('header')
            LOGGER.info("need_intersect: true!")
            intersect_param = IntersectParam()
            self.intersect_params = self._load_param(intersect_param)

            LOGGER.info("Start intersection!")
            if self.role == consts.HOST:
                intersect_operator = RawIntersectionHost(self.intersect_params)
            elif self.role == consts.GUEST:
                intersect_operator = RawIntersectionGuest(
                    self.intersect_params)
            elif self.role == consts.ARBITER:
                return data_instance
            else:
                raise ValueError("Unknown role of workflow")
            intersect_operator.set_flowid(intersect_flowid)
            intersect_ids = intersect_operator.run(data_instance)
            LOGGER.info("finish intersection!")

            return intersect_ids
        else:
            LOGGER.info("need_intersect: false!")
            return data_instance
 def _initialize_intersect(self, config):
     intersect_param = IntersectParam()
     self.intersect_param = ParamExtract.parse_param_from_config(
         intersect_param, config)
Exemple #4
0
 def _initialize_intersect(self):
     intersect_param = IntersectParam()
     self.intersect_param = self._load_param(intersect_param)