def intersect(self, data_instance, intersect_flowid=''): if data_instance is None: return data_instance if self.workflow_param.need_intersect: header = data_instance.schema.get('header') LOGGER.info("need_intersect: true!") intersect_param = IntersectParam() self.intersect_params = ParamExtract.parse_param_from_config( intersect_param, self.config_path) LOGGER.info("Start intersection!") if self.role == consts.HOST: intersect_operator = RawIntersectionHost(self.intersect_params) elif self.role == consts.GUEST: intersect_operator = RawIntersectionGuest( self.intersect_params) elif self.role == consts.ARBITER: return data_instance else: raise ValueError("Unknown role of workflow") intersect_operator.set_flowid(intersect_flowid) intersect_ids = intersect_operator.run(data_instance) LOGGER.info("finish intersection!") intersect_data_instance = intersect_ids.join( data_instance, lambda i, d: d) LOGGER.info("get intersect data_instance!") # LOGGER.debug("intersect_data_instance count:{}".format(intersect_data_instance.count())) intersect_data_instance.schema['header'] = header return intersect_data_instance else: LOGGER.info("need_intersect: false!") return data_instance
def intersect(self, data_instance, intersect_flowid=''): if data_instance is None: return data_instance if self.workflow_param.need_intersect: header = data_instance.schema.get('header') LOGGER.info("need_intersect: true!") intersect_param = IntersectParam() self.intersect_params = self._load_param(intersect_param) LOGGER.info("Start intersection!") if self.role == consts.HOST: intersect_operator = RawIntersectionHost(self.intersect_params) elif self.role == consts.GUEST: intersect_operator = RawIntersectionGuest( self.intersect_params) elif self.role == consts.ARBITER: return data_instance else: raise ValueError("Unknown role of workflow") intersect_operator.set_flowid(intersect_flowid) intersect_ids = intersect_operator.run(data_instance) LOGGER.info("finish intersection!") return intersect_ids else: LOGGER.info("need_intersect: false!") return data_instance
def _initialize_intersect(self, config): intersect_param = IntersectParam() self.intersect_param = ParamExtract.parse_param_from_config( intersect_param, config)
def _initialize_intersect(self): intersect_param = IntersectParam() self.intersect_param = self._load_param(intersect_param)