def run(self, original_model, train_data, validate_data, feature_mask): model = copy.deepcopy(original_model) current_flowid = self.get_flowid() model.set_flowid(current_flowid) if original_model.role != consts.ARBITER: curr_train_data = train_data.mapValues( lambda v: Step.slice_data_instance(v, feature_mask)) new_schema = Step.get_new_schema(train_data, feature_mask) LOGGER.debug("new schema is: {}".format(new_schema)) set_schema(curr_train_data, new_schema) model.header = new_schema.get("header") else: curr_train_data = train_data model.fit(curr_train_data) return model
def _set_output_table_schema(data_inst, schema): if schema is not None and data_inst.count() > 0: data_io.set_schema(data_inst, schema)