def to_mojo(self, mojo: MojoWriter, iframe: MojoFrame, group_uuid=None, group_name=None): import uuid group_uuid = str(uuid.uuid4()) group_name = self.__class__.__name__ from h2oaicore.mojo import MojoColumn, MojoFrame from h2oaicore.mojo_transformers import MjT_Log from h2oaicore.mojo_transformers_utils import AsType xnew = iframe[self.input_feature_names] oframe = MojoFrame() for col in xnew: ocol = MojoColumn(name=col.name, dtype=np.float64) ocol_frame = MojoFrame(columns=[ocol]) mojo += MjT_Log(iframe=MojoFrame(columns=[col]), oframe=ocol_frame, group_uuid=group_uuid, group_name=group_name) oframe += ocol oframe = AsType(dtype_global()).write_to_mojo(mojo, oframe, group_uuid=group_uuid, group_name=group_name) return oframe
def to_mojo(self, mojo: MojoWriter, iframe: MojoFrame): from h2oaicore.mojo import MojoColumn, MojoFrame from h2oaicore.mojo_transformers import MjT_Log xnew = iframe[self.input_feature_names] oframe = MojoFrame() for col in xnew: ocol = MojoColumn(name=col.name, dtype=np.float64) ocol_frame = MojoFrame(columns=[ocol]) mojo += MjT_Log(iframe=MojoFrame(columns=[col]), oframe=ocol_frame) oframe += ocol return oframe
def to_mojo(self, mojo: MojoWriter, iframe: MojoFrame): from h2oaicore.mojo import MojoColumn, MojoFrame from h2oaicore.mojo_transformers import MjT_Log from h2oaicore.systemutils import dtype_global xnew = iframe[self.input_feature_names] oframe = MojoFrame() for col in xnew: ocol = MojoColumn(name=col.name, dtype=np.float64) ocol_frame = MojoFrame(columns=[ocol]) mojo += MjT_Log(iframe=MojoFrame(columns=[col]), oframe=ocol_frame) oframe += ocol oframe = AsType(dtype_global()).write_to_mojo(mojo, oframe) return oframe
def to_mojo(self, mojo: MojoWriter, iframe: MojoFrame, group_uuid=None, group_name=None): from h2oaicore.mojo import MojoColumn from h2oaicore.mojo_transformers import (MjT_ConstBinaryOp, MjT_Sigmoid, MjT_AsType, MjT_Agg, MjT_BinaryOp, MjT_IntervalMap, MjT_Clip, MjT_Log) import uuid group_uuid = str(uuid.uuid4()) group_name = self.__class__.__name__ _iframe = super().write_to_mojo(mojo=mojo, iframe=iframe, group_uuid=group_uuid, group_name=group_name) res = MojoFrame() def _get_new_pair(left, right): pair = MojoFrame() pair.cbind(left) pair.cbind(right) return pair for c in range(len(_iframe)): icol = _iframe.get_column(c) def _get_new_col(name, type_=None): ocol_ = MojoColumn(name=name, dtype=icol.type if type_ is None else type_) oframe_ = MojoFrame(columns=[ocol_]) return oframe_ if self.calib_method == "sigmoid": oframe1 = _get_new_col(icol.name + "_slope") oframe2 = _get_new_col(icol.name + "_intercept") oframe3 = _get_new_col(icol.name + "_negative") oframe4 = _get_new_col(icol.name + "_calibrated", type_="float64") oframe5 = _get_new_col(icol.name + "_astype") mojo += MjT_ConstBinaryOp(iframe=_iframe[c], oframe=oframe1, op="multiply", const=self.slope[c], pos="right", group_uuid=group_uuid, group_name=group_name) mojo += MjT_ConstBinaryOp(iframe=oframe1, oframe=oframe2, op="add", const=self.intercept[c], pos="right", group_uuid=group_uuid, group_name=group_name) mojo += MjT_ConstBinaryOp(iframe=oframe2, oframe=oframe3, op="multiply", const=-1., pos="right", group_uuid=group_uuid, group_name=group_name) mojo += MjT_Sigmoid(iframe=oframe3, oframe=oframe4, group_uuid=group_uuid, group_name=group_name) mojo += MjT_AsType(iframe=oframe4, oframe=oframe5, type="float32", group_uuid=group_uuid, group_name=group_name) res.cbind(oframe5) elif self.calib_method == "isotonic": X = list(self._necessary_X_[c]) y = list(self._necessary_y_[c]) if len(y) == 1: oframe1 = _get_new_col(icol.name + "_zeroing") new_y = _get_new_col(icol.name + "_addingConst") mojo += MjT_ConstBinaryOp(iframe=_iframe[c], oframe=oframe1, op="multiply", const=0, pos="right", group_uuid=group_uuid, group_name=group_name) mojo += MjT_ConstBinaryOp(iframe=oframe1, oframe=new_y, op="add", const=y[0], pos="right", group_uuid=group_uuid, group_name=group_name) else: max_X = X + [self._necessary_X_[c][-1], None] min_X = [self._necessary_X_[c][0]] + X + [None] max_y = y + [self._necessary_y_[c][-1], None] min_y = [self._necessary_y_[c][0]] + y + [None] ocol1 = MojoColumn(name=icol.name + "_maxX", dtype=icol.type) ocol2 = MojoColumn(name=icol.name + "_minX", dtype=icol.type) ocol3 = MojoColumn(name=icol.name + "_maxY", dtype=icol.type) ocol4 = MojoColumn(name=icol.name + "_minY", dtype=icol.type) XY = MojoFrame(columns=[ocol1, ocol2, ocol3, ocol4]) # clipping inp_clipped = _get_new_col(icol.name + "_clipped") mojo += MjT_Clip(iframe=_iframe[c], oframe=inp_clipped, min=self.X_min_[c], max=self.X_max_[c], group_uuid=group_uuid, group_name=group_name) # search for coordinates mojo += MjT_IntervalMap( iframe=inp_clipped, oframe=XY, breakpoints=X, values=[[ x1, x0, y1, y0 ] for x1, x0, y1, y0 in zip(max_X, min_X, max_y, min_y) ], group_uuid=group_uuid, group_name=group_name) # interpolation curr_diff = _get_new_col(icol.name + "_currDiff") pair = _get_new_pair(inp_clipped, XY[1]) mojo += MjT_BinaryOp(iframe=pair, oframe=curr_diff, op="subtract", group_uuid=group_uuid, group_name=group_name) y_diff = _get_new_col(icol.name + "_yDiff") pair = _get_new_pair(XY[2], XY[3]) mojo += MjT_BinaryOp(iframe=pair, oframe=y_diff, op="subtract", group_uuid=group_uuid, group_name=group_name) X_diff = _get_new_col(icol.name + "_XDiff") pair = _get_new_pair(XY[0], XY[1]) mojo += MjT_BinaryOp(iframe=pair, oframe=X_diff, op="subtract", group_uuid=group_uuid, group_name=group_name) xy_ratio = _get_new_col(icol.name + "_xyRatio") pair = _get_new_pair(y_diff, X_diff) mojo += MjT_BinaryOp(iframe=pair, oframe=xy_ratio, op="divide", eps=1e-10, group_uuid=group_uuid, group_name=group_name) scaled_cur_diff = _get_new_col(icol.name + "_scaledCurDiff") pair = _get_new_pair(xy_ratio, curr_diff) mojo += MjT_BinaryOp(iframe=pair, oframe=scaled_cur_diff, op="multiply", group_uuid=group_uuid, group_name=group_name) new_y = _get_new_col(icol.name + "_newY") pair = _get_new_pair(XY[3], scaled_cur_diff) mojo += MjT_BinaryOp(iframe=pair, oframe=new_y, op="add", group_uuid=group_uuid, group_name=group_name) res.cbind(new_y) elif self.calib_method == "spline": if self.calib_logodds_scale: oframe1 = _get_new_col(icol.name + "_clipped") mojo += MjT_Clip(iframe=_iframe[c], oframe=oframe1, min=self.calib_logodds_eps, max=1 - self.calib_logodds_eps, group_uuid=group_uuid, group_name=group_name) oframe2 = _get_new_col(icol.name + "_inverse") mojo += MjT_ConstBinaryOp(iframe=oframe1, oframe=oframe2, op="subtract", const=1., pos="left", group_uuid=group_uuid, group_name=group_name) oframe3 = _get_new_col(icol.name + "_ratio") pair = _get_new_pair(oframe1, oframe2) mojo += MjT_BinaryOp(iframe=pair, oframe=oframe3, op="divide", eps=1e-10, group_uuid=group_uuid, group_name=group_name) oframe4 = _get_new_col(icol.name + "_log") mojo += MjT_Log(iframe=oframe3, oframe=oframe4, group_uuid=group_uuid, group_name=group_name) inp = oframe4 else: inp = _iframe[c] knots = self.calib_knot_vec_tr[c] num_knots = len(knots) # zero col zeros = _get_new_col(icol.name + "_zeros") mojo += MjT_ConstBinaryOp(iframe=inp, oframe=zeros, op="multiply", const=0., pos="right", group_uuid=group_uuid, group_name=group_name) # ones col ones = _get_new_col(icol.name + f"_ones") mojo += MjT_ConstBinaryOp(iframe=zeros, oframe=ones, op="add", const=1., pos="right", group_uuid=group_uuid, group_name=group_name) # last knot calc denom = knots[-1] - knots[-2] def _to_mojo_helper(mojo, inp, val, zeros, suffix=""): oframe5 = _get_new_col(icol.name + f"_{suffix}diff") mojo += MjT_ConstBinaryOp(iframe=inp, oframe=oframe5, op="subtract", const=val, pos="right", group_uuid=group_uuid, group_name=group_name) oframe6 = _get_new_col(icol.name + f"_{suffix}max") pair = _get_new_pair(oframe5, zeros) mojo += MjT_Agg(iframe=pair, oframe=oframe6, op="max", group_uuid=group_uuid, group_name=group_name) oframe7 = _get_new_col(icol.name + f"_{suffix}pwr") oframe8 = _get_new_col(icol.name + f"_{suffix}pwr2") oframe9 = _get_new_col(icol.name + f"_{suffix}pwr3") mojo += MjT_ConstBinaryOp(iframe=oframe6, oframe=oframe7, op="multiply", const=1., pos="right", group_uuid=group_uuid, group_name=group_name) pair = _get_new_pair(oframe6, oframe7) mojo += MjT_BinaryOp(iframe=pair, oframe=oframe8, op="multiply", group_uuid=group_uuid, group_name=group_name) pair = _get_new_pair(oframe8, oframe7) mojo += MjT_BinaryOp(iframe=pair, oframe=oframe9, op="multiply", group_uuid=group_uuid, group_name=group_name) return oframe9 last_knot2 = _to_mojo_helper(mojo=mojo, inp=inp, val=knots[-2], zeros=zeros, suffix="last2") last_knot1 = _to_mojo_helper(mojo=mojo, inp=inp, val=knots[-1], zeros=zeros, suffix="last1") oframe5 = _get_new_col(icol.name + f"_last21diff") pair = _get_new_pair(last_knot2, last_knot1) mojo += MjT_BinaryOp(iframe=pair, oframe=oframe5, op="subtract", group_uuid=group_uuid, group_name=group_name) last_knot = _get_new_col(icol.name + f"_lastKnot") mojo += MjT_ConstBinaryOp(iframe=oframe5, oframe=last_knot, op="divide", const=denom, pos="right", group_uuid=group_uuid, group_name=group_name) # all knots calc results = [] for i in range(1, num_knots - 1): denom = knots[-1] - knots[i - 1] knot1 = _to_mojo_helper(mojo=mojo, inp=inp, val=knots[i - 1], zeros=zeros, suffix=f"knot{i}m1") knot2 = _to_mojo_helper(mojo=mojo, inp=inp, val=knots[-1], zeros=zeros, suffix=f"knotm1f{i}") oframe_ = _get_new_col(icol.name + f"_knots_{i}_diff") pair = _get_new_pair(knot1, knot2) mojo += MjT_BinaryOp(iframe=pair, oframe=oframe_, op="subtract", group_uuid=group_uuid, group_name=group_name) div_res = _get_new_col(icol.name + f"_dv_{i}") mojo += MjT_ConstBinaryOp(iframe=oframe_, oframe=div_res, op="divide", const=denom, pos="right", group_uuid=group_uuid, group_name=group_name) diff_res = _get_new_col(icol.name + f"_diff_{i}") pair = _get_new_pair(div_res, last_knot) mojo += MjT_BinaryOp(iframe=pair, oframe=diff_res, op="subtract", group_uuid=group_uuid, group_name=group_name) results.append(diff_res) results = [ones, inp] + results assert len(results) == len(self.calib_basis_coef_vec[c].ravel( )), "Something went wrong :(" # linear model results2 = MojoFrame() for i, (frame_, const_) in enumerate( zip(results, self.calib_basis_coef_vec[c].ravel())): res_fr = _get_new_col(icol.name + f"_logits_{i}") mojo += MjT_ConstBinaryOp(iframe=frame_, oframe=res_fr, op="multiply", const=const_, pos="right", group_uuid=group_uuid, group_name=group_name) results2.cbind(res_fr) ocol_logits_sum = _get_new_col(icol.name + f"_logits_sum") mojo += MjT_Agg(iframe=results2, oframe=ocol_logits_sum, op="sum", group_uuid=group_uuid, group_name=group_name) # sigmoid ocol_spline_sigmoid = _get_new_col(icol.name + f"_spline_sigmoid", type_="float64") mojo += MjT_Sigmoid(iframe=ocol_logits_sum, oframe=ocol_spline_sigmoid, group_uuid=group_uuid, group_name=group_name) ocol_spline_sigmoid_astype = _get_new_col( icol.name + f"_spline_sigmoid_astype") mojo += MjT_AsType(iframe=ocol_spline_sigmoid, oframe=ocol_spline_sigmoid_astype, type="float32", group_uuid=group_uuid, group_name=group_name) res.cbind(ocol_spline_sigmoid_astype) else: raise RuntimeError('Unknown calibration method in to_mojo()') # normalization if len(res) > 1: res2 = MojoFrame() oframe_sum = _get_new_col(self.__class__.__name__ + "_sum") mojo += MjT_Agg(iframe=res, oframe=oframe_sum, op="sum", group_uuid=group_uuid, group_name=group_name) for c in range(len(res)): icol = res.get_column(c) oframe1 = _get_new_col(icol.name + "_normalized") pair = _get_new_pair(res[c], oframe_sum) mojo += MjT_BinaryOp(iframe=pair, oframe=oframe1, op="divide", group_uuid=group_uuid, group_name=group_name) res2.cbind(oframe1) res = res2 return res