Exemple #1
0
    def write_to_mojo(self, mojo: MojoWriter, iframe: MojoFrame):

        iframe = iframe[self.time_column]
        icol = iframe.get_column(0)
        if icol.type != MojoType.STR:
            iframe = AsType("int").write_to_mojo(mojo, iframe)
            iframe = AsType("str").write_to_mojo(mojo, iframe)
            icol = iframe.get_column(0)

        # We have to add each holiday to the MOJO
        oframe = MojoFrame()
        for prov in ['country', 'BW', 'BY', 'BE', 'BB', 'HB', 'HH', 'HE', 'MV',
                     'NI', 'NW', 'RP', 'SL', 'SN', 'ST', 'SH', 'TH']:
            tmpframe = iframe.duplicate()
            mojo += MjT_Replace(iframe=iframe, oframe=tmpframe, map=[('None', None), ('', None)])
            tcol = tmpframe.get_column(0)
            datetime_format = self.datetime_formats[self.time_column]
            if datetime_format is not None:
                mojo.set_datetime_format_str(tcol, datetime_format)
            iframe = tmpframe
            tframe = AsType("datetime64").write_to_mojo(mojo, iframe)
            year_col = MojoColumn(name="year", dtype="int")
            doy_col = MojoColumn(name="doy", dtype="int")
            mojo += MjT_Datepart(iframe=tframe, oframe=MojoFrame(columns=[year_col]), fn="year")
            mojo += MjT_Datepart(iframe=tframe, oframe=MojoFrame(columns=[doy_col]), fn="dayofyear")
            dates_frame = MojoFrame(columns=[year_col, doy_col])
            feat = f'is_DE_holiday_{prov}'
            holi_df = self.memos[prov]
            holi_df[feat] = 1
            mout = MergeTransformer.from_frame(
                holi_df, ['year', 'doy']).write_to_mojo(mojo, dates_frame)
            holi_df.drop(feat, axis=1, inplace=True)

            mlag = mout[feat]
            mlag.names = [feat]
            olag = mlag.get_column(0).duplicate()
            mojo += MjT_FillNa(iframe=mlag, oframe=MojoFrame(columns=[olag]),
                               repl=olag.pytype(0))
            oframe += olag

        # print(oframe.names)
        oframe = AsType("int").write_to_mojo(mojo, oframe)
        # print(oframe.names)
        return oframe
Exemple #2
0
    def to_mojo(self,
                mojo: MojoWriter,
                iframe: MojoFrame,
                group_uuid=None,
                group_name=None):

        from h2oaicore.mojo import MojoColumn
        from h2oaicore.mojo_transformers import (MjT_ConstBinaryOp,
                                                 MjT_Sigmoid, MjT_AsType,
                                                 MjT_Agg, MjT_BinaryOp,
                                                 MjT_IntervalMap, MjT_Clip,
                                                 MjT_Log)
        import uuid
        group_uuid = str(uuid.uuid4())
        group_name = self.__class__.__name__

        _iframe = super().write_to_mojo(mojo=mojo,
                                        iframe=iframe,
                                        group_uuid=group_uuid,
                                        group_name=group_name)
        res = MojoFrame()

        def _get_new_pair(left, right):
            pair = MojoFrame()
            pair.cbind(left)
            pair.cbind(right)
            return pair

        for c in range(len(_iframe)):
            icol = _iframe.get_column(c)

            def _get_new_col(name, type_=None):
                ocol_ = MojoColumn(name=name,
                                   dtype=icol.type if type_ is None else type_)
                oframe_ = MojoFrame(columns=[ocol_])
                return oframe_

            if self.calib_method == "sigmoid":

                oframe1 = _get_new_col(icol.name + "_slope")
                oframe2 = _get_new_col(icol.name + "_intercept")
                oframe3 = _get_new_col(icol.name + "_negative")
                oframe4 = _get_new_col(icol.name + "_calibrated",
                                       type_="float64")
                oframe5 = _get_new_col(icol.name + "_astype")

                mojo += MjT_ConstBinaryOp(iframe=_iframe[c],
                                          oframe=oframe1,
                                          op="multiply",
                                          const=self.slope[c],
                                          pos="right",
                                          group_uuid=group_uuid,
                                          group_name=group_name)

                mojo += MjT_ConstBinaryOp(iframe=oframe1,
                                          oframe=oframe2,
                                          op="add",
                                          const=self.intercept[c],
                                          pos="right",
                                          group_uuid=group_uuid,
                                          group_name=group_name)

                mojo += MjT_ConstBinaryOp(iframe=oframe2,
                                          oframe=oframe3,
                                          op="multiply",
                                          const=-1.,
                                          pos="right",
                                          group_uuid=group_uuid,
                                          group_name=group_name)

                mojo += MjT_Sigmoid(iframe=oframe3,
                                    oframe=oframe4,
                                    group_uuid=group_uuid,
                                    group_name=group_name)
                mojo += MjT_AsType(iframe=oframe4,
                                   oframe=oframe5,
                                   type="float32",
                                   group_uuid=group_uuid,
                                   group_name=group_name)

                res.cbind(oframe5)
            elif self.calib_method == "isotonic":
                X = list(self._necessary_X_[c])
                y = list(self._necessary_y_[c])
                if len(y) == 1:
                    oframe1 = _get_new_col(icol.name + "_zeroing")
                    new_y = _get_new_col(icol.name + "_addingConst")

                    mojo += MjT_ConstBinaryOp(iframe=_iframe[c],
                                              oframe=oframe1,
                                              op="multiply",
                                              const=0,
                                              pos="right",
                                              group_uuid=group_uuid,
                                              group_name=group_name)

                    mojo += MjT_ConstBinaryOp(iframe=oframe1,
                                              oframe=new_y,
                                              op="add",
                                              const=y[0],
                                              pos="right",
                                              group_uuid=group_uuid,
                                              group_name=group_name)

                else:
                    max_X = X + [self._necessary_X_[c][-1], None]
                    min_X = [self._necessary_X_[c][0]] + X + [None]

                    max_y = y + [self._necessary_y_[c][-1], None]
                    min_y = [self._necessary_y_[c][0]] + y + [None]

                    ocol1 = MojoColumn(name=icol.name + "_maxX",
                                       dtype=icol.type)
                    ocol2 = MojoColumn(name=icol.name + "_minX",
                                       dtype=icol.type)
                    ocol3 = MojoColumn(name=icol.name + "_maxY",
                                       dtype=icol.type)
                    ocol4 = MojoColumn(name=icol.name + "_minY",
                                       dtype=icol.type)
                    XY = MojoFrame(columns=[ocol1, ocol2, ocol3, ocol4])

                    # clipping
                    inp_clipped = _get_new_col(icol.name + "_clipped")
                    mojo += MjT_Clip(iframe=_iframe[c],
                                     oframe=inp_clipped,
                                     min=self.X_min_[c],
                                     max=self.X_max_[c],
                                     group_uuid=group_uuid,
                                     group_name=group_name)

                    # search for coordinates
                    mojo += MjT_IntervalMap(
                        iframe=inp_clipped,
                        oframe=XY,
                        breakpoints=X,
                        values=[[
                            x1, x0, y1, y0
                        ] for x1, x0, y1, y0 in zip(max_X, min_X, max_y, min_y)
                                ],
                        group_uuid=group_uuid,
                        group_name=group_name)

                    # interpolation
                    curr_diff = _get_new_col(icol.name + "_currDiff")
                    pair = _get_new_pair(inp_clipped, XY[1])
                    mojo += MjT_BinaryOp(iframe=pair,
                                         oframe=curr_diff,
                                         op="subtract",
                                         group_uuid=group_uuid,
                                         group_name=group_name)

                    y_diff = _get_new_col(icol.name + "_yDiff")
                    pair = _get_new_pair(XY[2], XY[3])
                    mojo += MjT_BinaryOp(iframe=pair,
                                         oframe=y_diff,
                                         op="subtract",
                                         group_uuid=group_uuid,
                                         group_name=group_name)

                    X_diff = _get_new_col(icol.name + "_XDiff")
                    pair = _get_new_pair(XY[0], XY[1])
                    mojo += MjT_BinaryOp(iframe=pair,
                                         oframe=X_diff,
                                         op="subtract",
                                         group_uuid=group_uuid,
                                         group_name=group_name)

                    xy_ratio = _get_new_col(icol.name + "_xyRatio")
                    pair = _get_new_pair(y_diff, X_diff)
                    mojo += MjT_BinaryOp(iframe=pair,
                                         oframe=xy_ratio,
                                         op="divide",
                                         eps=1e-10,
                                         group_uuid=group_uuid,
                                         group_name=group_name)

                    scaled_cur_diff = _get_new_col(icol.name +
                                                   "_scaledCurDiff")
                    pair = _get_new_pair(xy_ratio, curr_diff)
                    mojo += MjT_BinaryOp(iframe=pair,
                                         oframe=scaled_cur_diff,
                                         op="multiply",
                                         group_uuid=group_uuid,
                                         group_name=group_name)

                    new_y = _get_new_col(icol.name + "_newY")
                    pair = _get_new_pair(XY[3], scaled_cur_diff)
                    mojo += MjT_BinaryOp(iframe=pair,
                                         oframe=new_y,
                                         op="add",
                                         group_uuid=group_uuid,
                                         group_name=group_name)

                res.cbind(new_y)

            elif self.calib_method == "spline":
                if self.calib_logodds_scale:
                    oframe1 = _get_new_col(icol.name + "_clipped")
                    mojo += MjT_Clip(iframe=_iframe[c],
                                     oframe=oframe1,
                                     min=self.calib_logodds_eps,
                                     max=1 - self.calib_logodds_eps,
                                     group_uuid=group_uuid,
                                     group_name=group_name)

                    oframe2 = _get_new_col(icol.name + "_inverse")
                    mojo += MjT_ConstBinaryOp(iframe=oframe1,
                                              oframe=oframe2,
                                              op="subtract",
                                              const=1.,
                                              pos="left",
                                              group_uuid=group_uuid,
                                              group_name=group_name)

                    oframe3 = _get_new_col(icol.name + "_ratio")
                    pair = _get_new_pair(oframe1, oframe2)
                    mojo += MjT_BinaryOp(iframe=pair,
                                         oframe=oframe3,
                                         op="divide",
                                         eps=1e-10,
                                         group_uuid=group_uuid,
                                         group_name=group_name)

                    oframe4 = _get_new_col(icol.name + "_log")
                    mojo += MjT_Log(iframe=oframe3,
                                    oframe=oframe4,
                                    group_uuid=group_uuid,
                                    group_name=group_name)

                    inp = oframe4
                else:
                    inp = _iframe[c]

                knots = self.calib_knot_vec_tr[c]
                num_knots = len(knots)

                # zero col
                zeros = _get_new_col(icol.name + "_zeros")
                mojo += MjT_ConstBinaryOp(iframe=inp,
                                          oframe=zeros,
                                          op="multiply",
                                          const=0.,
                                          pos="right",
                                          group_uuid=group_uuid,
                                          group_name=group_name)

                # ones col
                ones = _get_new_col(icol.name + f"_ones")
                mojo += MjT_ConstBinaryOp(iframe=zeros,
                                          oframe=ones,
                                          op="add",
                                          const=1.,
                                          pos="right",
                                          group_uuid=group_uuid,
                                          group_name=group_name)

                # last knot calc
                denom = knots[-1] - knots[-2]

                def _to_mojo_helper(mojo, inp, val, zeros, suffix=""):
                    oframe5 = _get_new_col(icol.name + f"_{suffix}diff")
                    mojo += MjT_ConstBinaryOp(iframe=inp,
                                              oframe=oframe5,
                                              op="subtract",
                                              const=val,
                                              pos="right",
                                              group_uuid=group_uuid,
                                              group_name=group_name)

                    oframe6 = _get_new_col(icol.name + f"_{suffix}max")
                    pair = _get_new_pair(oframe5, zeros)
                    mojo += MjT_Agg(iframe=pair,
                                    oframe=oframe6,
                                    op="max",
                                    group_uuid=group_uuid,
                                    group_name=group_name)

                    oframe7 = _get_new_col(icol.name + f"_{suffix}pwr")
                    oframe8 = _get_new_col(icol.name + f"_{suffix}pwr2")
                    oframe9 = _get_new_col(icol.name + f"_{suffix}pwr3")
                    mojo += MjT_ConstBinaryOp(iframe=oframe6,
                                              oframe=oframe7,
                                              op="multiply",
                                              const=1.,
                                              pos="right",
                                              group_uuid=group_uuid,
                                              group_name=group_name)
                    pair = _get_new_pair(oframe6, oframe7)
                    mojo += MjT_BinaryOp(iframe=pair,
                                         oframe=oframe8,
                                         op="multiply",
                                         group_uuid=group_uuid,
                                         group_name=group_name)
                    pair = _get_new_pair(oframe8, oframe7)
                    mojo += MjT_BinaryOp(iframe=pair,
                                         oframe=oframe9,
                                         op="multiply",
                                         group_uuid=group_uuid,
                                         group_name=group_name)
                    return oframe9

                last_knot2 = _to_mojo_helper(mojo=mojo,
                                             inp=inp,
                                             val=knots[-2],
                                             zeros=zeros,
                                             suffix="last2")
                last_knot1 = _to_mojo_helper(mojo=mojo,
                                             inp=inp,
                                             val=knots[-1],
                                             zeros=zeros,
                                             suffix="last1")

                oframe5 = _get_new_col(icol.name + f"_last21diff")
                pair = _get_new_pair(last_knot2, last_knot1)
                mojo += MjT_BinaryOp(iframe=pair,
                                     oframe=oframe5,
                                     op="subtract",
                                     group_uuid=group_uuid,
                                     group_name=group_name)

                last_knot = _get_new_col(icol.name + f"_lastKnot")
                mojo += MjT_ConstBinaryOp(iframe=oframe5,
                                          oframe=last_knot,
                                          op="divide",
                                          const=denom,
                                          pos="right",
                                          group_uuid=group_uuid,
                                          group_name=group_name)

                # all knots calc
                results = []

                for i in range(1, num_knots - 1):
                    denom = knots[-1] - knots[i - 1]

                    knot1 = _to_mojo_helper(mojo=mojo,
                                            inp=inp,
                                            val=knots[i - 1],
                                            zeros=zeros,
                                            suffix=f"knot{i}m1")
                    knot2 = _to_mojo_helper(mojo=mojo,
                                            inp=inp,
                                            val=knots[-1],
                                            zeros=zeros,
                                            suffix=f"knotm1f{i}")

                    oframe_ = _get_new_col(icol.name + f"_knots_{i}_diff")
                    pair = _get_new_pair(knot1, knot2)
                    mojo += MjT_BinaryOp(iframe=pair,
                                         oframe=oframe_,
                                         op="subtract",
                                         group_uuid=group_uuid,
                                         group_name=group_name)

                    div_res = _get_new_col(icol.name + f"_dv_{i}")
                    mojo += MjT_ConstBinaryOp(iframe=oframe_,
                                              oframe=div_res,
                                              op="divide",
                                              const=denom,
                                              pos="right",
                                              group_uuid=group_uuid,
                                              group_name=group_name)

                    diff_res = _get_new_col(icol.name + f"_diff_{i}")
                    pair = _get_new_pair(div_res, last_knot)
                    mojo += MjT_BinaryOp(iframe=pair,
                                         oframe=diff_res,
                                         op="subtract",
                                         group_uuid=group_uuid,
                                         group_name=group_name)
                    results.append(diff_res)

                results = [ones, inp] + results

                assert len(results) == len(self.calib_basis_coef_vec[c].ravel(
                )), "Something went wrong :("
                # linear model
                results2 = MojoFrame()
                for i, (frame_, const_) in enumerate(
                        zip(results, self.calib_basis_coef_vec[c].ravel())):
                    res_fr = _get_new_col(icol.name + f"_logits_{i}")
                    mojo += MjT_ConstBinaryOp(iframe=frame_,
                                              oframe=res_fr,
                                              op="multiply",
                                              const=const_,
                                              pos="right",
                                              group_uuid=group_uuid,
                                              group_name=group_name)
                    results2.cbind(res_fr)

                ocol_logits_sum = _get_new_col(icol.name + f"_logits_sum")
                mojo += MjT_Agg(iframe=results2,
                                oframe=ocol_logits_sum,
                                op="sum",
                                group_uuid=group_uuid,
                                group_name=group_name)

                # sigmoid
                ocol_spline_sigmoid = _get_new_col(icol.name +
                                                   f"_spline_sigmoid",
                                                   type_="float64")
                mojo += MjT_Sigmoid(iframe=ocol_logits_sum,
                                    oframe=ocol_spline_sigmoid,
                                    group_uuid=group_uuid,
                                    group_name=group_name)
                ocol_spline_sigmoid_astype = _get_new_col(
                    icol.name + f"_spline_sigmoid_astype")
                mojo += MjT_AsType(iframe=ocol_spline_sigmoid,
                                   oframe=ocol_spline_sigmoid_astype,
                                   type="float32",
                                   group_uuid=group_uuid,
                                   group_name=group_name)
                res.cbind(ocol_spline_sigmoid_astype)

            else:
                raise RuntimeError('Unknown calibration method in to_mojo()')
        # normalization
        if len(res) > 1:
            res2 = MojoFrame()
            oframe_sum = _get_new_col(self.__class__.__name__ + "_sum")
            mojo += MjT_Agg(iframe=res,
                            oframe=oframe_sum,
                            op="sum",
                            group_uuid=group_uuid,
                            group_name=group_name)

            for c in range(len(res)):
                icol = res.get_column(c)
                oframe1 = _get_new_col(icol.name + "_normalized")

                pair = _get_new_pair(res[c], oframe_sum)
                mojo += MjT_BinaryOp(iframe=pair,
                                     oframe=oframe1,
                                     op="divide",
                                     group_uuid=group_uuid,
                                     group_name=group_name)
                res2.cbind(oframe1)

            res = res2

        return res