Esempio n. 1
0
    def fromJsonFile(mfile, blacklist, col_stats):
        """
        @des Construct a MalDictionary object from a JSON file
        @arg mfile    : str                   //json file containing query run
        @arg blacklist: list<str>             //list of blacklisted mal ins
        @arg col_stats: dict<str,ColumnStats> //column statistics
        """
        if type(mfile) == bytes:
            open_func = lambda b, mode, encoding: io.StringIO(
                b.decode(encoding))
        elif Utils.is_gzipped(mfile):
            open_func = gzip.open
        else:
            open_func = open
        with open_func(mfile, mode='rt', encoding='utf-8') as f:
            maldict = defaultdict(_make_list)
            startd = {}
            query_tags = set()

            lines = f.readlines()
            for line in lines:
                jobj = json.loads(line)
                if jobj is None:
                    break
                fname, args, ret = Utils.extract_fname(jobj["short"])

                if not Utils.is_blacklisted(blacklist, fname):
                    if jobj["state"] == "start":
                        startd[jobj["pc"]] = jobj["clk"]
                    elif jobj["state"] == "done":
                        assert jobj["pc"] in startd
                        new_mals = MalInstruction.fromJsonObj(jobj, col_stats)
                        new_mals.time = int(jobj["clk"]) - int(
                            startd[jobj["pc"]])
                        new_mals.start = int(startd[jobj["pc"]])
                        maldict[fname].append(new_mals)
                        query_tags.add(int(jobj["tag"]))

        return MalDictionary(maldict, list(query_tags), col_stats)
Esempio n. 2
0
    def fromJsonObj(jobj, stats):
        size = int(jobj["size"])
        pc = int(jobj["pc"])
        clk = int(jobj["clk"])
        short = jobj["short"]
        fname, _, _ = Utils.extract_fname(jobj["short"])
        tag = int(jobj["tag"])
        # rv = [rv.get("size", 0) for rv in jobj["ret"]]
        ro = jobj.get("ret", [])  # return object
        ret_size = sum([o.get("size", 0) for o in ro if int(o["eol"]) == 0])
        arg_size = sum([o.get("size", 0) for o in jobj.get("arg", [])])
        arg_list = [Arg.fromJsonObj(e) for e in jobj.get("arg", [])]
        ret_args = [Arg.fromJsonObj(e) for e in ro]  # if e["eol"]==0]
        # print(len(alive_ret))
        free_size = sum([arg.size for arg in arg_list if arg.eol == 1])
        arg_vars = [arg.name for arg in arg_list if arg.isVar()]
        ret_vars = [ret['name'] for ret in ro if Utils.isVar(ret['name'])]
        count = int(jobj["ret"][0].get("count", 0))

        con_args = [
            pc, clk, short, fname, size, ret_size, tag, arg_size, arg_list,
            ret_args, free_size, arg_vars, ret_vars, count
        ]

        # Select Instructions
        if fname in ['select', 'thetaselect', 'likeselect']:
            return SelectInstruction(*con_args, jobj=jobj,
                                     stats=stats)  # TODO replace jobj
        # Projections
        elif fname in ['projectionpath']:
            return DirectIntruction(*con_args, base_arg_i=0)
        elif fname in ['projection', 'projectdelta']:
            return ProjectInstruction(*con_args)
        # Joins
        elif fname in ['join', 'thetajoin', 'crossproduct']:
            return JoinInstruction(*con_args)
        # Group Instructions
        elif fname in ['group', 'subgroup', 'subgroupdone', 'groupdone']:
            a0 = arg_list[0].col.split('.')[::-1][0]
            return GroupInstruction(*con_args,
                                    base_arg_i=0,
                                    base_col=a0,
                                    col_stats=stats.get(a0, None))
        # Set Instructions: the last parameter determines how to compute the
        #     prediction for this MAL instruction
        elif fname in ['intersect']:
            return SetInstruction(*con_args, i1=0, i2=1, fun=min)
        elif fname in ['mergecand']:
            return SetInstruction(*con_args, i1=0, i2=1, fun=_lambda_add)
        elif fname in ['difference']:
            return SetInstruction(*con_args, i1=0, i2=1, fun=_lambda_lefthand)
        elif fname in ['<', '>', '>=', '<=']:
            if arg_list[1].isVar():
                return SetInstruction(*con_args, i1=0, i2=1, fun=min)
            else:
                return DirectIntruction(*con_args, base_arg_i=0)
        # Direct Intructions
        elif fname in ['+', '-', '*', '/', 'or', 'dbl', 'and', 'lng', '%']:
            if arg_list[0].isVar():
                return DirectIntruction(*con_args, base_arg_i=0)
            elif arg_list[1].isVar():
                return DirectIntruction(*con_args, base_arg_i=1)
            else:
                return ReduceInstruction(*con_args)
        elif fname in ['==', 'isnil', '!=', 'like']:
            return DirectIntruction(*con_args, base_arg_i=0)
        elif fname in ['sort']:
            return DirectIntruction(*con_args, base_arg_i=0, base_ret_i=1)
        elif fname in ['subsum', 'subavg', 'subcount', 'submin']:
            return DirectIntruction(*con_args, base_arg_i=2)
        elif fname in ['subslice']:
            return DirectIntruction(*con_args, base_arg_i=0)
        elif fname in ['firstn']:
            argl = len(arg_list)
            assert argl == 4 or argl == 6
            n = int(arg_list[3].aval) if argl == 6 else int(arg_list[1].aval)
            return DirectIntruction(*con_args, base_arg_i=0, fun=min)
        elif fname in [
                'hash', 'bulk_rotate_xor_hash', 'identity', 'mirror', 'year',
                'ifthenelse', 'delta', 'substring', 'project', 'int', 'floor'
        ]:
            return DirectIntruction(*con_args, base_arg_i=0)
        elif fname in ['dbl']:
            return DirectIntruction(*con_args, base_arg_i=1)
        elif fname in ['hge']:
            if arg_list[1].cnt > 0:
                return DirectIntruction(*con_args, base_arg_i=1)
            else:
                return ReduceInstruction(*con_args)
        elif fname in ['append']:
            return DirectIntruction(*con_args, base_arg_i=0, fun=_lambda_inc)
        elif fname in ['max', 'min']:
            if len(arg_list) == 1:
                return ReduceInstruction(*con_args)
            else:
                assert len(arg_list) == 2
                return DirectIntruction(*con_args, base_arg_i=0)
        # Aggregate Instructions (result = 1)
        elif fname in ['sum', 'avg', 'single', 'dec_round']:
            return ReduceInstruction(*con_args)
        elif fname in ['new']:
            return NullInstruction(*con_args)
        # Load stuff
        elif fname in ['tid', 'bind', 'bind_idxbat']:
            return LoadInstruction(*con_args)
        else:
            # logging.error("What instruction is this ?? {}".format(fname))
            return MalInstruction(*con_args)