def fromJsonFile(mfile, blacklist, col_stats): """ @des Construct a MalDictionary object from a JSON file @arg mfile : str //json file containing query run @arg blacklist: list<str> //list of blacklisted mal ins @arg col_stats: dict<str,ColumnStats> //column statistics """ if type(mfile) == bytes: open_func = lambda b, mode, encoding: io.StringIO( b.decode(encoding)) elif Utils.is_gzipped(mfile): open_func = gzip.open else: open_func = open with open_func(mfile, mode='rt', encoding='utf-8') as f: maldict = defaultdict(_make_list) startd = {} query_tags = set() lines = f.readlines() for line in lines: jobj = json.loads(line) if jobj is None: break fname, args, ret = Utils.extract_fname(jobj["short"]) if not Utils.is_blacklisted(blacklist, fname): if jobj["state"] == "start": startd[jobj["pc"]] = jobj["clk"] elif jobj["state"] == "done": assert jobj["pc"] in startd new_mals = MalInstruction.fromJsonObj(jobj, col_stats) new_mals.time = int(jobj["clk"]) - int( startd[jobj["pc"]]) new_mals.start = int(startd[jobj["pc"]]) maldict[fname].append(new_mals) query_tags.add(int(jobj["tag"])) return MalDictionary(maldict, list(query_tags), col_stats)
def fromJsonObj(jobj, stats): size = int(jobj["size"]) pc = int(jobj["pc"]) clk = int(jobj["clk"]) short = jobj["short"] fname, _, _ = Utils.extract_fname(jobj["short"]) tag = int(jobj["tag"]) # rv = [rv.get("size", 0) for rv in jobj["ret"]] ro = jobj.get("ret", []) # return object ret_size = sum([o.get("size", 0) for o in ro if int(o["eol"]) == 0]) arg_size = sum([o.get("size", 0) for o in jobj.get("arg", [])]) arg_list = [Arg.fromJsonObj(e) for e in jobj.get("arg", [])] ret_args = [Arg.fromJsonObj(e) for e in ro] # if e["eol"]==0] # print(len(alive_ret)) free_size = sum([arg.size for arg in arg_list if arg.eol == 1]) arg_vars = [arg.name for arg in arg_list if arg.isVar()] ret_vars = [ret['name'] for ret in ro if Utils.isVar(ret['name'])] count = int(jobj["ret"][0].get("count", 0)) con_args = [ pc, clk, short, fname, size, ret_size, tag, arg_size, arg_list, ret_args, free_size, arg_vars, ret_vars, count ] # Select Instructions if fname in ['select', 'thetaselect', 'likeselect']: return SelectInstruction(*con_args, jobj=jobj, stats=stats) # TODO replace jobj # Projections elif fname in ['projectionpath']: return DirectIntruction(*con_args, base_arg_i=0) elif fname in ['projection', 'projectdelta']: return ProjectInstruction(*con_args) # Joins elif fname in ['join', 'thetajoin', 'crossproduct']: return JoinInstruction(*con_args) # Group Instructions elif fname in ['group', 'subgroup', 'subgroupdone', 'groupdone']: a0 = arg_list[0].col.split('.')[::-1][0] return GroupInstruction(*con_args, base_arg_i=0, base_col=a0, col_stats=stats.get(a0, None)) # Set Instructions: the last parameter determines how to compute the # prediction for this MAL instruction elif fname in ['intersect']: return SetInstruction(*con_args, i1=0, i2=1, fun=min) elif fname in ['mergecand']: return SetInstruction(*con_args, i1=0, i2=1, fun=_lambda_add) elif fname in ['difference']: return SetInstruction(*con_args, i1=0, i2=1, fun=_lambda_lefthand) elif fname in ['<', '>', '>=', '<=']: if arg_list[1].isVar(): return SetInstruction(*con_args, i1=0, i2=1, fun=min) else: return DirectIntruction(*con_args, base_arg_i=0) # Direct Intructions elif fname in ['+', '-', '*', '/', 'or', 'dbl', 'and', 'lng', '%']: if arg_list[0].isVar(): return DirectIntruction(*con_args, base_arg_i=0) elif arg_list[1].isVar(): return DirectIntruction(*con_args, base_arg_i=1) else: return ReduceInstruction(*con_args) elif fname in ['==', 'isnil', '!=', 'like']: return DirectIntruction(*con_args, base_arg_i=0) elif fname in ['sort']: return DirectIntruction(*con_args, base_arg_i=0, base_ret_i=1) elif fname in ['subsum', 'subavg', 'subcount', 'submin']: return DirectIntruction(*con_args, base_arg_i=2) elif fname in ['subslice']: return DirectIntruction(*con_args, base_arg_i=0) elif fname in ['firstn']: argl = len(arg_list) assert argl == 4 or argl == 6 n = int(arg_list[3].aval) if argl == 6 else int(arg_list[1].aval) return DirectIntruction(*con_args, base_arg_i=0, fun=min) elif fname in [ 'hash', 'bulk_rotate_xor_hash', 'identity', 'mirror', 'year', 'ifthenelse', 'delta', 'substring', 'project', 'int', 'floor' ]: return DirectIntruction(*con_args, base_arg_i=0) elif fname in ['dbl']: return DirectIntruction(*con_args, base_arg_i=1) elif fname in ['hge']: if arg_list[1].cnt > 0: return DirectIntruction(*con_args, base_arg_i=1) else: return ReduceInstruction(*con_args) elif fname in ['append']: return DirectIntruction(*con_args, base_arg_i=0, fun=_lambda_inc) elif fname in ['max', 'min']: if len(arg_list) == 1: return ReduceInstruction(*con_args) else: assert len(arg_list) == 2 return DirectIntruction(*con_args, base_arg_i=0) # Aggregate Instructions (result = 1) elif fname in ['sum', 'avg', 'single', 'dec_round']: return ReduceInstruction(*con_args) elif fname in ['new']: return NullInstruction(*con_args) # Load stuff elif fname in ['tid', 'bind', 'bind_idxbat']: return LoadInstruction(*con_args) else: # logging.error("What instruction is this ?? {}".format(fname)) return MalInstruction(*con_args)