def _compute_block_id(self, block): """ For every block build a Cartesian tree using stack-based approach. During the build process encode stack pushes as *1* and stack pops as *0*. The generated 2b-bit number is the id of the block. @param block (List[int]): An array of integer numbers. @return code (int): A 2b-bit integer giving the id of the block, where b is the size of the block. """ binary_code = [0] * (2 * len(block)) idx = 0 S = Stack() for i in range(len(block)): while (not S.is_empty()) and (S.top() > block[i]): S.pop() idx += 1 S.push(block[i]) binary_code[idx] = 1 idx += 1 code = "".join(str(bit) for bit in binary_code) return int(code, 2)
def parse(rule): tokens = tokenizer.tokenize(rule) primary_stack = Stack() args_stack = Stack() head = None body = None idx = 0 while idx < len(tokens): token = tokens[idx] # primary_stack.show() # open parenthesis always has the highest precedence if token["type"] == tokenizer.TOKEN_OPEN_PARA: primary_stack.push(token) elif token["type"] == tokenizer.TOKEN_OPRT: # Since left has higher precedence than right, when we see an operator token, # we must try to make sure all previously pushed operators on the stack are # fully parsed before we push any new operator on the stack. # So, we go through previous operators, and try to parse them if we have enough # information now. while not primary_stack.empty(): lastOprt = primary_stack.top() # It must never happen that two unary operators come immediately after # each other, without any binary operator between them. They can nest # inside one another, but they cannot appear in the same level, and immediately # after each other. That is a syntax error if happens. if (isUnaryOperator(lastOprt["value"])) and (isUnaryOperator( token["value"])): syntax_err("Syntax error near operator " + token["value"]) # If a binary, or unary operator is already on top of the stack, and another # binary operator shows up, we must first finish the parsing of operator on the # stack, and then deal with the new operator. elif (isOperator(lastOprt["value"])) and (isBinaryOperator( token["value"])): if not args_stack.empty(): primary_stack.pop() parse_operator(lastOprt, args_stack) # If top of stack is occupied with coma, and/or parenthesis, that means we are # still parsing arguments of an operator. In such cases, we cannot empty the stack, # because current parsing is not done yet. We need to look into next tokens. # So, we break the loop and continue to receive future tokens. else: break primary_stack.push(token) elif token["type"] == tokenizer.TOKEN_IDENTIFIER: nxt = tokens[idx + 1] if idx + 1 != len(tokens) else None if (nxt != None) and (nxt["type"] == tokenizer.TOKEN_OPEN_PARA): idx, args = parse_predicate_arguments(idx + 2, tokens) #formula = Formula() #formula.setPredicate(token["value"]) pred = token["value"] #formula.setArgs(args) #args_stack.push(LeafNode(formula.getPredicate(), formula)) if pred == "MATH": assert len( args ) == 4, "Expected four parameters for function MATH" args_stack.push(Node(Node.Math, args[0], args[1:])) elif pred == "COMP": assert len( args ) == 3, "Expected four parameters for function COMP" args_stack.push(Node(Node.Comp, args[0], args[1:])) else: args_stack.push(Node(Node.Atom, pred, args)) else: #formula = Formula() #formula.setPredicate(token["value"]) pred = token["value"] #formula.setArgs([]) #args_stack.push(LeafNode(formula.getPredicate(), formula)) args = [] args_stack.push(Node(Node.Atom, pred, args)) elif token["type"] == tokenizer.TOKEN_CLOSE_PARA: while True: if args_stack.empty(): syntax_err("Expected operand or '('") oprt = primary_stack.pop() if oprt["value"] == '(': break parse_operator(oprt, args_stack) elif token["type"] == tokenizer.TOKEN_ENTAILMENT_SIGN: # In principal, it is possible that the rule has no head. # I am not sure if they are useful or not, but they can # exist in theory. if args_stack.empty(): print("No head in the rule") else: # Before parsing the body of the rule # we must make sure all operators in the # head are dealt with. So, we go through # the operator stack, and make sure that # all of them are processed. while not primary_stack.empty(): oprt = primary_stack.pop() parse_operator(oprt, args_stack, True) # Pop the head from the operand stack head = args_stack.pop() head.returnSttt = head.substitutetable #if type(head) != list: # head = head idx += 1 while not primary_stack.empty(): oprt = primary_stack.pop() if oprt["value"] == '(': syntax_err("Missing ')' in rule ") parse_operator(oprt, args_stack) body = args_stack.pop() if type(body) != list: body = [body] body = list(reversed(body)) # By default we only look at the current time point, namely no window #registerScopes(body, {"winType": "time_win", "winSize" : 0, "winSizeUnit": 1}) #body = optimize(body) # Get rid of window operators #print_rule(body) #print(body.getChildren()[1].getChildren()[0].getChildren()[0].getChildren()[0].getChildren()[0].getFormula().getPredicate()) #print(body.getChildren()[0].getChildren()[1].getOperator().getParams()) #print(head.getChildren()[0].getFormula().getArgs()) return {"head": head, "body": body}