def _sanity_check_block_pairwise_constraints(ir_blocks): """Assert that adjacent blocks obey all invariants.""" for first_block, second_block in pairwise(ir_blocks): # Always Filter before MarkLocation, never after. if isinstance(first_block, MarkLocation) and isinstance(second_block, Filter): raise AssertionError(u'Found Filter after MarkLocation block: {}'.format(ir_blocks)) # There's no point in marking the same location twice in a row. if isinstance(first_block, MarkLocation) and isinstance(second_block, MarkLocation): raise AssertionError(u'Found consecutive MarkLocation blocks: {}'.format(ir_blocks)) # Traverse blocks with optional=True are immediately followed # by a MarkLocation, CoerceType or Filter block. if isinstance(first_block, Traverse) and first_block.optional: if not isinstance(second_block, (MarkLocation, CoerceType, Filter)): raise AssertionError(u'Expected MarkLocation, CoerceType or Filter after Traverse ' u'with optional=True. Found: {}'.format(ir_blocks)) # Backtrack blocks with optional=True are immediately followed by a MarkLocation block. if isinstance(first_block, Backtrack) and first_block.optional: if not isinstance(second_block, MarkLocation): raise AssertionError(u'Expected MarkLocation after Backtrack with optional=True, ' u'but none was found: {}'.format(ir_blocks)) # Recurse blocks are immediately preceded by a MarkLocation or Backtrack block. if isinstance(second_block, Recurse): if not (isinstance(first_block, MarkLocation) or isinstance(first_block, Backtrack)): raise AssertionError(u'Expected MarkLocation or Backtrack before Recurse, but none ' u'was found: {}'.format(ir_blocks))
def _sanity_check_mark_location_preceding_optional_traverse(ir_blocks): """Assert that optional Traverse blocks are preceded by a MarkLocation.""" # Once all fold blocks are removed, each optional Traverse must have # a MarkLocation block immediately before it. _, new_ir_blocks = extract_folds_from_ir_blocks(ir_blocks) for first_block, second_block in pairwise(new_ir_blocks): # Traverse blocks with optional=True are immediately preceded by a MarkLocation block. if isinstance(second_block, Traverse) and second_block.optional: if not isinstance(first_block, MarkLocation): raise AssertionError(u'Expected MarkLocation before Traverse with optional=True, ' u'but none was found: {}'.format(ir_blocks))
def _sanity_check_coerce_type_outside_of_fold(ir_blocks): """Ensure that CoerceType not in a @fold are followed by a MarkLocation or Filter block.""" is_in_fold = False for first_block, second_block in pairwise(ir_blocks): if isinstance(first_block, Fold): is_in_fold = True if not is_in_fold and isinstance(first_block, CoerceType): if not isinstance(second_block, (MarkLocation, Filter)): raise AssertionError(u'Expected MarkLocation or Filter after CoerceType, ' u'but none was found: {}'.format(ir_blocks)) if isinstance(second_block, Unfold): is_in_fold = False
def merge_consecutive_filter_clauses(ir_blocks): """Merge consecutive Filter(x), Filter(y) blocks into Filter(x && y) block.""" new_ir_blocks = [ir_blocks[0]] for previous_block, current_block in pairwise(ir_blocks): if isinstance(previous_block, Filter) and isinstance( current_block, Filter): new_ir_blocks[-1] = Filter( BinaryComposition(u'&&', previous_block.predicate, current_block.predicate)) else: new_ir_blocks.append(current_block) return new_ir_blocks
def sanity_check_ir_blocks_from_frontend(ir_blocks): """Assert that IR blocks originating from the frontend do not have nonsensical structure. Args: ir_blocks: list of BasicBlocks representing the IR to sanity-check Raises: AssertionError, if the IR has unexpected structure. If the IR produced by the front-end cannot be successfully and correctly used to generate MATCH or Gremlin, this is the method that should catch the problem. """ if not ir_blocks: raise AssertionError(u'Received no ir_blocks: {}'.format(ir_blocks)) # QueryRoot is always and only the first block. if not isinstance(ir_blocks[0], QueryRoot): raise AssertionError( u'The first block was not QueryRoot: {}'.format(ir_blocks)) for block in ir_blocks[1:]: if isinstance(block, QueryRoot): raise AssertionError( u'Found QueryRoot after the first block: {}'.format(ir_blocks)) # ConstructResult is always and only the last block. if not isinstance(ir_blocks[-1], ConstructResult): raise AssertionError( u'The last block was not ConstructResult: {}'.format(ir_blocks)) for block in ir_blocks[:-1]: if isinstance(block, ConstructResult): raise AssertionError( u'Found ConstructResult before the last block: ' u'{}'.format(ir_blocks)) # There are no Traverse / Backtrack / Recurse blocks after an OutputSource block. seen_output_source = False for block in ir_blocks: if isinstance(block, OutputSource): seen_output_source = True elif seen_output_source: if isinstance(block, (Backtrack, Traverse, Recurse)): raise AssertionError(u'Found Backtrack / Traverse / Recurse ' u'after OutputSource block: ' u'{}'.format(ir_blocks)) for first_block, second_block in pairwise(ir_blocks): # Always Filter before MarkLocation, never after. if isinstance(first_block, MarkLocation) and isinstance( second_block, Filter): raise AssertionError( u'Found Filter after MarkLocation block: {}'.format(ir_blocks)) # There's no point in marking the same location twice in a row. if isinstance(first_block, MarkLocation) and isinstance( second_block, MarkLocation): raise AssertionError( u'Found consecutive MarkLocation blocks: {}'.format(ir_blocks)) # Traverse blocks with optional=True are immediately preceded by a MarkLocation block. if isinstance(second_block, Traverse) and second_block.optional: if not isinstance(first_block, MarkLocation): raise AssertionError( u'Expected MarkLocation before Traverse with optional=True, ' u'but none was found: {}'.format(ir_blocks)) # Traverse blocks with optional=True are immediately followed # by a MarkLocation, CoerceType or Filter block. if isinstance(first_block, Traverse) and first_block.optional: if not isinstance(second_block, (MarkLocation, CoerceType, Filter)): raise AssertionError( u'Expected MarkLocation, CoerceType or Filter after Traverse ' u'with optional=True. Found: {}'.format(ir_blocks)) # CoerceType blocks are immediately followed by a MarkLocation or Filter block. if isinstance(first_block, CoerceType): if not isinstance(second_block, (MarkLocation, Filter)): raise AssertionError( u'Expected MarkLocation or Filter after CoerceType, ' u'but none was found: {}'.format(ir_blocks)) # Backtrack blocks with optional=True are immediately followed by a MarkLocation block. if isinstance(first_block, Backtrack) and first_block.optional: if not isinstance(second_block, MarkLocation): raise AssertionError( u'Expected MarkLocation after Backtrack with optional=True, ' u'but none was found: {}'.format(ir_blocks)) # Recurse blocks are immediately preceded by a MarkLocation block. if isinstance(second_block, Recurse): if not isinstance(first_block, MarkLocation): raise AssertionError( u'Expected MarkLocation before Recurse, but none was found: ' u'{}'.format(ir_blocks)) # There's exactly one QueryRoot / Traverse / Recurse / Backtrack block (total) # between any two MarkLocation blocks. traversal_blocks = 0 for block in ir_blocks: # Treat QueryRoot as a Backtrack / Recurse / Traverse block, # to handle the first MarkLocation. if isinstance(object, (Backtrack, Traverse, Recurse, QueryRoot)): traversal_blocks += 1 elif isinstance(object, MarkLocation): if traversal_blocks != 1: raise AssertionError(u'Expected 1 traversal block between ' u'MarkLocation blocks, but found: ' u'{} {}'.format(traversal_blocks, ir_blocks)) traversal_blocks = 0 # Exactly one MarkLocation block is found between a QueryRoot / Traverse / Recurse block, # and the first subsequent Traverse, Recurse, Backtrack or ConstructResult block. found_start_block = False mark_location_blocks = 0 for block in ir_blocks: # Terminate started intervals before opening new ones. end_interval_types = (Backtrack, ConstructResult, Recurse, Traverse) if isinstance(block, end_interval_types) and found_start_block: found_start_block = False if mark_location_blocks != 1: raise AssertionError( u'Expected 1 MarkLocation block between traversals, found: ' u'{} {}'.format(mark_location_blocks, ir_blocks)) # Now consider opening new intervals or processing MarkLocation blocks. if isinstance(block, MarkLocation): mark_location_blocks += 1 elif isinstance(block, (QueryRoot, Traverse, Recurse)): found_start_block = True mark_location_blocks = 0