def create_fuzzing_req_collection(path_regex): """ Filters the request collection to create the fuzzing request collection. @param path_regex: The regex string used for filtering @type path_regex: Str @return: The fuzzing request collection @rtype : FuzzingRequestCollection """ fuzz_reqs = fuzzing_requests.FuzzingRequestCollection() if path_regex: for request in GrammarRequestCollection(): if re.findall(path_regex, request.endpoint): reqs = driver.compute_request_goal_seq( request, GrammarRequestCollection()) for req in reqs: fuzz_reqs.add_request(req) else: fuzz_reqs.set_all_requests(GrammarRequestCollection()._requests) return fuzz_reqs
def run(self): """ Thread entrance - performs fuzzing """ try: self._num_total_sequences = driver.generate_sequences( self._fuzzing_requests, self._checkers, self._fuzzing_jobs) # At the end of everything print out any request that were never # rendered (because they never had valid constraints). logger.print_request_rendering_stats_never_rendered_requests( self._fuzzing_requests, GrammarRequestCollection().candidate_values_pool, Monitor()) except InvalidDictionaryException: pass except Exception as err: self._exception = traceback.format_exc()
def _set_schemas(examples: RequestExamples, body_schema: BodySchema, method: str, endpoint: str): """ Assigns a specified RequestExamples object to the matching request in the RequestCollection @param examples: The RequestExamples object to set @param body_schema: The BodySchema object to set @param method: The request's method @param endpoint: The request's endpoint @return: None """ def _print_req_not_found(): logger.write_to_main( "Request from grammar does not exist in the Request Collection!\n" f"{method} {endpoint}\n",\ print_to_console=True ) request_collection = GrammarRequestCollection().request_id_collection hex_def = str_to_hex_def(endpoint) # Find the request's endpoint in the request collection if hex_def in request_collection: # Find the matching request by method. # This loop will run max n = # unique methods for the request for req in request_collection[hex_def]: if req.method == method: if examples: # Set the request's matching examples req.set_examples(examples) if body_schema: # Set the request's matching body schema req.set_body_schema(body_schema) break else: # The endpoint was found in the request collection, but not with this method _print_req_not_found() else: # Failed to find request in the request collection _print_req_not_found()
def apply_create_once_resources(fuzzing_requests): """ Attempts to create all of the resources in the 'create_once' endpoints. @param fuzzing_requests: The collection of requests to be fuzzed @type fuzzing_requests: FuzzingRequestCollection @return: A list of destructors to use to cleanup the create_once resources @rtype : list(Request) """ def exclude_requests(pre_reqs, post_reqs): # Exclude any requests that produce or destroy the create_once endpoint for req_i in pre_reqs: fuzzing_requests.exclude_preprocessing_request(req_i) for req_i in post_reqs: fuzzing_requests.exclude_postprocessing_request(req_i) create_once_endpoints = Settings().create_once_endpoints if not create_once_endpoints: return logger.create_network_log(logger.LOG_TYPE_PREPROCESSING) destructors = set() exclude_reqs = set() request_count = 0 logger.write_to_main("Rendering for create-once resources:\n") # Iterate through each 'create_once' endpoint for endpoint in create_once_endpoints: # Verify that the endpoint exists in the request collection if endpoint in GrammarRequestCollection().request_id_collection: # The create_once resource generator resource_gen_req = None # Iterate through each of the requests that contain the create_once endpoint for req in GrammarRequestCollection( ).request_id_collection[endpoint]: if req not in fuzzing_requests: logger.write_to_main( "Warning: Create-once endpoint is not a request in the fuzzing list\n", True) break if not resource_gen_req and req.is_resource_generator(): resource_gen_req = req # Compute the sequence necessary to create the create_once resource req_list = driver.compute_request_goal_seq( resource_gen_req, fuzzing_requests) logger.write_to_main( f"{formatting.timestamp()}: Endpoint - {resource_gen_req.endpoint_no_dynamic_objects}" ) logger.write_to_main( f"{formatting.timestamp()}: Hex Def - {resource_gen_req.method_endpoint_hex_definition}" ) create_once_seq = sequences.Sequence(req_list) renderings = create_once_seq.render( GrammarRequestCollection().candidate_values_pool, None, preprocessing=True) # Make sure we were able to successfully create the create_once resource if not renderings.valid: logger.write_to_main( f"{formatting.timestamp()}: Rendering INVALID") exclude_requests(exclude_reqs, destructors) raise FailedToCreateResource(destructors) logger.write_to_main( f"{formatting.timestamp()}: Rendering VALID") logger.format_rendering_stats_definition( resource_gen_req, GrammarRequestCollection().candidate_values_pool) if Settings().in_smoke_test_mode(): resource_gen_req.stats.request_order = 'Preprocessing' resource_gen_req.stats.valid = 1 resource_gen_req.stats.status_code = renderings.final_request_response.status_code resource_gen_req.stats.status_text = renderings.final_request_response.status_text resource_gen_req.stats.sample_request.set_request_stats( renderings.sequence.sent_request_data_list[-1]. rendered_data) resource_gen_req.stats.sample_request.set_response_stats( renderings.final_request_response, renderings.final_response_datetime) if req.is_destructor(): # Add destructors to the destructor list that will be returned destructors.add(req) # Only continue processing if a resource generator was actually found for this endpoint if not resource_gen_req: continue request_count += len(req_list) # Get the set of all dynamic object names in the endpoint var_names = resource_gen_req.consumes.union( resource_gen_req.produces) # This dictionary will map dynamic object names to the values created during # this preprocessing create-once step. dynamic_object_values = {} for name in var_names: dynamic_object_values[name] = dependencies.get_variable(name) # Iterate through the entire request collection, searching for requests that include # the create_once resource. We want to "lock" the resources in these requests with # the dynamic object values that were created during this preprocessing step. for req_i in fuzzing_requests: # Set the variables in any requests whose consumers were produced # by the create_once resource generator if resource_gen_req.produces & req_i.consumes: req_i.set_id_values_for_create_once_dynamic_objects( dynamic_object_values, renderings) # Exclude any requests that produce the create_once object(s) if resource_gen_req.produces & req_i.produces: exclude_reqs.add(req_i) else: exclude_requests(exclude_reqs, destructors) raise InvalidCreateOnce(destructors) exclude_requests(exclude_reqs, destructors) # Reset all of the dynamic object values that were just created dependencies.reset_tlb() # Reset the garbage collector, so it doesn't delete any of the resources that were just created dependencies.set_saved_dynamic_objects() logger.print_request_rendering_stats( GrammarRequestCollection().candidate_values_pool, fuzzing_requests, Monitor(), request_count, logger.PREPROCESSING_GENERATION, None) # Return the list of destructors that were removed from the request collection. # These will be used to cleanup the create_once resources created during preprocessing. return list(destructors)
def generate_sequences(fuzzing_requests, checkers, fuzzing_jobs=1): """ Implements core restler algorithm. @param fuzzing_requests: The collection of requests that will be fuzzed @type fuzzing_requests: FuzzingRequestCollection @param checkers: The list of checkers to apply @type checkers: list[Checker] @param fuzzing_jobs: Optional number of fuzzing jobs for parallel fuzzing. Default value passed is one (sequential fuzzing). @type fuzzing_jobs: Int @return: None @rtype : None """ if not fuzzing_requests.size: return logger.create_network_log(logger.LOG_TYPE_TESTING) fuzzing_mode = Settings().fuzzing_mode max_len = Settings().max_sequence_length if fuzzing_mode == 'directed-smoke-test': return generate_sequences_directed_smoketest(fuzzing_requests, checkers) if fuzzing_jobs > 1: render = render_parallel global_lock = multiprocessing.Lock() fuzzing_pool = ThreadPool(fuzzing_jobs) else: global_lock = None fuzzing_pool = None render = render_sequential should_stop = False timeout_reached = False seq_collection_exhausted = False num_total_sequences = 0 while not should_stop: seq_collection = [sequences.Sequence()] # Only for bfs: If any checkpoint file is available, load state of # latest generation. Note that it only makes sense to use checkpoints # for the bfs exploration method, since it is the only systemic and # exhaustive method. min_len = 0 if fuzzing_mode == 'bfs': req_collection = GrammarRequestCollection() monitor = Monitor() req_collection, seq_collection, fuzzing_requests, monitor, min_len =\ saver.load(req_collection, seq_collection, fuzzing_requests, monitor) requests.GlobalRequestCollection.Instance( )._req_collection = req_collection fuzzing_monitor.FuzzingMonitor.__instance = monitor # Repeat external loop only for random walk if fuzzing_mode != 'random-walk': should_stop = True # Initialize fuzzing schedule fuzzing_schedule = {} logger.write_to_main(f"Setting fuzzing schemes: {fuzzing_mode}") for length in range(min_len, max_len): fuzzing_schedule[length] = fuzzing_mode # print(" - {}: {}".format(length + 1, fuzzing_schedule[length])) # print general request-related stats logger.print_req_collection_stats( fuzzing_requests, GrammarRequestCollection().candidate_values_pool) generation = 0 for length in range(min_len, max_len): # we can set this without locking, since noone else writes (main # driver is single-threaded) and every potential worker will just # read-access this value. generation = length + 1 fuzzing_mode = fuzzing_schedule[length] # extend sequences with new request templates seq_collection = extend(seq_collection, fuzzing_requests, global_lock) print(f"{formatting.timestamp()}: Generation: {generation} ") logger.write_to_main( f"{formatting.timestamp()}: Generation: {generation} / " f"Sequences Collection Size: {len(seq_collection)} " f"(After {fuzzing_schedule[length]} Extend)") # render templates try: seq_collection_exhausted = False seq_collection = render(seq_collection, fuzzing_pool, checkers, generation, global_lock) except TimeOutException: logger.write_to_main("Timed out...") timeout_reached = True seq_collection_exhausted = True # Increase fuzzing generation after timeout because the code # that does it would have never been reached. This is done so # the previous generation's test summary is logged correctly. Monitor().current_fuzzing_generation += 1 except ExhaustSeqCollectionException: logger.write_to_main("Exhausted collection...") seq_collection = [] seq_collection_exhausted = True logger.write_to_main( f"{formatting.timestamp()}: Generation: {generation} / " f"Sequences Collection Size: {len(seq_collection)} " f"(After {fuzzing_schedule[length]} Render)") # saving latest state saver.save(GrammarRequestCollection(), seq_collection, fuzzing_requests, Monitor(), generation) # Print stats for iteration of the current generation logger.print_generation_stats(GrammarRequestCollection(), Monitor(), global_lock) num_total_sequences += len(seq_collection) logger.print_request_rendering_stats( GrammarRequestCollection().candidate_values_pool, fuzzing_requests, Monitor(), Monitor().num_fully_rendered_requests( fuzzing_requests.all_requests), generation, global_lock) if timeout_reached or seq_collection_exhausted: if timeout_reached: should_stop = True break logger.write_to_main("--\n") if fuzzing_pool is not None: fuzzing_pool.close() fuzzing_pool.join() return num_total_sequences
def generate_sequences_directed_smoketest(fuzzing_requests, checkers): """ Checks whether each request can be successfully rendered. For each request: - Constructs a sequence that satisfies all dependencies by backtracking. - Renders this sequence. This allows debugging rendering on a per-request basis to resolve configuration or spec issues. """ def render_request(request, seq): """ Helper function that attempts to find a valid rendering for the request. The do-while loop will render each combination of the request until either a valid rendering is detected or all combinations have been exhausted. Side effects: request.stats.status_code updated request.stats.status_text updated request.stats updated with concrete response and request text (valid request or last combination) @return: Tuple containing rendered sequence object, response body, and failure information enum. @rtype : Tuple(RenderedSequence, str, FailureInformation) """ response_body = None rendering_information = None while True: renderings = seq.render(candidate_values_pool, global_lock) if renderings.failure_info: # Even though we will be returning renderings from this function, # the renderings object that is returned may be from an unrendered # sequence. We want to save the most recent info. rendering_information = renderings.failure_info # Perform this check/save here in case the last call to seq.render # returns an empty 'renderings' object. An empty renderings object # will be returned from seq.render if all request combinations are # exhausted prior to getting a valid status code. if renderings.final_request_response: request.stats.status_code = renderings.final_request_response.status_code request.stats.status_text = renderings.final_request_response.status_text # Get the last rendered request. The corresponding response should be # the last received response. request.stats.sample_request.set_request_stats( renderings.sequence.sent_request_data_list[-1]. rendered_data) request.stats.sample_request.set_response_stats( renderings.final_request_response, renderings.final_response_datetime) response_body = renderings.final_request_response.body apply_checkers(checkers, renderings, global_lock) # If a valid rendering was found or the combinations have been # exhausted (empty rendering), exit the loop. if renderings.valid or renderings.sequence is None: return renderings, response_body, rendering_information global_lock = None candidate_values_pool = GrammarRequestCollection().candidate_values_pool # print general request-related stats logger.print_req_collection_stats( fuzzing_requests, GrammarRequestCollection().candidate_values_pool) logger.write_to_main( f"\n{formatting.timestamp()}: Starting directed-smoke-test\n") # Sort the request list prior to computing the request sequences, # so the prefixes are always in the same order for the algorithm fuzzing_request_list = list(fuzzing_requests._requests) fuzzing_request_list.sort(key=lambda x: x.method_endpoint_hex_definition) # sort the requests in fuzzing_requests by depth sorted_fuzzing_req_list = [] for request in fuzzing_request_list: req_list = compute_request_goal_seq(request, fuzzing_request_list) if len(req_list) > 0: sorted_fuzzing_req_list.append([len(req_list), request, req_list]) # Else an error message was printed and we skip this request # now sort by length (secondary sort by a hash of the request definition text) sorted_fuzzing_req_list.sort( key=lambda x: (x[0], x[1].method_endpoint_hex_definition)) logger.write_to_main(f"{formatting.timestamp()}: Will attempt to render " f"{len(sorted_fuzzing_req_list)} requests found\n") # the two following lists are indexed by request number and are of the same size. # memoize valid rendered sequences for each request and re-use those when going deeper valid_rendered_sequences_list = [] # memoize the first invalid prefix for each request first_invalid_prefix_list = [] # try to render all requests starting with the shallow ones for idx, request_triple in enumerate(sorted_fuzzing_req_list): req_list_length = request_triple[0] request = request_triple[1] req_list = request_triple[2] valid = False first_invalid_prefix = -1 # -1 denotes no invalid prefix by default request.stats.request_order = idx Found = False if (req_list_length > 1): # search for a valid matching prefix we can re-use; # unless path_regex is used we should always find a match # because we start with shallow sequences req_list_prefix = req_list[:-1] i = 0 while (not Found) and (i < idx): if sorted_fuzzing_req_list[i][2] == req_list_prefix: # we found a match Found = True logger.write_to_main( f"Found a matching prefix for request {idx} with previous request {i}" ) request.stats.matching_prefix[ "id"] = sorted_fuzzing_req_list[i][ 1].method_endpoint_hex_definition else: # continue searching i = i + 1 rendering_information = None response_body = None if Found: if valid_rendered_sequences_list[i].is_empty_sequence(): # then the current sequence will also be INVALID. # propagate the root-cause explaining why the prefix was invalid first_invalid_prefix = first_invalid_prefix_list[i] logger.write_to_main( f"\tbut that prefix was INVALID (root = {first_invalid_prefix})\n" ) request.stats.matching_prefix["valid"] = 0 # since valid = False by default, nothing else to do here else: # re-use the previous VALID prefix logger.write_to_main("\tand re-using that VALID prefix\n") request.stats.matching_prefix["valid"] = 1 new_seq = valid_rendered_sequences_list[i] req_copy = copy.copy(request) req_copy._current_combination_id = 0 new_seq = new_seq + sequences.Sequence(req_copy) new_seq.seq_i = 0 renderings, response_body, rendering_information = render_request( request, new_seq) valid = renderings.valid else: logger.write_to_main(f"Rendering request {idx} from scratch\n") # render the sequence. new_seq = sequences.Sequence() for req in req_list: req_copy = copy.copy(req) req_copy._current_combination_id = 0 if new_seq.is_empty_sequence(): new_seq = sequences.Sequence(req_copy) else: new_seq = new_seq + sequences.Sequence(req_copy) new_seq.seq_i = 0 renderings, response_body, rendering_information = render_request( req, new_seq) valid = renderings.valid logger.write_to_main( f"{formatting.timestamp()}: Request {idx}\n" f"{formatting.timestamp()}: Endpoint - {request.endpoint_no_dynamic_objects}\n" f"{formatting.timestamp()}: Hex Def - {request.method_endpoint_hex_definition}\n" f"{formatting.timestamp()}: Sequence length that satisfies dependencies: {req_list_length}" ) if valid: logger.write_to_main(f"{formatting.timestamp()}: Rendering VALID") request.stats.valid = 1 # remember this valid sequence valid_rendered_sequences_list.append(new_seq) first_invalid_prefix_list.append(first_invalid_prefix) else: logger.write_to_main( f"{formatting.timestamp()}: Rendering INVALID") request.stats.valid = 0 request.stats.error_msg = response_body # remember RESTler didn't find any valid sequence with an empty request sequence valid_rendered_sequences_list.append(sequences.Sequence()) if (first_invalid_prefix == -1): first_invalid_prefix = idx first_invalid_prefix_list.append(first_invalid_prefix) if rendering_information: if rendering_information == FailureInformation.PARSER: msg = ( "This request received a VALID status code, but the parser failed.\n" "Because of this, the request was set to INVALID.\n") elif rendering_information == FailureInformation.RESOURCE_CREATION: msg = ( "This request received a VALID status code, but the server " "indicated that there was a failure when creating the resource.\n" ) elif rendering_information == FailureInformation.SEQUENCE: msg = ( "This request was never rendered because the sequence failed to re-render.\n" "Because of this, the request was set to INVALID.\n") elif rendering_information == FailureInformation.BUG: msg = "A bug code was received after rendering this request." else: msg = "An unknown error occurred when processing this request." logger.write_to_main(f"{formatting.timestamp()}: {msg}") request.stats.failure = rendering_information rendering_information = None logger.format_rendering_stats_definition( request, GrammarRequestCollection().candidate_values_pool) logger.print_request_rendering_stats( GrammarRequestCollection().candidate_values_pool, fuzzing_requests, Monitor(), fuzzing_requests.size_all_requests, Monitor().current_fuzzing_generation, global_lock) Monitor().current_fuzzing_generation += 1 return len(valid_rendered_sequences_list)
def render_one(seq_collection, ith, checkers, generation, global_lock): """ Render ith sequence from sequence collection. @param seq_collection: List of sequences in sequence collection. @type seq_collection: List @param ith: The position of the target sequence (to be rendered) in the sequence collection. @type ith: Int @param checkers: The list of checkers to apply @type checkers: list[Checker] @param generation: The fuzzing generation @type generation: Int @param global_lock: Lock object used for sync of more than one fuzzing jobs. @type global_lock: thread.Lock object @return: The list of sequences with valid renderings. @rtype : List Note: Try ith sequence's template with all posible primitive type value combinations and return only renderings (combinations of primitive type values) that lead to valid error codes. We keep track of the order of the current sequence in the collection using "ith" argument for logging purposes. """ # Log memory consumption every hour. n_minutes = 60 # Static variable used for keeping track of the last time memory consumption was printed render_one.last_memory_consumption_check = getattr( render_one, 'last_memory_consumption_check', int(time.time())) if int(time.time()) - render_one.last_memory_consumption_check > ( n_minutes * 60): logger.print_memory_consumption(GrammarRequestCollection(), Monitor(), Settings().fuzzing_mode, generation) render_one.last_memory_consumption_check = int(time.time()) candidate_values_pool = GrammarRequestCollection().candidate_values_pool current_seq = seq_collection[ith] current_seq.seq_i = ith valid_renderings = [] # Try to find one valid rendering. n_invalid_renderings = 0 while True: # Render on a sequence instance will internally iterate over possible # renderings of current sequence until a valid or an invalid combination # of values for its primitive types is found -- internal iteration may # skip some renderings (that are marked to be skipped according to past # failures) -- that's why we put everything in a while. renderings = current_seq.render(candidate_values_pool, global_lock) # Note that this loop will keep running as long as we hit invalid # renderings and we will end up reapplying the leakage rule a billion # times for very similar 404s. To control this, when in bfs-cheap, we # apply the checkers only on the first invalid rendering. if Settings().fuzzing_mode not in ['bfs-cheap', 'bfs-minimal']\ or renderings.valid or n_invalid_renderings < 1: apply_checkers(checkers, renderings, global_lock) # If renderings.sequence is None it means there is nothing left to render. if renderings.valid or renderings.sequence is None: break # This line will only be reached only if we have an invalid rendering. n_invalid_renderings += 1 # for random-walk and cheap fuzzing, one valid rendering is enough. if Settings().fuzzing_mode in ['random-walk', 'bfs-cheap', 'bfs-minimal']: if renderings.valid: valid_renderings.append(renderings.sequence) # bfs needs to be exhaustive to provide full grammar coverage elif Settings().fuzzing_mode in ['bfs', 'bfs-fast']: # This loop will iterate over possible remaining renderings of the # current sequence. while renderings.sequence is not None: if renderings.valid: valid_renderings.append(renderings.sequence) renderings = current_seq.render(candidate_values_pool, global_lock) apply_checkers(checkers, renderings, global_lock) else: print("Unsupported fuzzing_mode:", Settings().fuzzing_mode) assert False return valid_renderings
def delete_create_once_resources(destructors, fuzzing_requests): """ Iterates through each destructor request and sends it to the server @param destructors: A list of destructor requests to send @type destructors: list(Request) @param fuzzing_requests: The global collection of requests to fuzz @type fuzzing_requests: FuzzingRequestCollection @return: None @rtype : None """ if not destructors: return candidate_values_pool = GrammarRequestCollection().candidate_values_pool logger.write_to_main("\nRendering for create-once resource destructors:\n") for destructor in destructors: status_codes = [] try: logger.write_to_main( f"{formatting.timestamp()}: Endpoint - {destructor.endpoint_no_dynamic_objects}" ) logger.write_to_main( f"{formatting.timestamp()}: Hex Def - {destructor.method_endpoint_hex_definition}" ) seq = sequences.Sequence([destructor]) renderings = seq.render( GrammarRequestCollection().candidate_values_pool, None, postprocessing=True) if not renderings.valid: logger.write_to_main( f"{formatting.timestamp()}: Rendering INVALID") else: logger.write_to_main( f"{formatting.timestamp()}: Rendering VALID") logger.format_rendering_stats_definition( destructor, GrammarRequestCollection().candidate_values_pool) if Settings().in_smoke_test_mode(): destructor.stats.request_order = 'Postprocessing' destructor.stats.valid = 1 destructor.stats.status_code = renderings.final_request_response.status_code destructor.stats.status_text = renderings.final_request_response.status_text destructor.stats.sample_request.set_request_stats( renderings.sequence.sent_request_data_list[-1]. rendered_data) destructor.stats.sample_request.set_response_stats( renderings.final_request_response, renderings.final_response_datetime) except Exception as error: msg = f"Failed to delete create_once resource: {error!s}" logger.raw_network_logging(msg) logger.write_to_main(msg, print_to_console=True) if Settings().in_smoke_test_mode(): destructor.stats.request_order = 'Postprocessing' destructor.stats.valid = 0 if renderings and renderings.final_request_response: destructor.stats.status_code = renderings.final_request_response.status_code destructor.stats.status_text = renderings.final_request_response.status_text destructor.stats.error_msg = renderings.final_request_response.body destructor.stats.sample_request.set_request_stats( renderings.sequence.sent_request_data_list[-1]. rendered_data) destructor.stats.sample_request.set_response_stats( renderings.final_request_response, renderings.final_response_datetime) pass Monitor().current_fuzzing_generation += 1 logger.print_request_rendering_stats(candidate_values_pool, fuzzing_requests, Monitor(), fuzzing_requests.size_all_requests, logger.POSTPROCESSING_GENERATION, None)