def visit_any_node(self, node, node_type): Log.DEBUG(f'Visiting {node_type} node') if self.should_mutate(): Log.DEBUG(f'Mutating node {node}') self._num_mutations += 1 selected_mutation = self.select_random_mutation() Log.DEBUG(f'Selected mutation: {selected_mutation}') return selected_mutation(node) else: Log.DEBUG('Returning original node.') return node
def __init__(self, population, source=None, source_tree=None): # assign a uuid to this chromosome and then increment the static uuid counter self.uuid = Chromosome.next_uuid Chromosome.next_uuid += 1 # require that either source code or an AST is provided if source is None and source_tree is None: raise Exception('Attempted to initialize a chromosome without source code or a source code AST.') self.source = source self.source_tree = source_tree if self.source is not None: Log.DEBUG('Initialized new chromosome from source') assert(type(self.source) == str) # if source code is not provided, generate it from the provided AST else: Log.DEBUG('Initialized new chromosome from AST') self.source = self.generate_source() # flag to track if a timeout occurs while running unit tests (indicating an error) self.timeout_occurred = False self.population = population # run the unit tests to gather information on the number of failing tests # tests should be run before the bandit report is run # this is because generating the bandit report is very expensive and we would like to check if the # unit tests fail and we can skip the bandit report altogether before attempting to generate it self.test_results = self.run_tests(population.simulation.test_file, population.simulation.repo) self.bandit_report = None if not (self.timeout_occurred or self.test_results.crashed or self.test_results.nran == 0): # generate a bandit report for information on security vulnerabilities in the given source code self.bandit_report = bp.BanditReport(source=source, auto_analyze=True, auto_parse=True) self.fitness = float('inf') # compute the fitness using the bandit report and test results self._compute_fitness() # check if this chromosome is a new local or global minimum and if so set the correct flags if self.fitness < self.population.simulation.best_fitness: self.population.simulation.best_fitness = self.fitness self.population.simulation.best_source = self.source if self.fitness < self.population.simulation.best_fitness_seen: self.population.simulation.best_fitness_seen = self.fitness self.population.simulation.best_source_seen = self.source
def visit_any_node(self, node, node_type): Log.DEBUG(f'Visiting node {node} with mutations turned off.') if node_type in self.nodes: self.nodes[node_type].append(node) else: self.nodes[node_type] = [node] self.generic_visit(node)
def generate_starting_population(self): Log.INFO('Generating random population.') # create a new empty population self.population = Population(self) # add population_size chromosomes to the population and randomly mutate them, # leaving one unchanged and requiring all other be mutated at least once chromosome = Chromosome(self.population, source=self.source) Log.DEBUG(f'Original source code for file {self.source_file} has a fitness of {chromosome.fitness}.') self.population.add_chromosome(chromosome) mutator = Mutator(mutation_frequency=5) while(self.population.size < self.population_size): # mutate the source code # Mutator.mutate does not return until at least one mutation has been made when called with require_mutation=True mutated_source = mutator.mutate(self.source, require_mutation=True) self.population.add_chromosome(mutated_source) assert(self.population.size == self.population_size) Log.INFO('Finished generating starting population.')
def mutate(self, source, require_mutation=False): Log.DEBUG('Mutating source code...') new_source = None assert (type(source) == str) Log.DEBUG(f'Number of characters: {len(source)}') original_tree = None # attempt to parse the source code to an AST # if this fails, the source code is not valid python, so return the original source without mutating # this is because it is highly unlikely any mutations will fix any errors in the code, and broken source code # will cause the resulting chromosome to have a fitness of inf try: original_tree = ast.parse(source) except SyntaxError: return source # collect the nodes from the tree in advance so we can use them for substitutions later Log.DEBUG('Collecting nodes.') self.collect_nodes(original_tree) # count the number of mutations in case we want to enforce that at least k mutations be made count = 0 Log.DEBUG('Beginning mutation process') # continue looping as long as our new source code is not yet valid or we have not made enough mutations while new_source is None or (require_mutation and self._num_mutations < 1): count += 1 Log.DEBUG(f'Mutation iteration: {count}') # create a new tree from the original source code # TODO: optimize tree = ast.parse(source) # visit the tree again, this time making mutations Log.DEBUG('Visiting tree with mutations turned on.') # note that no explicit mutations are being made here, as those are mode inside of the visit_[Node] methods self.visit(tree) Log.DEBUG(f'Mutations made before parsing back to source: {self._num_mutations}') # attempt to convert the AST back to source # if it cannot be done then reset to the original source code and try mutating again try: new_source = astor.to_source(tree) except AttributeError as e: Log.DEBUG('Attribute Error occured while attempting to parse AST back to source.') Log.DEBUG(f'Exception message: {e}') self._num_mutations = 0 new_source = None except Exception as e: Log.DEBUG('Unable to parse mutated AST back to source.') Log.DEBUG(f'Exception message: {e}') self._num_mutations = 0 new_source = None Log.DEBUG(f'Source is None: {new_source is None}, Require mutation: {require_mutation}, Mutations made: {self._num_mutations}') Log.DEBUG(f'Number of mutations made: {self._num_mutations}') # reset the mutators flags and namespace so it can be used again self._reset() return new_source
def replace_node_with_random(self, node): Log.DEBUG(f'Replacing node with call to random number generator.') return ast.Call(func=ast.Attribute(value=ast.Name(id='int'), attr='from_bytes'), args=[ast.Call(func=ast.Attribute(value=ast.Name(id='os'), attr='urandom'), args=[ast.Num(n=1)], keywords=[])], keywords=[ast.keyword(arg='byteorder', value=ast.Str(s='big'))])
def should_mutate(self): if random.randint(1, 100) <= self.mutation_frequency: Log.DEBUG(f'Choosing to mutate.') return True Log.DEBUG(f'Choosing not to mutate.') return False
def generate_source(self): Log.DEBUG('Generating source for chromosome from AST') assert(self.source_tree is not None) return astor.to_source(self.source_tree)