Example #1
0
  def queryHelper(self, regex, commandToRunFmt):
    """query() might benefit from this

    regex: the libLF.regex to query
    commandToRunFmt: format string with the invocation
      We apply commandToRun.format(inFile, outFile)
      inFile: contains a libLF.Regex, NDJSON formatted
      outFile: contains a libLF.RegexPatternAndInput, NDJSON formatted
    @returns: GeneratorQueryResponse[]
    """
    libLF.log('queryHelper for {}:\n  regex /{}/\n  command {}' \
      .format(self.name, regex.pattern, commandToRunFmt))
    gqrs = []
    with tempfile.NamedTemporaryFile(prefix='GenInput-DriverQueryFile-',
                                     suffix='.json',
                                     delete=DELETE_TMP_FILES) as inFile, \
          tempfile.NamedTemporaryFile(prefix='GenInput-DriverOutFile-',
                                     suffix='.json',
                                     delete=DELETE_TMP_FILES) as outFile:
      libLF.writeToFile(inFile.name, regex.toNDJSON())
      rc, out = libLF.runcmd(commandToRunFmt.format(inFile.name, outFile.name))
      if rc == 0:
        with open(outFile.name, 'r') as inStream:
          contents = inStream.read()
          rpai = libLF.RegexPatternAndInputs().initFromNDJSON(contents)
          for producer in rpai.stringsByProducer:
            gqr = GeneratorQueryResponse(producer, rpai.stringsByProducer[producer])
            gqrs.append(gqr)
    return gqrs
Example #2
0
def main(regexFile, outFile, seed, nInputs, timeout):
    libLF.log('regexFile {} outFile {} seed {} nInputs {} timeout {}' \
      .format(regexFile, outFile, seed, nInputs, timeout))

    # Get the libLF.Regex
    with open(regexFile, 'r') as inStream:
        regex = libLF.Regex().initFromNDJSON(inStream.read())
    libLF.log('Generating inputs for regex /{}/'.format(regex.pattern))

    # Query Rex
    stringsByProducer = getRexInputs(regex.pattern, seed, nInputs, timeout)

    # Emit
    rpai = libLF.RegexPatternAndInputs().initFromRaw(regex.pattern,
                                                     stringsByProducer)
    libLF.log('Rex generated {} unique inputs for regex /{}/ ({} including duplicates)' \
      .format(len(rpai.getUniqueInputs()), regex.pattern, rpai.getNTotalInputs()))
    with open(outFile, 'w') as outStream:
        outStream.write(rpai.toNDJSON())
Example #3
0
def main(regexFile, outFile, timeout):
    libLF.log('regexFile {} outFile {} timeout {}' \
      .format(regexFile, outFile, timeout))

    # Get the libLF.Regex
    with open(regexFile, 'r') as inStream:
        regex = libLF.Regex().initFromNDJSON(inStream.read())
    libLF.log('Generating inputs for regex /{}/'.format(regex.pattern))

    # Query ReScue
    mutRexInputs = getReScueInputs(regex.pattern, timeout)
    libLF.log('ReScue generated {} inputs for regex /{}/'.format(
        len(mutRexInputs), regex.pattern))

    # Emit
    stringsByProducer = {"ReScue": mutRexInputs}
    with open(outFile, 'w') as outStream:
        rpai = libLF.RegexPatternAndInputs().initFromRaw(
            regex.pattern, stringsByProducer)
        outStream.write(rpai.toNDJSON())
    def _getInputs(self):
        """inputs: unique str[], collapsing the result from INPUT_GENERATOR"""

        # For testing
        #return ["abc"] # TODO
        #return [" ", "\t", "\r", "\n", "\v"] # Whitespace -- Go does not include \n ?
        #return ["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!"] # For SL testing

        # Query from tempfile
        with tempfile.NamedTemporaryFile(prefix='SemanticAnalysis-genInputs-', suffix='.json', delete=DELETE_TMP_FILES) as queryFile, \
             tempfile.NamedTemporaryFile(prefix='SemanticAnalysis-outFile-', suffix='.json', delete=DELETE_TMP_FILES) as outFile:
            libLF.writeToFile(queryFile.name, self.regex.toNDJSON())
            rc, out = libLF.runcmd("'{}' --regex-file '{}' --out-file '{}' --parallelism 1 --seed {} --max-inputs-per-generator {} --generator-timeout {} 2>/dev/null" \
              .format(INPUT_GENERATOR, queryFile.name, outFile.name,
                      self.rngSeed, # Propagate reproducibility into the generators
                      self.maxInputsPerGenerator, # Reduce the size of intermediate tmp files
                      self.timeoutPerGenerator, # Ensure reasonable time is taken
                      ))
            out = out.strip()
            rpaiFileContents = outFile.read().decode("utf-8")
            #libLF.log('Got rc {} scriptOut {} rpai as JSON {}'.format(rc, out, rpaiFileContents))
        # This should never fail
        assert (rc == 0)

        rpai = libLF.RegexPatternAndInputs().initFromNDJSON(rpaiFileContents)
        inputs = []
        libLF.log('_getInputs: The {} producers yielded {} total inputs' \
          .format(len(rpai.stringsByProducer), len(rpai.getUniqueInputs())))
        for producer in rpai.stringsByProducer:
            # Apply per-generator input limit
            producerInputs = rpai.stringsByProducer[producer]
            if 0 < self.maxInputsPerGenerator and self.maxInputsPerGenerator < len(
                    producerInputs):
                libLF.log('_getInputs: producer {} yielded {} inputs, reducing to {}' \
                  .format(producer, len(producerInputs), self.maxInputsPerGenerator))
                producerInputs = random.sample(producerInputs,
                                               self.maxInputsPerGenerator)

            # Add these inputs
            inputs += producerInputs
        return list(set(inputs + ["a"]))  # Always test at least one string
Example #5
0
  def run(self):
    try:
      libLF.log('Working on regex: /{}/'.format(self.regex.pattern))

      # Drive the various input generators
      stringsByProducer = {}
      nStrings = 0
      for inputGen in INPUT_GENERATORS:
        libLF.log('Getting inputs from {}'.format(inputGen.name))
        # Query the generator
        gqrs = inputGen.query(self.regex, self.rngSeed, self.inputsPerGenerator, self.generatorTimeout)
        # Unpack the responses
        for gqr in gqrs:
          # Enforce inputsPerGenerator
          _inputs = gqr.inputs
          if len(_inputs) > self.inputsPerGenerator:
            _inputs = random.sample(_inputs, self.inputsPerGenerator)

          stringsByProducer['{}-{}'.format(inputGen.name, gqr.name)] = _inputs
          nStrings += len(_inputs)
          libLF.log('Got {} inputs from {}-{}'.format(len(_inputs), inputGen.name, gqr.name))

      #libLF.log('sbp = {}'.format(stringsByProducer))
      # TODO Consider introducing mutants here
      rpai = libLF.RegexPatternAndInputs().initFromRaw(self.regex.pattern, stringsByProducer)
      #libLF.log('rpai {}: {}'.format(rpai, rpai.toNDJSON()))

      # Return
      libLF.log('Completed regex /{}/ -- {} inputs'.format(self.regex.pattern, nStrings))
      return rpai
    except KeyboardInterrupt:
      raise
    except BaseException as err:
      libLF.log('ERROR')
      Sys.exit(1)
      libLF.log(err)
      return err