Esempio n. 1
0
    def test_runcmd(self):
        testFile = os.path.join(os.sep, 'tmp',
                                'testFile-{}'.format(os.getpid()))

        createCmd = 'touch {}'.format(testFile)
        rc, out = libLF.runcmd(createCmd)
        self.assertEqual(rc, 0)

        destroyCmd = 'rm {}'.format(testFile)
        rc, out = libLF.runcmd(destroyCmd)
        self.assertEqual(rc, 0)
Esempio n. 2
0
    def isSupportedInLanguage(self,
                              lang,
                              vrdPath=DEFAULT_VULN_REGEX_DETECTOR_ROOT):
        """Returns True if regex can be used in lang
    
    Also updates internal member."""
        checkRegexSupportScript = os.path.join(vrdPath, 'src', 'validate',
                                               'check-regex-support.pl')

        # Build query
        query = {"language": lang, "pattern": self.pattern}
        libLF.log('Query: {}'.format(json.dumps(query)))

        # Query from tempfile
        with tempfile.NamedTemporaryFile(prefix='SyntaxAnalysis-queryLangs-',
                                         suffix='.json',
                                         delete=True) as ntf:
            libLF.writeToFile(ntf.name, json.dumps(query))
            rc, out = libLF.runcmd("VULN_REGEX_DETECTOR_ROOT={} '{}' '{}'" \
              .format(vrdPath, checkRegexSupportScript, ntf.name))
            out = out.strip()

        libLF.log('Got rc {} out\n{}'.format(rc, out))
        # TODO Not sure if this can go wrong.
        assert (rc == 0)

        obj = json.loads(out)
        if bool(obj['validPattern']):
            if lang not in self.supportedLangs:
                self.supportedLangs.append(lang)
            return True
        else:
            return False
Esempio n. 3
0
  def queryHelper(self, regex, commandToRunFmt):
    """query() might benefit from this

    regex: the libLF.regex to query
    commandToRunFmt: format string with the invocation
      We apply commandToRun.format(inFile, outFile)
      inFile: contains a libLF.Regex, NDJSON formatted
      outFile: contains a libLF.RegexPatternAndInput, NDJSON formatted
    @returns: GeneratorQueryResponse[]
    """
    libLF.log('queryHelper for {}:\n  regex /{}/\n  command {}' \
      .format(self.name, regex.pattern, commandToRunFmt))
    gqrs = []
    with tempfile.NamedTemporaryFile(prefix='GenInput-DriverQueryFile-',
                                     suffix='.json',
                                     delete=DELETE_TMP_FILES) as inFile, \
          tempfile.NamedTemporaryFile(prefix='GenInput-DriverOutFile-',
                                     suffix='.json',
                                     delete=DELETE_TMP_FILES) as outFile:
      libLF.writeToFile(inFile.name, regex.toNDJSON())
      rc, out = libLF.runcmd(commandToRunFmt.format(inFile.name, outFile.name))
      if rc == 0:
        with open(outFile.name, 'r') as inStream:
          contents = inStream.read()
          rpai = libLF.RegexPatternAndInputs().initFromNDJSON(contents)
          for producer in rpai.stringsByProducer:
            gqr = GeneratorQueryResponse(producer, rpai.stringsByProducer[producer])
            gqrs.append(gqr)
    return gqrs
Esempio n. 4
0
    def _testEvilInputInLang(self, evilInput, lang):
        """Returns an SLRegexValidation[] with EXP and POW pumps"""
        # Build query
        query = {
            'language': lang.lower(),
            'pattern': self.regex.pattern,
            'evilInput': json.loads(evilInput.toNDJSON()),
            'nPumps': -1,  # Needs a valid value
            'timeLimit': self.slTimeout,
        }

        slRegexVals = []
        for nPumps in [self.EXP_PUMPS, self.powerPumps]:
            query['nPumps'] = nPumps
            libLF.log('query: {}'.format(json.dumps(query)))
            with tempfile.NamedTemporaryFile(
                    prefix='SLRegexAnalysis-validateOpinion-',
                    suffix='.json',
                    delete=True) as ntf:
                libLF.writeToFile(ntf.name, json.dumps(query))
                rc, out = libLF.runcmd("VULN_REGEX_DETECTOR_ROOT={} '{}' '{}' 2>>/tmp/err" \
                  .format(self.vrdPath, self.testInLanguageScript, ntf.name))
                out = out.strip()
            libLF.log('Got rc {} out\n{}'.format(rc, out))
            slRegexVals.append(
                SLRegexValidation(self.regex.pattern, evilInput,
                                  json.loads(out)))

        return slRegexVals
def fileMightContainRegexes(rustFile):
    """Fast grep-based check on whether a regex is possible.

  True if grep finds 'Regex' in the file, else false.
  Our token tree analysis only finds regexes that involve the string "Regex", so...
  """
    _, out = libLF.runcmd("grep Regex '{}'".format(rustFile))
    if 0 <= out.find('Regex'):
        return True
    return False
Esempio n. 6
0
  def queryDetectors(self):
    """Query detectors. Returns self"""

    # Build query
    query = {
      'pattern': self.regex.pattern,
      'timeLimit': 60, # Num seconds each detector gets to make a decision about this regex.
      'memoryLimit': 2048*1024, # KB each detector gets to use to make a decision. TODO Update VRD docs which say 'in MB'? But cf. detect-vuln.pl:59
    }

    # Query from tempfile
    with tempfile.NamedTemporaryFile(prefix='SLRegexAnalysis-queryDetectors-', suffix='.json', delete=True) as ntf:
      libLF.writeToFile(ntf.name, json.dumps(query))
      rc, out = libLF.runcmd("VULN_REGEX_DETECTOR_ROOT={} '{}' '{}' 2>>/tmp/err" \
        .format(self.vrdPath, self.queryDetectorsScript, ntf.name))
      out = out.strip()
    libLF.log('Got rc {} out\n{}'.format(rc, out))

    # TODO Not sure if this can go wrong.
    assert(rc == 0)

    self.detectorOpinions = self._qd_convOutput2DetectorOpinions(out)
    # TODO Not sure if this can go wrong.
    assert(self.detectorOpinions is not None)

    maybeVuln_exact = False
    maybeVuln_variant = False
    for do in self.detectorOpinions:
      if do.isVuln and len(list(filter(lambda ei: ei.couldParse, do.evilInputs))) > 0:
        if do.pattern == do.patternVariant:
          maybeVuln_exact = True
        else:
          maybeVuln_variant = True
    libLF.log('Maybe vuln: exact {} variant {}'.format(maybeVuln_exact, maybeVuln_variant))

    try:
      os.remove(queryFile)
      pass
    except:
      pass

    return self
    def _getInputs(self):
        """inputs: unique str[], collapsing the result from INPUT_GENERATOR"""

        # For testing
        #return ["abc"] # TODO
        #return [" ", "\t", "\r", "\n", "\v"] # Whitespace -- Go does not include \n ?
        #return ["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!"] # For SL testing

        # Query from tempfile
        with tempfile.NamedTemporaryFile(prefix='SemanticAnalysis-genInputs-', suffix='.json', delete=DELETE_TMP_FILES) as queryFile, \
             tempfile.NamedTemporaryFile(prefix='SemanticAnalysis-outFile-', suffix='.json', delete=DELETE_TMP_FILES) as outFile:
            libLF.writeToFile(queryFile.name, self.regex.toNDJSON())
            rc, out = libLF.runcmd("'{}' --regex-file '{}' --out-file '{}' --parallelism 1 --seed {} --max-inputs-per-generator {} --generator-timeout {} 2>/dev/null" \
              .format(INPUT_GENERATOR, queryFile.name, outFile.name,
                      self.rngSeed, # Propagate reproducibility into the generators
                      self.maxInputsPerGenerator, # Reduce the size of intermediate tmp files
                      self.timeoutPerGenerator, # Ensure reasonable time is taken
                      ))
            out = out.strip()
            rpaiFileContents = outFile.read().decode("utf-8")
            #libLF.log('Got rc {} scriptOut {} rpai as JSON {}'.format(rc, out, rpaiFileContents))
        # This should never fail
        assert (rc == 0)

        rpai = libLF.RegexPatternAndInputs().initFromNDJSON(rpaiFileContents)
        inputs = []
        libLF.log('_getInputs: The {} producers yielded {} total inputs' \
          .format(len(rpai.stringsByProducer), len(rpai.getUniqueInputs())))
        for producer in rpai.stringsByProducer:
            # Apply per-generator input limit
            producerInputs = rpai.stringsByProducer[producer]
            if 0 < self.maxInputsPerGenerator and self.maxInputsPerGenerator < len(
                    producerInputs):
                libLF.log('_getInputs: producer {} yielded {} inputs, reducing to {}' \
                  .format(producer, len(producerInputs), self.maxInputsPerGenerator))
                producerInputs = random.sample(producerInputs,
                                               self.maxInputsPerGenerator)

            # Add these inputs
            inputs += producerInputs
        return list(set(inputs + ["a"]))  # Always test at least one string
def getTokenTree(rustc, rustFile):
    """Get parse tree for this Rust file.

  @param rustc - str - invocable rustc
  @param rustFile - str - file to parse
  @return tokenTree - obj - parsed JSON string from rustc
  """

    # Try unexpanded first. It seems to work more consistently,
    # and in some of our test files the macro expansion removes a regex declaration
    # (e.g. rust-clippy/clippy_dev/src/lib.rs)
    cmd_noexp = "'{}' -Z ast-json-noexpand '{}' 2>/dev/null".format(
        rustc, rustFile)
    cmd_exp = "'{}' -Z ast-json '{}' 2>/dev/null".format(rustc, rustFile)
    for cmd in [cmd_noexp, cmd_exp]:
        _, out = libLF.runcmd(cmd)
        out = out.strip()

        # We don't care about complaints as long as it produces json
        if libLF.isNDJSON(out):
            return json.loads(out)
        else:
            libLF.log('Not NDJSON:\n{}'.format(out))
    raise ValueError('Could not get token tree from {}'.format(rustFile))