Exemple #1
0
  def queryHelper(self, regex, commandToRunFmt):
    """query() might benefit from this

    regex: the libLF.regex to query
    commandToRunFmt: format string with the invocation
      We apply commandToRun.format(inFile, outFile)
      inFile: contains a libLF.Regex, NDJSON formatted
      outFile: contains a libLF.RegexPatternAndInput, NDJSON formatted
    @returns: GeneratorQueryResponse[]
    """
    libLF.log('queryHelper for {}:\n  regex /{}/\n  command {}' \
      .format(self.name, regex.pattern, commandToRunFmt))
    gqrs = []
    with tempfile.NamedTemporaryFile(prefix='GenInput-DriverQueryFile-',
                                     suffix='.json',
                                     delete=DELETE_TMP_FILES) as inFile, \
          tempfile.NamedTemporaryFile(prefix='GenInput-DriverOutFile-',
                                     suffix='.json',
                                     delete=DELETE_TMP_FILES) as outFile:
      libLF.writeToFile(inFile.name, regex.toNDJSON())
      rc, out = libLF.runcmd(commandToRunFmt.format(inFile.name, outFile.name))
      if rc == 0:
        with open(outFile.name, 'r') as inStream:
          contents = inStream.read()
          rpai = libLF.RegexPatternAndInputs().initFromNDJSON(contents)
          for producer in rpai.stringsByProducer:
            gqr = GeneratorQueryResponse(producer, rpai.stringsByProducer[producer])
            gqrs.append(gqr)
    return gqrs
Exemple #2
0
    def isSupportedInLanguage(self,
                              lang,
                              vrdPath=DEFAULT_VULN_REGEX_DETECTOR_ROOT):
        """Returns True if regex can be used in lang
    
    Also updates internal member."""
        checkRegexSupportScript = os.path.join(vrdPath, 'src', 'validate',
                                               'check-regex-support.pl')

        # Build query
        query = {"language": lang, "pattern": self.pattern}
        libLF.log('Query: {}'.format(json.dumps(query)))

        # Query from tempfile
        with tempfile.NamedTemporaryFile(prefix='SyntaxAnalysis-queryLangs-',
                                         suffix='.json',
                                         delete=True) as ntf:
            libLF.writeToFile(ntf.name, json.dumps(query))
            rc, out = libLF.runcmd("VULN_REGEX_DETECTOR_ROOT={} '{}' '{}'" \
              .format(vrdPath, checkRegexSupportScript, ntf.name))
            out = out.strip()

        libLF.log('Got rc {} out\n{}'.format(rc, out))
        # TODO Not sure if this can go wrong.
        assert (rc == 0)

        obj = json.loads(out)
        if bool(obj['validPattern']):
            if lang not in self.supportedLangs:
                self.supportedLangs.append(lang)
            return True
        else:
            return False
Exemple #3
0
    def _testEvilInputInLang(self, evilInput, lang):
        """Returns an SLRegexValidation[] with EXP and POW pumps"""
        # Build query
        query = {
            'language': lang.lower(),
            'pattern': self.regex.pattern,
            'evilInput': json.loads(evilInput.toNDJSON()),
            'nPumps': -1,  # Needs a valid value
            'timeLimit': self.slTimeout,
        }

        slRegexVals = []
        for nPumps in [self.EXP_PUMPS, self.powerPumps]:
            query['nPumps'] = nPumps
            libLF.log('query: {}'.format(json.dumps(query)))
            with tempfile.NamedTemporaryFile(
                    prefix='SLRegexAnalysis-validateOpinion-',
                    suffix='.json',
                    delete=True) as ntf:
                libLF.writeToFile(ntf.name, json.dumps(query))
                rc, out = libLF.runcmd("VULN_REGEX_DETECTOR_ROOT={} '{}' '{}' 2>>/tmp/err" \
                  .format(self.vrdPath, self.testInLanguageScript, ntf.name))
                out = out.strip()
            libLF.log('Got rc {} out\n{}'.format(rc, out))
            slRegexVals.append(
                SLRegexValidation(self.regex.pattern, evilInput,
                                  json.loads(out)))

        return slRegexVals
    def _evaluateRegex(self, pattern, testStrings, languages):
        """Evaluate this regex with these strings in this language
    
    pattern: str
    testStrings: str[]
    languages: str[]
    @returns dict { lang: libLF.RegexEvaluationResult[], ... } for this request
    """
        with tempfile.NamedTemporaryFile(
                prefix='SemanticAnalysis-evaluateRegex-queryFile-',
                suffix='.json',
                delete=DELETE_TMP_FILES) as queryFile:
            # We can use the same queryFile for each language
            query = {
                "pattern": pattern,
                "inputs": testStrings,
            }
            libLF.writeToFile(queryFile.name, json.dumps(query))

            lang2rers = {}
            for lang in languages:
                try:  # Might time out -- if so, just ignore the language
                    lang2rers[lang] = self._queryRegexInLang(
                        pattern, queryFile.name, lang)
                except Exception as err:
                    libLF.log('_evaluateRegex: exception in {}: {}'.format(
                        lang, err))

            return lang2rers
Exemple #5
0
def getRexInputs(pattern, seed, nInputs, timeout):
    """Return stringsByProducer for use as an RPAI member"""
    nModes = len(encodings) * len(rexOptionsPowerSet)
    inputsPerMode = int(nInputs / nModes)

    stringsByProducer = {}
    for encoding in encodings:
        for rexOptions in rexOptionsPowerSet:

            libLF.log("Encoding: {}".format(encoding))
            libLF.log("Options: {}".format(rexOptions))

            with tempfile.NamedTemporaryFile(prefix='GenInput-QueryRex-RegexFile-', suffix='.dat', delete=DELETE_TMP_FILES) as rexInFile, \
                tempfile.NamedTemporaryFile(prefix='GenInput-QueryRex-ResponseFile-', suffix='.dat', delete=DELETE_TMP_FILES) as rexOutFile:

                # Build input file
                libLF.writeToFile(rexInFile.name, pattern)
                # Build command to run
                cmd = [
                    WINE_PATH, REX_PATH, "/regexfile:" + rexInFile.name,
                    "/k:" + str(inputsPerMode), "/encoding:" + encoding,
                    "/seed:" + str(seed), "/file:" + rexOutFile.name
                ]
                for opt in rexOptions:
                    cmd.append("/options:" + opt)
                libLF.log('cmd: ' + " ".join(cmd))
                #inputs = []
                #producerName = 'rex-Encoding{}-Options{}'.format(encoding, '-'.join(rexOptions))
                #stringsByProducer[producerName] = inputs
                #continue

                # Get inputs, guarded by a timeout
                tmo = None if timeout < 0 else timeout
                inputs = []
                try:
                    completedProcess = subprocess.run(cmd, timeout=tmo)
                    rc = completedProcess.returncode
                    libLF.log("rc: " + str(rc))
                    if rc == 0:
                        inputs = processRexOutFile(rexOutFile.name)
                except subprocess.TimeoutExpired:
                    libLF.log("Rex timed out")
                    try:
                        libLF.log(
                            "Salvaging any strings streamed by Rex before the timeout"
                        )
                        inputs = processRexOutFile(rexOutFile.name)
                    except Exception:
                        pass
                except Exception as err:
                    libLF.log("Exception: " + str(err))
                libLF.log("{} inputs from: encoding {} options {}".format(
                    len(inputs), encoding, rexOptions))
                producerName = 'rex-Encoding{}-Options{}'.format(
                    encoding, '-'.join(rexOptions))
                stringsByProducer[producerName] = inputs
    return stringsByProducer
Exemple #6
0
  def queryDetectors(self):
    """Query detectors. Returns self"""

    # Build query
    query = {
      'pattern': self.regex.pattern,
      'timeLimit': 60, # Num seconds each detector gets to make a decision about this regex.
      'memoryLimit': 2048*1024, # KB each detector gets to use to make a decision. TODO Update VRD docs which say 'in MB'? But cf. detect-vuln.pl:59
    }

    # Query from tempfile
    with tempfile.NamedTemporaryFile(prefix='SLRegexAnalysis-queryDetectors-', suffix='.json', delete=True) as ntf:
      libLF.writeToFile(ntf.name, json.dumps(query))
      rc, out = libLF.runcmd("VULN_REGEX_DETECTOR_ROOT={} '{}' '{}' 2>>/tmp/err" \
        .format(self.vrdPath, self.queryDetectorsScript, ntf.name))
      out = out.strip()
    libLF.log('Got rc {} out\n{}'.format(rc, out))

    # TODO Not sure if this can go wrong.
    assert(rc == 0)

    self.detectorOpinions = self._qd_convOutput2DetectorOpinions(out)
    # TODO Not sure if this can go wrong.
    assert(self.detectorOpinions is not None)

    maybeVuln_exact = False
    maybeVuln_variant = False
    for do in self.detectorOpinions:
      if do.isVuln and len(list(filter(lambda ei: ei.couldParse, do.evilInputs))) > 0:
        if do.pattern == do.patternVariant:
          maybeVuln_exact = True
        else:
          maybeVuln_variant = True
    libLF.log('Maybe vuln: exact {} variant {}'.format(maybeVuln_exact, maybeVuln_variant))

    try:
      os.remove(queryFile)
      pass
    except:
      pass

    return self
    def _getInputs(self):
        """inputs: unique str[], collapsing the result from INPUT_GENERATOR"""

        # For testing
        #return ["abc"] # TODO
        #return [" ", "\t", "\r", "\n", "\v"] # Whitespace -- Go does not include \n ?
        #return ["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!"] # For SL testing

        # Query from tempfile
        with tempfile.NamedTemporaryFile(prefix='SemanticAnalysis-genInputs-', suffix='.json', delete=DELETE_TMP_FILES) as queryFile, \
             tempfile.NamedTemporaryFile(prefix='SemanticAnalysis-outFile-', suffix='.json', delete=DELETE_TMP_FILES) as outFile:
            libLF.writeToFile(queryFile.name, self.regex.toNDJSON())
            rc, out = libLF.runcmd("'{}' --regex-file '{}' --out-file '{}' --parallelism 1 --seed {} --max-inputs-per-generator {} --generator-timeout {} 2>/dev/null" \
              .format(INPUT_GENERATOR, queryFile.name, outFile.name,
                      self.rngSeed, # Propagate reproducibility into the generators
                      self.maxInputsPerGenerator, # Reduce the size of intermediate tmp files
                      self.timeoutPerGenerator, # Ensure reasonable time is taken
                      ))
            out = out.strip()
            rpaiFileContents = outFile.read().decode("utf-8")
            #libLF.log('Got rc {} scriptOut {} rpai as JSON {}'.format(rc, out, rpaiFileContents))
        # This should never fail
        assert (rc == 0)

        rpai = libLF.RegexPatternAndInputs().initFromNDJSON(rpaiFileContents)
        inputs = []
        libLF.log('_getInputs: The {} producers yielded {} total inputs' \
          .format(len(rpai.stringsByProducer), len(rpai.getUniqueInputs())))
        for producer in rpai.stringsByProducer:
            # Apply per-generator input limit
            producerInputs = rpai.stringsByProducer[producer]
            if 0 < self.maxInputsPerGenerator and self.maxInputsPerGenerator < len(
                    producerInputs):
                libLF.log('_getInputs: producer {} yielded {} inputs, reducing to {}' \
                  .format(producer, len(producerInputs), self.maxInputsPerGenerator))
                producerInputs = random.sample(producerInputs,
                                               self.maxInputsPerGenerator)

            # Add these inputs
            inputs += producerInputs
        return list(set(inputs + ["a"]))  # Always test at least one string
def getEGRETInputs(pattern, timeout):
    """Return inputs: str[]"""
    with tempfile.NamedTemporaryFile(prefix='GenInput-QueryEGRET-RegexFile-',
                                    suffix='.dat',
                                    delete=DELETE_TMP_FILES) as inFile, \
         tempfile.NamedTemporaryFile(prefix='GenInput-QueryEGRET-ResponseFile-',
                                     suffix='.dat',
                                     delete=DELETE_TMP_FILES) as outFile:
        # Build input file
        libLF.writeToFile(inFile.name, pattern)
        # Build command to run
        cmd = [
            "python3", EGRET_PATH, "--file", inFile.name, "--output_file",
            outFile.name
        ]
        libLF.log('cmd: ' + " ".join(cmd))

        # Get inputs, guarded by a timeout
        tmo = None if timeout < 0 else timeout
        inputs = []
        try:
            completedProcess = subprocess.run(cmd, timeout=tmo)
            rc = completedProcess.returncode
            libLF.log("rc: " + str(rc))

            if rc == 0:
                inputs = processEGRETOutFile(outFile.name)
        except subprocess.TimeoutExpired:
            libLF.log("EGRET timed out")
            try:
                libLF.log(
                    "Salvaging any strings streamed by EGRET before the timeout"
                )
                inputs = processEGRETOutFile(outFile.name)
            except Exception:
                pass
        except Exception as err:
            libLF.log('Exception: ' + str(err))
        return inputs