def test_runcmd(self): testFile = os.path.join(os.sep, 'tmp', 'testFile-{}'.format(os.getpid())) createCmd = 'touch {}'.format(testFile) rc, out = libLF.runcmd(createCmd) self.assertEqual(rc, 0) destroyCmd = 'rm {}'.format(testFile) rc, out = libLF.runcmd(destroyCmd) self.assertEqual(rc, 0)
def isSupportedInLanguage(self, lang, vrdPath=DEFAULT_VULN_REGEX_DETECTOR_ROOT): """Returns True if regex can be used in lang Also updates internal member.""" checkRegexSupportScript = os.path.join(vrdPath, 'src', 'validate', 'check-regex-support.pl') # Build query query = {"language": lang, "pattern": self.pattern} libLF.log('Query: {}'.format(json.dumps(query))) # Query from tempfile with tempfile.NamedTemporaryFile(prefix='SyntaxAnalysis-queryLangs-', suffix='.json', delete=True) as ntf: libLF.writeToFile(ntf.name, json.dumps(query)) rc, out = libLF.runcmd("VULN_REGEX_DETECTOR_ROOT={} '{}' '{}'" \ .format(vrdPath, checkRegexSupportScript, ntf.name)) out = out.strip() libLF.log('Got rc {} out\n{}'.format(rc, out)) # TODO Not sure if this can go wrong. assert (rc == 0) obj = json.loads(out) if bool(obj['validPattern']): if lang not in self.supportedLangs: self.supportedLangs.append(lang) return True else: return False
def queryHelper(self, regex, commandToRunFmt): """query() might benefit from this regex: the libLF.regex to query commandToRunFmt: format string with the invocation We apply commandToRun.format(inFile, outFile) inFile: contains a libLF.Regex, NDJSON formatted outFile: contains a libLF.RegexPatternAndInput, NDJSON formatted @returns: GeneratorQueryResponse[] """ libLF.log('queryHelper for {}:\n regex /{}/\n command {}' \ .format(self.name, regex.pattern, commandToRunFmt)) gqrs = [] with tempfile.NamedTemporaryFile(prefix='GenInput-DriverQueryFile-', suffix='.json', delete=DELETE_TMP_FILES) as inFile, \ tempfile.NamedTemporaryFile(prefix='GenInput-DriverOutFile-', suffix='.json', delete=DELETE_TMP_FILES) as outFile: libLF.writeToFile(inFile.name, regex.toNDJSON()) rc, out = libLF.runcmd(commandToRunFmt.format(inFile.name, outFile.name)) if rc == 0: with open(outFile.name, 'r') as inStream: contents = inStream.read() rpai = libLF.RegexPatternAndInputs().initFromNDJSON(contents) for producer in rpai.stringsByProducer: gqr = GeneratorQueryResponse(producer, rpai.stringsByProducer[producer]) gqrs.append(gqr) return gqrs
def _testEvilInputInLang(self, evilInput, lang): """Returns an SLRegexValidation[] with EXP and POW pumps""" # Build query query = { 'language': lang.lower(), 'pattern': self.regex.pattern, 'evilInput': json.loads(evilInput.toNDJSON()), 'nPumps': -1, # Needs a valid value 'timeLimit': self.slTimeout, } slRegexVals = [] for nPumps in [self.EXP_PUMPS, self.powerPumps]: query['nPumps'] = nPumps libLF.log('query: {}'.format(json.dumps(query))) with tempfile.NamedTemporaryFile( prefix='SLRegexAnalysis-validateOpinion-', suffix='.json', delete=True) as ntf: libLF.writeToFile(ntf.name, json.dumps(query)) rc, out = libLF.runcmd("VULN_REGEX_DETECTOR_ROOT={} '{}' '{}' 2>>/tmp/err" \ .format(self.vrdPath, self.testInLanguageScript, ntf.name)) out = out.strip() libLF.log('Got rc {} out\n{}'.format(rc, out)) slRegexVals.append( SLRegexValidation(self.regex.pattern, evilInput, json.loads(out))) return slRegexVals
def fileMightContainRegexes(rustFile): """Fast grep-based check on whether a regex is possible. True if grep finds 'Regex' in the file, else false. Our token tree analysis only finds regexes that involve the string "Regex", so... """ _, out = libLF.runcmd("grep Regex '{}'".format(rustFile)) if 0 <= out.find('Regex'): return True return False
def queryDetectors(self): """Query detectors. Returns self""" # Build query query = { 'pattern': self.regex.pattern, 'timeLimit': 60, # Num seconds each detector gets to make a decision about this regex. 'memoryLimit': 2048*1024, # KB each detector gets to use to make a decision. TODO Update VRD docs which say 'in MB'? But cf. detect-vuln.pl:59 } # Query from tempfile with tempfile.NamedTemporaryFile(prefix='SLRegexAnalysis-queryDetectors-', suffix='.json', delete=True) as ntf: libLF.writeToFile(ntf.name, json.dumps(query)) rc, out = libLF.runcmd("VULN_REGEX_DETECTOR_ROOT={} '{}' '{}' 2>>/tmp/err" \ .format(self.vrdPath, self.queryDetectorsScript, ntf.name)) out = out.strip() libLF.log('Got rc {} out\n{}'.format(rc, out)) # TODO Not sure if this can go wrong. assert(rc == 0) self.detectorOpinions = self._qd_convOutput2DetectorOpinions(out) # TODO Not sure if this can go wrong. assert(self.detectorOpinions is not None) maybeVuln_exact = False maybeVuln_variant = False for do in self.detectorOpinions: if do.isVuln and len(list(filter(lambda ei: ei.couldParse, do.evilInputs))) > 0: if do.pattern == do.patternVariant: maybeVuln_exact = True else: maybeVuln_variant = True libLF.log('Maybe vuln: exact {} variant {}'.format(maybeVuln_exact, maybeVuln_variant)) try: os.remove(queryFile) pass except: pass return self
def _getInputs(self): """inputs: unique str[], collapsing the result from INPUT_GENERATOR""" # For testing #return ["abc"] # TODO #return [" ", "\t", "\r", "\n", "\v"] # Whitespace -- Go does not include \n ? #return ["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!"] # For SL testing # Query from tempfile with tempfile.NamedTemporaryFile(prefix='SemanticAnalysis-genInputs-', suffix='.json', delete=DELETE_TMP_FILES) as queryFile, \ tempfile.NamedTemporaryFile(prefix='SemanticAnalysis-outFile-', suffix='.json', delete=DELETE_TMP_FILES) as outFile: libLF.writeToFile(queryFile.name, self.regex.toNDJSON()) rc, out = libLF.runcmd("'{}' --regex-file '{}' --out-file '{}' --parallelism 1 --seed {} --max-inputs-per-generator {} --generator-timeout {} 2>/dev/null" \ .format(INPUT_GENERATOR, queryFile.name, outFile.name, self.rngSeed, # Propagate reproducibility into the generators self.maxInputsPerGenerator, # Reduce the size of intermediate tmp files self.timeoutPerGenerator, # Ensure reasonable time is taken )) out = out.strip() rpaiFileContents = outFile.read().decode("utf-8") #libLF.log('Got rc {} scriptOut {} rpai as JSON {}'.format(rc, out, rpaiFileContents)) # This should never fail assert (rc == 0) rpai = libLF.RegexPatternAndInputs().initFromNDJSON(rpaiFileContents) inputs = [] libLF.log('_getInputs: The {} producers yielded {} total inputs' \ .format(len(rpai.stringsByProducer), len(rpai.getUniqueInputs()))) for producer in rpai.stringsByProducer: # Apply per-generator input limit producerInputs = rpai.stringsByProducer[producer] if 0 < self.maxInputsPerGenerator and self.maxInputsPerGenerator < len( producerInputs): libLF.log('_getInputs: producer {} yielded {} inputs, reducing to {}' \ .format(producer, len(producerInputs), self.maxInputsPerGenerator)) producerInputs = random.sample(producerInputs, self.maxInputsPerGenerator) # Add these inputs inputs += producerInputs return list(set(inputs + ["a"])) # Always test at least one string
def getTokenTree(rustc, rustFile): """Get parse tree for this Rust file. @param rustc - str - invocable rustc @param rustFile - str - file to parse @return tokenTree - obj - parsed JSON string from rustc """ # Try unexpanded first. It seems to work more consistently, # and in some of our test files the macro expansion removes a regex declaration # (e.g. rust-clippy/clippy_dev/src/lib.rs) cmd_noexp = "'{}' -Z ast-json-noexpand '{}' 2>/dev/null".format( rustc, rustFile) cmd_exp = "'{}' -Z ast-json '{}' 2>/dev/null".format(rustc, rustFile) for cmd in [cmd_noexp, cmd_exp]: _, out = libLF.runcmd(cmd) out = out.strip() # We don't care about complaints as long as it produces json if libLF.isNDJSON(out): return json.loads(out) else: libLF.log('Not NDJSON:\n{}'.format(out)) raise ValueError('Could not get token tree from {}'.format(rustFile))