def queryHelper(self, regex, commandToRunFmt): """query() might benefit from this regex: the libLF.regex to query commandToRunFmt: format string with the invocation We apply commandToRun.format(inFile, outFile) inFile: contains a libLF.Regex, NDJSON formatted outFile: contains a libLF.RegexPatternAndInput, NDJSON formatted @returns: GeneratorQueryResponse[] """ libLF.log('queryHelper for {}:\n regex /{}/\n command {}' \ .format(self.name, regex.pattern, commandToRunFmt)) gqrs = [] with tempfile.NamedTemporaryFile(prefix='GenInput-DriverQueryFile-', suffix='.json', delete=DELETE_TMP_FILES) as inFile, \ tempfile.NamedTemporaryFile(prefix='GenInput-DriverOutFile-', suffix='.json', delete=DELETE_TMP_FILES) as outFile: libLF.writeToFile(inFile.name, regex.toNDJSON()) rc, out = libLF.runcmd(commandToRunFmt.format(inFile.name, outFile.name)) if rc == 0: with open(outFile.name, 'r') as inStream: contents = inStream.read() rpai = libLF.RegexPatternAndInputs().initFromNDJSON(contents) for producer in rpai.stringsByProducer: gqr = GeneratorQueryResponse(producer, rpai.stringsByProducer[producer]) gqrs.append(gqr) return gqrs
def isSupportedInLanguage(self, lang, vrdPath=DEFAULT_VULN_REGEX_DETECTOR_ROOT): """Returns True if regex can be used in lang Also updates internal member.""" checkRegexSupportScript = os.path.join(vrdPath, 'src', 'validate', 'check-regex-support.pl') # Build query query = {"language": lang, "pattern": self.pattern} libLF.log('Query: {}'.format(json.dumps(query))) # Query from tempfile with tempfile.NamedTemporaryFile(prefix='SyntaxAnalysis-queryLangs-', suffix='.json', delete=True) as ntf: libLF.writeToFile(ntf.name, json.dumps(query)) rc, out = libLF.runcmd("VULN_REGEX_DETECTOR_ROOT={} '{}' '{}'" \ .format(vrdPath, checkRegexSupportScript, ntf.name)) out = out.strip() libLF.log('Got rc {} out\n{}'.format(rc, out)) # TODO Not sure if this can go wrong. assert (rc == 0) obj = json.loads(out) if bool(obj['validPattern']): if lang not in self.supportedLangs: self.supportedLangs.append(lang) return True else: return False
def _testEvilInputInLang(self, evilInput, lang): """Returns an SLRegexValidation[] with EXP and POW pumps""" # Build query query = { 'language': lang.lower(), 'pattern': self.regex.pattern, 'evilInput': json.loads(evilInput.toNDJSON()), 'nPumps': -1, # Needs a valid value 'timeLimit': self.slTimeout, } slRegexVals = [] for nPumps in [self.EXP_PUMPS, self.powerPumps]: query['nPumps'] = nPumps libLF.log('query: {}'.format(json.dumps(query))) with tempfile.NamedTemporaryFile( prefix='SLRegexAnalysis-validateOpinion-', suffix='.json', delete=True) as ntf: libLF.writeToFile(ntf.name, json.dumps(query)) rc, out = libLF.runcmd("VULN_REGEX_DETECTOR_ROOT={} '{}' '{}' 2>>/tmp/err" \ .format(self.vrdPath, self.testInLanguageScript, ntf.name)) out = out.strip() libLF.log('Got rc {} out\n{}'.format(rc, out)) slRegexVals.append( SLRegexValidation(self.regex.pattern, evilInput, json.loads(out))) return slRegexVals
def _evaluateRegex(self, pattern, testStrings, languages): """Evaluate this regex with these strings in this language pattern: str testStrings: str[] languages: str[] @returns dict { lang: libLF.RegexEvaluationResult[], ... } for this request """ with tempfile.NamedTemporaryFile( prefix='SemanticAnalysis-evaluateRegex-queryFile-', suffix='.json', delete=DELETE_TMP_FILES) as queryFile: # We can use the same queryFile for each language query = { "pattern": pattern, "inputs": testStrings, } libLF.writeToFile(queryFile.name, json.dumps(query)) lang2rers = {} for lang in languages: try: # Might time out -- if so, just ignore the language lang2rers[lang] = self._queryRegexInLang( pattern, queryFile.name, lang) except Exception as err: libLF.log('_evaluateRegex: exception in {}: {}'.format( lang, err)) return lang2rers
def getRexInputs(pattern, seed, nInputs, timeout): """Return stringsByProducer for use as an RPAI member""" nModes = len(encodings) * len(rexOptionsPowerSet) inputsPerMode = int(nInputs / nModes) stringsByProducer = {} for encoding in encodings: for rexOptions in rexOptionsPowerSet: libLF.log("Encoding: {}".format(encoding)) libLF.log("Options: {}".format(rexOptions)) with tempfile.NamedTemporaryFile(prefix='GenInput-QueryRex-RegexFile-', suffix='.dat', delete=DELETE_TMP_FILES) as rexInFile, \ tempfile.NamedTemporaryFile(prefix='GenInput-QueryRex-ResponseFile-', suffix='.dat', delete=DELETE_TMP_FILES) as rexOutFile: # Build input file libLF.writeToFile(rexInFile.name, pattern) # Build command to run cmd = [ WINE_PATH, REX_PATH, "/regexfile:" + rexInFile.name, "/k:" + str(inputsPerMode), "/encoding:" + encoding, "/seed:" + str(seed), "/file:" + rexOutFile.name ] for opt in rexOptions: cmd.append("/options:" + opt) libLF.log('cmd: ' + " ".join(cmd)) #inputs = [] #producerName = 'rex-Encoding{}-Options{}'.format(encoding, '-'.join(rexOptions)) #stringsByProducer[producerName] = inputs #continue # Get inputs, guarded by a timeout tmo = None if timeout < 0 else timeout inputs = [] try: completedProcess = subprocess.run(cmd, timeout=tmo) rc = completedProcess.returncode libLF.log("rc: " + str(rc)) if rc == 0: inputs = processRexOutFile(rexOutFile.name) except subprocess.TimeoutExpired: libLF.log("Rex timed out") try: libLF.log( "Salvaging any strings streamed by Rex before the timeout" ) inputs = processRexOutFile(rexOutFile.name) except Exception: pass except Exception as err: libLF.log("Exception: " + str(err)) libLF.log("{} inputs from: encoding {} options {}".format( len(inputs), encoding, rexOptions)) producerName = 'rex-Encoding{}-Options{}'.format( encoding, '-'.join(rexOptions)) stringsByProducer[producerName] = inputs return stringsByProducer
def queryDetectors(self): """Query detectors. Returns self""" # Build query query = { 'pattern': self.regex.pattern, 'timeLimit': 60, # Num seconds each detector gets to make a decision about this regex. 'memoryLimit': 2048*1024, # KB each detector gets to use to make a decision. TODO Update VRD docs which say 'in MB'? But cf. detect-vuln.pl:59 } # Query from tempfile with tempfile.NamedTemporaryFile(prefix='SLRegexAnalysis-queryDetectors-', suffix='.json', delete=True) as ntf: libLF.writeToFile(ntf.name, json.dumps(query)) rc, out = libLF.runcmd("VULN_REGEX_DETECTOR_ROOT={} '{}' '{}' 2>>/tmp/err" \ .format(self.vrdPath, self.queryDetectorsScript, ntf.name)) out = out.strip() libLF.log('Got rc {} out\n{}'.format(rc, out)) # TODO Not sure if this can go wrong. assert(rc == 0) self.detectorOpinions = self._qd_convOutput2DetectorOpinions(out) # TODO Not sure if this can go wrong. assert(self.detectorOpinions is not None) maybeVuln_exact = False maybeVuln_variant = False for do in self.detectorOpinions: if do.isVuln and len(list(filter(lambda ei: ei.couldParse, do.evilInputs))) > 0: if do.pattern == do.patternVariant: maybeVuln_exact = True else: maybeVuln_variant = True libLF.log('Maybe vuln: exact {} variant {}'.format(maybeVuln_exact, maybeVuln_variant)) try: os.remove(queryFile) pass except: pass return self
def _getInputs(self): """inputs: unique str[], collapsing the result from INPUT_GENERATOR""" # For testing #return ["abc"] # TODO #return [" ", "\t", "\r", "\n", "\v"] # Whitespace -- Go does not include \n ? #return ["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!"] # For SL testing # Query from tempfile with tempfile.NamedTemporaryFile(prefix='SemanticAnalysis-genInputs-', suffix='.json', delete=DELETE_TMP_FILES) as queryFile, \ tempfile.NamedTemporaryFile(prefix='SemanticAnalysis-outFile-', suffix='.json', delete=DELETE_TMP_FILES) as outFile: libLF.writeToFile(queryFile.name, self.regex.toNDJSON()) rc, out = libLF.runcmd("'{}' --regex-file '{}' --out-file '{}' --parallelism 1 --seed {} --max-inputs-per-generator {} --generator-timeout {} 2>/dev/null" \ .format(INPUT_GENERATOR, queryFile.name, outFile.name, self.rngSeed, # Propagate reproducibility into the generators self.maxInputsPerGenerator, # Reduce the size of intermediate tmp files self.timeoutPerGenerator, # Ensure reasonable time is taken )) out = out.strip() rpaiFileContents = outFile.read().decode("utf-8") #libLF.log('Got rc {} scriptOut {} rpai as JSON {}'.format(rc, out, rpaiFileContents)) # This should never fail assert (rc == 0) rpai = libLF.RegexPatternAndInputs().initFromNDJSON(rpaiFileContents) inputs = [] libLF.log('_getInputs: The {} producers yielded {} total inputs' \ .format(len(rpai.stringsByProducer), len(rpai.getUniqueInputs()))) for producer in rpai.stringsByProducer: # Apply per-generator input limit producerInputs = rpai.stringsByProducer[producer] if 0 < self.maxInputsPerGenerator and self.maxInputsPerGenerator < len( producerInputs): libLF.log('_getInputs: producer {} yielded {} inputs, reducing to {}' \ .format(producer, len(producerInputs), self.maxInputsPerGenerator)) producerInputs = random.sample(producerInputs, self.maxInputsPerGenerator) # Add these inputs inputs += producerInputs return list(set(inputs + ["a"])) # Always test at least one string
def getEGRETInputs(pattern, timeout): """Return inputs: str[]""" with tempfile.NamedTemporaryFile(prefix='GenInput-QueryEGRET-RegexFile-', suffix='.dat', delete=DELETE_TMP_FILES) as inFile, \ tempfile.NamedTemporaryFile(prefix='GenInput-QueryEGRET-ResponseFile-', suffix='.dat', delete=DELETE_TMP_FILES) as outFile: # Build input file libLF.writeToFile(inFile.name, pattern) # Build command to run cmd = [ "python3", EGRET_PATH, "--file", inFile.name, "--output_file", outFile.name ] libLF.log('cmd: ' + " ".join(cmd)) # Get inputs, guarded by a timeout tmo = None if timeout < 0 else timeout inputs = [] try: completedProcess = subprocess.run(cmd, timeout=tmo) rc = completedProcess.returncode libLF.log("rc: " + str(rc)) if rc == 0: inputs = processEGRETOutFile(outFile.name) except subprocess.TimeoutExpired: libLF.log("EGRET timed out") try: libLF.log( "Salvaging any strings streamed by EGRET before the timeout" ) inputs = processEGRETOutFile(outFile.name) except Exception: pass except Exception as err: libLF.log('Exception: ' + str(err)) return inputs