def test_initFromNDJSON(self): ru = libLF.RegexUsage() ru.initFromNDJSON(libLF.toNDJSON(pythonReObj)) self.assertEqual(pythonReObj['pattern'], ru.pattern) self.assertEqual(pythonReObj['flags'], ru.flags) self.assertEqual(pythonReObj['inputs'], ru.inputs) self.assertEqual(pythonReObj['project'], ru.project) self.assertEqual(pythonReObj['relPath'], ru.relPath) self.assertEqual(pythonReObj['basename'], ru.basename)
def sfwrToRegexUsageList(sfwr): """Expand a SimpleFileWithRegexes to a RegexUsage[] Handy if a regex extractor produces SFWR and analyses want a RegexUsage """ ruList = [] for regex in sfwr.regexes: ru = libLF.RegexUsage() basePath = os.path.basename(sfwr.fileName) ru.initFromRaw(regex['pattern'], regex['flags'], None, None, sfwr.fileName, basePath) ruList.append(ru) libLF.log('sfwrToRegexUsageList: Got {} regexes from {}'.format( len(ruList), sfwr.fileName)) return ruList
def runExtractor(sourceFile, extractor, registry): libLF.log('Extracting regexes from {} using {}'.format( sourceFile['name'], extractor)) # Any special invocation recipe? if extractor.endswith(".jar"): invocationPrefix = "java -jar" else: invocationPrefix = "" try: # Extract cmd = "{} '{}' '{}' 2>/dev/null".format(invocationPrefix, extractor, sourceFile['name']) out = libLF.chkcmd(cmd) try: sfwr = libLF.SimpleFileWithRegexes() sfwr.initFromNDJSON(out) if not sfwr.couldParse: libLF.log('Could not parse: {}'.format(sourceFile['name'])) # TODO ruList = libLF.sfwrToRegexUsageList(sfwr) ruList = [] for regex in sfwr.regexes: ru = libLF.RegexUsage() basePath = os.path.basename(sourceFile['name']) ru.initFromRaw(regex['pattern'], regex['flags'], None, None, sourceFile['name'], basePath) ruList.append(ru) libLF.log('Got {} regexes from {}'.format(len(ruList), sourceFile['name'])) return ruList except KeyboardInterrupt: raise except Exception as err: libLF.log( 'Error converting output from SFWR to RU: {}\n {}'.format( out, err)) except KeyboardInterrupt: raise except BaseException as err: libLF.log('Error extracting regexes from {} using {}: {}'.format( sourceFile['name'], extractor, err))
def loadRegexUsages(regexUsageFile): """Returns libLF.RegexUsage[]""" ret = [] libLF.log('Loading RegexUsage\'s from {}'.format(regexUsageFile)) with open(regexUsageFile, 'r') as inStream: for line in inStream: line = line.strip() if len(line) == 0: continue try: # Build the Regex ru = libLF.RegexUsage() ru.initFromNDJSON(line) ret.append(ru) except KeyboardInterrupt: raise except BaseException as err: libLF.log('Exception parsing line:\n {}\n {}'.format( line, err)) libLF.log('Loaded {} RegexUsage\'s'.format(len(ret))) return ret
def test_toNDJSON(self): ru = libLF.RegexUsage() ru.initFromNDJSON(libLF.toNDJSON(pythonReObj)) obj = json.loads(ru.toNDJSON()) self.assertEqual(obj, pythonReObj)