Example #1
0
    def test_initFromNDJSON(self):
        ru = libLF.RegexUsage()
        ru.initFromNDJSON(libLF.toNDJSON(pythonReObj))

        self.assertEqual(pythonReObj['pattern'], ru.pattern)
        self.assertEqual(pythonReObj['flags'], ru.flags)
        self.assertEqual(pythonReObj['inputs'], ru.inputs)
        self.assertEqual(pythonReObj['project'], ru.project)
        self.assertEqual(pythonReObj['relPath'], ru.relPath)
        self.assertEqual(pythonReObj['basename'], ru.basename)
Example #2
0
def sfwrToRegexUsageList(sfwr):
    """Expand a SimpleFileWithRegexes to a RegexUsage[]
  
  Handy if a regex extractor produces SFWR and analyses want a RegexUsage
  """
    ruList = []
    for regex in sfwr.regexes:
        ru = libLF.RegexUsage()
        basePath = os.path.basename(sfwr.fileName)
        ru.initFromRaw(regex['pattern'], regex['flags'], None, None,
                       sfwr.fileName, basePath)
        ruList.append(ru)
    libLF.log('sfwrToRegexUsageList: Got {} regexes from {}'.format(
        len(ruList), sfwr.fileName))
    return ruList
def runExtractor(sourceFile, extractor, registry):
    libLF.log('Extracting regexes from {} using {}'.format(
        sourceFile['name'], extractor))

    # Any special invocation recipe?
    if extractor.endswith(".jar"):
        invocationPrefix = "java -jar"
    else:
        invocationPrefix = ""

    try:
        # Extract
        cmd = "{} '{}' '{}' 2>/dev/null".format(invocationPrefix, extractor,
                                                sourceFile['name'])
        out = libLF.chkcmd(cmd)
        try:
            sfwr = libLF.SimpleFileWithRegexes()
            sfwr.initFromNDJSON(out)
            if not sfwr.couldParse:
                libLF.log('Could not parse: {}'.format(sourceFile['name']))

            # TODO ruList = libLF.sfwrToRegexUsageList(sfwr)
            ruList = []
            for regex in sfwr.regexes:
                ru = libLF.RegexUsage()
                basePath = os.path.basename(sourceFile['name'])
                ru.initFromRaw(regex['pattern'], regex['flags'], None, None,
                               sourceFile['name'], basePath)
                ruList.append(ru)
            libLF.log('Got {} regexes from {}'.format(len(ruList),
                                                      sourceFile['name']))
            return ruList
        except KeyboardInterrupt:
            raise
        except Exception as err:
            libLF.log(
                'Error converting output from SFWR to RU: {}\n  {}'.format(
                    out, err))
    except KeyboardInterrupt:
        raise
    except BaseException as err:
        libLF.log('Error extracting regexes from {} using {}: {}'.format(
            sourceFile['name'], extractor, err))
def loadRegexUsages(regexUsageFile):
    """Returns libLF.RegexUsage[]"""
    ret = []
    libLF.log('Loading RegexUsage\'s from {}'.format(regexUsageFile))
    with open(regexUsageFile, 'r') as inStream:
        for line in inStream:
            line = line.strip()
            if len(line) == 0:
                continue

            try:
                # Build the Regex
                ru = libLF.RegexUsage()
                ru.initFromNDJSON(line)

                ret.append(ru)
            except KeyboardInterrupt:
                raise
            except BaseException as err:
                libLF.log('Exception parsing line:\n  {}\n  {}'.format(
                    line, err))

        libLF.log('Loaded {} RegexUsage\'s'.format(len(ret)))
        return ret
Example #5
0
    def test_toNDJSON(self):
        ru = libLF.RegexUsage()
        ru.initFromNDJSON(libLF.toNDJSON(pythonReObj))
        obj = json.loads(ru.toNDJSON())

        self.assertEqual(obj, pythonReObj)