def extractRegexesFromJS(jsFile):
    """Extract regexes from this JS file.

    Returns a libLF.SimpleFileWithRegexes object.
    """

    # Extract
    cmd = "'{}' '{}'".format(regexExtractor, jsFile)
    out = libLF.chkcmd(cmd)

    # Object-ify
    sfwr = libLF.SimpleFileWithRegexes()
    sfwr.initFromNDJSON(out)
    return sfwr
def runExtractor(sourceFile, extractor, registry):
    libLF.log('Extracting regexes from {} using {}'.format(
        sourceFile['name'], extractor))

    # Any special invocation recipe?
    if extractor.endswith(".jar"):
        invocationPrefix = "java -jar"
    else:
        invocationPrefix = ""

    try:
        # Extract
        cmd = "{} '{}' '{}' 2>/dev/null".format(invocationPrefix, extractor,
                                                sourceFile['name'])
        out = libLF.chkcmd(cmd)
        try:
            sfwr = libLF.SimpleFileWithRegexes()
            sfwr.initFromNDJSON(out)
            if not sfwr.couldParse:
                libLF.log('Could not parse: {}'.format(sourceFile['name']))

            # TODO ruList = libLF.sfwrToRegexUsageList(sfwr)
            ruList = []
            for regex in sfwr.regexes:
                ru = libLF.RegexUsage()
                basePath = os.path.basename(sourceFile['name'])
                ru.initFromRaw(regex['pattern'], regex['flags'], None, None,
                               sourceFile['name'], basePath)
                ruList.append(ru)
            libLF.log('Got {} regexes from {}'.format(len(ruList),
                                                      sourceFile['name']))
            return ruList
        except KeyboardInterrupt:
            raise
        except Exception as err:
            libLF.log(
                'Error converting output from SFWR to RU: {}\n  {}'.format(
                    out, err))
    except KeyboardInterrupt:
        raise
    except BaseException as err:
        libLF.log('Error extracting regexes from {} using {}: {}'.format(
            sourceFile['name'], extractor, err))
def main(tsFile):
    checkDependencies([transpiler, regexExtractor])
    _, jsTmpFile = tempfile.mkstemp(suffix='.js')

    sfwr = libLF.SimpleFileWithRegexes()
    try:
        # Get regexes from JS version
        transpile(tsFile, jsTmpFile)
        sfwr = extractRegexesFromJS(jsTmpFile)

        # Tweak result a bit -- real file name, not temp file
        sfwr.fileName = tsFile

        # Clean up
        os.remove(jsTmpFile)
    except BaseException as err:
        libLF.log('Error: {}'.format(err))
        sfwr.initFromRaw(fileName=tsFile, language='typescript', couldParse=0, regexes=[])
    print(sfwr.toNDJSON())
def retrieveRegexes(regexOutputFileName):
  """Returns libLF.RegexUsage[]

  (Since regexOutputFileName contains regexes from multiple source files,
  multiple files are represented in the returned libLF.RegexUsage[])

  Duplicates by <file, pattern> are removed.
  """

  libLF.log("Loading regexes from {}".format(regexOutputFileName))
  
  # Bin by file, removing duplicates
  file2uniqRegexes = {} # x[filename][pattern] = record
  with open(regexOutputFileName, mode='r') as regexStream:
    for line in regexStream:
      # Try to parse as NDJSON.
      # In Java we rely on a "poor man's JSON" implementation which may sometimes
      # produce malformed strings. In other languages, this should always work.
      try:
        obj = libLF.fromNDJSON(line)
      except:
        libLF.log("Could not fromNDJSON line: {}".format(line))
        continue

      if obj['file'] not in file2uniqRegexes:
        file2uniqRegexes[obj['file']] = {}
      file2uniqRegexes[obj['file']][obj['pattern']] = \
        {
          'pattern': obj['pattern'],
          'flags': obj['flags']
        }
  
  # Convert to libLF.RegexUsage[] via libLF.SimpleFileWithRegexes
  ruList = []
  for fileName in file2uniqRegexes:
    sfwr = libLF.SimpleFileWithRegexes().initFromRaw(
      fileName, "XXX", True, list(file2uniqRegexes[fileName].values())
    )
    ruList += libLF.sfwrToRegexUsageList(sfwr)

  return ruList
def main(rustc, rustFile, dumpTokenTree):
    libLF.checkShellDependencies([rustc])

    if fileMightContainRegexes(rustFile):
        libLF.log('File might contain regexes, proceeding...')
        try:
            libLF.log('Getting token tree')
            tokenTree = getTokenTree(rustc, rustFile)
        except BaseException as err:
            libLF.log('Error getting token tree: {}'.format(err))
            sys.exit(1)

        try:
            libLF.log('Walking token tree')
            visitor = FrontierVisitor()
            walkTokenTree(tokenTree, visitor)
            patterns = visitor.getRegexPatterns()
            libLF.log('Extracted {} patterns'.format(len(patterns)))
        except BaseException as err:
            libLF.log('Error walking token tree: {}'.format(err))
            sys.exit(1)
    else:
        libLF.log('File does not contain "Regex", no regexes possible')
        patterns = []

    regexes = [{'pattern': p, 'flags': ''} for p in patterns]
    sfwr = libLF.SimpleFileWithRegexes()
    sfwr.initFromRaw(fileName=rustFile,
                     language='rust',
                     couldParse=1,
                     regexes=regexes)
    print(sfwr.toNDJSON())

    if dumpTokenTree:
        # "Pretty" JSON makes it easier for humans to decode
        asJSON = json.dumps(tokenTree, indent=2, separators=(',', ':'))
        libLF.log('\n' + asJSON)