Python SimpleFileWithRegexes Examples

Programming Language: Python

Namespace/Package Name: libLF

Method/Function: SimpleFileWithRegexes

Examples at hotexamples.com: 5

Python SimpleFileWithRegexes - 5 examples found. These are the top rated real world Python examples of libLF.SimpleFileWithRegexes extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: extract-ts-regexes.py Project: SBULeeLab/RegexGeneralizability-ASE19

def extractRegexesFromJS(jsFile):
    """Extract regexes from this JS file.

    Returns a libLF.SimpleFileWithRegexes object.
    """

    # Extract
    cmd = "'{}' '{}'".format(regexExtractor, jsFile)
    out = libLF.chkcmd(cmd)

    # Object-ify
    sfwr = libLF.SimpleFileWithRegexes()
    sfwr.initFromNDJSON(out)
    return sfwr

Example #2

Show file

File: regex-extractor.py Project: phj95304/LinguaFranca-FSE19

def runExtractor(sourceFile, extractor, registry):
    libLF.log('Extracting regexes from {} using {}'.format(
        sourceFile['name'], extractor))

    # Any special invocation recipe?
    if extractor.endswith(".jar"):
        invocationPrefix = "java -jar"
    else:
        invocationPrefix = ""

    try:
        # Extract
        cmd = "{} '{}' '{}' 2>/dev/null".format(invocationPrefix, extractor,
                                                sourceFile['name'])
        out = libLF.chkcmd(cmd)
        try:
            sfwr = libLF.SimpleFileWithRegexes()
            sfwr.initFromNDJSON(out)
            if not sfwr.couldParse:
                libLF.log('Could not parse: {}'.format(sourceFile['name']))

            # TODO ruList = libLF.sfwrToRegexUsageList(sfwr)
            ruList = []
            for regex in sfwr.regexes:
                ru = libLF.RegexUsage()
                basePath = os.path.basename(sourceFile['name'])
                ru.initFromRaw(regex['pattern'], regex['flags'], None, None,
                               sourceFile['name'], basePath)
                ruList.append(ru)
            libLF.log('Got {} regexes from {}'.format(len(ruList),
                                                      sourceFile['name']))
            return ruList
        except KeyboardInterrupt:
            raise
        except Exception as err:
            libLF.log(
                'Error converting output from SFWR to RU: {}\n  {}'.format(
                    out, err))
    except KeyboardInterrupt:
        raise
    except BaseException as err:
        libLF.log('Error extracting regexes from {} using {}: {}'.format(
            sourceFile['name'], extractor, err))

Example #3

Show file

File: extract-ts-regexes.py Project: phj95304/LinguaFranca-FSE19

def main(tsFile):
    checkDependencies([transpiler, regexExtractor])
    _, jsTmpFile = tempfile.mkstemp(suffix='.js')

    sfwr = libLF.SimpleFileWithRegexes()
    try:
        # Get regexes from JS version
        transpile(tsFile, jsTmpFile)
        sfwr = extractRegexesFromJS(jsTmpFile)

        # Tweak result a bit -- real file name, not temp file
        sfwr.fileName = tsFile

        # Clean up
        os.remove(jsTmpFile)
    except BaseException as err:
        libLF.log('Error: {}'.format(err))
        sfwr.initFromRaw(fileName=tsFile, language='typescript', couldParse=0, regexes=[])
    print(sfwr.toNDJSON())

Example #4

Show file

File: dyn-regex-extractor.py Project: SBULeeLab/RegexGeneralizability-ASE19

def retrieveRegexes(regexOutputFileName):
  """Returns libLF.RegexUsage[]

  (Since regexOutputFileName contains regexes from multiple source files,
  multiple files are represented in the returned libLF.RegexUsage[])

  Duplicates by <file, pattern> are removed.
  """

  libLF.log("Loading regexes from {}".format(regexOutputFileName))
  
  # Bin by file, removing duplicates
  file2uniqRegexes = {} # x[filename][pattern] = record
  with open(regexOutputFileName, mode='r') as regexStream:
    for line in regexStream:
      # Try to parse as NDJSON.
      # In Java we rely on a "poor man's JSON" implementation which may sometimes
      # produce malformed strings. In other languages, this should always work.
      try:
        obj = libLF.fromNDJSON(line)
      except:
        libLF.log("Could not fromNDJSON line: {}".format(line))
        continue

      if obj['file'] not in file2uniqRegexes:
        file2uniqRegexes[obj['file']] = {}
      file2uniqRegexes[obj['file']][obj['pattern']] = \
        {
          'pattern': obj['pattern'],
          'flags': obj['flags']
        }
  
  # Convert to libLF.RegexUsage[] via libLF.SimpleFileWithRegexes
  ruList = []
  for fileName in file2uniqRegexes:
    sfwr = libLF.SimpleFileWithRegexes().initFromRaw(
      fileName, "XXX", True, list(file2uniqRegexes[fileName].values())
    )
    ruList += libLF.sfwrToRegexUsageList(sfwr)

  return ruList

Example #5

Show file

File: extract-rust-regexes.py Project: phj95304/LinguaFranca-FSE19

def main(rustc, rustFile, dumpTokenTree):
    libLF.checkShellDependencies([rustc])

    if fileMightContainRegexes(rustFile):
        libLF.log('File might contain regexes, proceeding...')
        try:
            libLF.log('Getting token tree')
            tokenTree = getTokenTree(rustc, rustFile)
        except BaseException as err:
            libLF.log('Error getting token tree: {}'.format(err))
            sys.exit(1)

        try:
            libLF.log('Walking token tree')
            visitor = FrontierVisitor()
            walkTokenTree(tokenTree, visitor)
            patterns = visitor.getRegexPatterns()
            libLF.log('Extracted {} patterns'.format(len(patterns)))
        except BaseException as err:
            libLF.log('Error walking token tree: {}'.format(err))
            sys.exit(1)
    else:
        libLF.log('File does not contain "Regex", no regexes possible')
        patterns = []

    regexes = [{'pattern': p, 'flags': ''} for p in patterns]
    sfwr = libLF.SimpleFileWithRegexes()
    sfwr.initFromRaw(fileName=rustFile,
                     language='rust',
                     couldParse=1,
                     regexes=regexes)
    print(sfwr.toNDJSON())

    if dumpTokenTree:
        # "Pretty" JSON makes it easier for humans to decode
        asJSON = json.dumps(tokenTree, indent=2, separators=(',', ':'))
        libLF.log('\n' + asJSON)