Ejemplos de tokenize en Python, ejemplos de esprima.tokenize en Python

Ejemplo n.º 1

0

Mostrar archivo

def parse_js(text):
    temp_params = []
    try:
        jsparse = esprima.tokenize(text)
        for token in jsparse:
            if token.type == 'Identifier':
                temp_params.append(token.value)
    except:
        pass  # блэт, это что, не js?
    return temp_params

Ejemplo n.º 2

0

Mostrar archivo

Archivo: repo_tokenizer.py Proyecto: rcoh/ml

def tokenizer_esprima(code: str) -> List[str]:
    tokens = esprima.tokenize(code)

    def subtok(token: BufferEntry):
        if token.type == 'String':
            content = token.value.strip('\'"')
            return ['"', *content.split(' '), '"']
        else:
            return [token.value]

    return flatten([subtok(t) for t in tokens])

Ejemplo n.º 3

0

Mostrar archivo

def check_js(paths, strings_dict):
    """Return the count of user-exposed hard-coded strings in javascript files."""
    js_offenses = 0

    for path in paths:

        if (
            os.path.basename(path) == "moment.js"
            or os.path.basename(path) == "chart.js"
            or "__tests__" in path
        ):
            continue

        short_path = path.replace(os.path.join("force-app", "main", "default", ""), "")

        with open(path) as f:
            last_value = ""

            # This strips out the decorators, which aren't supported in esprima.
            js_body = re.sub(r"@\w+(\(.*\))?", "", f.read())

            parsed_js = esprima.tokenize(js_body, {"loc": True})
            parsed_js = [
                element for element in parsed_js if element.type != "Punctuator"
            ]
            for item in parsed_js:
                item.value = item.value.strip("\u00a0'\"/.;() ")
                if (
                    item.type == "String"
                    and item.value not in strings_dict.get("ignorable_js_values")
                    and not item.value.endswith("__r")
                    and not item.value.endswith("__c")
                    and last_value
                    not in strings_dict.get(
                        "ignorable_js_last_values"
                    )  # allows for lwc attributes that expect string values
                    and item.value[
                        0:1
                    ].isupper()  # assumes title case in any user-exposed strings
                    and not item.value.isupper()  # allows for constants with ALLUPPER naming convention
                    and not last_value.isupper()  # allows for constants with ALLUPPER naming convention
                ):
                    print(
                        f"JS: {short_path} -- line {str(item.loc.start.line)} -- {last_value}: {item.value}"
                    )
                    js_offenses += 1
                last_value = item.value
    return js_offenses

Ejemplo n.º 4

0

Mostrar archivo

Archivo: scan_javascript.py Proyecto: x0rzkov/strelka

    def scan(self, data, file, options, expire_at):
        beautify = options.get('beautify', True)

        self.event.setdefault('tokens', [])
        self.event.setdefault('keywords', [])
        self.event.setdefault('strings', [])
        self.event.setdefault('identifiers', [])
        self.event.setdefault('regular_expressions', [])
        self.event['beautified'] = False

        js = None

        try:
            if beautify:
                js = jsbeautifier.beautify(data.decode())
                self.event['beautified'] = True
        except:  # noqa
            self.flags.append('beautify_failed')

        if js is None:
            js = data.decode()

        tokens = esprima.tokenize(
            js,
            options={
                'comment': True,
                'tolerant': True,
            }
        )
        for t in tokens:
            if t.type not in self.event['tokens']:
                self.event['tokens'].append(t.type)
            if t.type == 'String':
                stripped_val = t.value.strip('"\'')
                if stripped_val not in self.event['strings']:
                    self.event['strings'].append(stripped_val)
            if t.type == 'Keyword':
                if t.value not in self.event['keywords']:
                    self.event['keywords'].append(t.value)
            if t.type == 'Identifier':
                if t.value not in self.event['identifiers']:
                    self.event['identifiers'].append(t.value)
            if type == 'RegularExpression':
                if t.value not in self.event['regular_expressions']:
                    self.event['regular_expressions'].append(t.value)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: extractMethodBodyToken.py Proyecto: kangyujian/DebugMethodName

def methodBody2Tokens(path, dist, errPath):
    with open(path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    for line in lines:
        lineList = line.split('@')
        print(lineList)
        methodBody = lineList[-1].replace('\x00', '')

        try:
            bodySequence = []
            methodBodyTokenJson = esprima.tokenize(methodBody)
            for pair in methodBodyTokenJson:
                types = pair.type
                value = pair.value
                bodySequence.append(types)
                bodySequence.append(value)
            bodySequence = ' '.join(bodySequence)
            with open(dist, 'a') as f:
                f.write('@'.join(lineList[:-1] + [bodySequence]) + '\n')
        except:
            with open(errPath, 'a', encoding='utf8') as f:
                f.write('@'.join(lineList) + '\n')

Ejemplo n.º 6

0

Mostrar archivo

def parseObject(code):
    tokens = esprima.tokenize(code)
    return isDeclaredObject(([], tokens, False))

Ejemplo n.º 7

0

Mostrar archivo

def tokenize_program(file):
    with open(file, encoding="utf8") as f:
        program = f.read()
        tokens = esprima.tokenize(program)
        return tokens

Ejemplo n.º 8

0

Mostrar archivo

Archivo: prob_1_the_scraper.py Proyecto: zuikaru/sgcu63-recruitment

def get_baan_list() -> list:
    """ Get list of baan from rubnongkaomai.com 

    Returns:
        A list of baan dictionary contains nameURL(relative url to baan's page) and size(S/M/L/XL)
    """
    # Find js component location
    baan_response = requests.get(RUBNONGKAOMAI_BASE_URL + '/baan')
    # parse html using beautifulsoup
    baan_soup = BeautifulSoup(baan_response.text, 'html.parser')
    # find all link as=script tags
    possible_js_tags = baan_soup.find_all('link', {
        'as': 'script',
        'rel': 'preload'
    })
    component_js_location = None
    for tag in possible_js_tags:
        if 'src-pages-baan-js' in tag['href']:
            component_js_location = tag['href']
            break
    # not found
    if component_js_location == None:
        return []
    # get js component file
    baan_js_response = requests.get(RUBNONGKAOMAI_BASE_URL +
                                    component_js_location)
    # parse js file
    tokens = esprima.tokenize(baan_js_response.text)
    # find allBaanJson variable
    collecting = False
    look_for_size_field = False
    all_bann = []
    current_baan = {}
    idx = 0
    # for each token
    while idx < len(tokens):
        # begin collecting data if we found allBaanJson
        if (tokens[idx].value == 'allBaanJson'
                and tokens[idx].type == 'Identifier'
                and tokens[idx + 1].type == 'Punctuator'
                and tokens[idx + 1].value == ':'):
            collecting = True
            idx += 2  # skip head 2 tokens
            continue
        # end collecting data if we found allFile
        if (tokens[idx].value == 'allFile' and tokens[idx].type == 'Identifier'
                and tokens[idx + 1].type == 'Punctuator'
                and tokens[idx + 1].value == ':'):
            collecting = False
            break
        # If we are in range of allBaanJson object definition
        if collecting:
            # If we found nameURL key
            if tokens[idx].type == 'Identifier' and tokens[
                    idx].value == 'nameURL':
                current_baan['nameURL'] = tokens[idx + 2].value.strip('\"')
                # Try to look for size key
                look_for_size_field = True
                # skip ahead by 3
                idx += 3
                continue
            # If we found size key
            if look_for_size_field and tokens[
                    idx].type == 'Identifier' and tokens[idx].value == 'size':
                # record and push to list
                current_baan['size'] = tokens[idx + 2].value.strip('\"')
                all_bann.append(dict(current_baan))
                # stop looking for size key
                look_for_size_field = False
                # skip ahead by 3
                idx += 3
                continue
        idx += 1
    return all_bann

Ejemplo n.º 9

0

Mostrar archivo

    def test_depth(self):
        # Make minimal experiment
        comp, rt, exp = _make_minimal_experiment(self)
        # Get class we're currently working with
        compClass = type(comp)
        # Add index to component name
        baseCompName = comp.name
        comp.name = baseCompName + str(0)
        # Add more components
        for i in range(3):
            comp = compClass(exp=exp,
                             parentName='TestRoutine',
                             name=baseCompName + str(i + 1))
            rt.append(comp)

        # Do test for Py
        script = exp.writeScript(target="PsychoPy")
        # Parse script to get each object def as a node
        tree = ast.parse(script)
        for node in tree.body:
            # If current node is an assignment, investigate
            if isinstance(node, ast.Assign):
                # Get name
                name = node.targets[0]
                if isinstance(name, ast.Name):
                    # If name matches component names, look for depth keyword
                    if baseCompName in name.id:
                        for key in node.value.keywords:
                            if key.arg == "depth":
                                if isinstance(key.value, ast.Constant):
                                    # If depth is positive, get value as is
                                    depth = int(key.value.value)
                                elif isinstance(key.value, ast.UnaryOp):
                                    # If depth is negative, get value*-1
                                    depth = int(key.value.operand.value)
                                else:
                                    # If it's anything else, something is wrong
                                    raise TypeError(
                                        f"Expected depth value in script to be a number, instead it is {type(key.value)}"
                                    )
                                # Make sure depth matches what we expect
                                assert baseCompName + str(depth) == name.id, (
                                    "Depth of {compClass} did not match expected: {name.id} should have a depth "
                                    "matching the value in its name * -1, instead had depth of -{depth}."
                                )

        # Do test for JS
        script = exp.writeScript(target="PsychoJS")
        # Parse JS script
        tree = esprima.tokenize(
            script
        )  # ideally we'd use esprima.parseScript, but this throws an error with PsychoJS scripts
        inInit = False
        thisCompName = ""
        for i, node in enumerate(tree):
            # Detect start of inits
            if node.type == "Identifier" and baseCompName in node.value:
                inInit = True
                thisCompName = node.value
            # Detect end of inits
            if node.type == "Punctuator" and node.value == "}":
                inInit = False
                thisCompName = ""
            if inInit:
                # If we're in the init, detect start of param
                if node.type == "Identifier" and node.value == "depth":
                    # 2 nodes ahead of param start will be param value...
                    depth = tree[i + 2].value
                    if depth == "-":
                        # ...unless negative, in which case the value is 3 nodes ahead
                        depth = tree[i + 3].value
                    depth = int(float(depth))
                    # Make sure depth matches what we expect
                    assert baseCompName + str(depth) == thisCompName, (
                        "Depth of {compClass} did not match expected: {thisCompName} should have a depth "
                        "matching the value in its name * -1, instead had depth of -{depth}."
                    )

Ejemplo n.º 10

0

Mostrar archivo

Archivo: lib.py Proyecto: Pmartin97/Vulnerability-analysis-in-web-coding

def AnalisisSintactico(documento):
    tokens = esprima.tokenize(documento)
    return tokens

Ejemplo n.º 11

0

Mostrar archivo

Archivo: main.py Proyecto: hanrc97/CUGBAutoSubmit

    def login(self):
        try:
            # # (Invalid now due to the websites' update)
            # #  Request the cookie and token from the origin web:
            # originurl = 'https://stu.cugb.edu.cn'
            # req = self.session.get(originurl)
            # set_cookie = requests.utils.dict_from_cookiejar(req.cookies)
            # header_cookie = 'JSESSIONID' + '=' + str(set_cookie['JSESSIONID']) + ';' + 'token' + '=' + str(set_cookie['token'])
            # #Add the cookie and token into headers:
            # self.headers['Cookie'] = header_cookie
            # A new feature for the site that uses base64 encoding for the username and password, and uses token:
            # uname_encrypt = str(base64.b64encode(uname.encode('utf-8')), 'utf-8')
            # upwd_encrypt = str(base64.b64encode(upwd.encode('utf-8')), 'utf-8')
            # token = str(set_cookie['token'])
            # data = {'username': uname_encrypt, 'password': upwd_encrypt, 'verification': '', 'token': token}

            # CAS Unified Auth:
            url = 'https://cas.cugb.edu.cn/login'
            req = self.session.request('GET', url, verify=False).content
            soup = BeautifulSoup(req, 'html.parser')
            execution = soup.findAll("input",
                                     {"name": "execution"})[0]["value"]
            system = soup.findAll("input",
                                  {"id": "userLoginSystem"})[0]["value"]
            uname = self.userconfig['username']
            upwd = self.userconfig['password']
            data = {
                'username': uname,
                'password': upwd,
                'execution': execution,
                '_eventId': 'submit',
                'geolocation': '',
                'loginType': 'username',
                'system': system,  # '27A5A4DF0C874122A0AFE0367F0A3F46'
                'enableCaptcha': 'N'
            }
            req = self.session.post(url=url,
                                    data=data,
                                    headers=self.headers,
                                    verify=False)
            self.cookies = requests.utils.dict_from_cookiejar(req.cookies)
            time.sleep(3)
            # To get the uid from javascript:
            uidurl = 'https://stu.cugb.edu.cn/'
            req = self.session.request('GET',
                                       uidurl,
                                       cookies=self.cookies,
                                       headers=self.headers,
                                       verify=False).content
            soup = BeautifulSoup(req, 'html.parser')
            scriptTags = str(soup.findAll('script')[1])
            rexp = re.compile(r'<[^>]+>', re.S)
            scriptCode = rexp.sub('', scriptTags)
            uid = esprima.tokenize(scriptCode)[48].value.replace('\'', '')
            uiddata = {'uid': uid}
            req = self.session.request(
                'POST',
                "https://stu.cugb.edu.cn:443/caswisedu/login.htm",
                data=uiddata,
                verify=False)
            time.sleep(3)
            content = self.session.post(
                'https://stu.cugb.edu.cn/webApp/xuegong/index.html#/zizhu/apply?projectId=4a4ce9d674438da101745ca20e2b3a5e&type=YQSJCJ',
                verify=False)
            if content.status_code == 200:
                self.message1 = "Login status: Succeeded"
                time.sleep(3)
                self.out_apply()
            else:
                self.message1 = "Login status: Failed"
        except Exception as e:
            self.message1 = 'Error Code 0: ' + str(e)

Ejemplo n.º 12

0

Mostrar archivo

        def test(self):
            with open(result_file, 'rb') as f:
                expected_json = f.read()
            expected = toDict(json.loads(expected_json.decode('utf-8')))
            if isinstance(expected, dict):
                expected.pop(
                    'description',
                    None)  # Not all json failure files include description
                expected.pop('tokenize',
                             None)  # tokenize is not part of errors
                options = expected.pop(
                    'options', None)  # Extracts options from tree (if any)
            else:
                options = None

            with open(filename, 'rb') as f:
                actual_code = f.read()
            if '.source.' in filename:
                actual_code = SOURCE_RE.sub(
                    r'\2', actual_code).decode('unicode_escape')
            else:
                actual_code = actual_code.decode('utf-8')

            try:
                if result_type == '.tokens':
                    if options is None:
                        options = {
                            'loc': True,
                            'range': True,
                            'comment': True,
                            'tolerant': True,
                        }
                    actual = toDict(tokenize(actual_code, options=options))
                else:
                    sourceType = 'module' if '.module.' in filename else 'script'
                    if options is None:
                        options = {
                            'jsx': True,
                            'comment': 'comments' in expected,
                            'range': True,
                            'loc': True,
                            'tokens': True,
                            'raw': True,
                            'tolerant': 'errors' in expected,
                            'source': None,
                            'sourceType': expected.get('sourceType',
                                                       sourceType),
                        }

                    if options.get('comment'):

                        def hasAttachedComment(expected):
                            for k, v in expected.items():
                                if k in ('leadingComments', 'trailingComments',
                                         'innerComments'):
                                    return True
                                elif isinstance(v, dict):
                                    if hasAttachedComment(v):
                                        return True
                                elif isinstance(v, list):
                                    for i in v:
                                        if isinstance(i, dict):
                                            if hasAttachedComment(i):
                                                return True
                            return False

                        options['attachComment'] = hasAttachedComment(expected)

                    if expected.get('tokens'):
                        token = expected['tokens'][0]
                        options['range'] = 'range' in token
                        options['loc'] = 'loc' in token

                    if expected.get('comments'):
                        comment = expected['comments'][0]
                        options['range'] = 'range' in comment
                        options['loc'] = 'loc' in comment

                    if options.get('loc'):
                        options['source'] = expected.get('loc',
                                                         {}).get('source')

                    actual = toDict(parse(actual_code, options=options))
            except Error as e:
                actual = e.toDict()

            self.assertEqual(expected, actual)