def parse_js(text): temp_params = [] try: jsparse = esprima.tokenize(text) for token in jsparse: if token.type == 'Identifier': temp_params.append(token.value) except: pass # блэт, это что, не js? return temp_params
def tokenizer_esprima(code: str) -> List[str]: tokens = esprima.tokenize(code) def subtok(token: BufferEntry): if token.type == 'String': content = token.value.strip('\'"') return ['"', *content.split(' '), '"'] else: return [token.value] return flatten([subtok(t) for t in tokens])
def check_js(paths, strings_dict): """Return the count of user-exposed hard-coded strings in javascript files.""" js_offenses = 0 for path in paths: if ( os.path.basename(path) == "moment.js" or os.path.basename(path) == "chart.js" or "__tests__" in path ): continue short_path = path.replace(os.path.join("force-app", "main", "default", ""), "") with open(path) as f: last_value = "" # This strips out the decorators, which aren't supported in esprima. js_body = re.sub(r"@\w+(\(.*\))?", "", f.read()) parsed_js = esprima.tokenize(js_body, {"loc": True}) parsed_js = [ element for element in parsed_js if element.type != "Punctuator" ] for item in parsed_js: item.value = item.value.strip("\u00a0'\"/.;() ") if ( item.type == "String" and item.value not in strings_dict.get("ignorable_js_values") and not item.value.endswith("__r") and not item.value.endswith("__c") and last_value not in strings_dict.get( "ignorable_js_last_values" ) # allows for lwc attributes that expect string values and item.value[ 0:1 ].isupper() # assumes title case in any user-exposed strings and not item.value.isupper() # allows for constants with ALLUPPER naming convention and not last_value.isupper() # allows for constants with ALLUPPER naming convention ): print( f"JS: {short_path} -- line {str(item.loc.start.line)} -- {last_value}: {item.value}" ) js_offenses += 1 last_value = item.value return js_offenses
def scan(self, data, file, options, expire_at): beautify = options.get('beautify', True) self.event.setdefault('tokens', []) self.event.setdefault('keywords', []) self.event.setdefault('strings', []) self.event.setdefault('identifiers', []) self.event.setdefault('regular_expressions', []) self.event['beautified'] = False js = None try: if beautify: js = jsbeautifier.beautify(data.decode()) self.event['beautified'] = True except: # noqa self.flags.append('beautify_failed') if js is None: js = data.decode() tokens = esprima.tokenize( js, options={ 'comment': True, 'tolerant': True, } ) for t in tokens: if t.type not in self.event['tokens']: self.event['tokens'].append(t.type) if t.type == 'String': stripped_val = t.value.strip('"\'') if stripped_val not in self.event['strings']: self.event['strings'].append(stripped_val) if t.type == 'Keyword': if t.value not in self.event['keywords']: self.event['keywords'].append(t.value) if t.type == 'Identifier': if t.value not in self.event['identifiers']: self.event['identifiers'].append(t.value) if type == 'RegularExpression': if t.value not in self.event['regular_expressions']: self.event['regular_expressions'].append(t.value)
def methodBody2Tokens(path, dist, errPath): with open(path, 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: lineList = line.split('@') print(lineList) methodBody = lineList[-1].replace('\x00', '') try: bodySequence = [] methodBodyTokenJson = esprima.tokenize(methodBody) for pair in methodBodyTokenJson: types = pair.type value = pair.value bodySequence.append(types) bodySequence.append(value) bodySequence = ' '.join(bodySequence) with open(dist, 'a') as f: f.write('@'.join(lineList[:-1] + [bodySequence]) + '\n') except: with open(errPath, 'a', encoding='utf8') as f: f.write('@'.join(lineList) + '\n')
def parseObject(code): tokens = esprima.tokenize(code) return isDeclaredObject(([], tokens, False))
def tokenize_program(file): with open(file, encoding="utf8") as f: program = f.read() tokens = esprima.tokenize(program) return tokens
def get_baan_list() -> list: """ Get list of baan from rubnongkaomai.com Returns: A list of baan dictionary contains nameURL(relative url to baan's page) and size(S/M/L/XL) """ # Find js component location baan_response = requests.get(RUBNONGKAOMAI_BASE_URL + '/baan') # parse html using beautifulsoup baan_soup = BeautifulSoup(baan_response.text, 'html.parser') # find all link as=script tags possible_js_tags = baan_soup.find_all('link', { 'as': 'script', 'rel': 'preload' }) component_js_location = None for tag in possible_js_tags: if 'src-pages-baan-js' in tag['href']: component_js_location = tag['href'] break # not found if component_js_location == None: return [] # get js component file baan_js_response = requests.get(RUBNONGKAOMAI_BASE_URL + component_js_location) # parse js file tokens = esprima.tokenize(baan_js_response.text) # find allBaanJson variable collecting = False look_for_size_field = False all_bann = [] current_baan = {} idx = 0 # for each token while idx < len(tokens): # begin collecting data if we found allBaanJson if (tokens[idx].value == 'allBaanJson' and tokens[idx].type == 'Identifier' and tokens[idx + 1].type == 'Punctuator' and tokens[idx + 1].value == ':'): collecting = True idx += 2 # skip head 2 tokens continue # end collecting data if we found allFile if (tokens[idx].value == 'allFile' and tokens[idx].type == 'Identifier' and tokens[idx + 1].type == 'Punctuator' and tokens[idx + 1].value == ':'): collecting = False break # If we are in range of allBaanJson object definition if collecting: # If we found nameURL key if tokens[idx].type == 'Identifier' and tokens[ idx].value == 'nameURL': current_baan['nameURL'] = tokens[idx + 2].value.strip('\"') # Try to look for size key look_for_size_field = True # skip ahead by 3 idx += 3 continue # If we found size key if look_for_size_field and tokens[ idx].type == 'Identifier' and tokens[idx].value == 'size': # record and push to list current_baan['size'] = tokens[idx + 2].value.strip('\"') all_bann.append(dict(current_baan)) # stop looking for size key look_for_size_field = False # skip ahead by 3 idx += 3 continue idx += 1 return all_bann
def test_depth(self): # Make minimal experiment comp, rt, exp = _make_minimal_experiment(self) # Get class we're currently working with compClass = type(comp) # Add index to component name baseCompName = comp.name comp.name = baseCompName + str(0) # Add more components for i in range(3): comp = compClass(exp=exp, parentName='TestRoutine', name=baseCompName + str(i + 1)) rt.append(comp) # Do test for Py script = exp.writeScript(target="PsychoPy") # Parse script to get each object def as a node tree = ast.parse(script) for node in tree.body: # If current node is an assignment, investigate if isinstance(node, ast.Assign): # Get name name = node.targets[0] if isinstance(name, ast.Name): # If name matches component names, look for depth keyword if baseCompName in name.id: for key in node.value.keywords: if key.arg == "depth": if isinstance(key.value, ast.Constant): # If depth is positive, get value as is depth = int(key.value.value) elif isinstance(key.value, ast.UnaryOp): # If depth is negative, get value*-1 depth = int(key.value.operand.value) else: # If it's anything else, something is wrong raise TypeError( f"Expected depth value in script to be a number, instead it is {type(key.value)}" ) # Make sure depth matches what we expect assert baseCompName + str(depth) == name.id, ( "Depth of {compClass} did not match expected: {name.id} should have a depth " "matching the value in its name * -1, instead had depth of -{depth}." ) # Do test for JS script = exp.writeScript(target="PsychoJS") # Parse JS script tree = esprima.tokenize( script ) # ideally we'd use esprima.parseScript, but this throws an error with PsychoJS scripts inInit = False thisCompName = "" for i, node in enumerate(tree): # Detect start of inits if node.type == "Identifier" and baseCompName in node.value: inInit = True thisCompName = node.value # Detect end of inits if node.type == "Punctuator" and node.value == "}": inInit = False thisCompName = "" if inInit: # If we're in the init, detect start of param if node.type == "Identifier" and node.value == "depth": # 2 nodes ahead of param start will be param value... depth = tree[i + 2].value if depth == "-": # ...unless negative, in which case the value is 3 nodes ahead depth = tree[i + 3].value depth = int(float(depth)) # Make sure depth matches what we expect assert baseCompName + str(depth) == thisCompName, ( "Depth of {compClass} did not match expected: {thisCompName} should have a depth " "matching the value in its name * -1, instead had depth of -{depth}." )
def AnalisisSintactico(documento): tokens = esprima.tokenize(documento) return tokens
def login(self): try: # # (Invalid now due to the websites' update) # # Request the cookie and token from the origin web: # originurl = 'https://stu.cugb.edu.cn' # req = self.session.get(originurl) # set_cookie = requests.utils.dict_from_cookiejar(req.cookies) # header_cookie = 'JSESSIONID' + '=' + str(set_cookie['JSESSIONID']) + ';' + 'token' + '=' + str(set_cookie['token']) # #Add the cookie and token into headers: # self.headers['Cookie'] = header_cookie # A new feature for the site that uses base64 encoding for the username and password, and uses token: # uname_encrypt = str(base64.b64encode(uname.encode('utf-8')), 'utf-8') # upwd_encrypt = str(base64.b64encode(upwd.encode('utf-8')), 'utf-8') # token = str(set_cookie['token']) # data = {'username': uname_encrypt, 'password': upwd_encrypt, 'verification': '', 'token': token} # CAS Unified Auth: url = 'https://cas.cugb.edu.cn/login' req = self.session.request('GET', url, verify=False).content soup = BeautifulSoup(req, 'html.parser') execution = soup.findAll("input", {"name": "execution"})[0]["value"] system = soup.findAll("input", {"id": "userLoginSystem"})[0]["value"] uname = self.userconfig['username'] upwd = self.userconfig['password'] data = { 'username': uname, 'password': upwd, 'execution': execution, '_eventId': 'submit', 'geolocation': '', 'loginType': 'username', 'system': system, # '27A5A4DF0C874122A0AFE0367F0A3F46' 'enableCaptcha': 'N' } req = self.session.post(url=url, data=data, headers=self.headers, verify=False) self.cookies = requests.utils.dict_from_cookiejar(req.cookies) time.sleep(3) # To get the uid from javascript: uidurl = 'https://stu.cugb.edu.cn/' req = self.session.request('GET', uidurl, cookies=self.cookies, headers=self.headers, verify=False).content soup = BeautifulSoup(req, 'html.parser') scriptTags = str(soup.findAll('script')[1]) rexp = re.compile(r'<[^>]+>', re.S) scriptCode = rexp.sub('', scriptTags) uid = esprima.tokenize(scriptCode)[48].value.replace('\'', '') uiddata = {'uid': uid} req = self.session.request( 'POST', "https://stu.cugb.edu.cn:443/caswisedu/login.htm", data=uiddata, verify=False) time.sleep(3) content = self.session.post( 'https://stu.cugb.edu.cn/webApp/xuegong/index.html#/zizhu/apply?projectId=4a4ce9d674438da101745ca20e2b3a5e&type=YQSJCJ', verify=False) if content.status_code == 200: self.message1 = "Login status: Succeeded" time.sleep(3) self.out_apply() else: self.message1 = "Login status: Failed" except Exception as e: self.message1 = 'Error Code 0: ' + str(e)
def test(self): with open(result_file, 'rb') as f: expected_json = f.read() expected = toDict(json.loads(expected_json.decode('utf-8'))) if isinstance(expected, dict): expected.pop( 'description', None) # Not all json failure files include description expected.pop('tokenize', None) # tokenize is not part of errors options = expected.pop( 'options', None) # Extracts options from tree (if any) else: options = None with open(filename, 'rb') as f: actual_code = f.read() if '.source.' in filename: actual_code = SOURCE_RE.sub( r'\2', actual_code).decode('unicode_escape') else: actual_code = actual_code.decode('utf-8') try: if result_type == '.tokens': if options is None: options = { 'loc': True, 'range': True, 'comment': True, 'tolerant': True, } actual = toDict(tokenize(actual_code, options=options)) else: sourceType = 'module' if '.module.' in filename else 'script' if options is None: options = { 'jsx': True, 'comment': 'comments' in expected, 'range': True, 'loc': True, 'tokens': True, 'raw': True, 'tolerant': 'errors' in expected, 'source': None, 'sourceType': expected.get('sourceType', sourceType), } if options.get('comment'): def hasAttachedComment(expected): for k, v in expected.items(): if k in ('leadingComments', 'trailingComments', 'innerComments'): return True elif isinstance(v, dict): if hasAttachedComment(v): return True elif isinstance(v, list): for i in v: if isinstance(i, dict): if hasAttachedComment(i): return True return False options['attachComment'] = hasAttachedComment(expected) if expected.get('tokens'): token = expected['tokens'][0] options['range'] = 'range' in token options['loc'] = 'loc' in token if expected.get('comments'): comment = expected['comments'][0] options['range'] = 'range' in comment options['loc'] = 'loc' in comment if options.get('loc'): options['source'] = expected.get('loc', {}).get('source') actual = toDict(parse(actual_code, options=options)) except Error as e: actual = e.toDict() self.assertEqual(expected, actual)