def parse_js(js_html): try: p = PyJsParser() js_ast = p.parse(js_html) except: return False return js_ast
def parse_js(self, js_html): try: p = PyJsParser() js_ast = p.parse(js_html) except: self.interfacer.raise_error("Unable to get a JS AST") return False return js_ast
def parse(javascript_code): """Returns syntax tree of javascript_code. Syntax tree has the same structure as syntax tree produced by esprima.js Same as PyJsParser().parse For your convenience :) """ p = PyJsParser() return p.parse(javascript_code)
def get_config(pool): if pool['pool_API_type'] == 'node-cryptonote-pool': try: response = requests.get(pool['config_file'], stream=True) if response.status_code == 200: config = response.text p = PyJsParser() config_json = (p.parse(config)) for entry in config_json['body']: if entry['type'] == 'VariableDeclaration': if entry['declarations'][0]['id']['name'] == "api": pool['pool_API_url'] = entry['declarations'][0][ 'init']['value'] elif "poolHost" in entry['declarations'][0]['id'][ 'name']: if entry['declarations'][0]['init'][ 'value'] not in pool['pool_mining_urls']: pool['pool_mining_urls'].append( entry['declarations'][0]['init']['value']) return pool except: print("error getting the provided URL for config file") return pool elif pool['pool_API_type'] == 'nodejs-pool': try: response = requests.get(pool['config_file'], stream=True) if response.status_code == 200: config = response.text.splitlines() for line in config: if 'api_url' in line: pool['pool_API_url'] = line[line.index("api_url") + 11:line.index(",") - 1] except: print("error getting the provided URL for config file") try: poolports_url = pool['pool_API_url'] + "/pool/ports" response = requests.get(poolports_url) if response.status_code == 200: poolports = json.loads(response.text) for pplns in poolports['pplns']: if pplns['host']['hostname'] not in pool[ 'pool_mining_urls']: pool['pool_mining_urls'].append( pplns['host']['hostname']) for pplns in poolports['global']: if pplns['host']['hostname'] not in pool[ 'pool_mining_urls']: pool['pool_mining_urls'].append( pplns['host']['hostname']) except: print("error getting the list of poolHost via API") return pool else: return pool
def parse_js(content): from datetime import datetime start = datetime.now() js_parser = PyJsParser() try: tree = js_parser.parse(content) for expr in tree["body"]: parse_expr(expr) except JsSyntaxError: parser.process_content(content, contexts.JS_CODE) end = datetime.now() library.js_us += end - start
class GoogleMyMaps(): def __init__(self): self.parser = PyJsParser() def getFromMyMap(self, mapID): r = requests.get( "https://www.google.com/maps/d/edit?hl=ja&mid=" + mapID) return r def parseData(self, r): soup = BeautifulSoup(r.text, "html.parser") script = soup.find_all("script")[1].text js = self.parser.parse(script) pagedata = js["body"][1]["declarations"][0]["init"]["value"] data = pagedata.replace("true", "True") data = data.replace("false", "False") data = data.replace("null", "None") data = data.replace("\n", "") # exec("data = " + data) data = eval(data) return data[1] def parseLayerData(self, layerData): # layerName = layerData[2] places = layerData[4] # url = places[0][0] parsed = [] for place in places: placeName = place[5][0][0] info = place[4] point = info[4] parsed.append({ "placeName": placeName, "point": point, }) return parsed def get(self, mapID, layers=[0]): r = self.getFromMyMap(mapID) if r.status_code != 200: print("status_code:", r.status_code) raise data = self.parseData(r) # mapID = data[1] # mapName = data[2] parsed = [] for layer in layers: layerData = data[6][layer] parsed += self.parseLayerData(layerData) return parsed
def parse_devicelist(data_str): """Parse the BT Home Hub 6 data format.""" # print(data_str); p = PyJsParser() parsed = p.parse(data_str) known_devices = {} for ele in parsed['body'][1]['declarations'][0]['init']['elements']: kv = {} if 'properties' not in ele: continue for prop in ele['properties']: kv[prop['key']['name']] = prop['value']['value'] known_devices[urllib.parse.unquote(kv['mac'])] = urllib.parse.unquote(kv['hostname']) print(known_devices) devices = {} return devices
def fixup_function(text): p = PyJsParser() try: ast = p.parse(text) except NotImplementedError as err: # PyJsParser can't pass class definitions raise ParseError(err) except Exception as err: # Shouldn't be necessary, but PyJsParser has bugs. # TODO(nik): Come back here, remove this, track down the problems and fix them. raise ParseError(err) function_name = None # function hello() { ... } if ast['body'][0]['type'] == 'FunctionDeclaration': function_name = ast['body'][0]['id']['name'] params = [param['name'] for param in ast['body'][0]['params']] # var recursivSum = function(n) { ... } if ast['body'][0]['type'] == 'VariableDeclaration': function_name = ast['body'][0]['declarations'][0]['id']['name'] params = [ param['name'] for param in ast['body'][0]['declarations'][0]['init']['params'] ] if not function_name: raise ParseError('Could not parse function name') return """function {0}({1}) {{ // Your code here. }} module.exports = {0}; """.format(function_name, ', '.join(params))
def js_detect(url, r, debug=False): ''' url = "" debug = False call_count = list() sub_func_dict = dict() char_freq_dict = dict() string_len_dict = dict() parsed = "" reg_result = "" ''' parser = PyJsParser() soup = bs(r, 'html.parser') tot_script = "" for script in soup.find_all('script'): out = "" try: out = script['src'] if debug: print("getting outer js") #getting scripts not in site ''' if out[:4] != "http": tot_script = get_outer_js(urljoin(self.url, out)) else: tot_script = get_outer_js(out) ''' except: tot_script += script.get_text() reg_result = [] if tot_script != "": ''' count_char(tot_script) a = parser.parse(tot_script) read_dic_2(a) read_dic(a) ''' reg_result = regex().match(tot_script) return reg_result
def parse(javascript_code): """Returns syntax tree of javascript_code. Same as PyJsParser().parse For your convenience :) """ p = PyJsParser() return p.parse(javascript_code)
def __init__(self): self.py_js_parser = PyJsParser() self.all_nodes = {} self.js_ast = None self.js_dom = None self.error = None
def __init__(self): self.parser = PyJsParser()
def fixup_tests(text): p = PyJsParser() try: ast = p.parse(text) except Exception as err: raise ParseError(err) tests_with_names = [] tests_without_names = [] expected_function = None for statement in ast['body']: if statement['type'] == 'EmptyStatement': continue if statement['type'] == 'VariableDeclaration': # This is a complex test with local state. Bail on trying to # parse it. raise ParseError if statement['type'] == 'FunctionDeclaration': # This is a test with embedded helper functions. Bail on trying to # parse it at the moment. raise ParseError expression = statement['expression'] if expression['type'] != 'CallExpression': continue if expression['callee']['object']['name'] != 'Test': continue # The test_method is a string like 'assertEquals', etc test_method = expression['callee']['property']['name'] if test_method in ['assertEquals', 'assertSimilar', 'assertNotEquals']: fixed_test = fixup_Test_assertEquals( expression, invert=test_method == 'assertNotEquals') if fixed_test[1]: tests_with_names.append({ 'code': fixed_test[0], 'name': fixed_test[1] }) else: tests_without_names.append(fixed_test[0]) # Most tests have the name of the function to call as the first # parameter, but not all. if 'callee' in expression['arguments'][0]: expected_function = expression['arguments'][0]['callee'][ 'name'] else: expected_function = expression['arguments'][1]['callee'][ 'name'] continue raise Exception( 'Unknown Test method called, Test.{}'.format(test_method)) def format_named_test(test): return """test({}, () => {{ {} }});""".format(test['name'], test['code']) def format_unnamed_tests(tests): return """test('the tests', () => {{ {} }});""".format('\n '.join(tests)) # Shouldn't happen -- if it does then we've failed to convert a test. if len(tests_with_names) == 0 and len(tests_without_names) == 0: raise Exception('No tests created! Test parsing is broken') # TODO(nik): This (and the function definition style) should be based # on a command line flag -- see the related commented out code where # the function template is emitted. return """const {0} = require('./code'); describe('Tests', () => {{ {1} {2} }}); """.format(expected_function, '\n\n'.join([format_named_test(test) for test in tests_with_names]), format_unnamed_tests(tests_without_names))
from typing import Optional, Dict, Tuple, List import requests from pyjsparser import PyJsParser from pyquery import PyQuery as pq from requests.cookies import RequestsCookieJar # The User-Agent header used for all requests HEADER_UA = {"User-Agent": "Mozilla/5.0"} # The domains used in the API VANIER_DOMAIN = "https://vaniercollege.omnivox.ca" LEA_DOMAIN = "https://vaniercollege-estd.omnivox.ca/estd" # The global JS parser JS_PARSER = PyJsParser() class ScheduleDay(Enum): MONDAY = 0 TUESDAY = 1 WEDNESDAY = 2 THURSDAY = 3 FRIDAY = 4 class OmnivoxSemester: """ Represents a semester. """ def __init__(self, semester_id: str, semester_name: str, current: bool):
def __init__(self): with open('config.json') as file: config = json.load(file) self.event_labels = config["event_labels"] self.geocoder = OpenCageGeocode(API_KEY) self.p = PyJsParser()
continue elif any(w in f for w in b_files): on_screen('b-right', 'blacklisted file', 'WR') continue # prepare the .js/.html for getting parsed with open(sites_folder + js_f[5], 'r') as f: js_file = f.read().decode('utf-8') if js_f[5].endswith('.html'): js_file = re.findall('(?si)<script>(.*?)</script>', js_file) js_file = ''.join(js_file) """ start parsing read more: https://github.com/PiotrDabkowski/pyjsparser """ p = PyJsParser() parsed = p.parse(js_file) # parse the JSON file for each query for q in queries: find_values(parsed['body'], q) find_type(result_k, parsed, q) # initilize the items result_k = [] path_k = [] z = 0 # print the tiem in seconds spent on each file time_delta(time0) if websiteName != temp_sitename or i == len(js_list):
import logging import urllib import re from bs4 import BeautifulSoup from pyjsparser import PyJsParser logger = logging.getLogger() logger.setLevel(logging.INFO) if __name__ == "__main__": url = 'https://docs.google.com/forms/d/e/1FAIpQLSfuCdfkfq31Xsz6hsGFLviEna4_em2VVzCoJZIALduQs_NEeg/viewform?usp=sf_link' page = urllib.urlopen(url).read() soup = BeautifulSoup(page, 'html.parser') js_answers = soup.find_all('script', text=re.compile(r'FB_PUBLIC_LOAD_DATA'))[0].text p = PyJsParser() js_answers = p.parse(js_answers) list_of_answers = js_answers['body'][0]['declarations'][0]['init'][ 'elements'][1]['elements'][1]['elements'] # figure out a way of identifying answers after parser, write some recursive filter maybe (or look one up) # then iterate over lists and for each question, try to find an answer, if there are none, set my own # OR # remove the tag and use JSON.loads(), this might be easier but then I will need to identify # which questions are a text area and which are not, which is difficult.
def parseOneXinhuaNetJSFile(path, pub_id, titleList): file = open(path, "r") #file = codecs.open("C:\\news\\xinhuanet\\4002\\235\\33540.js", "r", encoding="utf-8") decodedStr = file.readlines() if len(decodedStr) == 0: print "Read Empty File" return if len(decodedStr[0]) <= 22 or len(decodedStr[0]) >= (1024 * 512): print "File not valid" return text = unicode(decodedStr[0], 'utf-8') p = PyJsParser() result = p.parse(text) #print result valueDict = dict() if result.get("body"): for member in result["body"]: if member.get("declarations"): for declaration in member["declarations"]: #print declaration["init"]; if declaration.get("init"): init = declaration["init"] if init.get("properties"): for property in init["properties"]: #print ("key: %s, value: %s\n" %(str(property["key"]).encode("gbk"), str(property["value"]).encode("gbk"))) #test = json.loads(str(property["value"]).encode("gbk")) try: key = property["key"]["value"] value = json.dumps( property["value"]["value"], ensure_ascii=False) ''' , ensure_ascii=False ''' #print key #print value valueDict[key] = value #dict[str(property["key"]["value"]).encode("gbk")] = str(property["value"]["value"].encode("gbk")); #print ("key: %s, value: %s\n" %(str(property["key"]).encode("gbk"), str(property["value"]).encode("gbk"))) pass except Exception as e: pass else: pass finally: pass if len(valueDict) < 3: return topic = (valueDict["topic"].encode("utf-8")[1:-1]).replace("\\\"", "\"") if valueDict.get("content"): content = (valueDict["content"].encode("utf-8")[1:-1]) elif valueDict.get("summary"): content = (valueDict["summary"].encode("utf-8")[1:-1]) shareurl = (valueDict["shareurl"].encode("utf-8")[1:-1]) releasedate = "20" + (valueDict["releasedate"].encode("utf-8")[1:-1]) found = False for title in titleList: if title == topic: found = True break sql = "" if found != True: try: #print dict; print topic #print valueDict["content"] print releasedate #print valueDict["shareurl"] sql = "insert into news values(NULL,'" + topic + "','" + "" + "','" + content + "','" + shareurl + "','" + releasedate + "','" + str( pub_id) + "','" + path.replace("\\", "\\\\") + "')" cur.execute(sql) except Exception as e: print "sql execute failed" else: pass finally: #conn.commit() pass else: print "pass"
def parse_file(self, fname, file_content): js_parser = PyJsParser() return js_parser.parse(file_content)
def getConfig(self): try: r = requests.get(self.URLGET, auth=HTTPDigestAuth(self.user, self.password), timeout=3) htmlInput = html.fromstring(r.text.encode('cp1251')) script = htmlInput.xpath('//script') try: p = PyJsParser() scriptToPython = p.parse(script[3].text) listbody = scriptToPython.get('body') self.config = [ { 'url': listbody[0].get('expression').get('right').get( 'properties')[0].get('value').get('elements') [0].get('properties')[0].get('value').get('value'), 'worker': listbody[0].get('expression').get('right').get( 'properties')[0].get('value').get('elements') [0].get('properties')[1].get('value').get('value'), 'password': listbody[0].get('expression').get('right').get( 'properties')[0].get('value').get('elements') [0].get('properties')[2].get('value').get('value') }, { 'url': listbody[0].get('expression').get('right').get( 'properties')[0].get('value').get('elements') [1].get('properties')[0].get('value').get('value'), 'worker': listbody[0].get('expression').get('right').get( 'properties')[0].get('value').get('elements') [1].get('properties')[1].get('value').get('value'), 'password': listbody[0].get('expression').get('right').get( 'properties')[0].get('value').get('elements') [1].get('properties')[2].get('value').get('value') }, { 'url': listbody[0].get('expression').get('right').get( 'properties')[0].get('value').get('elements') [2].get('properties')[0].get('value').get('value'), 'worker': listbody[0].get('expression').get('right').get( 'properties')[0].get('value').get('elements') [2].get('properties')[1].get('value').get('value'), 'password': listbody[0].get('expression').get('right').get( 'properties')[0].get('value').get('elements') [2].get('properties')[2].get('value').get('value') }, listbody[0].get('expression').get('right').get( 'properties')[6].get('value').get('value'), ] if self.isS9() or self.isT9(): self.config.append( listbody[0].get('expression').get('right').get( 'properties')[7].get('value').get('value')) return self.config except AttributeError: print( 'minerConfiguration.cgi error get config: AttributeError\n' ) return None except IndexError: print('minerConfiguration.cgi error get config: IndexError\n') return None except requests.exceptions.Timeout: print('Get minerConfiguration.cgi TimeOut\n') return None except requests.RequestException: print('GET minerConfiguration.cgi Request Error\n') return None
import requests from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.common.exceptions import TimeoutException from slimit import ast from slimit.parser import Parser from slimit.visitors import nodevisitor from pyjsparser import PyJsParser p = PyJsParser() # https://thingspeak.com//apps/plugins/166139 # https://thingspeak.com//apps/plugins/6732 def load(link): delay = 5 values = [] sub_values = [] gauge_source = "" done = False # 'image 1 '''
def __init__(self, body, flags, prototype=None): self.prototype = prototype self.glob = True if 'g' in flags else False self.ignore_case = re.IGNORECASE if 'i' in flags else 0 self.multiline = re.MULTILINE if 'm' in flags else 0 self.value = body if (body, flags) in REGEXP_DB: self.pat = REGEXP_DB[body, flags] else: comp = None try: # converting JS regexp pattern to Py pattern. possible_fixes = [(u'[]', u'[\0]'), (u'[^]', u'[^\0]'), (u'nofix1791', u'nofix1791')] reg = self.value for fix, rep in possible_fixes: comp = PyJsParser()._interpret_regexp(reg, flags) #print 'reg -> comp', reg, '->', comp try: self.pat = re.compile( comp, self.ignore_case | self.multiline) #print reg, '->', comp break except: reg = reg.replace(fix, rep) # print 'Fix', fix, '->', rep, '=', reg else: raise Exception() REGEXP_DB[body, flags] = self.pat except: #print 'Invalid pattern...', self.value, comp raise MakeError( 'SyntaxError', 'Invalid RegExp pattern: %s -> %s' % (repr(self.value), repr(comp))) # now set own properties: self.own = { 'source': { 'value': self.value, 'enumerable': False, 'writable': False, 'configurable': False }, 'global': { 'value': self.glob, 'enumerable': False, 'writable': False, 'configurable': False }, 'ignoreCase': { 'value': bool(self.ignore_case), 'enumerable': False, 'writable': False, 'configurable': False }, 'multiline': { 'value': bool(self.multiline), 'enumerable': False, 'writable': False, 'configurable': False }, 'lastIndex': { 'value': 0., 'enumerable': False, 'writable': True, 'configurable': False } }
# Get the script parsed_filter['script'] = js_filter.find('fval', attrs={ "name": "script" }).value.get_text() logging.debug(parsed_filter['script']) # Initialize empty list of JavaScript variables parsed_filter['js_vars'] = [] # Initialize empty list of alerts parsed_filter['alerts'] = [] # Parse the JavaScript p = PyJsParser() res = p.parse(parsed_filter['script']) #logging.critical(json.dumps(res,indent=4)) parse_js(res, parsed_filter['js_vars'], -1) logging.debug(parsed_filter['js_vars']) # Check variables are declared as local local_vars = OrderedDict() local_vars["script:0"] = [] for js_var in parsed_filter['js_vars']: this_depth = int(js_var.split(':')[1]) scope = get_scope(local_vars) scope_depth = int(scope.split(':')[1]) scope_name = scope.split(':')[0] logging.debug(js_var) if this_depth >= scope_depth:
from pyjsparser import PyJsParser REGEXP_CONVERTER = PyJsParser() def indent(lines, ind=4): return ind * ' ' + lines.replace('\n', '\n' + ind * ' ').rstrip(' ') def inject_before_lval(source, lval, code): if source.count(lval) > 1: print print lval raise RuntimeError('To many lvals (%s)' % lval) elif not source.count(lval): print print lval assert lval not in source raise RuntimeError('No lval found "%s"' % lval) end = source.index(lval) inj = source.rfind('\n', 0, end) ind = inj while source[ind + 1] == ' ': ind += 1 ind -= inj return source[:inj + 1] + indent(code, ind) + source[inj + 1:] def get_continue_label(label): return CONTINUE_LABEL % label.encode('hex')
class Javascript_Parser: py_js_parser: PyJsParser program: Program def __init__(self): self.py_js_parser = PyJsParser() self.all_nodes = {} self.js_ast = None self.js_dom = None self.error = None def ast_from_py_js_parser(self, js_code): return self.py_js_parser.parse(js_code) def ast_to_dom(self): self.program = Program(js_ast=self.js_ast, all_nodes=self.all_nodes) self.program.parse_node() return self.program def process_js_code(self, js_code): try: self.error = None self.js_ast = self.ast_from_py_js_parser(js_code=js_code) self.js_dom = self.ast_to_dom() except Exception as error: self.error = error return self def all_nodes__stats(self): keys = list_set(self.all_nodes) result = {} all_nodes = self.all_nodes for key in keys: key_nodes = all_nodes[key] result[key] = {'size': len(key_nodes)} return result def get_functions(self): node_id = "FunctionDeclaration" nodes = self.all_nodes.get(node_id) for node in nodes: name = node.id print(f"- {name} - {node.params}") return nodes def get_literals(self): node_id = "Literal" nodes = self.all_nodes.get(node_id) for node in nodes: print( f"raw: {node.raw} | value: {node.value} | regex: {node.regex}") def get_variables(self): variables = {} node_id = "VariableDeclarator" nodes = self.all_nodes.get(node_id) for node in nodes: variable: VariableDeclarator = node if variable.name: variables[variable.name] = variable.value return variables def function_names(self, min_name_size=0): node_id = "FunctionDeclaration" names = [] nodes = self.all_nodes.get(node_id) if nodes: for node in nodes: if node.id: name = node.id.get('name') if min_name_size < len(name): names.append(name) return unique(names) def identifier_names(self, min_name_size=0): node_id = "Identifier" names = [] nodes = self.all_nodes.get(node_id) for node in nodes: name = node.name if min_name_size < len(name): names.append(name) return unique(names) def literal_names(self, min_name_size=0, starts_with=None): node_id = "Literal" names = [] nodes = self.all_nodes.get(node_id) for node in nodes: name = node.raw if min_name_size < len(name): if starts_with is None or name.startswith(starts_with): names.append(name) return unique(names) def var_names(self, min_name_size=0): node_id = "VariableDeclarator" names = [] nodes = self.all_nodes.get(node_id) for node in nodes: name = node.id.get('name') if min_name_size < len(name): names.append(name) return unique(names)
class Scrap: def __init__(self): with open('config.json') as file: config = json.load(file) self.event_labels = config["event_labels"] self.geocoder = OpenCageGeocode(API_KEY) self.p = PyJsParser() def parse_date(self, date): try: day, month, year = date[0]['value'].split('.') hour, minute = date[2]['value'].split(':') except (AttributeError, ValueError) as e: return False return datetime.datetime(int(year), int(month), int(day), int(hour), int(minute), 0) def create_geojson(self, query=None, latlng=None): results = [] if query != None: geo = self.geocoder.geocode(query) results.append(geo[0]['geometry']['lng']) results.append(geo[0]['geometry']['lat']) else: results.append(latlng[1]) results.append(latlng[0]) return Point((results[0], results[1])) def get_address(self, latlng): return self.geocoder.reverse_geocode(latlng[0], latlng[1])[0]['formatted'] def event_parser(self, event): source = urllib.request.urlopen(event) soup = bs.BeautifulSoup(source, 'lxml') info = soup.find_all('script')[8].getText() info_dict = self.p.parse(info) name = info_dict['body'][4]['declarations'][0]['init']['properties'][ 2]['value']['value'] url = info_dict['body'][4]['declarations'][0]['init']['properties'][5][ 'value']['value'] longDescription = info_dict['body'][4]['declarations'][0]['init'][ 'properties'][6]['value']['value'] sDF = bs.BeautifulSoup(longDescription, 'lxml') longDescription = str(longDescription) sDF = ''.join(sDF.find_all(text=True)) shortDescription = ' '.join(re.split(r'(?<=[.:;])\s', sDF)[:2]) + ' [...] ' tags = info_dict['body'][4]['declarations'][0]['init']['properties'][ 37]['value']['value'] creationDate = datetime.datetime.now() eventStart = self.parse_date( info_dict['body'][4]['declarations'][0]['init']['properties'][16] ['value']['elements']) if self.parse_date(info_dict['body'][4]['declarations'][0]['init'] ['properties'][17]['value']['elements']): eventEnd = self.parse_date( info_dict['body'][4]['declarations'][0]['init']['properties'] [17]['value']['elements']) else: eventEnd = eventStart owner = 1 categories_elements = info_dict['body'][4]['declarations'][0]['init'][ 'properties'][18]['value']['elements'] categories = [] for category in categories_elements: # categories.append(category['value'].lower()) if 'sport' in category['value'].lower(): categories.append(1) elif 'kultura' in category['value'].lower(): categories.append(2) elif 'koncert' in category['value'].lower(): categories.append(3) elif 'targi' in category['value'].lower(): categories.append(4) elif 'hackathon' in category['value'].lower(): categories.append(6) else: categories.append(5) categories = list(set(categories)) imageSource = info_dict['body'][4]['declarations'][0]['init'][ 'properties'][20]['value']['value'] latlng = (float(info_dict['body'][4]['declarations'][0]['init'] ['properties'][31]['value']['elements'][0]['value']), float(info_dict['body'][4]['declarations'][0]['init'] ['properties'][31]['value']['elements'][1]['value'])) #----------- ODZNACZYĆ !!! - ograniczenie 2500 requestów/dzień geoJSON = str(self.create_geojson(latlng=latlng)) address = self.get_address(latlng) def date_converter(o): if isinstance(o, datetime.datetime): return o.isoformat() used_var_list = [ name, shortDescription, longDescription, creationDate, eventStart, eventEnd, owner, geoJSON, imageSource, address, self.addressCity ] dic = {} dic["event"] = dict(zip(self.event_labels, used_var_list)) dic["categories"] = categories return json.dumps(dic, default=date_converter) def scrap_kiwiportal(self, url): self.addressCity = url.rsplit('/', 1)[-1].capitalize() try: source = urllib.request.urlopen(url) except: print('Website ERROR') soup = bs.BeautifulSoup(source, 'lxml') event_list = soup.find_all( 'a', { 'href': re.compile( r'https:\/\/www\.kiwiportal\.pl\/wydarzenia/[0-9]+.*') }) event_list = list(set([event['href'] for event in event_list])) json_list = [] for event in event_list: try: json_list.append(self.event_parser(event)) except AttributeError: print('AttributeError occured inside event_parser') return json_list # test-------------------------------------------------------------------------------------------------------- # s = Scrap() # s.scrap_kiwiportal('https://www.kiwiportal.pl/wydarzenia/m/warszawa') # s.create_geojson(query='Polska, Poznań, ulica Stróżyńskiego 17c/10') # print(s.get_address((21.0246, 52.2791)))
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import logging from pyjsparser import PyJsParser text = ''' $.ajax({ type: "POST", url: "https://msec.flyme.cn/captcha/server/check", //访问的链接 data: param, success: function (data) { //成功的回调函数 console.info(data); }, error: function (e) { console.info(e); } }); ''' p = PyJsParser() try: result = p.parse(text) except Exception as e: logging.error(e) result_json = json.dumps(result) print(result_json)