Example #1
0
def parse_js(js_html):
	try:
		p = PyJsParser()
		js_ast = p.parse(js_html)
	except:
		return False
	return js_ast
Example #2
0
 def parse_js(self, js_html):
     try:
         p = PyJsParser()
         js_ast = p.parse(js_html)
     except:
         self.interfacer.raise_error("Unable to get a JS AST")
         return False
     return js_ast
Example #3
0
def parse(javascript_code):
    """Returns syntax tree of javascript_code.

    Syntax tree has the same structure as syntax tree produced by esprima.js

       Same as PyJsParser().parse  For your convenience :) """
    p = PyJsParser()
    return p.parse(javascript_code)
Example #4
0
def parse(javascript_code):
    """Returns syntax tree of javascript_code.

    Syntax tree has the same structure as syntax tree produced by esprima.js

       Same as PyJsParser().parse  For your convenience :) """
    p = PyJsParser()
    return p.parse(javascript_code)
Example #5
0
def get_config(pool):
    if pool['pool_API_type'] == 'node-cryptonote-pool':
        try:
            response = requests.get(pool['config_file'], stream=True)
            if response.status_code == 200:
                config = response.text
                p = PyJsParser()
                config_json = (p.parse(config))
                for entry in config_json['body']:
                    if entry['type'] == 'VariableDeclaration':
                        if entry['declarations'][0]['id']['name'] == "api":
                            pool['pool_API_url'] = entry['declarations'][0][
                                'init']['value']
                        elif "poolHost" in entry['declarations'][0]['id'][
                                'name']:
                            if entry['declarations'][0]['init'][
                                    'value'] not in pool['pool_mining_urls']:
                                pool['pool_mining_urls'].append(
                                    entry['declarations'][0]['init']['value'])
            return pool
        except:
            print("error getting the provided URL for config file")
            return pool
    elif pool['pool_API_type'] == 'nodejs-pool':
        try:
            response = requests.get(pool['config_file'], stream=True)
            if response.status_code == 200:
                config = response.text.splitlines()
                for line in config:
                    if 'api_url' in line:
                        pool['pool_API_url'] = line[line.index("api_url") +
                                                    11:line.index(",") - 1]
        except:
            print("error getting the provided URL for config file")
        try:
            poolports_url = pool['pool_API_url'] + "/pool/ports"
            response = requests.get(poolports_url)
            if response.status_code == 200:
                poolports = json.loads(response.text)
                for pplns in poolports['pplns']:
                    if pplns['host']['hostname'] not in pool[
                            'pool_mining_urls']:
                        pool['pool_mining_urls'].append(
                            pplns['host']['hostname'])
                for pplns in poolports['global']:
                    if pplns['host']['hostname'] not in pool[
                            'pool_mining_urls']:
                        pool['pool_mining_urls'].append(
                            pplns['host']['hostname'])
        except:
            print("error getting the list of poolHost via API")
        return pool
    else:
        return pool
Example #6
0
def parse_js(content):
    from datetime import datetime
    start = datetime.now()
    js_parser = PyJsParser()
    try:
        tree = js_parser.parse(content)
        for expr in tree["body"]:
            parse_expr(expr)
    except JsSyntaxError:
        parser.process_content(content, contexts.JS_CODE)
    end = datetime.now()
    library.js_us += end - start
Example #7
0
class GoogleMyMaps():

    def __init__(self):
        self.parser = PyJsParser()

    def getFromMyMap(self, mapID):
        r = requests.get(
            "https://www.google.com/maps/d/edit?hl=ja&mid=" + mapID)
        return r

    def parseData(self, r):
        soup = BeautifulSoup(r.text, "html.parser")
        script = soup.find_all("script")[1].text
        js = self.parser.parse(script)
        pagedata = js["body"][1]["declarations"][0]["init"]["value"]

        data = pagedata.replace("true", "True")
        data = data.replace("false", "False")
        data = data.replace("null", "None")
        data = data.replace("\n", "")
        # exec("data = " + data)
        data = eval(data)
        return data[1]

    def parseLayerData(self, layerData):
        # layerName = layerData[2]

        places = layerData[4]
        # url = places[0][0]

        parsed = []
        for place in places:
            placeName = place[5][0][0]

            info = place[4]
            point = info[4]

            parsed.append({
                "placeName": placeName,
                "point": point,
            })

        return parsed

    def get(self, mapID, layers=[0]):
        r = self.getFromMyMap(mapID)
        if r.status_code != 200:
            print("status_code:", r.status_code)
            raise

        data = self.parseData(r)
        # mapID = data[1]
        # mapName = data[2]

        parsed = []
        for layer in layers:
            layerData = data[6][layer]
            parsed += self.parseLayerData(layerData)

        return parsed
Example #8
0
def parse_devicelist(data_str):
    """Parse the BT Home Hub 6 data format."""
    # print(data_str);
    p = PyJsParser()
    parsed = p.parse(data_str)
    known_devices = {}
    for ele in parsed['body'][1]['declarations'][0]['init']['elements']:
        kv = {}
        if 'properties' not in ele:
            continue
        for prop in ele['properties']:
            kv[prop['key']['name']] = prop['value']['value']
        known_devices[urllib.parse.unquote(kv['mac'])] = urllib.parse.unquote(kv['hostname'])
    print(known_devices)

    devices = {}

    return devices
Example #9
0
def fixup_function(text):
    p = PyJsParser()
    try:
        ast = p.parse(text)
    except NotImplementedError as err:
        # PyJsParser can't pass class definitions
        raise ParseError(err)
    except Exception as err:
        # Shouldn't be necessary, but PyJsParser has bugs.
        # TODO(nik): Come back here, remove this, track down the problems and fix them.
        raise ParseError(err)

    function_name = None

    # function hello() { ... }
    if ast['body'][0]['type'] == 'FunctionDeclaration':
        function_name = ast['body'][0]['id']['name']
        params = [param['name'] for param in ast['body'][0]['params']]

    # var recursivSum = function(n) { ... }
    if ast['body'][0]['type'] == 'VariableDeclaration':
        function_name = ast['body'][0]['declarations'][0]['id']['name']
        params = [
            param['name']
            for param in ast['body'][0]['declarations'][0]['init']['params']
        ]

    if not function_name:
        raise ParseError('Could not parse function name')

    return """function {0}({1}) {{
  // Your code here.
}}

module.exports = {0};
""".format(function_name, ', '.join(params))
Example #10
0
def js_detect(url, r, debug=False):
    '''
    url = ""
    debug = False
    call_count = list()
    sub_func_dict = dict()
    char_freq_dict = dict()
    string_len_dict = dict()
    parsed = ""
    reg_result = ""
    '''
    parser = PyJsParser()
    soup = bs(r, 'html.parser')
    tot_script = ""
    for script in soup.find_all('script'):
        out = ""
        try:
            out = script['src']
            if debug:
                print("getting outer js")
            #getting scripts not in site
            '''
            if out[:4] != "http":
                tot_script = get_outer_js(urljoin(self.url, out))
            else:
                tot_script = get_outer_js(out)
            '''
        except:
            tot_script += script.get_text()
    
    reg_result = []
    if tot_script != "":
        '''
        count_char(tot_script)
        a = parser.parse(tot_script)
        read_dic_2(a)
        read_dic(a)
        '''
        reg_result = regex().match(tot_script)
    return reg_result        
Example #11
0
def parse(javascript_code):
    """Returns syntax tree of javascript_code.
       Same as PyJsParser().parse  For your convenience :) """
    p = PyJsParser()
    return p.parse(javascript_code)
 def __init__(self):
     self.py_js_parser = PyJsParser()
     self.all_nodes = {}
     self.js_ast = None
     self.js_dom = None
     self.error = None
Example #13
0
 def __init__(self):
     self.parser = PyJsParser()
Example #14
0
def fixup_tests(text):
    p = PyJsParser()
    try:
        ast = p.parse(text)
    except Exception as err:
        raise ParseError(err)

    tests_with_names = []
    tests_without_names = []

    expected_function = None

    for statement in ast['body']:
        if statement['type'] == 'EmptyStatement':
            continue
        if statement['type'] == 'VariableDeclaration':
            # This is a complex test with local state. Bail on trying to
            # parse it.
            raise ParseError

        if statement['type'] == 'FunctionDeclaration':
            # This is a test with embedded helper functions. Bail on trying to
            # parse it at the moment.
            raise ParseError

        expression = statement['expression']
        if expression['type'] != 'CallExpression':
            continue

        if expression['callee']['object']['name'] != 'Test':
            continue

        # The test_method is a string like 'assertEquals', etc
        test_method = expression['callee']['property']['name']
        if test_method in ['assertEquals', 'assertSimilar', 'assertNotEquals']:
            fixed_test = fixup_Test_assertEquals(
                expression, invert=test_method == 'assertNotEquals')
            if fixed_test[1]:
                tests_with_names.append({
                    'code': fixed_test[0],
                    'name': fixed_test[1]
                })
            else:
                tests_without_names.append(fixed_test[0])
            # Most tests have the name of the function to call as the first
            # parameter, but not all.
            if 'callee' in expression['arguments'][0]:
                expected_function = expression['arguments'][0]['callee'][
                    'name']
            else:
                expected_function = expression['arguments'][1]['callee'][
                    'name']
            continue

        raise Exception(
            'Unknown Test method called, Test.{}'.format(test_method))

    def format_named_test(test):
        return """test({}, () => {{
        {}
    }});""".format(test['name'], test['code'])

    def format_unnamed_tests(tests):
        return """test('the tests', () => {{
        {}
    }});""".format('\n        '.join(tests))

    # Shouldn't happen -- if it does then we've failed to convert a test.
    if len(tests_with_names) == 0 and len(tests_without_names) == 0:
        raise Exception('No tests created! Test parsing is broken')

    # TODO(nik): This (and the function definition style) should be based
    # on a command line flag -- see the related commented out code where
    # the function template is emitted.
    return """const {0} = require('./code');

describe('Tests', () => {{
    {1}
    
    {2}
}});
""".format(expected_function,
           '\n\n'.join([format_named_test(test) for test in tests_with_names]),
           format_unnamed_tests(tests_without_names))
Example #15
0
from typing import Optional, Dict, Tuple, List

import requests
from pyjsparser import PyJsParser
from pyquery import PyQuery as pq
from requests.cookies import RequestsCookieJar

# The User-Agent header used for all requests
HEADER_UA = {"User-Agent": "Mozilla/5.0"}

# The domains used in the API
VANIER_DOMAIN = "https://vaniercollege.omnivox.ca"
LEA_DOMAIN = "https://vaniercollege-estd.omnivox.ca/estd"

# The global JS parser
JS_PARSER = PyJsParser()


class ScheduleDay(Enum):
    MONDAY = 0
    TUESDAY = 1
    WEDNESDAY = 2
    THURSDAY = 3
    FRIDAY = 4


class OmnivoxSemester:
    """
    Represents a semester.
    """
    def __init__(self, semester_id: str, semester_name: str, current: bool):
Example #16
0
 def __init__(self):
     with open('config.json') as file:
         config = json.load(file)
     self.event_labels = config["event_labels"]
     self.geocoder = OpenCageGeocode(API_KEY)
     self.p = PyJsParser()
Example #17
0
                continue
            elif any(w in f for w in b_files):
                on_screen('b-right', 'blacklisted file', 'WR')
                continue

            # prepare the .js/.html for getting parsed
            with open(sites_folder + js_f[5], 'r') as f:
                js_file = f.read().decode('utf-8')
            if js_f[5].endswith('.html'):
                js_file = re.findall('(?si)<script>(.*?)</script>', js_file)
                js_file = ''.join(js_file)
            """
            start parsing
            read more: https://github.com/PiotrDabkowski/pyjsparser
            """
            p = PyJsParser()
            parsed = p.parse(js_file)

            # parse the JSON file for each query
            for q in queries:
                find_values(parsed['body'], q)
                find_type(result_k, parsed, q)
                # initilize the items
                result_k = []
                path_k = []
                z = 0

            # print the tiem in seconds spent on each file
            time_delta(time0)

        if websiteName != temp_sitename or i == len(js_list):
Example #18
0
import logging
import urllib
import re
from bs4 import BeautifulSoup
from pyjsparser import PyJsParser

logger = logging.getLogger()
logger.setLevel(logging.INFO)

if __name__ == "__main__":

    url = 'https://docs.google.com/forms/d/e/1FAIpQLSfuCdfkfq31Xsz6hsGFLviEna4_em2VVzCoJZIALduQs_NEeg/viewform?usp=sf_link'

    page = urllib.urlopen(url).read()
    soup = BeautifulSoup(page, 'html.parser')

    js_answers = soup.find_all('script',
                               text=re.compile(r'FB_PUBLIC_LOAD_DATA'))[0].text

    p = PyJsParser()
    js_answers = p.parse(js_answers)

    list_of_answers = js_answers['body'][0]['declarations'][0]['init'][
        'elements'][1]['elements'][1]['elements']
    # figure out a way of identifying answers after parser, write some recursive filter maybe (or look one up)
# then iterate over lists and for each question, try to find an answer, if there are none, set my own
# OR
# remove the tag and use JSON.loads(), this might be easier but then I will need to identify
# which questions are a text area and which are not, which is difficult.
Example #19
0
def parseOneXinhuaNetJSFile(path, pub_id, titleList):
    file = open(path, "r")
    #file = codecs.open("C:\\news\\xinhuanet\\4002\\235\\33540.js", "r", encoding="utf-8")

    decodedStr = file.readlines()

    if len(decodedStr) == 0:
        print "Read Empty File"
        return

    if len(decodedStr[0]) <= 22 or len(decodedStr[0]) >= (1024 * 512):
        print "File not valid"
        return

    text = unicode(decodedStr[0], 'utf-8')

    p = PyJsParser()
    result = p.parse(text)

    #print result

    valueDict = dict()

    if result.get("body"):
        for member in result["body"]:
            if member.get("declarations"):
                for declaration in member["declarations"]:
                    #print declaration["init"];
                    if declaration.get("init"):
                        init = declaration["init"]
                        if init.get("properties"):
                            for property in init["properties"]:
                                #print ("key: %s, value: %s\n" %(str(property["key"]).encode("gbk"), str(property["value"]).encode("gbk")))
                                #test = json.loads(str(property["value"]).encode("gbk"))

                                try:
                                    key = property["key"]["value"]
                                    value = json.dumps(
                                        property["value"]["value"],
                                        ensure_ascii=False)
                                    ''' , ensure_ascii=False '''
                                    #print key
                                    #print value
                                    valueDict[key] = value
                                    #dict[str(property["key"]["value"]).encode("gbk")] = str(property["value"]["value"].encode("gbk"));
                                    #print ("key: %s, value: %s\n" %(str(property["key"]).encode("gbk"), str(property["value"]).encode("gbk")))
                                    pass
                                except Exception as e:
                                    pass
                                else:
                                    pass
                                finally:
                                    pass

    if len(valueDict) < 3:
        return

    topic = (valueDict["topic"].encode("utf-8")[1:-1]).replace("\\\"", "\"")
    if valueDict.get("content"):
        content = (valueDict["content"].encode("utf-8")[1:-1])
    elif valueDict.get("summary"):
        content = (valueDict["summary"].encode("utf-8")[1:-1])

    shareurl = (valueDict["shareurl"].encode("utf-8")[1:-1])
    releasedate = "20" + (valueDict["releasedate"].encode("utf-8")[1:-1])

    found = False
    for title in titleList:
        if title == topic:
            found = True
            break

    sql = ""
    if found != True:
        try:
            #print dict;
            print topic
            #print valueDict["content"]
            print releasedate
            #print valueDict["shareurl"]
            sql = "insert into news values(NULL,'" + topic + "','" + "" + "','" + content + "','" + shareurl + "','" + releasedate + "','" + str(
                pub_id) + "','" + path.replace("\\", "\\\\") + "')"
            cur.execute(sql)
        except Exception as e:
            print "sql execute failed"
        else:
            pass
        finally:
            #conn.commit()
            pass
    else:
        print "pass"
 def parse_file(self, fname, file_content):
     js_parser = PyJsParser()
     return js_parser.parse(file_content)
    def getConfig(self):
        try:
            r = requests.get(self.URLGET,
                             auth=HTTPDigestAuth(self.user, self.password),
                             timeout=3)
            htmlInput = html.fromstring(r.text.encode('cp1251'))
            script = htmlInput.xpath('//script')

            try:
                p = PyJsParser()
                scriptToPython = p.parse(script[3].text)
                listbody = scriptToPython.get('body')
                self.config = [
                    {
                        'url':
                        listbody[0].get('expression').get('right').get(
                            'properties')[0].get('value').get('elements')
                        [0].get('properties')[0].get('value').get('value'),
                        'worker':
                        listbody[0].get('expression').get('right').get(
                            'properties')[0].get('value').get('elements')
                        [0].get('properties')[1].get('value').get('value'),
                        'password':
                        listbody[0].get('expression').get('right').get(
                            'properties')[0].get('value').get('elements')
                        [0].get('properties')[2].get('value').get('value')
                    },
                    {
                        'url':
                        listbody[0].get('expression').get('right').get(
                            'properties')[0].get('value').get('elements')
                        [1].get('properties')[0].get('value').get('value'),
                        'worker':
                        listbody[0].get('expression').get('right').get(
                            'properties')[0].get('value').get('elements')
                        [1].get('properties')[1].get('value').get('value'),
                        'password':
                        listbody[0].get('expression').get('right').get(
                            'properties')[0].get('value').get('elements')
                        [1].get('properties')[2].get('value').get('value')
                    },
                    {
                        'url':
                        listbody[0].get('expression').get('right').get(
                            'properties')[0].get('value').get('elements')
                        [2].get('properties')[0].get('value').get('value'),
                        'worker':
                        listbody[0].get('expression').get('right').get(
                            'properties')[0].get('value').get('elements')
                        [2].get('properties')[1].get('value').get('value'),
                        'password':
                        listbody[0].get('expression').get('right').get(
                            'properties')[0].get('value').get('elements')
                        [2].get('properties')[2].get('value').get('value')
                    },
                    listbody[0].get('expression').get('right').get(
                        'properties')[6].get('value').get('value'),
                ]

                if self.isS9() or self.isT9():
                    self.config.append(
                        listbody[0].get('expression').get('right').get(
                            'properties')[7].get('value').get('value'))

                return self.config

            except AttributeError:
                print(
                    'minerConfiguration.cgi error get config: AttributeError\n'
                )
                return None
            except IndexError:
                print('minerConfiguration.cgi error get config: IndexError\n')
                return None

        except requests.exceptions.Timeout:
            print('Get minerConfiguration.cgi TimeOut\n')
            return None

        except requests.RequestException:
            print('GET minerConfiguration.cgi Request Error\n')
            return None
Example #22
0
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException

from slimit import ast
from slimit.parser import Parser
from slimit.visitors import nodevisitor

from pyjsparser import PyJsParser
p = PyJsParser()

# https://thingspeak.com//apps/plugins/166139
# https://thingspeak.com//apps/plugins/6732


def load(link):
    delay = 5
    values = []
    sub_values = []
    gauge_source = ""

    done = False

    # 'image 1
    '''
 def parse_file(self, fname, file_content):
     js_parser = PyJsParser()
     return js_parser.parse(file_content)
Example #24
0
    def __init__(self, body, flags, prototype=None):
        self.prototype = prototype
        self.glob = True if 'g' in flags else False
        self.ignore_case = re.IGNORECASE if 'i' in flags else 0
        self.multiline = re.MULTILINE if 'm' in flags else 0
        self.value = body

        if (body, flags) in REGEXP_DB:
            self.pat = REGEXP_DB[body, flags]
        else:
            comp = None
            try:
                # converting JS regexp pattern to Py pattern.
                possible_fixes = [(u'[]', u'[\0]'), (u'[^]', u'[^\0]'),
                                  (u'nofix1791', u'nofix1791')]
                reg = self.value
                for fix, rep in possible_fixes:
                    comp = PyJsParser()._interpret_regexp(reg, flags)
                    #print 'reg -> comp', reg, '->', comp
                    try:
                        self.pat = re.compile(
                            comp, self.ignore_case | self.multiline)
                        #print reg, '->', comp
                        break
                    except:
                        reg = reg.replace(fix, rep)
                    # print 'Fix', fix, '->', rep, '=', reg
                else:
                    raise Exception()
                REGEXP_DB[body, flags] = self.pat
            except:
                #print 'Invalid pattern...', self.value, comp
                raise MakeError(
                    'SyntaxError', 'Invalid RegExp pattern: %s -> %s' %
                    (repr(self.value), repr(comp)))
        # now set own properties:
        self.own = {
            'source': {
                'value': self.value,
                'enumerable': False,
                'writable': False,
                'configurable': False
            },
            'global': {
                'value': self.glob,
                'enumerable': False,
                'writable': False,
                'configurable': False
            },
            'ignoreCase': {
                'value': bool(self.ignore_case),
                'enumerable': False,
                'writable': False,
                'configurable': False
            },
            'multiline': {
                'value': bool(self.multiline),
                'enumerable': False,
                'writable': False,
                'configurable': False
            },
            'lastIndex': {
                'value': 0.,
                'enumerable': False,
                'writable': True,
                'configurable': False
            }
        }
        # Get the script
        parsed_filter['script'] = js_filter.find('fval',
                                                 attrs={
                                                     "name": "script"
                                                 }).value.get_text()
        logging.debug(parsed_filter['script'])

        # Initialize empty list of JavaScript variables
        parsed_filter['js_vars'] = []

        # Initialize empty list of alerts
        parsed_filter['alerts'] = []

        # Parse the JavaScript
        p = PyJsParser()
        res = p.parse(parsed_filter['script'])
        #logging.critical(json.dumps(res,indent=4))
        parse_js(res, parsed_filter['js_vars'], -1)
        logging.debug(parsed_filter['js_vars'])

        # Check variables are declared as local
        local_vars = OrderedDict()
        local_vars["script:0"] = []
        for js_var in parsed_filter['js_vars']:
            this_depth = int(js_var.split(':')[1])
            scope = get_scope(local_vars)
            scope_depth = int(scope.split(':')[1])
            scope_name = scope.split(':')[0]
            logging.debug(js_var)
            if this_depth >= scope_depth:
Example #26
0
from pyjsparser import PyJsParser

REGEXP_CONVERTER = PyJsParser()


def indent(lines, ind=4):
    return ind * ' ' + lines.replace('\n', '\n' + ind * ' ').rstrip(' ')


def inject_before_lval(source, lval, code):
    if source.count(lval) > 1:
        print
        print lval
        raise RuntimeError('To many lvals (%s)' % lval)
    elif not source.count(lval):
        print
        print lval
        assert lval not in source
        raise RuntimeError('No lval found "%s"' % lval)
    end = source.index(lval)
    inj = source.rfind('\n', 0, end)
    ind = inj
    while source[ind + 1] == ' ':
        ind += 1
    ind -= inj
    return source[:inj + 1] + indent(code, ind) + source[inj + 1:]


def get_continue_label(label):
    return CONTINUE_LABEL % label.encode('hex')
Example #27
0
def parse(javascript_code):
    """Returns syntax tree of javascript_code.
       Same as PyJsParser().parse  For your convenience :) """
    p = PyJsParser()
    return p.parse(javascript_code)
class Javascript_Parser:
    py_js_parser: PyJsParser
    program: Program

    def __init__(self):
        self.py_js_parser = PyJsParser()
        self.all_nodes = {}
        self.js_ast = None
        self.js_dom = None
        self.error = None

    def ast_from_py_js_parser(self, js_code):
        return self.py_js_parser.parse(js_code)

    def ast_to_dom(self):
        self.program = Program(js_ast=self.js_ast, all_nodes=self.all_nodes)
        self.program.parse_node()
        return self.program

    def process_js_code(self, js_code):
        try:
            self.error = None
            self.js_ast = self.ast_from_py_js_parser(js_code=js_code)
            self.js_dom = self.ast_to_dom()
        except Exception as error:
            self.error = error
        return self

    def all_nodes__stats(self):
        keys = list_set(self.all_nodes)
        result = {}
        all_nodes = self.all_nodes
        for key in keys:
            key_nodes = all_nodes[key]
            result[key] = {'size': len(key_nodes)}
        return result

    def get_functions(self):
        node_id = "FunctionDeclaration"
        nodes = self.all_nodes.get(node_id)
        for node in nodes:
            name = node.id
            print(f"- {name}   -   {node.params}")
        return nodes

    def get_literals(self):
        node_id = "Literal"
        nodes = self.all_nodes.get(node_id)
        for node in nodes:
            print(
                f"raw: {node.raw} | value: {node.value} | regex: {node.regex}")

    def get_variables(self):
        variables = {}
        node_id = "VariableDeclarator"
        nodes = self.all_nodes.get(node_id)
        for node in nodes:
            variable: VariableDeclarator = node
            if variable.name:
                variables[variable.name] = variable.value
        return variables

    def function_names(self, min_name_size=0):
        node_id = "FunctionDeclaration"
        names = []
        nodes = self.all_nodes.get(node_id)
        if nodes:
            for node in nodes:
                if node.id:
                    name = node.id.get('name')
                    if min_name_size < len(name):
                        names.append(name)
        return unique(names)

    def identifier_names(self, min_name_size=0):
        node_id = "Identifier"
        names = []
        nodes = self.all_nodes.get(node_id)
        for node in nodes:
            name = node.name
            if min_name_size < len(name):
                names.append(name)
        return unique(names)

    def literal_names(self, min_name_size=0, starts_with=None):
        node_id = "Literal"
        names = []
        nodes = self.all_nodes.get(node_id)
        for node in nodes:
            name = node.raw
            if min_name_size < len(name):
                if starts_with is None or name.startswith(starts_with):
                    names.append(name)
        return unique(names)

    def var_names(self, min_name_size=0):
        node_id = "VariableDeclarator"
        names = []
        nodes = self.all_nodes.get(node_id)
        for node in nodes:
            name = node.id.get('name')
            if min_name_size < len(name):
                names.append(name)
        return unique(names)
Example #29
0
class Scrap:
    def __init__(self):
        with open('config.json') as file:
            config = json.load(file)
        self.event_labels = config["event_labels"]
        self.geocoder = OpenCageGeocode(API_KEY)
        self.p = PyJsParser()

    def parse_date(self, date):
        try:
            day, month, year = date[0]['value'].split('.')
            hour, minute = date[2]['value'].split(':')
        except (AttributeError, ValueError) as e:
            return False
        return datetime.datetime(int(year), int(month), int(day), int(hour),
                                 int(minute), 0)

    def create_geojson(self, query=None, latlng=None):
        results = []
        if query != None:
            geo = self.geocoder.geocode(query)
            results.append(geo[0]['geometry']['lng'])
            results.append(geo[0]['geometry']['lat'])
        else:
            results.append(latlng[1])
            results.append(latlng[0])
        return Point((results[0], results[1]))

    def get_address(self, latlng):
        return self.geocoder.reverse_geocode(latlng[0],
                                             latlng[1])[0]['formatted']

    def event_parser(self, event):
        source = urllib.request.urlopen(event)
        soup = bs.BeautifulSoup(source, 'lxml')
        info = soup.find_all('script')[8].getText()
        info_dict = self.p.parse(info)

        name = info_dict['body'][4]['declarations'][0]['init']['properties'][
            2]['value']['value']
        url = info_dict['body'][4]['declarations'][0]['init']['properties'][5][
            'value']['value']
        longDescription = info_dict['body'][4]['declarations'][0]['init'][
            'properties'][6]['value']['value']
        sDF = bs.BeautifulSoup(longDescription, 'lxml')
        longDescription = str(longDescription)
        sDF = ''.join(sDF.find_all(text=True))
        shortDescription = ' '.join(re.split(r'(?<=[.:;])\s',
                                             sDF)[:2]) + ' [...] '
        tags = info_dict['body'][4]['declarations'][0]['init']['properties'][
            37]['value']['value']
        creationDate = datetime.datetime.now()
        eventStart = self.parse_date(
            info_dict['body'][4]['declarations'][0]['init']['properties'][16]
            ['value']['elements'])
        if self.parse_date(info_dict['body'][4]['declarations'][0]['init']
                           ['properties'][17]['value']['elements']):
            eventEnd = self.parse_date(
                info_dict['body'][4]['declarations'][0]['init']['properties']
                [17]['value']['elements'])
        else:
            eventEnd = eventStart
        owner = 1
        categories_elements = info_dict['body'][4]['declarations'][0]['init'][
            'properties'][18]['value']['elements']
        categories = []
        for category in categories_elements:
            # categories.append(category['value'].lower())
            if 'sport' in category['value'].lower():
                categories.append(1)
            elif 'kultura' in category['value'].lower():
                categories.append(2)
            elif 'koncert' in category['value'].lower():
                categories.append(3)
            elif 'targi' in category['value'].lower():
                categories.append(4)
            elif 'hackathon' in category['value'].lower():
                categories.append(6)
            else:
                categories.append(5)
        categories = list(set(categories))
        imageSource = info_dict['body'][4]['declarations'][0]['init'][
            'properties'][20]['value']['value']
        latlng = (float(info_dict['body'][4]['declarations'][0]['init']
                        ['properties'][31]['value']['elements'][0]['value']),
                  float(info_dict['body'][4]['declarations'][0]['init']
                        ['properties'][31]['value']['elements'][1]['value']))
        #----------- ODZNACZYĆ !!! - ograniczenie 2500 requestów/dzień
        geoJSON = str(self.create_geojson(latlng=latlng))
        address = self.get_address(latlng)

        def date_converter(o):
            if isinstance(o, datetime.datetime):
                return o.isoformat()

        used_var_list = [
            name, shortDescription, longDescription, creationDate, eventStart,
            eventEnd, owner, geoJSON, imageSource, address, self.addressCity
        ]
        dic = {}
        dic["event"] = dict(zip(self.event_labels, used_var_list))
        dic["categories"] = categories
        return json.dumps(dic, default=date_converter)

    def scrap_kiwiportal(self, url):
        self.addressCity = url.rsplit('/', 1)[-1].capitalize()
        try:
            source = urllib.request.urlopen(url)
        except:
            print('Website ERROR')
        soup = bs.BeautifulSoup(source, 'lxml')
        event_list = soup.find_all(
            'a', {
                'href':
                re.compile(
                    r'https:\/\/www\.kiwiportal\.pl\/wydarzenia/[0-9]+.*')
            })
        event_list = list(set([event['href'] for event in event_list]))
        json_list = []
        for event in event_list:
            try:
                json_list.append(self.event_parser(event))
            except AttributeError:
                print('AttributeError occured inside event_parser')
        return json_list


# test--------------------------------------------------------------------------------------------------------
# s = Scrap()
# s.scrap_kiwiportal('https://www.kiwiportal.pl/wydarzenia/m/warszawa')
# s.create_geojson(query='Polska, Poznań, ulica Stróżyńskiego 17c/10')
# print(s.get_address((21.0246, 52.2791)))
Example #30
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import logging

from pyjsparser import PyJsParser

text = '''
      $.ajax({
                type: "POST",
                url: "https://msec.flyme.cn/captcha/server/check", //访问的链接
                data: param,
                success: function (data) {  //成功的回调函数
                    console.info(data);
                },
                error: function (e) {
                    console.info(e);
                }
            });
'''

p = PyJsParser()
try:
    result = p.parse(text)
except Exception as e:
    logging.error(e)

result_json = json.dumps(result)

print(result_json)