Beispiel #1
0
def _get_json_list(url):
    """fetch url and return a list of url"""
    data = get_unicode_content(url)
    data = data.replace('\\/','/')
    data = data.replace('\\\'','\'')
    pattern = re.compile(r'(?<=var imgdata =).*?(?=;v)')
    block = pattern.findall(data)[0]
    jsonlist = json.loads(block)
    return jsonlist['data'][:-1]
Beispiel #2
0
def search_result(url):
    """get result count of given search url"""
    parser = CustomHTMLParser()
    parser.feed(get_unicode_content(url))
    block = longest_string(parser.script_list)
    parser.close()
    pattern = re.compile('(?<="listNum":)\d*(?=,)')
    count = pattern.findall(block)
    if count:
        count = int(count[0])
        return count
    return 0
Beispiel #3
0
def _get_params(url, parser):
    """Get a dict contained the url params"""
    data = get_unicode_content(url)
    parser.feed(data)
    return parser.form_params