def _get_json_list(url): """fetch url and return a list of url""" data = get_unicode_content(url) data = data.replace('\\/','/') data = data.replace('\\\'','\'') pattern = re.compile(r'(?<=var imgdata =).*?(?=;v)') block = pattern.findall(data)[0] jsonlist = json.loads(block) return jsonlist['data'][:-1]
def search_result(url): """get result count of given search url""" parser = CustomHTMLParser() parser.feed(get_unicode_content(url)) block = longest_string(parser.script_list) parser.close() pattern = re.compile('(?<="listNum":)\d*(?=,)') count = pattern.findall(block) if count: count = int(count[0]) return count return 0
def _get_params(url, parser): """Get a dict contained the url params""" data = get_unicode_content(url) parser.feed(data) return parser.form_params