def _getJsonList(url): stream = getStream(url) data = getCodingContent(stream) pattern = re.compile(r'(?<=var imgdata =).*?(?=;v)') block = pattern.findall(data)[0] jsonlist = json.loads(block) return jsonlist['data'][:-1]
def getImageUrlList(url): stream = getStream(url) data = getCodingContent(stream) parser = MyParser() parser.feed(data) alist = parser.scriptList longestStr = longestString(alist) var_img = cutTo(longestStr, ';') return getImageUrlFromScript(var_img)
def searchResult(url): parser = MyParser() parser.feed(getCodingContent(getStream(url))) block = longestString(parser.scriptList) parser.close() pattern = re.compile('(?<="listNum":)\d*(?=,)') count = pattern.findall(block) if count: count = int(count[0]) return count return 0
def _getParams(url, parser): """Get a dict contained the url params""" stream = getStream(url) data = getCodingContent(stream) parser.feed(data) return parser.formParams
def getParams(url, parser): stream = getStream(url) data = getCodingContent(stream) parser.feed(data) return parser.formParams
def run(self): stream = getStream(self.url, timeout=self.timeout) file_name = getFilenameFromURL(self.url) if not stream or not writeBinFile(stream, file_name, self.directory): self.failure.append(self.url) self.finished = True