def main(): # 开始准备 prepare() while_n = 0 # 循环计数器 imglist = [] makedir(Config.directory) print 'Generate search url' URL = baseURL() # 下载 ############# # 获取搜索结果数量并与_count比较取其较小值 count = min(searchResult(URL), Config.count) # 没有搜索结果时退出 if not count: print "No search result at current condition." sys.exit(1) # 获得指定数量的url, 存放于list print 'Fetching page', while len(imglist) < count: print while_n, while_n += 1 tmplist = getImageUrlList(URL) imglist = imglist + tmplist URL = nextPage(URL, len(tmplist)) print '' # 换行 count = len(imglist) print "There're %d files to download" % count # 将已有文件从imglist中去除 imglist = [url for url in imglist if not getFilenameFromURL(url) in os.listdir(Config.directory)] print "There's %d files already downloaded." % (count - len(imglist)) # 下载该list print 'Fetching list of %d files' % len(imglist) queue = Queue() for url in imglist: queue.put(url) failure = [] for i in range(Config.thread_count): start_new_thread(downloadFromQueue, ( queue, failure, Config.directory, Config.timeout)) queue.join() print "%d failed to fetch." % len(failure)
# 下载 ############# # 获取搜索结果数量并与count比较取其较小值 count = min(searchResult(searchURL), count) # 没有搜索结果时退出 if not count: print "No search result at current condition." sys.exit(1) # 获得指定数量的url, 存放于list ,one page by one page print 'Fetching page', while len(imglist) < count: print while_n, #mark the times of while while_n += 1 tmplist = getImageUrlList(searchURL) imglist = imglist + tmplist searchURL = nextPage(searchURL, len(tmplist)) print '' # 换行 count = len(imglist) print "There're %d files to download" % count # 将已有文件从imglist中去除 imglist = [url for url in imglist if not getFilenameFromURL(url) in os.listdir(directory)] print "There's %d files already downloaded." % (count - len(imglist)) # 下载该list 使用超时20 10好像小了点 print 'Fetching list of %d files' % len(imglist) failure = threadDownloadFromList(imglist, directory=directory, timeout=20) print "%d failed to fetch." % len(failure) # 清理 # 1.添加后缀 print 'Adding extension ...', for fname in os.listdir(directory): addExtension(directory + os.sep + fname, '.jpg')