def main(): """main function""" # 开始准备 prepare() thread_pool = [] while_n = 0 # 循环计数器 imglist = [] make_dir(Config.DIRECTORY) print 'Generate search url' url = base_url(get_keyword()) # 下载 ############# # 获取搜索结果数量并与_count比较取其较小值 count = min(search_result(url), Config.COUNT) # 没有搜索结果时退出 if not count: print "No search result at current condition." sys.exit(1) # 获得指定数量的url, 存放于list print 'Fetching page', while len(imglist) < count: print while_n, while_n += 1 tmplist = get_image_url_list(url) imglist = imglist + tmplist url = next_page(url, len(tmplist)) print '' # 换行 count = len(imglist) print "There're %d files to download" % count # # 将已有文件从imglist中去除 # imglist = [url for url in imglist # if not get_filename_from_url(url) in os.listdir(Config.DIRECTORY)] # print "There's %d files already downloaded." % (count - len(imglist)) # 下载该list print 'Fetching list of %d files' % len(imglist) queue = Queue() for url in imglist: queue.put(url) failure = [] for _ in range(Config.THREAD_COUNT): thread_pool.append(start_new_thread(download_from_queue, ( queue, failure, Config.DIRECTORY))) queue.join() print "%d failed to fetch." % len(failure)
def main(): # 开始准备 prepare() while_n = 0 # 循环计数器 imglist = [] makedir(Config.directory) print 'Generate search url' URL = baseURL() # 下载 ############# # 获取搜索结果数量并与_count比较取其较小值 count = min(searchResult(URL), Config.count) # 没有搜索结果时退出 if not count: print "No search result at current condition." sys.exit(1) # 获得指定数量的url, 存放于list print 'Fetching page', while len(imglist) < count: print while_n, while_n += 1 tmplist = getImageUrlList(URL) imglist = imglist + tmplist URL = nextPage(URL, len(tmplist)) print '' # 换行 count = len(imglist) print "There're %d files to download" % count # 将已有文件从imglist中去除 imglist = [url for url in imglist if not getFilenameFromURL(url) in os.listdir(Config.directory)] print "There's %d files already downloaded." % (count - len(imglist)) # 下载该list print 'Fetching list of %d files' % len(imglist) queue = Queue() for url in imglist: queue.put(url) failure = [] for i in range(Config.thread_count): start_new_thread(downloadFromQueue, ( queue, failure, Config.directory, Config.timeout)) queue.join() print "%d failed to fetch." % len(failure)