coll.remove() folder_path = 'E:/workspace/adultlib/src/main/resources/static/covers/' if os.path.exists(folder_path): shutil.rmtree(folder_path) def get_text(key, info): text = info('span.header:contains(' + key + ')').parent().text() return text.split(' ')[1] if text != '' else None collection = get_jav_collection() # cleanup() spider = Spider(basic_url='#') page_count = spider.get_page_count() print('The page count %d' % page_count) for i in range(1, page_count): items = spider.getdoc(path="page/" + str(i))('.item').items() print("Visiting in page %d" % i) for item in items: number = item.find('.photo-info date:first').text() result = collection.find_one({'number': number}) if result is None: small_cover_url = item.find('img').attr("src") start_download_img(url=small_cover_url, file_name=number + '_small') details_url = item.find('.movie-box').attr("href") container = spider.getdoc(url=details_url)('.container') big_cover_url = container.find('.bigImage img').attr('src')