Example #1
0
    coll.remove()
    folder_path = 'E:/workspace/adultlib/src/main/resources/static/covers/'
    if os.path.exists(folder_path):
        shutil.rmtree(folder_path)


def get_text(key, info):
    text = info('span.header:contains(' + key + ')').parent().text()
    return text.split(' ')[1] if text != '' else None


collection = get_jav_collection()
# cleanup()

spider = Spider(basic_url='#')
page_count = spider.get_page_count()
print('The page count %d' % page_count)
for i in range(1, page_count):
    items = spider.getdoc(path="page/" + str(i))('.item').items()
    print("Visiting in page %d" % i)
    for item in items:
        number = item.find('.photo-info date:first').text()
        result = collection.find_one({'number': number})
        if result is None:
            small_cover_url = item.find('img').attr("src")
            start_download_img(url=small_cover_url, file_name=number + '_small')

            details_url = item.find('.movie-box').attr("href")
            container = spider.getdoc(url=details_url)('.container')

            big_cover_url = container.find('.bigImage img').attr('src')