def crawl_site(db, site, page, pages, force): logger.info('crawling site %s', site) cur = db.cursor() write = 0 skip = False for p in range(page, page + pages): logger.info('crawling site %s page %d', site, p) page_url, videos = get_page(site, p) logger.info('page url %s %d videos', page_url, len(videos)) for v in videos: cur.execute("SELECT id FROM video WHERE url=%s", [v.link]) if not cur.fetchone(): logging.info('add video %s %s %s', site, v.id, v.title) cur.execute("""INSERT INTO video (url, link, title, label, datetime, site) VALUES (%s, %s, %s, %s, %s, %s)""", [v.link, v.cover, v.title, v.id, v.date or None, site]) write += 1 else: logger.debug('video %s already exists', v.id) skip = True if not force and skip: logging.info('skip from page %d', p + 1) break time.sleep(random.random() * 10) if write: logger.info('add %d videos', write) db.commit()
def av_site(site, page=1): source, videos = av.get_page(site, page) return template('''<html> <head> <style> a:visited { color: purple; } div { font-size: x-large; } .video { padding: 10px; } .star { color: gold; } </style> <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" crossorigin="anonymous"> <script src="http://code.jquery.com/jquery-2.1.4.min.js"></script> <meta name="referrer" content="no-referrer" /> </head> <body> <div class="container"> <div><a target="_blank" href="{{source}}">Source</a></div> % for video in videos: <div class="video"> <div><a target="_blank" href="{{video.link}}"><img src="{{video.cover}}"></a></div> <div> <a href="/api/magnet/{{video.search}}" class="btn btn-sm btn-default glyphicon glyphicon-magnet"></a> <a target="_blank" href="/api/search/{{video.search}}">{{video.id}}</a> <a target="_blank" href="/api/search/{{video.name}}">{{video.name}}</a> <a target="_blank" href="/api/search/{{video.title}}">{{video.title}}</a> <span>{{video.date}}</span> <span class="star">{{u"\u2605" * int(video.rating)}}</span> <span>{{video.rating if video.rating > 0 else ''}}</span> </div> <div> % for f in video.find: <div> <span>{{f}}</span> % if f.endswith(VTYPES): <a href="http://ypcat.csie.org{{f}}" class="btn btn-sm btn-default glyphicon glyphicon-download-alt"></a> <a href="vlc://ypcat.csie.org{{f}}" class="btn btn-sm btn-warning glyphicon glyphicon-play"></a> % end </div> % end </div> </div> % end <div><a href="/av/{{site}}/{{page + 1}}">Next Page</a></div> </div> </body> </html>''', videos=videos, site=site, page=page, source=source, VTYPES=VTYPES)