Example #1
0
def crawl_site(db, site, page, pages, force):
    logger.info('crawling site %s', site)
    cur = db.cursor()
    write = 0
    skip = False
    for p in range(page, page + pages):
        logger.info('crawling site %s page %d', site, p)
        page_url, videos = get_page(site, p)
        logger.info('page url %s %d videos', page_url, len(videos))
        for v in videos:
            cur.execute("SELECT id FROM video WHERE url=%s", [v.link])
            if not cur.fetchone():
                logging.info('add video %s %s %s', site, v.id, v.title)
                cur.execute("""INSERT INTO video (url, link, title, label, datetime, site)
                               VALUES (%s, %s, %s, %s, %s, %s)""",
                            [v.link, v.cover, v.title, v.id, v.date or None, site])
                write += 1
            else:
                logger.debug('video %s already exists', v.id)
                skip = True
        if not force and skip:
            logging.info('skip from page %d', p + 1)
            break
        time.sleep(random.random() * 10)
    if write:
        logger.info('add %d videos', write)
        db.commit()
Example #2
0
def av_site(site, page=1):
    source, videos = av.get_page(site, page)
    return template('''<html>
        <head>
            <style>
                a:visited { color: purple; }
                div { font-size: x-large; }
                .video { padding: 10px; }
                .star { color: gold; }
            </style>
            <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7" crossorigin="anonymous">
            <script src="http://code.jquery.com/jquery-2.1.4.min.js"></script>
            <meta name="referrer" content="no-referrer" />
        </head>
        <body>
            <div class="container">
                <div><a target="_blank" href="{{source}}">Source</a></div>
                % for video in videos:
                    <div class="video">
                        <div><a target="_blank" href="{{video.link}}"><img src="{{video.cover}}"></a></div>
                        <div>
                            <a href="/api/magnet/{{video.search}}" class="btn btn-sm btn-default glyphicon glyphicon-magnet"></a>
                            <a target="_blank" href="/api/search/{{video.search}}">{{video.id}}</a>
                            <a target="_blank" href="/api/search/{{video.name}}">{{video.name}}</a>
                            <a target="_blank" href="/api/search/{{video.title}}">{{video.title}}</a>
                            <span>{{video.date}}</span>
                            <span class="star">{{u"\u2605" * int(video.rating)}}</span>
                            <span>{{video.rating if video.rating > 0 else ''}}</span>
                        </div>
                        <div>
                        % for f in video.find:
                            <div>
                                <span>{{f}}</span>
                                % if f.endswith(VTYPES):
                                <a href="http://ypcat.csie.org{{f}}" class="btn btn-sm btn-default glyphicon glyphicon-download-alt"></a>
                                <a href="vlc://ypcat.csie.org{{f}}" class="btn btn-sm btn-warning glyphicon glyphicon-play"></a>
                                % end
                            </div>
                        % end
                        </div>
                    </div>
                % end
                <div><a href="/av/{{site}}/{{page + 1}}">Next Page</a></div>
            </div>
        </body>
    </html>''', videos=videos, site=site, page=page, source=source, VTYPES=VTYPES)