예제 #1
0
def TimerCheck(IOLoop, proxytype='http'):
    def func(ip, port, proxytype, anonymous, connect_time):
        if session.query(Proxy).filter(
                Proxy.ip == ip, Proxy.port == port,
                Proxy.type == proxytype).first() == None:
            try:
                pos = IPFIND(ip).split('\t')
                if pos[len(pos) - 2] == '':
                    position = pos[len(pos) - 1]
                else:
                    position = pos[len(pos) - 2] + '.' + pos[len(pos) - 1]
            except:
                position = 'None.None'
            p = Proxy(ip, port, proxytype, anonymous, position, connect_time)
            session.add(p)
            session.commit()

    r = session.query(Proxy).filter(Proxy.type == proxytype).all()
    session.query(Proxy).filter(Proxy.type == proxytype).delete()
    session.commit()
    checkproxylist = []
    for t in r:
        checkproxylist.append((t.ip, t.port))
    checkproxylist += checkproxylist
    proxyloop.addipsl(checkproxylist, callback=func)
예제 #2
0
파일: spider.py 프로젝트: murphyzh/music163
def parser_song(song_id, art):
    tree = get_tree(SONG_URL.format(song_id))
    song = session.query(Song).filter_by(id=song_id).all()
    r = post(COMMENTS_URL.format(song_id))
    if r.status_code != 200:
        print('API error: Song: {}'.format(song_id))
        return
    data = r.json()
    if not song:
        for404 = tree.xpath('//div[@class="n-for404"]')
        if for404:
            return
        try:
            song_name = tree.xpath('//em[@class="f-ff2"]/text()')[0].strip()
        except IndexError:
            try:
                song_name = tree.xpath(
                    '//meta[@name="keywords"]/@content')[0].strip()
            except IndexError:
                print('Fetch limit!')
                time.sleep(10)
                return parser_song(song_id, art)
        song = Song(id=song_id,
                    name=str(song_name),
                    art_id=art.id,
                    comment_count=int(data['total']))
        session.add(song)
        session.commit()
    else:
        song = song[0]
    for comment_ in data['hotComments']:
        comment_id = comment_['commentId']
        content = comment_['content']
        like_count = comment_['likedCount']
        user_ = comment_['user']
        if not user_:
            continue
        user = session.query(User).filter_by(id=user_['userId']).all()
        if not user:
            user = User(id=user_['userId'],
                        name=user_['nickname'],
                        picture=user_['avatarUrl'])
            try:
                session.add(user)
                session.commit()
            except Exception as e:
                continue
        else:
            user = user[0]
        comment = session.query(Comment).filter_by(id=comment_id).all()
        if not comment:
            comment = Comment(id=comment_id,
                              use_id=user.id,
                              song_id=song.id,
                              content=content,
                              like_count=like_count)
            session.add(comment)
            session.commit()
    return song
예제 #3
0
 def func(ip, port, proxytype, anonymous, connect_time):
     if session.query(Proxy).filter(Proxy.ip == ip, Proxy.port == port, Proxy.type == proxytype).first() == None:
         try:
             pos = IPFIND(ip).split("\t")
             if pos[len(pos) - 2] == "":
                 position = pos[len(pos) - 1]
             else:
                 position = pos[len(pos) - 2] + "." + pos[len(pos) - 1]
         except:
             position = "None.None"
         p = Proxy(ip, port, proxytype, anonymous, position, connect_time)
         session.add(p)
         session.commit()
예제 #4
0
파일: spider.py 프로젝트: murphyzh/music163
def parser_artist(artist_id):
    tree = get_tree(ARTIST_URL.format(artist_id))
    art = session.query(Art).filter_by(id=artist_id).all()
    print(art)
    if not art:
        art_name = str(tree.xpath('//h2[@id="artist-name"]/text()')[0])
        picture = str(
            tree.xpath('//div[contains(@class, "n-artist")]//img/@src')[0])
        art = Art(id=artist_id, name=art_name, picture=picture)
        session.add(art)
        session.commit()
    else:
        art = art[0]
    song_items = tree.xpath('//div[@id="artist-top50"]//ul/li/a/@href')
    for item in song_items:
        song_id = item.split('=')[1]
        song = session.query(Song).filter_by(id=song_id).all()
        if not song:
            song = parser_song(song_id, art)
            session.add(song)
            session.commit()
        else:
            time.sleep(1)
예제 #5
0
def TimerCheck(IOLoop, proxytype="http"):
    def func(ip, port, proxytype, anonymous, connect_time):
        if session.query(Proxy).filter(Proxy.ip == ip, Proxy.port == port, Proxy.type == proxytype).first() == None:
            try:
                pos = IPFIND(ip).split("\t")
                if pos[len(pos) - 2] == "":
                    position = pos[len(pos) - 1]
                else:
                    position = pos[len(pos) - 2] + "." + pos[len(pos) - 1]
            except:
                position = "None.None"
            p = Proxy(ip, port, proxytype, anonymous, position, connect_time)
            session.add(p)
            session.commit()

    r = session.query(Proxy).filter(Proxy.type == proxytype).all()
    session.query(Proxy).filter(Proxy.type == proxytype).delete()
    session.commit()
    checkproxylist = []
    for t in r:
        checkproxylist.append((t.ip, t.port))
    checkproxylist += checkproxylist
    proxyloop.addipsl(checkproxylist, callback=func)
예제 #6
0
 def func(ip, port, proxytype, anonymous, connect_time):
     if session.query(Proxy).filter(
             Proxy.ip == ip, Proxy.port == port,
             Proxy.type == proxytype).first() == None:
         try:
             pos = IPFIND(ip).split('\t')
             if pos[len(pos) - 2] == '':
                 position = pos[len(pos) - 1]
             else:
                 position = pos[len(pos) - 2] + '.' + pos[len(pos) - 1]
         except:
             position = 'None.None'
         p = Proxy(ip, port, proxytype, anonymous, position, connect_time)
         session.add(p)
         session.commit()