Example #1
0
def login():
    payload = {'app_name': DOUBAN_SPIDER_NAME,
               'version': DOUBAN_SPIDER_VERSION,
               'email': DOUBAN_USER_NAME,
               'password': DOUBAN_USER_PASSWORD}
    try:
        r = requests.post("http://www.douban.com/j/app/login", data=payload)
    except requests.exceptions.ConnectionError:
        return False
    except requests.exceptions.Timeout:
        return False
    r = json.loads(r.text)
    if r['r'] != 0:
        info = 'crawler.douban.login: failed. r=%s', r
        spider_log.log_info(info)
        return False
    global g_user_id, g_token, g_expire
    g_user_id = r['user_id']
    g_token = r['token']
    g_expire = r['expire']
    spider_log.log_info('crawler.douban.login: success')
    return True
Example #2
0
def _update_channel_once(channel, max_num=20):
    """"update music in channel. max is the max number it will update
    return updated music
    please login before this function"""
    global g_user_id, g_token, g_expire
    # TODO
    # maybe need a better method to assert and get cid
    assert channel.uuid.startswith(DOUBAN_CHANNEL_UUID_FORMAT.split('-')[0])
    cid = int(channel.uuid.split('-')[1])
    if not channel.music_list:
        payload = {'app_name': DOUBAN_SPIDER_NAME,
                   'version': DOUBAN_SPIDER_VERSION,
                   'user_id': g_user_id,
                   'expire': g_expire,
                   'token': g_token,
                   'channel': cid,
                   'type': 'n'}
    else:
        uuid = get_music(key=random.choice(channel.music_list))[0].uuid
        sid = uuid.split('-')[2]
        payload = {'app_name': DOUBAN_SPIDER_NAME,
                   'version': DOUBAN_SPIDER_VERSION,
                   'user_id': g_user_id,
                   'expire': g_expire,
                   'token': g_token,
                   'channel': cid,
                   'type': 'p',
                   'sid': sid}

        # # mark as listened
        # mark_payload = {'app_name': DOUBAN_SPIDER_NAME,
        #                 'version': DOUBAN_SPIDER_VERSION,
        #                 'user_id': _user_id,
        #                 'expire': _expire,
        #                 'token': _token,
        #                 'channel': cid,
        #                 'type': 'e',
        #                 'sid': sid}
        # try:
        #     requests.get("http://www.douban.com/j/app/radio/people", params=mark_payload, timeout=5)
        # except:
        #     pass

        # # don't play again
        # mark_payload = {'app_name': DOUBAN_SPIDER_NAME,
        #                 'version': DOUBAN_SPIDER_VERSION,
        #                 'user_id': _user_id,
        #                 'expire': _expire,
        #                 'token': _token,
        #                 'channel': cid,
        #                 'type': 'b',
        #                 'sid': sid}
        # try:
        #     requests.get("http://www.douban.com/j/app/radio/people", params=mark_payload, timeout=5)
        # except:
        #     pass
    try:
        r = requests.get("http://www.douban.com/j/app/radio/people", params=payload, timeout=5)
    except requests.exceptions.ConnectionError:
        traceback.print_exc()
        return []
    except requests.exceptions.Timeout:
        traceback.print_exc()
        return []
    r = json.loads(r.text)
    assert r['r'] == 0
    update_music = []
    for song in r['song']:
        try:
            uuid = DOUBAN_MUSIC_UUID_FORMAT % (int(song['aid']), int(song['sid']))
        except Exception:
            # ads
            continue
        if not get_music(uuid=uuid):
            try:
                import pdb; pdb.set_trace()
                cover_fd = requests.get(song['picture'], stream=True, timeout=5).raw
                audio_fd = requests.get(song['url'], stream=True, timeout=5).raw
            except requests.exceptions.ConnectionError:
                traceback.print_exc()
                continue
            except requests.exceptions.Timeout:
                traceback.print_exc()
                continue
            try:
                print song['rating_avg']
                music = add_music(song['title'], song['artist'], song['albumtitle'],
                                  song['company'], song['public_time'], song['kbps'],
                                  cover_fd, audio_fd, uuid)
            except Exception:
                traceback.print_exc()
                continue

            spider_log.log_info("add music:"+uuid)
        else:
            music = get_music(uuid=uuid)[0]

        if music and music.key not in channel.music_list:
            channel_music_list = channel.music_list
            channel_music_list.append(music.key)
            update_channel(channel, music_list=channel_music_list)
            update_music.append(music)
            if len(update_music) >= max_num:
                break
    return update_music