def parser_artist(artist_id): create_app() process = Process.get_or_create(id=artist_id) # Process以歌手为单位 if process.is_success: return print('Starting fetch artist: {}'.format(artist_id)) start = time.time() process = Process.get_or_create(id=artist_id) tree = get_tree(ARTIST_URL.format(artist_id)) # 使用requests获取页面文本,转化为lxml对象 artist = Artist.objects.filter(id=artist_id) if not artist: # 如果之前没抓过 artist_name = tree.xpath('//h2[@id="artist-name"]/text()')[0] picture = tree.xpath( '//div[contains(@class, "n-artist")]//img/@src')[0] artist = Artist(id=artist_id, name=artist_name, picture=picture) artist.save() else: # 如果之前抓过,但是该歌手的歌曲没抓完 artist = artist[0] song_items = tree.xpath('//div[@id="artist-top50"]//ul/li/a/@href') songs = [] for item in song_items: song_id = item.split('=')[1] song = parser_song(song_id, artist) # 进入抓取和解析歌手模式 if song is not None: songs.append(song) artist.songs = songs artist.save() process.make_succeed() # 标记歌手下的热门歌曲的热门评论抓完 print('Finished fetch artist: {} Cost: {}'.format(artist_id, time.time() - start))
def parser_artist(artist_id): create_app() process = Process.get_or_create(id=artist_id) if process.is_success: return print 'Starting fetch artist: {}'.format(artist_id) start = time.time() process = Process.get_or_create(id=artist_id) tree = get_tree(ARTIST_URL.format(artist_id)) artist = Artist.objects.filter(id=artist_id) if not artist: artist_name = tree.xpath('//h2[@id="artist-name"]/text()')[0] picture = tree.xpath( '//div[contains(@class, "n-artist")]//img/@src')[0] artist = Artist(id=artist_id, name=artist_name, picture=picture) artist.save() else: artist = artist[0] song_items = tree.xpath('//div[@id="artist-top50"]//ul/li/a/@href') songs = [] for item in song_items: song_id = item.split('=')[1] song = parser_song(song_id, artist) if song is not None: songs.append(song) artist.songs = songs artist.save() process.make_succeed() print 'Finished fetch artist: {} Cost: {}'.format( artist_id, time.time() - start)
def parser_artist(artist_id): create_app() process = Process.get_or_create(id=artist_id) if process.is_success: return print 'Starting fetch artist: {}'.format(artist_id) start = time.time() process = Process.get_or_create(id=artist_id) tree = get_tree(ARTIST_URL.format(artist_id)) artist = Artist.objects.filter(id=artist_id) if not artist: artist_name = tree.xpath('//h2[@id="artist-name"]/text()')[0] picture = tree.xpath( '//div[contains(@class, "n-artist")]//img/@src')[0] artist = Artist(id=artist_id, name=artist_name, picture=picture) artist.save() else: artist = artist[0] song_items = tree.xpath('//div[@id="artist-top50"]//ul/li/a/@href') songs = [] for item in song_items: song_id = item.split('=')[1] song = parser_song(song_id, artist) if song is not None: songs.append(song) artist.songs = songs artist.save() process.make_succeed() print 'Finished fetch artist: {} Cost: {}'.format(artist_id, time.time() - start)
def parser_artist(artist_id): create_app() process = Process.get_or_create(id=artist_id) if process.is_success: print "find process artist finished ,return" return print 'Starting fetch artist: {}'.format(artist_id) start = time.time() process = Process.get_or_create(id=artist_id) tree = get_tree(ARTIST_URL.format(artist_id)) #get artist url if tree==None: print "fetch artist url get none,return !" return artist = Artist.objects.filter(id=artist_id) if not artist: print "create artist "+str(artist_id) artist_name = tree.xpath('//h2[@id="artist-name"]/text()')[0] picture = tree.xpath( '//div[contains(@class, "n-artist")]//img/@src')[0] artist = Artist(id=artist_id, name=artist_name, picture=picture) artist.save() else: artist = artist[0] print "artist exist " + str(artist_id) print "fetching all song comments" song_items = tree.xpath('//div[@id="artist-top50"]//ul/li/a/@href') #song_items2=tree.xpath('//ul[@class="f-hide"]/li/a/@href') the same songs = [] print song_items if song_items==[]: print "Artist get no songs ,return fetch artist {}".format(artist_id) return for item in song_items: song_id = item.split('=')[1] song = parser_song(song_id, artist) if song is None: print "parse song failed,return " return else: songs.append(song) artist.songs = songs artist.save() process.make_succeed() print 'Finished fetch artist: {} Cost: {}'.format( artist_id, time.time() - start)