def ana_song(weblink): # return song dictionary ml = mylogger(logfile, get_funcname()) songmid = weblink.split('/')[-1] songmid = songmid.split('.')[0] ml.debug(songmid) html = op_simple(weblink)[0] bsObj = BeautifulSoup(html, "html.parser") artist_name = bsObj.find('div', {'class': 'data__singer'}) artist_name = artist_name.attrs['title'] ml.debug(artist_name) song_name = bsObj.find('h1', {'class': 'data__name_txt'}) song_name = modstr(song_name.text.strip()) ml.debug(song_name) cover = bsObj.find('img', {'class': 'data__photo'}) cover = 'http:' + cover.attrs['src'] ml.debug('Cover link: ' + cover) sDict = { 'artist': artist_name, 'song_name': song_name, 'songmid': songmid, 'cover': cover } ml.debug(sDict) return sDict
def get_loc_one(song_id): l = mylogger(logfile,get_funcname()) url = f'http://www.xiami.com/widget/xml-single/sid/{song_id}' page = op_simple(url) l.debug(page[1]) bsObj = BeautifulSoup(page[0],"html.parser") #;print(bsObj) location = bsObj.find("location") location = str(location)[19:-14] if location == '': l.debug('Track not published') SongDic = {} else: l.debug('Raw Location: '+location) location = decry(location) song = bsObj.find("song_name") song = modstr(str(song)[20:-15]) singer = bsObj.find("artist_name") singer = modstr(str(singer)[22:-17]) album = bsObj.find("album_name") album = modstr(str(album)[21:-16]) cover = bsObj.find('album_cover') cover = 'http:'+str(cover)[22:-17] SongDic = {'location':location,'song':song,'cover':cover,\ 'artist':singer,'singer':singer,'album':album} l.debug(SongDic) return SongDic
def get_songlocation(songid): '''Get undecryted location from xml''' url = f'https://emumo.xiami.com/widget/xml-single/sid/{songid}' html = op_simple(url, headers)[0] bsObj = BeautifulSoup(html, "html.parser") location = bsObj.find('location').text return location
def ana_song(weblink): ml = mylogger(logfile, get_funcname()) html = op_simple(weblink, header)[0] # html = op_requests(url,verify=False).content bsObj = BeautifulSoup(html, "html.parser") # ml.debug(bsObj) # title = bsObj.find('title') # print(title) song_name = bsObj.find('em', {'class': 'f-ff2'}) songname = modstr(song_name.text.strip()) ml.info(songname) aa = bsObj.findAll('p', {'class': 'des s-fc4'}) artistname = modstr(aa[0].span.a.text) albumname = modstr(aa[1].a.text) ml.info(artistname) ml.info(albumname) cover = bsObj.find('div', {'class': 'u-cover u-cover-6 f-fl'}) cover = cover.img.attrs['href'] ml.info(cover) songmid = weblink.split('=')[-1] sDict = { 'artist': artistname, 'song_name': songname, 'songmid': songmid, 'cover': cover } ml.debug(sDict) return sDict
def ana_mono(page): ml = mylogger(logfile,get_funcname()) html = op_simple(page,ran_header())[0] bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj) author = bsObj.find('span',{'class':'title'}).text.strip() title = bsObj.find('h1',{'class':'title'}).text.strip() p = {'author':author,'title':title} ml.debug(p) return p
def ana_dy(page): '''Analyze Douyin web''' ml = mylogger(logfile,get_funcname()) html = op_simple(page,ran_header())[0] bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj) author = bsObj.find('p',{'class':'name nowrap'}).text.strip() title = bsObj.find('h1',{'class':'desc'}).text.strip() p = {'author':author,'title':title} ml.info(p) return p
def ana_mono(page): '''Analyze Mono web''' ml = mylogger(logfile,get_funcname()) html = op_simple(page,ran_header())[0] bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj) author = bsObj.find('span',{'class':'title'}).text.strip() title = bsObj.find('h1',{'class':'title'}).text.strip() p = {'author':author,'title':title} ml.debug(p) return p
def ana_album(weblink): ml = mylogger(logfile, get_funcname()) html = op_simple(weblink, header=ran_header(ref=ref))[0] bsObj = BeautifulSoup(html, "html.parser") #;print(bsObj) album_name = bsObj.find('h1', {'class': 'data__name_txt'}) album_name = modstr(album_name.text) ml.debug(album_name) artist_name = bsObj.find('a', {'class': 'js_singer data__singer_txt'}) artist_name = modstr(artist_name.text) ml.debug(artist_name) year = bsObj.find(text=re.compile('^发行时间'))[5:9] ml.debug(year) cover = bsObj.find('img', {'id': 'albumImg'}) cover = 'http:' + cover.attrs['src'] ml.debug('Cover link: ' + cover) fullname = artist_name + ' - ' + year + ' - ' + album_name aDict = { 'album': album_name, 'artist': artist_name, 'year': year, 'cover': cover, 'fullname': fullname } song = bsObj.findAll('div', {'class': 'songlist__number'}) n = 0 songtmp = [] # name duplicate check for i in song: n += 1 tracknumber = i.text ml.debug('Find track ' + str(tracknumber)) tmp = i.next_sibling.next_sibling si = tmp.find('span', {'class': 'songlist__songname_txt'}).a songmid = si.attrs['href'].split('/')[-1][:-5] songname = si.text if songname in songtmp: songname = songname + '_' + tracknumber songtmp.append(songname) ml.debug(songname) singers = tmp.parent.findAll('a', {'class': "singer_name"}) if len(singers) > 1: s = list(map(lambda x: x.text, singers)) singer = ','.join(s) else: singer = singers[0].text ml.debug(singer) si = [songmid, songname, singer] aDict[int(tracknumber)] = si aDict['TrackNum'] = n # ml.info(aDict) return aDict # Album dictionary
def ana_wx(page): ml = mylogger(logfile,get_funcname()) html = op_simple(page,ran_header())[0] # print(html) bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj) # bsObj = BeautifulSoup(html,"html5lib") #;print(bsObj) author = bsObj.find('span',{'class':'rich_media_meta rich_media_meta_nickname'}) author = author.a.text.strip() title = bsObj.find('h2',{'class':'rich_media_title'}) title = title.text.strip() p = {'author':author,'title':title} # p['link'] = page ml.debug(p) return p
def ana_cd(albumlink): '''Get album JSON data''' ml = mylogger(logfile,get_funcname()) year = op_sel(albumlink) albumid = albumlink.split('=')[-1] ml.dbg(albumid) url = f'http://{host}/api/album/{albumid}/' html = op_simple(url,ran_header(agentref,host,org))[0] # print(html) jdata = BeautifulSoup(html,"html.parser").prettify() ml.dbg(jdata) adict = ana_json(jdata) adict['year'] = year ml.dbg(adict) return adict
def ana_cd(albumlink): '''Get album JSON data''' # ml = mylogger(logfile,get_funcname()) # html = op_simple(albumlink,ran_header(ref=agentref))[0] year = op_sel(albumlink) # print(year) albumid = albumlink.split('=')[-1] # print(albumid) url = f'http://music.163.com/api/album/{albumid}/' html = op_simple(url, ran_header(ref=agentref))[0] jdata = BeautifulSoup(html, "html.parser").prettify() # jdata = bsObj.prettify() adict = ana_json(jdata) adict['year'] = year # print(jdata) return adict
def ana_wx(page): '''Analyze Weixin web''' ml = mylogger(logfile,get_funcname()) html = op_simple(page,ran_header())[0] # print(html) bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj) # bsObj = BeautifulSoup(html,"html5lib") #;print(bsObj) try: author = bsObj.find('span',{'class':'rich_media_meta rich_media_meta_nickname'}) author = author.a.text.strip() title = bsObj.find('h2',{'class':'rich_media_title'}) title = title.text.strip() p = {'author':author,'title':title} # p['link'] = page ml.dbg(p) except: return None return p
def get_loc_cd(song_id): l = mylogger(logfile,get_funcname()) url = 'http://www.xiami.com/widget/xml-single/sid/%s' % song_id # url = url.replace('%s', song_id) # url = 'file:///E://xml.xml' page = op_simple(url) l.debug(page[1]) bsObj = BeautifulSoup(page[0],"html.parser") #;print(bsObj) location = bsObj.find("location") location = str(location)[19:-14] if location == '': l.debug('Track not published') SongDic = {} else: l.debug('Raw Location: '+location) location = decry(location) # location = location.replace('/m128','/m320') song = bsObj.find("song_name") song = modstr(str(song)[20:-15]) singer = bsObj.find("artist_name") singer = modstr(str(singer)[22:-17]) SongDic = {'location':location,'song':song,'singer':singer} l.debug(SongDic) return SongDic
def test_op_simple(self): print('Test op_simple') url = 'http://www.xiami.com/widget/xml-single/sid/1769402049' html = openlink.op_simple(url) # print(url) self.assertEqual(html[1], 200)