예제 #1
0
파일: qm_ana.py 프로젝트: aka-achea/xd
def ana_song(weblink):  # return song dictionary
    ml = mylogger(logfile, get_funcname())
    songmid = weblink.split('/')[-1]
    songmid = songmid.split('.')[0]
    ml.debug(songmid)
    html = op_simple(weblink)[0]
    bsObj = BeautifulSoup(html, "html.parser")

    artist_name = bsObj.find('div', {'class': 'data__singer'})
    artist_name = artist_name.attrs['title']
    ml.debug(artist_name)

    song_name = bsObj.find('h1', {'class': 'data__name_txt'})
    song_name = modstr(song_name.text.strip())
    ml.debug(song_name)

    cover = bsObj.find('img', {'class': 'data__photo'})
    cover = 'http:' + cover.attrs['src']
    ml.debug('Cover link: ' + cover)
    sDict = {
        'artist': artist_name,
        'song_name': song_name,
        'songmid': songmid,
        'cover': cover
    }
    ml.debug(sDict)
    return sDict
예제 #2
0
def get_loc_one(song_id):
    l = mylogger(logfile,get_funcname()) 
    url = f'http://www.xiami.com/widget/xml-single/sid/{song_id}'
    page = op_simple(url)
    l.debug(page[1])
    bsObj = BeautifulSoup(page[0],"html.parser") #;print(bsObj)
    location = bsObj.find("location")
    location = str(location)[19:-14]
    if location == '':
        l.debug('Track not published')
        SongDic = {}
    else:           
        l.debug('Raw Location: '+location)
        location = decry(location)
        song = bsObj.find("song_name")
        song = modstr(str(song)[20:-15])
        singer = bsObj.find("artist_name")
        singer = modstr(str(singer)[22:-17])     
        album = bsObj.find("album_name")
        album = modstr(str(album)[21:-16])
        cover = bsObj.find('album_cover')
        cover = 'http:'+str(cover)[22:-17]
        SongDic = {'location':location,'song':song,'cover':cover,\
                    'artist':singer,'singer':singer,'album':album}
        l.debug(SongDic)
    return SongDic
예제 #3
0
def get_songlocation(songid):
    '''Get undecryted location from xml'''
    url = f'https://emumo.xiami.com/widget/xml-single/sid/{songid}'
    html = op_simple(url, headers)[0]
    bsObj = BeautifulSoup(html, "html.parser")
    location = bsObj.find('location').text
    return location
예제 #4
0
파일: ed_ana.py 프로젝트: aka-achea/xd
def ana_song(weblink):
    ml = mylogger(logfile, get_funcname())
    html = op_simple(weblink, header)[0]
    # html = op_requests(url,verify=False).content
    bsObj = BeautifulSoup(html, "html.parser")
    # ml.debug(bsObj)
    # title = bsObj.find('title')
    # print(title)

    song_name = bsObj.find('em', {'class': 'f-ff2'})
    songname = modstr(song_name.text.strip())
    ml.info(songname)
    aa = bsObj.findAll('p', {'class': 'des s-fc4'})
    artistname = modstr(aa[0].span.a.text)
    albumname = modstr(aa[1].a.text)
    ml.info(artistname)
    ml.info(albumname)

    cover = bsObj.find('div', {'class': 'u-cover u-cover-6 f-fl'})
    cover = cover.img.attrs['href']
    ml.info(cover)

    songmid = weblink.split('=')[-1]

    sDict = {
        'artist': artistname,
        'song_name': songname,
        'songmid': songmid,
        'cover': cover
    }
    ml.debug(sDict)
    return sDict
예제 #5
0
파일: bm_note.py 프로젝트: aka-achea/BM
def ana_mono(page): 
    ml = mylogger(logfile,get_funcname())   
    html = op_simple(page,ran_header())[0]
    bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj)
    author = bsObj.find('span',{'class':'title'}).text.strip()
    title = bsObj.find('h1',{'class':'title'}).text.strip()
    p = {'author':author,'title':title}
    ml.debug(p)
    return p
예제 #6
0
def ana_dy(page): 
    '''Analyze Douyin web'''
    ml = mylogger(logfile,get_funcname())   
    html = op_simple(page,ran_header())[0]
    bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj)
    author = bsObj.find('p',{'class':'name nowrap'}).text.strip()
    title = bsObj.find('h1',{'class':'desc'}).text.strip()
    p = {'author':author,'title':title}
    ml.info(p)
    return p
예제 #7
0
def ana_mono(page): 
    '''Analyze Mono web'''
    ml = mylogger(logfile,get_funcname())   
    html = op_simple(page,ran_header())[0]
    bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj)
    author = bsObj.find('span',{'class':'title'}).text.strip()
    title = bsObj.find('h1',{'class':'title'}).text.strip()
    p = {'author':author,'title':title}
    ml.debug(p)
    return p
예제 #8
0
파일: qm_ana.py 프로젝트: aka-achea/xd
def ana_album(weblink):
    ml = mylogger(logfile, get_funcname())
    html = op_simple(weblink, header=ran_header(ref=ref))[0]
    bsObj = BeautifulSoup(html, "html.parser")  #;print(bsObj)
    album_name = bsObj.find('h1', {'class': 'data__name_txt'})
    album_name = modstr(album_name.text)
    ml.debug(album_name)

    artist_name = bsObj.find('a', {'class': 'js_singer data__singer_txt'})
    artist_name = modstr(artist_name.text)
    ml.debug(artist_name)

    year = bsObj.find(text=re.compile('^发行时间'))[5:9]
    ml.debug(year)

    cover = bsObj.find('img', {'id': 'albumImg'})
    cover = 'http:' + cover.attrs['src']
    ml.debug('Cover link: ' + cover)

    fullname = artist_name + ' - ' + year + ' - ' + album_name
    aDict = {
        'album': album_name,
        'artist': artist_name,
        'year': year,
        'cover': cover,
        'fullname': fullname
    }

    song = bsObj.findAll('div', {'class': 'songlist__number'})
    n = 0
    songtmp = []  # name duplicate check
    for i in song:
        n += 1
        tracknumber = i.text
        ml.debug('Find track ' + str(tracknumber))
        tmp = i.next_sibling.next_sibling
        si = tmp.find('span', {'class': 'songlist__songname_txt'}).a
        songmid = si.attrs['href'].split('/')[-1][:-5]
        songname = si.text
        if songname in songtmp:
            songname = songname + '_' + tracknumber
        songtmp.append(songname)
        ml.debug(songname)
        singers = tmp.parent.findAll('a', {'class': "singer_name"})
        if len(singers) > 1:
            s = list(map(lambda x: x.text, singers))
            singer = ','.join(s)
        else:
            singer = singers[0].text
        ml.debug(singer)
        si = [songmid, songname, singer]
        aDict[int(tracknumber)] = si
    aDict['TrackNum'] = n
    # ml.info(aDict)
    return aDict  # Album dictionary
예제 #9
0
파일: bm_note.py 프로젝트: aka-achea/BM
def ana_wx(page):
    ml = mylogger(logfile,get_funcname())   
    html = op_simple(page,ran_header())[0]
    # print(html)
    bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj)
    # bsObj = BeautifulSoup(html,"html5lib") #;print(bsObj)
    author = bsObj.find('span',{'class':'rich_media_meta rich_media_meta_nickname'})
    author = author.a.text.strip()
    title = bsObj.find('h2',{'class':'rich_media_title'})
    title = title.text.strip()
    p = {'author':author,'title':title}
    # p['link'] = page
    ml.debug(p)
    return p
예제 #10
0
def ana_cd(albumlink):
    '''Get album JSON data'''
    ml = mylogger(logfile,get_funcname()) 
    year = op_sel(albumlink)
    albumid = albumlink.split('=')[-1]
    ml.dbg(albumid)
    url = f'http://{host}/api/album/{albumid}/'
    html = op_simple(url,ran_header(agentref,host,org))[0]
    # print(html)
    jdata = BeautifulSoup(html,"html.parser").prettify()
    ml.dbg(jdata)
    adict = ana_json(jdata)
    adict['year'] = year
    ml.dbg(adict)
    return adict
예제 #11
0
파일: ed_ana.py 프로젝트: aka-achea/xd
def ana_cd(albumlink):
    '''Get album JSON data'''
    # ml = mylogger(logfile,get_funcname())
    # html = op_simple(albumlink,ran_header(ref=agentref))[0]
    year = op_sel(albumlink)
    # print(year)
    albumid = albumlink.split('=')[-1]
    # print(albumid)
    url = f'http://music.163.com/api/album/{albumid}/'
    html = op_simple(url, ran_header(ref=agentref))[0]
    jdata = BeautifulSoup(html, "html.parser").prettify()
    # jdata = bsObj.prettify()
    adict = ana_json(jdata)
    adict['year'] = year
    # print(jdata)
    return adict
예제 #12
0
def ana_wx(page):
    '''Analyze Weixin web'''
    ml = mylogger(logfile,get_funcname())   
    html = op_simple(page,ran_header())[0]
    # print(html)
    bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj)
    # bsObj = BeautifulSoup(html,"html5lib") #;print(bsObj)
    try:
        author = bsObj.find('span',{'class':'rich_media_meta rich_media_meta_nickname'})
        author = author.a.text.strip()
        title = bsObj.find('h2',{'class':'rich_media_title'})
        title = title.text.strip()
        p = {'author':author,'title':title}
        # p['link'] = page
        ml.dbg(p)
    except:
        return None
    return p
예제 #13
0
def get_loc_cd(song_id):
    l = mylogger(logfile,get_funcname()) 
    url = 'http://www.xiami.com/widget/xml-single/sid/%s' % song_id
    # url = url.replace('%s', song_id)
    # url = 'file:///E://xml.xml'
    page = op_simple(url)
    l.debug(page[1])
    bsObj = BeautifulSoup(page[0],"html.parser") #;print(bsObj)
    location = bsObj.find("location")
    location = str(location)[19:-14] 
    if location == '':
        l.debug('Track not published')
        SongDic = {}
    else:           
        l.debug('Raw Location: '+location)
        location = decry(location)
        # location = location.replace('/m128','/m320')
        song = bsObj.find("song_name")
        song = modstr(str(song)[20:-15])
        singer = bsObj.find("artist_name")
        singer = modstr(str(singer)[22:-17])     
        SongDic = {'location':location,'song':song,'singer':singer}
        l.debug(SongDic)
    return SongDic
예제 #14
0
 def test_op_simple(self):
     print('Test op_simple')
     url = 'http://www.xiami.com/widget/xml-single/sid/1769402049'
     html = openlink.op_simple(url)
     # print(url)
     self.assertEqual(html[1], 200)