Exemple #1
0
def ana_song(weblink):
    ml = mylogger(logfile, get_funcname())
    html = op_simple(weblink, ran_header(ref=agentref))[0]
    # html = op_requests(url,verify=False).content
    bsObj = BeautifulSoup(html, "html.parser")
    # ml.debug(bsObj)
    # title = bsObj.find('title')
    # print(title)
    song_name = bsObj.find('em', {'class': 'f-ff2'})
    songname = modstr(song_name.text.strip())
    ml.info(songname)
    aa = bsObj.findAll('p', {'class': 'des s-fc4'})
    artistname = modstr(aa[0].span.a.text)
    albumname = modstr(aa[1].a.text)
    ml.info(artistname)
    ml.info(albumname)

    cover = bsObj.find('div', {'class': 'u-cover u-cover-6 f-fl'})
    cover = cover.img.attrs['href']
    ml.info(cover)

    songmid = weblink.split('=')[-1]

    sDict = {
        'artist': artistname,
        'song_name': songname,
        'songmid': songmid,
        'cover': cover
    }
    ml.debug(sDict)
    return sDict
Exemple #2
0
def get_vkeyguid(songmid, q=1):
    ml = mylogger(logfile, get_funcname())
    guid = int(random.random() * 2147483647) * int(
        time.time() * 1000) % 10000000000
    ml.debug(f'GUID:{guid}')
    url = 'http://c.y.qq.com/base/fcgi-bin/fcg_music_express_mobile3.fcg'
    qly = quality[q][0]
    t = quality[q][1]
    para = {
        'loginUin': '0',
        'hostUin': '0',
        'format': 'json',
        'inCharset': 'utf8',
        'outCharset': 'utf-8',
        'notice': '0',
        'platform': 'yqq',
        'needNewCode': '0',
        'cid': '205361747',  #important 
        'uin': '0',
        'songmid': str(songmid),
        'filename': qly + str(songmid) + t,
        'guid': str(guid)
    }
    req = op_requests(url, header=ran_header(ref=ref), para=para, verify=False)
    # print(req.content)
    j = req.json()
    vkey = j['data']['items'][0]['vkey']
    ml.debug(f'vkey:{vkey}')
    return vkey, guid
Exemple #3
0
def get_json(url, params, encSecKey):
    '''Get response of song download url'''
    data = {"params": params, "encSecKey": encSecKey}
    response = requests.post(url,
                             headers=ran_header(ref=agentref, host=host),
                             data=data)
    # print(response.text)
    return response.json()['data']
Exemple #4
0
def ana_mono(page): 
    ml = mylogger(logfile,get_funcname())   
    html = op_simple(page,ran_header())[0]
    bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj)
    author = bsObj.find('span',{'class':'title'}).text.strip()
    title = bsObj.find('h1',{'class':'title'}).text.strip()
    p = {'author':author,'title':title}
    ml.debug(p)
    return p
Exemple #5
0
def ana_mono(page): 
    '''Analyze Mono web'''
    ml = mylogger(logfile,get_funcname())   
    html = op_simple(page,ran_header())[0]
    bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj)
    author = bsObj.find('span',{'class':'title'}).text.strip()
    title = bsObj.find('h1',{'class':'title'}).text.strip()
    p = {'author':author,'title':title}
    ml.debug(p)
    return p
Exemple #6
0
def get_json(url, params, encSecKey):
    '''Get response of song download url'''
    ml = mylogger(logfile,get_funcname())
    data = {
        "params": params,
        "encSecKey": encSecKey
    }
    response = requests.post(url,headers=ran_header(agentref,host,org),data=data)
    ml.dbg(response.json())
    return response.json()['data']
Exemple #7
0
def ana_dy(page): 
    '''Analyze Douyin web'''
    ml = mylogger(logfile,get_funcname())   
    html = op_simple(page,ran_header())[0]
    bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj)
    author = bsObj.find('p',{'class':'name nowrap'}).text.strip()
    title = bsObj.find('h1',{'class':'desc'}).text.strip()
    p = {'author':author,'title':title}
    ml.info(p)
    return p
Exemple #8
0
def ana_album(weblink):
    ml = mylogger(logfile, get_funcname())
    html = op_simple(weblink, header=ran_header(ref=ref))[0]
    bsObj = BeautifulSoup(html, "html.parser")  #;print(bsObj)
    album_name = bsObj.find('h1', {'class': 'data__name_txt'})
    album_name = modstr(album_name.text)
    ml.debug(album_name)

    artist_name = bsObj.find('a', {'class': 'js_singer data__singer_txt'})
    artist_name = modstr(artist_name.text)
    ml.debug(artist_name)

    year = bsObj.find(text=re.compile('^发行时间'))[5:9]
    ml.debug(year)

    cover = bsObj.find('img', {'id': 'albumImg'})
    cover = 'http:' + cover.attrs['src']
    ml.debug('Cover link: ' + cover)

    fullname = artist_name + ' - ' + year + ' - ' + album_name
    aDict = {
        'album': album_name,
        'artist': artist_name,
        'year': year,
        'cover': cover,
        'fullname': fullname
    }

    song = bsObj.findAll('div', {'class': 'songlist__number'})
    n = 0
    songtmp = []  # name duplicate check
    for i in song:
        n += 1
        tracknumber = i.text
        ml.debug('Find track ' + str(tracknumber))
        tmp = i.next_sibling.next_sibling
        si = tmp.find('span', {'class': 'songlist__songname_txt'}).a
        songmid = si.attrs['href'].split('/')[-1][:-5]
        songname = si.text
        if songname in songtmp:
            songname = songname + '_' + tracknumber
        songtmp.append(songname)
        ml.debug(songname)
        singers = tmp.parent.findAll('a', {'class': "singer_name"})
        if len(singers) > 1:
            s = list(map(lambda x: x.text, singers))
            singer = ','.join(s)
        else:
            singer = singers[0].text
        ml.debug(singer)
        si = [songmid, songname, singer]
        aDict[int(tracknumber)] = si
    aDict['TrackNum'] = n
    # ml.info(aDict)
    return aDict  # Album dictionary
Exemple #9
0
def find_book_ver(queryapi, book, author=''):
    '''Get book of different version,
    return version,verlink 
    '''
    ml = mylogger(logfile, get_funcname())
    para = [('menu', 'search'), ('index', '.TW'), ('term', book)]
    if author:
        para += [('index', '.AW'), ('term', author)]
    ml.debug(para)
    try:
        vdict = {}  # version dictionary
        html = op_requests(url=queryapi, para=para, header=ran_header())
        ml.debug(html.url)
        bsObj = BeautifulSoup(html.content, "html.parser")
        if bsObj.find_all(string=re.compile("对不起")):  # not find any book
            ml.error("对不起, 不能找到任何命中书目")
            return None
        else:
            if vbook := bsObj.find_all("a", {"class": "mediumBoldAnchor"}):
                # mediumBoldAnchor >= 1 , different version find, only scan 1st page
                ml.debug('Find book version below')
                for v in vbook:
                    ml.debug(v)
                    n = v
                    for i in range(7):
                        n = n.parent
                    # ml.debug(n)
                    n = n.previous_sibling.text.strip()
                    # ml.debug(n)  # sample n :  "1."
                    bookname = str(v).split('<')[1].split('>')[-1].strip()
                    # ml.info(bookname)
                    if bookname == book:
                        ml.debug(n + bookname)
                        ml.debug(v["href"])
                        vdict[n] = v["href"]
                    else:
                        ml.warning(n + bookname + '--> not match')
                        if input("Go ahead (Y/y)? Press Enter to ignore >>>"
                                 ) in ['y', 'Y']:
                            ml.info('Add to search candidate')
                            vdict[n] = v["href"]
                        else:
                            ml.warning('ignored')
                if vdict == {}:  #there is book, but no name match
                    if input("都不符合,翻页?(Y)") in ['y', 'Y']:
                        nextpage = bsObj.find_all(text="下页")[0].parent
                        np = nextpage.attrs['href']
                        print('oooops do nothing')
                    else:
                        return None  # all none
            else:  # mediumBoldAnchor = 0 , search directly
Exemple #10
0
def find_other_lib(weblink):
    '''Get link of other library
    return other library link
    '''
    ml = mylogger(logfile, get_funcname())
    ml.debug(weblink)
    global link
    try:  #find other library tag
        bsObj = BeautifulSoup(
            op_requests(weblink, ran_header()).content, "html.parser")
        if other := bsObj.find("input", {"value": "其它馆址"}):
            ml.debug(other)
            ol = (str(other).split(" "))
            ml.debug(ol)
            # other_lib = modificate(ol[2][30:-2])
            other_lib = ol[2][30:-2].replace('&amp;', u'&').strip()
            ml.debug(f"Other lib is -->  {other_lib}")
            link.add(other_lib)
            #go to other_lib
            bsObj = BeautifulSoup(
                op_requests(other_lib, ran_header()).content, "html.parser")
            more_other_lib(bsObj)
        else:
Exemple #11
0
def ana_wx(page):
    ml = mylogger(logfile,get_funcname())   
    html = op_simple(page,ran_header())[0]
    # print(html)
    bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj)
    # bsObj = BeautifulSoup(html,"html5lib") #;print(bsObj)
    author = bsObj.find('span',{'class':'rich_media_meta rich_media_meta_nickname'})
    author = author.a.text.strip()
    title = bsObj.find('h2',{'class':'rich_media_title'})
    title = title.text.strip()
    p = {'author':author,'title':title}
    # p['link'] = page
    ml.debug(p)
    return p
Exemple #12
0
def ana_cd(albumlink):
    '''Get album JSON data'''
    ml = mylogger(logfile,get_funcname()) 
    year = op_sel(albumlink)
    albumid = albumlink.split('=')[-1]
    ml.dbg(albumid)
    url = f'http://{host}/api/album/{albumid}/'
    html = op_simple(url,ran_header(agentref,host,org))[0]
    # print(html)
    jdata = BeautifulSoup(html,"html.parser").prettify()
    ml.dbg(jdata)
    adict = ana_json(jdata)
    adict['year'] = year
    ml.dbg(adict)
    return adict
Exemple #13
0
def ana_cd(albumlink):
    '''Get album JSON data'''
    # ml = mylogger(logfile,get_funcname())
    # html = op_simple(albumlink,ran_header(ref=agentref))[0]
    year = op_sel(albumlink)
    # print(year)
    albumid = albumlink.split('=')[-1]
    # print(albumid)
    url = f'http://music.163.com/api/album/{albumid}/'
    html = op_simple(url, ran_header(ref=agentref))[0]
    jdata = BeautifulSoup(html, "html.parser").prettify()
    # jdata = bsObj.prettify()
    adict = ana_json(jdata)
    adict['year'] = year
    # print(jdata)
    return adict
Exemple #14
0
def ana_wx(page):
    '''Analyze Weixin web'''
    ml = mylogger(logfile,get_funcname())   
    html = op_simple(page,ran_header())[0]
    # print(html)
    bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj)
    # bsObj = BeautifulSoup(html,"html5lib") #;print(bsObj)
    try:
        author = bsObj.find('span',{'class':'rich_media_meta rich_media_meta_nickname'})
        author = author.a.text.strip()
        title = bsObj.find('h2',{'class':'rich_media_title'})
        title = title.text.strip()
        p = {'author':author,'title':title}
        # p['link'] = page
        ml.dbg(p)
    except:
        return None
    return p
Exemple #15
0
def find_library(cursor, liblink, book):
    '''Find libary details\n
    馆址 馆藏地	索书号 状态	应还日期 馆藏类型 馆藏条码'''
    ml = mylogger(logfile, get_funcname())
    bsObj = BeautifulSoup(
        op_requests(liblink, ran_header()).content, "html.parser")
    for i in bsObj.find_all("tr", {"height": "15"}):
        ml.debug('=' * 10)
        library = i.td
        lib = library.text
        ml.debug("馆址:" + lib)
        if wantedlib(lib):
            room = library.next_sibling
            ml.debug("馆藏地:" + room.text)
            catalog = room.next_sibling
            cat = catalog.text
            ml.debug("索引号:" + cat)
            status = catalog.next_sibling
            if status.text == "归还":
                if bsObj.find(title="应还日期"):
                    #print("find 应还日期")
                    #index = room.next_sibling
                    #print(index.text)
                    btype = status.next_sibling.next_sibling
                else:
                    btype = status.next_sibling
                ml.debug("馆藏类型:" + btype.text)
                if btype.text == "普通外借资料":
                    SN = btype.next_sibling.text
                    ml.debug("馆藏条码:" + SN)
                    try:
                        cursor.execute(
                            "insert into inventory values (?,?,?,?)",
                            (SN, book, lib, cat))
                    except sqlite3.IntegrityError as e:
                        ml.debug(e)
                        ml.error(f"Duplicate: {SN} {book} {lib}")
            else:
                ml.debug(lib + status.text)
        else:
            ml.debug('Not recommaned library')
Exemple #16
0
def get_detail():
    dxyapi = 'https://lab.isaaclin.cn/nCoV/api/area?latest=1&province=%E4%B8%8A%E6%B5%B7%E5%B8%82'
    header = ran_header()
    try:
        html = requests.get(dxyapi)
        # bsobj = BeautifulSoup(html,'html.parser').text
        j = json.loads(html.text)
    except json.decoder.JSONDecodeError:
        print(html.text)
        print('Fail to update data')
        return False
    data = j['results'][0]
    confirmedCount = data['confirmedCount']
    curedCount = data['curedCount']
    deadCount = data['deadCount']
    detail = data['cities']
    detail = {d['cityName']: d['confirmedCount'] for d in detail}
    pprint(detail)
    with open(shsumary, 'w', encoding='utf-8') as f:
        json.dump(detail, f, ensure_ascii=False, indent=2)
    return True
Exemple #17
0
from mylog import get_funcname,mylogger
from mystr import fnamechecker as modstr
import myget
from mytool import mywait


quality = { 
    1:['M500','.mp3','66'], # work, 99
    2:['M800','.mp3','53'], # work, 99
    3:['F000','.flac','99'], 
    4:['C400','.m4a','66'], # work 999
    5:['A000','.ape','64']            
            }

ref = 'https://y.qq.com'
header = ran_header(ref=ref)


def get_vkeyguid(songmid,q=1):
    '''Get vkey and guid from songid'''
    ml = mylogger(logfile,get_funcname()) 
    guid = int(random.random()*2147483647)*int(time.time()*1000) % 10000000000
    ml.dbg(f'GUID:{guid}')
    url = 'https://c.y.qq.com/base/fcgi-bin/fcg_music_express_mobile3.fcg'
    qly = quality[q][0]
    t = quality[q][1]
    para = {
            'loginUin':'0',
            'hostUin':'0',
            'format':'json',
            'inCharset':'utf8',
Exemple #18
0
import pyautogui as auto

# customized module
from config import logfile, dldir
from openlink import op_simple, ran_header
from mtag import addtag
from mylog import get_funcname, mylogger
from mp3archive import find_album, create_folder
from mytool import mywait, get_text_clipboard, clickbutton, capture
from myfs import clean_f
from myimg import squaresize
from mystr import fnamechecker as modstr
import myget

ref = 'https://www.xiami.com/'
headers = ran_header(ref=ref)


def decry(code):
    '''decrypt download url'''
    url = code[1:]
    urllen = len(url)
    rows = int(code[0])
    cols_base = urllen // rows  #;print(cols_base) # basic column count
    rows_ex = urllen % rows  #;print(rows_ex)   # count of rows that have 1 more column
    matrix = []
    for r in range(rows):
        length = cols_base + 1 if r < rows_ex else cols_base
        matrix.append(url[:length])
        url = url[length:]
    #for i in matrix : print(i)
Exemple #19
0
def get_json(url, params, encSecKey):
    '''Get response of song download url'''
    data = {"params": params, "encSecKey": encSecKey}
    response = requests.post(url, headers=ran_header(), data=data).json()
    return response['data']