def ana_song(weblink): ml = mylogger(logfile, get_funcname()) html = op_simple(weblink, ran_header(ref=agentref))[0] # html = op_requests(url,verify=False).content bsObj = BeautifulSoup(html, "html.parser") # ml.debug(bsObj) # title = bsObj.find('title') # print(title) song_name = bsObj.find('em', {'class': 'f-ff2'}) songname = modstr(song_name.text.strip()) ml.info(songname) aa = bsObj.findAll('p', {'class': 'des s-fc4'}) artistname = modstr(aa[0].span.a.text) albumname = modstr(aa[1].a.text) ml.info(artistname) ml.info(albumname) cover = bsObj.find('div', {'class': 'u-cover u-cover-6 f-fl'}) cover = cover.img.attrs['href'] ml.info(cover) songmid = weblink.split('=')[-1] sDict = { 'artist': artistname, 'song_name': songname, 'songmid': songmid, 'cover': cover } ml.debug(sDict) return sDict
def get_vkeyguid(songmid, q=1): ml = mylogger(logfile, get_funcname()) guid = int(random.random() * 2147483647) * int( time.time() * 1000) % 10000000000 ml.debug(f'GUID:{guid}') url = 'http://c.y.qq.com/base/fcgi-bin/fcg_music_express_mobile3.fcg' qly = quality[q][0] t = quality[q][1] para = { 'loginUin': '0', 'hostUin': '0', 'format': 'json', 'inCharset': 'utf8', 'outCharset': 'utf-8', 'notice': '0', 'platform': 'yqq', 'needNewCode': '0', 'cid': '205361747', #important 'uin': '0', 'songmid': str(songmid), 'filename': qly + str(songmid) + t, 'guid': str(guid) } req = op_requests(url, header=ran_header(ref=ref), para=para, verify=False) # print(req.content) j = req.json() vkey = j['data']['items'][0]['vkey'] ml.debug(f'vkey:{vkey}') return vkey, guid
def get_json(url, params, encSecKey): '''Get response of song download url''' data = {"params": params, "encSecKey": encSecKey} response = requests.post(url, headers=ran_header(ref=agentref, host=host), data=data) # print(response.text) return response.json()['data']
def ana_mono(page): ml = mylogger(logfile,get_funcname()) html = op_simple(page,ran_header())[0] bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj) author = bsObj.find('span',{'class':'title'}).text.strip() title = bsObj.find('h1',{'class':'title'}).text.strip() p = {'author':author,'title':title} ml.debug(p) return p
def ana_mono(page): '''Analyze Mono web''' ml = mylogger(logfile,get_funcname()) html = op_simple(page,ran_header())[0] bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj) author = bsObj.find('span',{'class':'title'}).text.strip() title = bsObj.find('h1',{'class':'title'}).text.strip() p = {'author':author,'title':title} ml.debug(p) return p
def get_json(url, params, encSecKey): '''Get response of song download url''' ml = mylogger(logfile,get_funcname()) data = { "params": params, "encSecKey": encSecKey } response = requests.post(url,headers=ran_header(agentref,host,org),data=data) ml.dbg(response.json()) return response.json()['data']
def ana_dy(page): '''Analyze Douyin web''' ml = mylogger(logfile,get_funcname()) html = op_simple(page,ran_header())[0] bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj) author = bsObj.find('p',{'class':'name nowrap'}).text.strip() title = bsObj.find('h1',{'class':'desc'}).text.strip() p = {'author':author,'title':title} ml.info(p) return p
def ana_album(weblink): ml = mylogger(logfile, get_funcname()) html = op_simple(weblink, header=ran_header(ref=ref))[0] bsObj = BeautifulSoup(html, "html.parser") #;print(bsObj) album_name = bsObj.find('h1', {'class': 'data__name_txt'}) album_name = modstr(album_name.text) ml.debug(album_name) artist_name = bsObj.find('a', {'class': 'js_singer data__singer_txt'}) artist_name = modstr(artist_name.text) ml.debug(artist_name) year = bsObj.find(text=re.compile('^发行时间'))[5:9] ml.debug(year) cover = bsObj.find('img', {'id': 'albumImg'}) cover = 'http:' + cover.attrs['src'] ml.debug('Cover link: ' + cover) fullname = artist_name + ' - ' + year + ' - ' + album_name aDict = { 'album': album_name, 'artist': artist_name, 'year': year, 'cover': cover, 'fullname': fullname } song = bsObj.findAll('div', {'class': 'songlist__number'}) n = 0 songtmp = [] # name duplicate check for i in song: n += 1 tracknumber = i.text ml.debug('Find track ' + str(tracknumber)) tmp = i.next_sibling.next_sibling si = tmp.find('span', {'class': 'songlist__songname_txt'}).a songmid = si.attrs['href'].split('/')[-1][:-5] songname = si.text if songname in songtmp: songname = songname + '_' + tracknumber songtmp.append(songname) ml.debug(songname) singers = tmp.parent.findAll('a', {'class': "singer_name"}) if len(singers) > 1: s = list(map(lambda x: x.text, singers)) singer = ','.join(s) else: singer = singers[0].text ml.debug(singer) si = [songmid, songname, singer] aDict[int(tracknumber)] = si aDict['TrackNum'] = n # ml.info(aDict) return aDict # Album dictionary
def find_book_ver(queryapi, book, author=''): '''Get book of different version, return version,verlink ''' ml = mylogger(logfile, get_funcname()) para = [('menu', 'search'), ('index', '.TW'), ('term', book)] if author: para += [('index', '.AW'), ('term', author)] ml.debug(para) try: vdict = {} # version dictionary html = op_requests(url=queryapi, para=para, header=ran_header()) ml.debug(html.url) bsObj = BeautifulSoup(html.content, "html.parser") if bsObj.find_all(string=re.compile("对不起")): # not find any book ml.error("对不起, 不能找到任何命中书目") return None else: if vbook := bsObj.find_all("a", {"class": "mediumBoldAnchor"}): # mediumBoldAnchor >= 1 , different version find, only scan 1st page ml.debug('Find book version below') for v in vbook: ml.debug(v) n = v for i in range(7): n = n.parent # ml.debug(n) n = n.previous_sibling.text.strip() # ml.debug(n) # sample n : "1." bookname = str(v).split('<')[1].split('>')[-1].strip() # ml.info(bookname) if bookname == book: ml.debug(n + bookname) ml.debug(v["href"]) vdict[n] = v["href"] else: ml.warning(n + bookname + '--> not match') if input("Go ahead (Y/y)? Press Enter to ignore >>>" ) in ['y', 'Y']: ml.info('Add to search candidate') vdict[n] = v["href"] else: ml.warning('ignored') if vdict == {}: #there is book, but no name match if input("都不符合,翻页?(Y)") in ['y', 'Y']: nextpage = bsObj.find_all(text="下页")[0].parent np = nextpage.attrs['href'] print('oooops do nothing') else: return None # all none else: # mediumBoldAnchor = 0 , search directly
def find_other_lib(weblink): '''Get link of other library return other library link ''' ml = mylogger(logfile, get_funcname()) ml.debug(weblink) global link try: #find other library tag bsObj = BeautifulSoup( op_requests(weblink, ran_header()).content, "html.parser") if other := bsObj.find("input", {"value": "其它馆址"}): ml.debug(other) ol = (str(other).split(" ")) ml.debug(ol) # other_lib = modificate(ol[2][30:-2]) other_lib = ol[2][30:-2].replace('&', u'&').strip() ml.debug(f"Other lib is --> {other_lib}") link.add(other_lib) #go to other_lib bsObj = BeautifulSoup( op_requests(other_lib, ran_header()).content, "html.parser") more_other_lib(bsObj) else:
def ana_wx(page): ml = mylogger(logfile,get_funcname()) html = op_simple(page,ran_header())[0] # print(html) bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj) # bsObj = BeautifulSoup(html,"html5lib") #;print(bsObj) author = bsObj.find('span',{'class':'rich_media_meta rich_media_meta_nickname'}) author = author.a.text.strip() title = bsObj.find('h2',{'class':'rich_media_title'}) title = title.text.strip() p = {'author':author,'title':title} # p['link'] = page ml.debug(p) return p
def ana_cd(albumlink): '''Get album JSON data''' ml = mylogger(logfile,get_funcname()) year = op_sel(albumlink) albumid = albumlink.split('=')[-1] ml.dbg(albumid) url = f'http://{host}/api/album/{albumid}/' html = op_simple(url,ran_header(agentref,host,org))[0] # print(html) jdata = BeautifulSoup(html,"html.parser").prettify() ml.dbg(jdata) adict = ana_json(jdata) adict['year'] = year ml.dbg(adict) return adict
def ana_cd(albumlink): '''Get album JSON data''' # ml = mylogger(logfile,get_funcname()) # html = op_simple(albumlink,ran_header(ref=agentref))[0] year = op_sel(albumlink) # print(year) albumid = albumlink.split('=')[-1] # print(albumid) url = f'http://music.163.com/api/album/{albumid}/' html = op_simple(url, ran_header(ref=agentref))[0] jdata = BeautifulSoup(html, "html.parser").prettify() # jdata = bsObj.prettify() adict = ana_json(jdata) adict['year'] = year # print(jdata) return adict
def ana_wx(page): '''Analyze Weixin web''' ml = mylogger(logfile,get_funcname()) html = op_simple(page,ran_header())[0] # print(html) bsObj = BeautifulSoup(html,"html.parser") #;print(bsObj) # bsObj = BeautifulSoup(html,"html5lib") #;print(bsObj) try: author = bsObj.find('span',{'class':'rich_media_meta rich_media_meta_nickname'}) author = author.a.text.strip() title = bsObj.find('h2',{'class':'rich_media_title'}) title = title.text.strip() p = {'author':author,'title':title} # p['link'] = page ml.dbg(p) except: return None return p
def find_library(cursor, liblink, book): '''Find libary details\n 馆址 馆藏地 索书号 状态 应还日期 馆藏类型 馆藏条码''' ml = mylogger(logfile, get_funcname()) bsObj = BeautifulSoup( op_requests(liblink, ran_header()).content, "html.parser") for i in bsObj.find_all("tr", {"height": "15"}): ml.debug('=' * 10) library = i.td lib = library.text ml.debug("馆址:" + lib) if wantedlib(lib): room = library.next_sibling ml.debug("馆藏地:" + room.text) catalog = room.next_sibling cat = catalog.text ml.debug("索引号:" + cat) status = catalog.next_sibling if status.text == "归还": if bsObj.find(title="应还日期"): #print("find 应还日期") #index = room.next_sibling #print(index.text) btype = status.next_sibling.next_sibling else: btype = status.next_sibling ml.debug("馆藏类型:" + btype.text) if btype.text == "普通外借资料": SN = btype.next_sibling.text ml.debug("馆藏条码:" + SN) try: cursor.execute( "insert into inventory values (?,?,?,?)", (SN, book, lib, cat)) except sqlite3.IntegrityError as e: ml.debug(e) ml.error(f"Duplicate: {SN} {book} {lib}") else: ml.debug(lib + status.text) else: ml.debug('Not recommaned library')
def get_detail(): dxyapi = 'https://lab.isaaclin.cn/nCoV/api/area?latest=1&province=%E4%B8%8A%E6%B5%B7%E5%B8%82' header = ran_header() try: html = requests.get(dxyapi) # bsobj = BeautifulSoup(html,'html.parser').text j = json.loads(html.text) except json.decoder.JSONDecodeError: print(html.text) print('Fail to update data') return False data = j['results'][0] confirmedCount = data['confirmedCount'] curedCount = data['curedCount'] deadCount = data['deadCount'] detail = data['cities'] detail = {d['cityName']: d['confirmedCount'] for d in detail} pprint(detail) with open(shsumary, 'w', encoding='utf-8') as f: json.dump(detail, f, ensure_ascii=False, indent=2) return True
from mylog import get_funcname,mylogger from mystr import fnamechecker as modstr import myget from mytool import mywait quality = { 1:['M500','.mp3','66'], # work, 99 2:['M800','.mp3','53'], # work, 99 3:['F000','.flac','99'], 4:['C400','.m4a','66'], # work 999 5:['A000','.ape','64'] } ref = 'https://y.qq.com' header = ran_header(ref=ref) def get_vkeyguid(songmid,q=1): '''Get vkey and guid from songid''' ml = mylogger(logfile,get_funcname()) guid = int(random.random()*2147483647)*int(time.time()*1000) % 10000000000 ml.dbg(f'GUID:{guid}') url = 'https://c.y.qq.com/base/fcgi-bin/fcg_music_express_mobile3.fcg' qly = quality[q][0] t = quality[q][1] para = { 'loginUin':'0', 'hostUin':'0', 'format':'json', 'inCharset':'utf8',
import pyautogui as auto # customized module from config import logfile, dldir from openlink import op_simple, ran_header from mtag import addtag from mylog import get_funcname, mylogger from mp3archive import find_album, create_folder from mytool import mywait, get_text_clipboard, clickbutton, capture from myfs import clean_f from myimg import squaresize from mystr import fnamechecker as modstr import myget ref = 'https://www.xiami.com/' headers = ran_header(ref=ref) def decry(code): '''decrypt download url''' url = code[1:] urllen = len(url) rows = int(code[0]) cols_base = urllen // rows #;print(cols_base) # basic column count rows_ex = urllen % rows #;print(rows_ex) # count of rows that have 1 more column matrix = [] for r in range(rows): length = cols_base + 1 if r < rows_ex else cols_base matrix.append(url[:length]) url = url[length:] #for i in matrix : print(i)
def get_json(url, params, encSecKey): '''Get response of song download url''' data = {"params": params, "encSecKey": encSecKey} response = requests.post(url, headers=ran_header(), data=data).json() return response['data']