def album_data(): url = "http://vlg.berryservice.net:8099/melon/list" trs = mf.request(url).select('tbody tr[data-song-no]') album_data = [] b = 0 for tr in trs: album_json = tr.select('td:nth-of-type(4) a') album_title = tr.select_one('div.ellipsis.rank03 a').text # album id 가져오기 for j in album_json: strings = j.attrs['href'] pattern = re.compile("\'(.*)\'") album_id = re.findall(pattern, strings) # album 상세 페이지 album_url = "http://vlg.berryservice.net:8099/melon/detail?albumId={}".format( album_id[0]) # headers = { # 'Referer': 'https://www.melon.com/album/detail.htm?albumId={}'.format(album_id[0]), # 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36' # } # album 평점 json json_url = " http://vlg.berryservice.net:8099/melon/albumratejson?albumId={}".format( album_id[0]) # album의 id, title, genre, 발매일, 발매사, 기획사 가져오기 divs = mf.request(album_url).select_one('div.entry') dls = divs.select('div.meta dl.list') for dl in dls: releasedt = dl.select_one('dd:nth-of-type(1)').text album_genre = dl.select_one('dd:nth-of-type(2)').text album_comp = dl.select_one('dd:nth-of-type(3)').text entertainment = dl.select_one('dd:nth-of-type(4)').text b += 1 # album의 평점 가져오기 rating_json = requests.get(json_url).text jsonData = json.loads(rating_json, encoding="utf-8") rating = jsonData['infoGrade']['TOTAVRGSCORE'] # 모든 column의 data 모으기 album_data.append([ album_id[0], album_title, album_genre, "{:.02f}".format(float(rating)), releasedt, album_comp, entertainment ]) print("Album ----->", b, "record --> done!") print("Album_data has been downloaded!!!!") return (album_data)
def songsinger(): url = "http://vlg.berryservice.net:8099/melon/list" sel = "#frm table tbody tr " get_song = mf.request(url).select(sel) lst = [] for i in get_song: song_number = i.attrs["data-song-no"] singer = i.select_one('div.rank02 span').text lst.append([song_number, singer]) # Singer Table에서 가수별 id와 이름 가져오기 conn = mf.get_conn('melondb') cursor = conn.cursor() sql2 = '''select artist_id, name from Singer''' cursor.execute(sql2) lines = cursor.fetchall() # MS_Song에서 가져온 노래 제목과 가수 이름을 비교해서 같은 아티스트의 이름이 있는 title을 append data = [] for i in lst: for j in lines: if j[1] in i[1]: data.append([i[0], j[0]]) print("MS_Song===>>", i, "\nSinger====>>>", j) return data
def songsinger(): url = "http://vlg.berryservice.net:8099/melon/list" sel_song = "#frm table tbody tr " get_song = mf.request(url).select(sel_song) lst = [] for num, i in enumerate(get_song): song_number = i.attrs["data-song-no"] singer = i.select_one('div.rank02 span').text lst.append([song_number, singer]) # print(album_id) conn = mf.get_conn('melondb') cursor = conn.cursor() sql2 = '''select artist_id, name from Singer''' cursor.execute(sql2) lines = cursor.fetchall() data = [] for i in lst: for j in lines: if j[1] in i[1]: data.append([j[0], i[0]]) print("MS_Song===>>", i, "\nSinger====>>>", j) return data
def song_data(): # header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} url = "http://vlg.berryservice.net:8099/melon/list" sel_song = "#frm table tbody tr " get_song = mf.request(url).select(sel_song) album_ids = [] song_no = [] song_name = [] singer = [] genre = [] lst = [] # index page에서 노래 id와 노래 제목, 앨범 id 가져오기 for i in get_song: song_number = i.attrs["data-song-no"] song_no.append(song_number) song_name.append((i.select_one('div.rank01 span a').text)) singer.append(i.select_one('div.rank02 span').text) album_id_strings = i.select_one('div.ellipsis.rank03 a').attrs['href'] pattern = re.compile("\'(.*)\'") album_id = re.findall(pattern, album_id_strings)[0] album_ids.append(album_id) # 곡 상세 페이지에서 장르 가져오기 for num, song_num in enumerate(song_no): url2 = "http://vlg.berryservice.net:8099/melon/songdetail?songId=" + song_num # headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} # params = {'contsIds': ",".join(song_no)} song_data = "#downloadfrm div.wrap_info div.entry div.meta" get_data = mf.request(url2).select(song_data) # 위에서 받은 노래 id, 노래 이름, 앨범 id와 2번째 for loop에서 받아온 장르 담기 for i in get_data: genre.append(i.select_one('dl.list dd:nth-of-type(3)').text) lst.append( [song_no[num], song_name[num], genre[num], album_ids[num]]) print('Song ----->', (num + 1), 'record --> done!') print("Finished Crawling Songs!!!!!") return lst
def album_data (): url = "http://vlg.berryservice.net:8099/melon/list" trs = mf.request(url).select('tbody tr[data-song-no]') album_data = [] b = 0 for tr in trs: album_json = tr.select('td:nth-of-type(4) a') album_title = tr.select_one('div.ellipsis.rank03 a').text # album id 가져오기 for j in album_json: strings = j.attrs['href'] pattern = re.compile("\'(.*)\'") album_id = re.findall(pattern, strings) # album 상세 페이지 album_url = "http://vlg.berryservice.net:8099/melon/detail?albumId={}".format(album_id[0]) # album 평점 json json_url = " http://vlg.berryservice.net:8099/melon/albumratejson?albumId={}".format(album_id[0]) # album의 id, title, genre, 발매일, 발매사, 기획사 가져오기 divs = mf.request(album_url).select_one('div.entry') dls = divs.select('div.meta dl.list') for dl in dls: releasedt = dl.select_one('dd:nth-of-type(1)').text album_genre = dl.select_one('dd:nth-of-type(2)').text album_comp = dl.select_one('dd:nth-of-type(3)').text entertainment = dl.select_one('dd:nth-of-type(4)').text b += 1 # album의 평점 가져오기 jsonData = mf.requestJson(json_url) rating = jsonData['infoGrade']['TOTAVRGSCORE'] # 모든 column의 data 모으기 album_data.append([album_id[0], album_title, album_genre, "{:.02f}".format(float(rating)) , releasedt, album_comp, entertainment]) print("Album ----->", b , "record --> done!") print ("Album_data has been downloaded!!!!") return (album_data)
def song_data(): url = "http://vlg.berryservice.net:8099/melon/list" sel_song = "#frm table tbody tr " get_song = mf.request(url).select(sel_song) album_ids = [] song_no = [] song_name = [] singer = [] genre = [] lst = [] # index page에서 노래 id와 노래 제목, 앨범 id 가져오기 for i in get_song: song_number = i.attrs["data-song-no"] song_no.append(song_number) song_name.append((i.select_one('div.rank01 span a').text)) singer.append(i.select_one('div.rank02 span').text) album_id_strings = i.select_one('div.ellipsis.rank03 a').attrs['href'] pattern = re.compile("\'(.*)\'") album_id = re.findall(pattern, album_id_strings)[0] album_ids.append(album_id) # 곡 상세 페이지에서 장르 가져오기 for num, song_num in enumerate(song_no): url2 = "http://vlg.berryservice.net:8099/melon/songdetail?songId=" + song_num song_data = "#downloadfrm div.wrap_info div.entry div.meta" get_data = mf.request(url2).select(song_data) # 위에서 받은 노래 id, 노래 이름, 앨범 id와 2번째 for loop에서 받아온 장르 담기 for i in get_data: genre.append(i.select_one('dl.list dd:nth-of-type(3)').text) lst.append( [song_no[num], song_name[num], genre[num], album_ids[num]]) print('Song ----->', (num + 1), 'record --> done!') # print(album_id) print("Finished Crawling Songs!!!!!") print(lst) return lst
def singer(): url = "http://vlg.berryservice.net:8099/melon/list" singers = mf.request(url).select('tbody tr[data-song-no]') singer_info = [] for singer in singers: singer_links = singer.select('td:nth-of-type(6) div.ellipsis.rank02 span a') for singer_link in singer_links: singer_name = singer_link.text singer_ids = singer_link.attrs['href'] pattern = re.compile("\'(.*)\'") singer_id = re.findall(pattern, singer_ids)[0] singer_info.append([singer_id, singer_name]) return singer_info
def song_rank(): now = datetime.datetime.now() likecnt = [] song_no = [] rank = [] lst = [] b = 0 url = "http://vlg.berryservice.net:8099/melon/list" top_list = mf.request(url) sel = "#frm table tbody tr " # song id, rank 가져오기 get_song = top_list.select(sel) for i in get_song: song_number = i.attrs["data-song-no"] song_no.append(song_number) rank.append(i.select_one('div span.rank').text) url2 = "http://vlg.berryservice.net:8099/melon/likejson" jsonData = mf.requestJson(url2) # 좋아요 for j in jsonData['contsLike']: if str(j['CONTSID']) == str((song_no)[b]): likecnt.append(j['SUMMCNT']) b = b+1 # update 일자 date = now.strftime('%Y%m%d') # data 모으기 for i in range (0,100): lst.append([song_no[i], rank[i], date, likecnt[i]]) print("Rank_lst ----->", (i + 1) , "record --> done!") print ("Ranking has been downloaded!!!!") return (lst)
from bs4 import BeautifulSoup import requests import time import random import json import melon_function as mf import album import song_rank as sr import singer as s import make_songsinger as ms url = "http://vlg.berryservice.net:8099/melon/list" trs = mf.request(url).select('tbody tr[data-song-no]') album_lst = album.album_data(trs) album_insert = "insert ignore into Album (album_id, album_title, album_genre, rating, releasedt, album_comp, entertainment) values (%s, %s, %s, %s, %s, %s, %s) " mf.save(album_lst, album_insert) songs = mf.song_data() mssong_insert = "insert ignore into MS_Song (song_no, title, genre, album_id) values (%s, %s, %s, %s) " mf.save(songs, mssong_insert) rank_lst = sr.song_rank() rank_insert = "insert into Song_Rank (song_no, rank, rankdt, likecnt) values (%s, %s, %s, %s) " mf.save(rank_lst, rank_insert) singer_id_lst = s.singer() singer_insert = "insert ignore into Singer(artist_id, name) values(%s, %s)" mf.save(singer_id_lst, singer_insert)