def crawl_list(): db = Sql("khroma") page = 1 while True: print("페이지 : ", page) url_list = "https://www.musicow.com/auctions?tab=closed&keyword=&page=" + str( page) CUSTOM_HEADER['referer'] = url_list r1 = requests.get(url_list, headers=CUSTOM_HEADER) bs1 = BeautifulSoup(r1.text, 'html.parser') song_list = bs1.select('ul.user_buy li') for song in song_list: url2_add = song.a["href"] songID = url2_add.split("/")[2].strip() print(songID) txt = song.select('div.txt dl') title = txt[0].dd.text singer = txt[1].dd.text auctionDate = txt[2].dd.text db.insert_withoutDuplication('musicow_list', check_list=['songID'], songID=songID, title=title, singer=singer, auctionDate=auctionDate) crawl_auction(songID) page += 1 if len(song_list) == 0: break
def crawl_auction(songID): try: print(songID) songID = str(songID) db = Sql("khroma") url = "https://www.musicow.com/auction/%s" % (songID) CUSTOM_HEADER['referer'] = url r = requests.get(url, headers=CUSTOM_HEADER) bs = BeautifulSoup(r.text, 'html.parser') text_info = bs.select_one('#tab1').script.text.split(";") profit_raw = re.sub("[A-z=\s]", "", text_info[2]) profit_info = re.sub(".+'", "", profit_raw) print(profit_info) auction = bs.select('dl.price strong') auctionAmount = int(re.sub("\D", "", auction[1].text)) auctionStartPrice = int(re.sub("\D", "", auction[2].text)) #auctionLowPrice = int(re.sub("\D", "", auction[2].text)) #auctionAvgPrice = int(re.sub("\D", "", auction[3].text)) print(auctionStartPrice) info_list = bs.select('div.lst_bul p') share_raw = re.sub("\s", "", info_list[0].text) shares = int(share_raw.replace("1/", "").replace(",", "")) print(shares) db.insert_withoutDuplication('musicow_auction', check_list=['songID'], songID=songID, profit_info=profit_info, shares=shares, auctionAmount=auctionAmount, auctionStartPrice=auctionStartPrice) except Exception as ex: print(ex)
def update_info(): db = Sql("khroma") dealID_data = db.select("musicow_deal", "dealID") dealID_list = set([d["dealID"] for d in dealID_data]) for dealID in dealID_list: try: print(dealID) url = "https://www.musicow.com/song/%s?tab=info" % (dealID) CUSTOM_HEADER['referer'] = url r = requests.get(url, headers=CUSTOM_HEADER) bs = BeautifulSoup(r.text, 'html.parser') title = bs.select_one('strong.song_title').text.strip() singer = bs.select_one('span.artist').text.strip() auction = bs.select('div.row-col-2 dd') auctionAmount = int(re.sub("\D", "", auction[0].text)) auctionStartPrice = int(re.sub("\D", "", auction[1].text)) auctionLowPrice = int(re.sub("\D", "", auction[2].text)) auctionAvgPrice = int(re.sub("\D", "", auction[3].text)) db.insert_withoutDuplication('musicow_info', check_list=['dealID'], dealID=dealID, title=title, singer=singer, auctionAmount1=auctionAmount, auctionStartPrice1=auctionStartPrice, auctionLowPrice1=auctionLowPrice, auctionAvgPrice1=auctionAvgPrice) except Exception as ex: print(ex)