def _get_releases_paginated(self, html): soup = BeautifulSoup(html, "lxml") items = soup.find_all('li', class_='release') rel_list = [] for item in items: new_rel = Release() name = item.find('p', class_='release-title') link = name.find('a') link = link['href'] name = name.text artists = item.find('p', class_='release-artists') artists = artists.text.strip() artists = re.sub(' +', ' ', artists) artists = " ".join(artists.split()) label = item.find('p', class_="release-label") label = label.text.strip() label = re.sub(' +', ' ', label) new_rel.Name = name new_rel.LabelName = label new_rel.InfoPageLink = 'http://pro.beatport.com' + link rel_list.append(new_rel) return rel_list
def test__relase_page(self): f = open('tests/files/releasePage.html', 'r') html = f.read() f.close() rel = Release() rel.Name = 'ON THE EDGE VOL 1' rel.InfoPageLink = 'http://www.beatport.com/release/on-the-edge-vol-1/1164510' rel_page = BeatportWeb.ReleasePage(rel, True) catid = rel_page._get_catalog_id(html) self.assertEqual('SYNDROME13015', catid) tracks = rel_page._get_tracks(html) self.assertEqual(4, len(tracks)) artwork = rel_page._get_artwork_link(html) self.assertEqual('http://geo-media.beatport.com/image_size/500x500/8195874.jpg', artwork) for t in tracks: print(t)
def test__get_key(self): f = open('tests/files/track.html', 'r') html = f.read() f.close() rel = Release() rel.Name = 'ON THE EDGE VOL 1' rel.InfoPageLink = 'http://www.beatport.com/release/on-the-edge-vol-1/1164510' rel_page = BeatportWeb.ReleasePage(rel, True) key = rel_page._get_key(html) self.assertEqual('hmoll', key)
def __CreateReleaseList__(self, results): for rel in results: cur_release = Release() if rel["name"]: cur_release.Name = rel["name"] if rel["catalogNumber"]: cur_release.Catid = rel["catalogNumber"] if rel["label"]["name"]: cur_release.LabelName = rel["label"]["name"] if rel["id"]: cur_release.InfoPageLink = rel["id"] self.__ReleaseList.append(cur_release)
def __CreateReleaseList__(self, stream): found_item = False found_artist = False found_title = False found_label = False found_catnum = False cur_artist = "" cur_title = "" cur_label = "" cur_catnum = "" for item in stream: # check for item found if ("name" in item) and (item["name"] == "div"): for attTupel in item["data"]: if ("class" in attTupel) and ("item" in attTupel): found_item = True # create a new release object cur_release = Release() # check if artist field ends if found_artist == True: if ("name" in item) and (item["name"] == "h4") and (item["type"] == "EndTag"): found_artist = False # check if artist field ends if found_label == True: if ("name" in item) and (item["name"] == "span") and (item["type"] == "EndTag"): found_label = False cur_release.LabelName = cur_label.strip() # check if title field ends if found_title == True: if ("name" in item) and (item["name"] == "p") and (item["type"] == "EndTag"): found_title = False # add name to Release instance cur_release.Name = cur_artist.strip() + " " + cur_title.strip() # check if catnum field ends if found_catnum == True: if ("name" in item) and (item["name"] == "span") and (item["type"] == "EndTag"): found_catnum = False # add catnum to release cur_release.Catid = cur_catnum.strip() # because here is the item end, reset all data, and # append the current release to the __ReleaseList self.__ReleaseList.append(cur_release) cur_artist = "" cur_title = "" cur_label = "" cur_catnum = "" found_item = False # check if label found if found_item == True: if ("name" in item) and (item["name"] == "span"): for attTupel in item["data"]: if ("class" in attTupel) and ("label" in attTupel): found_label = True # check if artist found if found_item == True: if ("name" in item) and (item["name"] == "h4"): for attTupel in item["data"]: if ("class" in attTupel) and ("artist" in attTupel): found_artist = True # check if title found if found_item == True: if ("name" in item) and (item["name"] == "p"): for attTupel in item["data"]: if ("class" in attTupel) and ("title" in attTupel): found_title = True # find the infoPageLink if found_item == True and found_title: if ("name" in item) and (item["name"] == "a"): for attTupel in item["data"]: if attTupel[0] == "href": cur_release.InfoPageLink = attTupel[1] # check if catnum found if found_item == True: if ("name" in item) and (item["name"] == "span"): for attTupel in item["data"]: if ("class" in attTupel) and ("catnum" in attTupel): found_catnum = True # fetch artists if found_artist == True: if item["type"] == "SpaceCharacters": cur_artist += " " if item["type"] == "Characters": cur_artist += item["data"] # fetch artists if found_label == True: if item["type"] == "SpaceCharacters": cur_label += " " if item["type"] == "Characters": cur_label += item["data"] # fetch title if found_title == True: if item["type"] == "SpaceCharacters": cur_title += " " if item["type"] == "Characters": cur_title += item["data"] # fetch catnum if found_catnum == True: if item["type"] == "SpaceCharacters": cur_catnum += " " if item["type"] == "Characters": cur_catnum += item["data"]