Exemple #1
0
    def _get_releases_paginated(self, html):

        soup = BeautifulSoup(html, "lxml")
        items = soup.find_all('li', class_='release')
        rel_list = []

        for item in items:

            new_rel = Release()

            name = item.find('p', class_='release-title')

            link = name.find('a')
            link = link['href']

            name = name.text

            artists = item.find('p', class_='release-artists')
            artists = artists.text.strip()
            artists = re.sub(' +', ' ', artists)
            artists = " ".join(artists.split())

            label = item.find('p', class_="release-label")
            label = label.text.strip()
            label = re.sub(' +', ' ', label)


            new_rel.Name = name
            new_rel.LabelName = label
            new_rel.InfoPageLink = 'http://pro.beatport.com' + link

            rel_list.append(new_rel)

        return rel_list
Exemple #2
0
    def test__relase_page(self):

        f = open('tests/files/releasePage.html', 'r')
        html = f.read()
        f.close()

        rel = Release()

        rel.Name = 'ON THE EDGE VOL 1'
        rel.InfoPageLink = 'http://www.beatport.com/release/on-the-edge-vol-1/1164510'


        rel_page = BeatportWeb.ReleasePage(rel, True)

        catid = rel_page._get_catalog_id(html)

        self.assertEqual('SYNDROME13015', catid)


        tracks = rel_page._get_tracks(html)

        self.assertEqual(4, len(tracks))

        artwork = rel_page._get_artwork_link(html)

        self.assertEqual('http://geo-media.beatport.com/image_size/500x500/8195874.jpg', artwork)

        for t in tracks:
            print(t)
Exemple #3
0
    def test__get_key(self):
        f = open('tests/files/track.html', 'r')
        html = f.read()
        f.close()

        rel = Release()

        rel.Name = 'ON THE EDGE VOL 1'
        rel.InfoPageLink = 'http://www.beatport.com/release/on-the-edge-vol-1/1164510'


        rel_page = BeatportWeb.ReleasePage(rel, True)

        key = rel_page._get_key(html)

        self.assertEqual('hmoll', key)
Exemple #4
0
    def __CreateReleaseList__(self, results):
        for rel in results:
            cur_release = Release()

            if rel["name"]:
                cur_release.Name = rel["name"]

            if rel["catalogNumber"]:
                cur_release.Catid = rel["catalogNumber"]

            if rel["label"]["name"]:
                cur_release.LabelName = rel["label"]["name"]

            if rel["id"]:
                cur_release.InfoPageLink = rel["id"]

            self.__ReleaseList.append(cur_release)
Exemple #5
0
    def __CreateReleaseList__(self, stream):
        found_item = False
        found_artist = False
        found_title = False
        found_label = False
        found_catnum = False

        cur_artist = ""
        cur_title = ""
        cur_label = ""
        cur_catnum = ""

        for item in stream:
            # check for item found
            if ("name" in item) and (item["name"] == "div"):
                for attTupel in item["data"]:
                    if ("class" in attTupel) and ("item" in attTupel):
                        found_item = True

                        # create a new release object
                        cur_release = Release()
                        

            # check if artist field ends
            if found_artist == True:
                if ("name" in item) and (item["name"] == "h4") and (item["type"] == "EndTag"):
                    found_artist = False

            # check if artist field ends
            if found_label == True:
                if ("name" in item) and (item["name"] == "span") and (item["type"] == "EndTag"):
                    found_label = False
                    cur_release.LabelName = cur_label.strip()

            # check if title field ends
            if found_title == True:
                if ("name" in item) and (item["name"] == "p") and (item["type"] == "EndTag"):
                    found_title = False

                    # add name to Release instance
                    cur_release.Name = cur_artist.strip() + " " + cur_title.strip()

            # check if catnum field ends
            if found_catnum == True:
                if ("name" in item) and (item["name"] == "span") and (item["type"] == "EndTag"):
                    found_catnum = False
                    
                    # add catnum to release
                    cur_release.Catid = cur_catnum.strip()

                    # because here is the item end, reset all data, and
                    # append the current release to the __ReleaseList
                    self.__ReleaseList.append(cur_release)
                    cur_artist =  ""
                    cur_title =  ""
                    cur_label = ""
                    cur_catnum = ""
                    found_item = False

            # check if label found
            if found_item == True:
                if ("name" in item) and (item["name"] == "span"):
                    for attTupel in item["data"]:
                        if ("class" in attTupel) and ("label" in attTupel):
                            found_label = True

            # check if artist found
            if found_item == True:
                if ("name" in item) and (item["name"] == "h4"):
                    for attTupel in item["data"]:
                        if ("class" in attTupel) and ("artist" in attTupel):
                            found_artist = True

            # check if title found
            if found_item == True:
                if ("name" in item) and (item["name"] == "p"):
                    for attTupel in item["data"]:
                        if ("class" in attTupel) and ("title" in attTupel):
                            found_title = True

            # find the infoPageLink
            if found_item == True and found_title:
                if ("name" in item) and (item["name"] == "a"):
                    for attTupel in item["data"]:
                        if attTupel[0] == "href":
                            cur_release.InfoPageLink = attTupel[1]

            # check if catnum found
            if found_item == True:
                if ("name" in item) and (item["name"] == "span"):
                    for attTupel in item["data"]:
                        if ("class" in attTupel) and ("catnum" in attTupel):
                            found_catnum = True

            # fetch artists
            if found_artist == True:
                if item["type"] == "SpaceCharacters":
                    cur_artist += " "
                if item["type"] == "Characters":
                    cur_artist += item["data"]

            # fetch artists
            if found_label == True:
                if item["type"] == "SpaceCharacters":
                    cur_label += " "
                if item["type"] == "Characters":
                    cur_label += item["data"]

            # fetch title
            if found_title == True:
                if item["type"] == "SpaceCharacters":
                    cur_title += " "
                if item["type"] == "Characters":
                    cur_title += item["data"]

            # fetch catnum
            if found_catnum == True:
                if item["type"] == "SpaceCharacters":
                    cur_catnum += " "
                if item["type"] == "Characters":
                    cur_catnum += item["data"]