Example #1
0
    def search(name, page=1, albums=True, tracks=True, artists=True,
               labels=False):
        params = {"page": page, "q": name}
        response = requests.get('http://bandcamp.com/search', params=params)
        html_doc = response.content
        soup = BeautifulSoup(html_doc, 'html.parser')

        seen = []
        for item in soup.find_all("li", class_="searchresult"):
            item_type = item.find('div', class_='itemtype').text.strip().lower()
            if item_type == "album" and albums:
                data = BandCamper._parse_album(item)
            elif item_type == "track" and tracks:
                data = BandCamper._parse_track(item)
            elif item_type == "artist" and artists:
                data = BandCamper._parse_artist(item)
            elif item_type == "label" and labels:
                data = BandCamper._parse_label(item)
            else:
                continue
            #data["type"] = type
            yield data
            seen.append(data)
        if not len(seen):
            return  # no more pages
        for item in BandCamper.search(name, page=page + 1, albums=albums,
                                      tracks=tracks, artists=artists,
                                      labels=labels):
            if item in seen:
                return  # duplicate data, fail safe out of loops
            yield item
Example #2
0
    def get_stream_data(url):

        txt_string = requests.get(url).text

        json_blob = txt_string. \
            split('<script type="application/ld+json">')[-1]. \
            split("</script>")[0]

        data = json.loads(json_blob)
        from pprint import pprint
        pprint(data)

        artist_data = data['byArtist']
        album_data = data['inAlbum']
        result = {
            "categories": data["@type"],
            'album_name': album_data['name'],
            'artist': artist_data['name'],
            'image': data['image'],
            "title": data['name'],
            "url": url,
            "tags": data['keywords'].split(", ") + data["tags"]
        }

        for p in data['additionalProperty']:
            if p['name'] == 'file_mp3-128':
                result["stream"] = p["value"]
            if p['name'] == 'duration_secs':
                result["length"] = p["value"] * 1000

        return result
Example #3
0
def extract_ldjson_blob(url, clean=False):
    txt_string = requests.get(url).text

    json_blob = txt_string. \
        split('<script type="application/ld+json">')[-1]. \
        split("</script>")[0]

    data = json.loads(json_blob)

    def _clean_list(l):
        for idx, v in enumerate(l):
            if isinstance(v, dict):
                l[idx] = _clean_dict(v)
            if isinstance(v, list):
                l[idx] = _clean_list(v)
        return l

    def _clean_dict(d):
        clean = {}
        for k, v in d.items():
            if isinstance(v, dict):
                v = _clean_dict(v)
            if isinstance(v, list):
                v = _clean_list(v)
            k = k.replace("@", "")
            clean[k] = v
        return clean

    if clean:
        return _clean_dict(data)
    return data
Example #4
0
    def get_track_lyrics(track_url):
        track_page = requests.get(track_url)
        track_soup = BeautifulSoup(track_page.text, 'html.parser')
        track_lyrics = track_soup.find("div", {"class": "lyricsText"})
        if track_lyrics:
            return track_lyrics.text

        return "lyrics unavailable"
Example #5
0
def extract_blob(url, params=None):
    blob = requests.get(url, params=params).text
    for b in blob.split("data-blob='")[1:]:
        json_blob = b.split("'")[0]
        return json.loads(json_blob)
    for b in blob.split("data-blob=\"")[1:]:
        json_blob = b.split("\"")[0].replace("&quot;", '"')
        return json.loads(json_blob)
Example #6
0
 def get_albums(url):
     albums = []
     soup = BeautifulSoup(requests.get(url).text, "html.parser")
     for album in soup.find_all("a"):
         album_url = album.find("p", {"class": "title"})
         if album_url:
             title = album_url.text.strip()
             art = album.find("div", {"class": "art"}).find("img")["src"]
             album_url = url + album["href"]
             album = BandcampAlbum({"album_name": title,
                                    "image": art,
                                    "url": album_url})
             albums.append(album)
     return albums