def get_embed_string_block(self, request): embedBlock = request.text.split("var EmbedData = ") embedStringBlock = embedBlock[1] embedStringBlock = embedStringBlock.split("};")[0] + "};" embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % embedStringBlock) return embedStringBlock
def bandcamp_get_embed_block(response): block = response.text.split("var EmbedData = ") block = block[1] block = block.split("};")[0] + "};" block = jsobj.read_js_object("var EmbedData = {}".format(block)) return block
def extract_album_meta_data(self, request): album = {} embedData = self.get_embed_string_block(request) block = request.text.split("var TralbumData = ") stringBlock = block[1] stringBlock = stringBlock.split("};")[0] + "};" stringBlock = jsobj.read_js_object("var TralbumData = %s" % stringBlock) album['title'] = embedData['EmbedData']['album_title'] album['artist'] = stringBlock['TralbumData']['artist'] album['tracks'] = stringBlock['TralbumData']['trackinfo'] album['date'] = stringBlock['TralbumData']['album_release_date'].split()[2] return album
def parse_file(url): print "Starting the parsing for: " + url response = urllib2.urlopen(url) r = response.read() # embedBlock = r.text.split("var EmbedData = ") embedBlock = r.split("var EmbedData = ") embedStringBlock = embedBlock[1] # embedStringBlock = unicodedata.normalize(u'NFKD', embedStringBlock).encode('ascii', 'ignore') embedStringBlock = embedStringBlock.split("};")[0] + "};" embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock)) embedData = embedStringBlock albumTitle = embedData['EmbedData']['album_title'] # block = r.text.split("var TralbumData = ") block = r.split("var TralbumData = ") #print block[0] stringBlock = block[1] # stringBlock = unicodedata.normalize('NFKD', stringBlock).encode('ascii', 'ignore') stringBlock = stringBlock.split("};")[0] + "};" stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock)) data = stringBlock # print data artistName = data['TralbumData']['artist'] firstLetter = artistName[0] if not firstLetter.isalpha: firstLetter = "0" else: firstLetter = firstLetter.capitalize() if not os.path.exists("files"): os.makedirs("files") if not os.path.exists("files/" + firstLetter): if (firstLetter.isalpha): os.makedirs("files/" + firstLetter) if not os.path.exists("files/" + firstLetter + "/" + artistName): os.makedirs("files/" + firstLetter + "/" + artistName) tracks = data['TralbumData']['trackinfo'] albumPath = albumTitle.replace(" ", "").replace("/", "").replace(".", "") albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath if not os.path.exists("files/zips"): os.makedirs("files/zips") if not os.path.exists(albumPath): os.makedirs(albumPath) for each in tracks: if not os.path.exists(albumPath): os.makedirs(albumPath) songTitle = each['title'].replace(" ", "").replace(".", "") try: songURL = each['file']['mp3-128'] except: continue trackNum = each['track_num'] print "Now Downloading: " + each['title'], each['file']['mp3-128'] req = urllib2.Request(songURL, headers={'User-Agent': "Magic Browser"}) u = urllib2.urlopen(req) title = slugify(unicode(each['title'])) f = open(albumPath+'/' + title +'.mp3', 'wb') meta = u.info() file_size = int(meta.getheaders("Content-Length")[0]) file_size_dl = 0.0 block_sz = 8192 while True: buffer = u.read(block_sz) if not buffer: break file_size_dl += len(buffer) f.write(buffer) p = float(file_size_dl) / file_size status = r"[{1:2.2%}]".format(file_size_dl, p) # status = status + chr(8) * (len(status) + 1) sys.stdout.write("Download progress: %s%% \r" % (status)) sys.stdout.flush() f.close() print "Encoding . . . " audio = MP3(albumPath + '/' + title + '.mp3') audio["TIT2"] = TIT2(encoding=3, text=["title"]) audio.save() audio = EasyID3(albumPath + '/' + title + '.mp3') audio["title"] = each['title'].decode('utf-8') audio["artist"] = unicode(artistName) audio["album"] = unicode(albumTitle) audio["tracknumber"] = trackNum audio.save() print "Done downloading " + songTitle
def parse_file(url): print "Starting the parsing for: " + url r = requests.get(url) soup = BeautifulSoup(r.text) if "album" in url: songType = "album" else: songType = "track" albumTitle = soup.head.title.text embedBlock = r.text.split("var EmbedData = ") embedStringBlock = embedBlock[1] embedStringBlock = embedStringBlock.split("};")[0] + "};" embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock)) #print embedStringBlock #embedStringBlock = re.sub(r'{\s*(\w)', r'{"\1', embedStringBlock) #embedStringBlock = re.sub(r',\s*(\w)', r',"\1', embedStringBlock) #embedStringBlock = re.sub(r'(\w):', r'\1":', embedStringBlock) #embedStringBlock = embedStringBlock.replace(r'http\":', 'http:') #print embedStringBlock #currData = json.loads(embedStringBlock) #print currData #print embedStringBlock embedData = embedStringBlock artistName = embedData['EmbedData']['artist'] if "name" in embedData: fileType = "track" trackName = embedData['EmbedData']['name'] else: fileType = "album" albumTitle = embedData['EmbedData']['album_title'] block = r.text.split("var TralbumData = ") #print block[0] stringBlock = block[1] stringBlock = stringBlock.split("};")[0] + "};" stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock)) #print stringBlock #sys.exit() #stringArray = stringBlock.split("\n") #del stringArray[1:4] #print stringArray #stringBlock = "".join(stringArray).strip().replace(" ", "") data = stringBlock artistName = data['TralbumData']['artist'] firstLetter = artistName[0] if not firstLetter.isalpha: firstLetter = "0" else: firstLetter = firstLetter.capitalize() if not os.path.exists("files"): os.makedirs("files") letterDirectory = "files/" + firstLetter if not os.path.exists("files/" + firstLetter): if(firstLetter.isalpha): os.makedirs("files/" + firstLetter) if not os.path.exists("files/" + firstLetter + "/" + artistName): os.makedirs("files/" + firstLetter + "/" + artistName) tracks = data['TralbumData']['trackinfo'] albumPath = albumTitle.replace(" ", "").replace("/","").replace(".", "") albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath if not os.path.exists("files/zips"): os.makedirs("files/zips") if not os.path.exists(albumPath): os.makedirs(albumPath) for each in tracks: songTitle = each['title'].replace(" ", "").replace(".", "") songURL = each['file']['mp3-128'] track_num = each['track_num'] print "Now Downloading: " + each['title'], each['file']['mp3-128'] urllib.urlretrieve(songURL, albumPath + "/" + songTitle + ".mp3") print "Encoding . . . " audio = MP3(albumPath + "/" + songTitle + ".mp3") audio["TIT2"]=TIT2(encoding=3, text=["title"]) audio.save() audio = EasyID3(albumPath + "/" + songTitle + ".mp3") audio["title"] = each['title'] audio["artist"] = artistName audio["album"] = albumTitle #audio["tracknumber"] = track_num audio.save() #audiofile.tag.save() print "Done downloading " + songTitle
def bandcamp_get_album_block(response): block = response.text.split('var TralbumData = ') block = block[1] block = block.partition("};")[0] + "};" block = jsobj.read_js_object("var TralbumData = {}".format(block)) return block