Exemple #1
0
    def get_embed_string_block(self, request):
        embedBlock = request.text.split("var EmbedData = ")

        embedStringBlock = embedBlock[1]
        embedStringBlock = embedStringBlock.split("};")[0] + "};"
        embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % embedStringBlock)

        return embedStringBlock
Exemple #2
0
def bandcamp_get_embed_block(response):
    block = response.text.split("var EmbedData = ")

    block = block[1]
    block = block.split("};")[0] + "};"
    block = jsobj.read_js_object("var EmbedData = {}".format(block))

    return block
Exemple #3
0
    def get_embed_string_block(self, request):
        embedBlock = request.text.split("var EmbedData = ")

        embedStringBlock = embedBlock[1]
        embedStringBlock = embedStringBlock.split("};")[0] + "};"
        embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % embedStringBlock)

        return embedStringBlock
Exemple #4
0
    def extract_album_meta_data(self, request):
        album = {}

        embedData = self.get_embed_string_block(request)

        block = request.text.split("var TralbumData = ")

        stringBlock = block[1]

        stringBlock = stringBlock.split("};")[0] + "};"
        stringBlock = jsobj.read_js_object("var TralbumData = %s" % stringBlock)

        album['title'] = embedData['EmbedData']['album_title']
        album['artist'] = stringBlock['TralbumData']['artist']
        album['tracks'] = stringBlock['TralbumData']['trackinfo']
        album['date'] = stringBlock['TralbumData']['album_release_date'].split()[2]

        return album
Exemple #5
0
    def extract_album_meta_data(self, request):
        album = {}

        embedData = self.get_embed_string_block(request)

        block = request.text.split("var TralbumData = ")

        stringBlock = block[1]

        stringBlock = stringBlock.split("};")[0] + "};"
        stringBlock = jsobj.read_js_object("var TralbumData = %s" % stringBlock)

        album['title'] = embedData['EmbedData']['album_title']
        album['artist'] = stringBlock['TralbumData']['artist']
        album['tracks'] = stringBlock['TralbumData']['trackinfo']
        album['date'] = stringBlock['TralbumData']['album_release_date'].split()[2]

        return album
Exemple #6
0
def parse_file(url):
    print "Starting the parsing for: " + url

    response = urllib2.urlopen(url)
    r = response.read()

    # embedBlock = r.text.split("var EmbedData = ")
    embedBlock = r.split("var EmbedData = ")

    embedStringBlock = embedBlock[1]
    # embedStringBlock = unicodedata.normalize(u'NFKD', embedStringBlock).encode('ascii', 'ignore')
    embedStringBlock = embedStringBlock.split("};")[0] + "};"
    embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock))



    embedData = embedStringBlock


    albumTitle = embedData['EmbedData']['album_title']

    # block = r.text.split("var TralbumData = ")
    block = r.split("var TralbumData = ")
    #print block[0]

    stringBlock = block[1]
    # stringBlock = unicodedata.normalize('NFKD', stringBlock).encode('ascii', 'ignore')
    stringBlock = stringBlock.split("};")[0] + "};"
    stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock))


    data = stringBlock
    # print data
    artistName = data['TralbumData']['artist']

    firstLetter = artistName[0]

    if not firstLetter.isalpha:
        firstLetter = "0"
    else:
        firstLetter = firstLetter.capitalize()

    if not os.path.exists("files"):
        os.makedirs("files")


    if not os.path.exists("files/" + firstLetter):
        if (firstLetter.isalpha):
            os.makedirs("files/" + firstLetter)

    if not os.path.exists("files/" + firstLetter + "/" + artistName):
        os.makedirs("files/" + firstLetter + "/" + artistName)

    tracks = data['TralbumData']['trackinfo']

    albumPath = albumTitle.replace(" ", "").replace("/", "").replace(".", "")

    albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath
    if not os.path.exists("files/zips"):
        os.makedirs("files/zips")

    if not os.path.exists(albumPath):
        os.makedirs(albumPath)

    for each in tracks:
        if not os.path.exists(albumPath):
            os.makedirs(albumPath)
        songTitle = each['title'].replace(" ", "").replace(".", "")
        try:
            songURL = each['file']['mp3-128']
        except:
            continue
        
        trackNum = each['track_num']

        print "Now Downloading: " + each['title'], each['file']['mp3-128']

        req = urllib2.Request(songURL, headers={'User-Agent': "Magic Browser"})
        u = urllib2.urlopen(req)
        title = slugify(unicode(each['title']))
        f = open(albumPath+'/' + title +'.mp3', 'wb')

        meta = u.info()
        file_size = int(meta.getheaders("Content-Length")[0])
        file_size_dl = 0.0
        block_sz = 8192
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break

            file_size_dl += len(buffer)
            f.write(buffer)
            p = float(file_size_dl) / file_size
            status = r"[{1:2.2%}]".format(file_size_dl, p)
            # status = status + chr(8) * (len(status) + 1)
            sys.stdout.write("Download progress: %s%%   \r" % (status))
            sys.stdout.flush()


        f.close()
        print "Encoding . . . "
        audio = MP3(albumPath + '/' + title + '.mp3')
        audio["TIT2"] = TIT2(encoding=3, text=["title"])
        audio.save()
        audio = EasyID3(albumPath + '/' + title + '.mp3')
        audio["title"] = each['title'].decode('utf-8')
        audio["artist"] = unicode(artistName)
        audio["album"] = unicode(albumTitle)
        audio["tracknumber"] = trackNum
        audio.save()

        print "Done downloading " + songTitle
def parse_file(url):

	print "Starting the parsing for: " + url
	r = requests.get(url)
	soup = BeautifulSoup(r.text)

	if "album" in url:
		songType = "album"
	else:
		songType = "track"

	albumTitle = soup.head.title.text
	
	embedBlock = r.text.split("var EmbedData = ")

	embedStringBlock = embedBlock[1]

	embedStringBlock = embedStringBlock.split("};")[0] + "};"
	embedStringBlock = jsobj.read_js_object("var EmbedData = %s" % str(embedStringBlock))

	#print embedStringBlock

	#embedStringBlock = re.sub(r'{\s*(\w)', r'{"\1', embedStringBlock)
	#embedStringBlock = re.sub(r',\s*(\w)', r',"\1', embedStringBlock)
	#embedStringBlock = re.sub(r'(\w):', r'\1":', embedStringBlock)

	#embedStringBlock = embedStringBlock.replace(r'http\":', 'http:')


	#print embedStringBlock
	#currData = json.loads(embedStringBlock)
	#print currData


	#print embedStringBlock


	embedData = embedStringBlock

	artistName = embedData['EmbedData']['artist']

	if "name" in embedData:
		fileType = "track"
		trackName = embedData['EmbedData']['name']
	else:
		fileType = "album"

	albumTitle = embedData['EmbedData']['album_title']

	block = r.text.split("var TralbumData = ")
	#print block[0]

	stringBlock = block[1]

	stringBlock = stringBlock.split("};")[0] + "};"
	stringBlock = jsobj.read_js_object("var TralbumData = %s" % str(stringBlock))
	#print stringBlock

	#sys.exit()

	#stringArray = stringBlock.split("\n")
	#del stringArray[1:4]
	#print stringArray

	#stringBlock = "".join(stringArray).strip().replace("    ", "")



	data = stringBlock

	artistName = data['TralbumData']['artist']


	firstLetter = artistName[0]

	if not firstLetter.isalpha:
		firstLetter = "0"
	else:
		firstLetter = firstLetter.capitalize()


	if not os.path.exists("files"):
		os.makedirs("files")


	letterDirectory = "files/" + firstLetter

	if not os.path.exists("files/" + firstLetter):
		if(firstLetter.isalpha):
			os.makedirs("files/" + firstLetter)


	if not os.path.exists("files/" + firstLetter + "/" + artistName):
		os.makedirs("files/" + firstLetter + "/" + artistName)


	tracks = data['TralbumData']['trackinfo']	

	albumPath = albumTitle.replace(" ", "").replace("/","").replace(".", "")

	albumPath = "files/" + firstLetter + "/" + artistName + "/" + albumPath
	if not os.path.exists("files/zips"):
			os.makedirs("files/zips")

	if not os.path.exists(albumPath):
		os.makedirs(albumPath)

	for each in tracks:
		songTitle = each['title'].replace(" ", "").replace(".", "")
		songURL = each['file']['mp3-128']
		track_num = each['track_num']

		print "Now Downloading: " +  each['title'], each['file']['mp3-128']
		urllib.urlretrieve(songURL, albumPath + "/" + songTitle + ".mp3")



		print "Encoding . . . "
		audio = MP3(albumPath + "/" + songTitle + ".mp3")
		audio["TIT2"]=TIT2(encoding=3, text=["title"])
		audio.save()
		audio = EasyID3(albumPath + "/" + songTitle + ".mp3")
		audio["title"] = each['title']
		audio["artist"] = artistName
		audio["album"] = albumTitle
		#audio["tracknumber"] = track_num
		audio.save()

		#audiofile.tag.save()
		print "Done downloading " + songTitle
Exemple #8
0
def bandcamp_get_album_block(response):
    block = response.text.split('var TralbumData = ')
    block = block[1]
    block = block.partition("};")[0] + "};"
    block = jsobj.read_js_object("var TralbumData = {}".format(block))
    return block