Beispiel #1
0
def dump_csv(d, fn, headers=None):
    debug.log("writing csv to file: " + fn)
    with open(fn, "w+") as file:
        writer = csv.writer(file, delimiter=',')
        if headers: writer.writerow(headers)
        for k, v in tqdm(d.items()):
            writer.writerow([k, v])
Beispiel #2
0
def find_youtube_by_api(query, youtube_api_key):
  base_url = "https://www.googleapis.com/youtube/v3/search?"
  req_url = base_url + "part=snippet&q=" + urllib.parse.quote(query) + "&key=" + youtube_api_key + "&maxResults=10"
  debug.log(req_url)
  json_string = requests.get(req_url).text
  # debug.log(json_string)
  data_list = json.loads(json_string)['items']
  # debug.log(data_list)
  watch_urls = []
  videoIds = []
  for content in data_list:
    if content['id']['kind'] == 'youtube#video':
      videoId = content['id']['videoId']
      # debug.log(content)
      videoIds.append(videoId)
      watch_urls.append('https://www.youtube.com/watch?v=' + videoId)


  # soup = BeautifulSoup(response, "html.parser")
  # # debug.log(soup)
  # watch_urls = []
  # for link in soup.find_all('h3', {'class':'yt-lockup-title'}):
  #   watch_urls.append(base_url + link.find('a').attrs['href'])
  # # if len(watch_urls) < 1:
  # #   debug.log(soup)

  return watch_urls, videoIds
def getSongInfoOfMelon(music_record):
  soupArtist = music_record.find('div', {'class':'ellipsis rank02'})
  soupTitle = music_record.find('div', {'class':'ellipsis rank01'})
  soupSongInfo = music_record.find('a', {'class':'btn btn_icon_detail'})
  soupAlbumInfo = music_record.find('div', {'class':'ellipsis rank03'})

  # debug.log('=========')
  # debug.log('soupArtist')
  artist = ''
  # debug.log(soupArtist)
  artistCount = 0
  for art in soupArtist.find('span', {'class':'checkEllipsis'}).find_all('a'):
    if artistCount > 0:
      artist += ','
    artist += art.contents[0]
    artistCount += 1
  # debug.log(artist)
  # debug.log('soupTitle')
  # debug.log(soupTitle)
  if soupTitle.find('a') == None:
    title = soupTitle.find('span', {'class':'fc_lgray'}).contents[0]
  else:
    title = soupTitle.find('a').contents[0]
  # debug.log(title)
  # debug.log('soupSongInfo')
  # debug.log(soupSongInfo)
  songID = soupSongInfo['href'].replace('javascript:melon.link.goSongDetail(\'', '').replace('\');', '')
  # debug.log(songID)
  # debug.log('soupAlbumInfo')
  # debug.log(soupAlbumInfo)
  albumID = soupAlbumInfo.find('a')['href'].replace('javascript:melon.link.goAlbumDetail(\'', '').replace('\');', '')
  # debug.log(albumID)
  debug.log('parsed the music detail (artist: {}, title: {}, songID: {}, albumID: {})'.format(artist, title, songID, albumID))
  '''
  links = music_record.find_all('a')
  if len(links) < 4:
    return None
  artist = links[3].contents[0]
  title = links[2].contents[0]
  songID = links[1]['href'].replace('javascript:melon.link.goSongDetail(\'', '').replace('\');', '')
  albumID = links[5]['href'].replace('javascript:melon.link.goAlbumDetail(\'', '').replace('\');', '')
  if len(albumID) > 10:
    for link in links:
      debug.log(link)
  debug.log(albumID)
  '''
  image = music_record.find('img')
  coverImageURL = image['src'].split('.jpg')[0] + '.jpg'
  coverImgFile = downloadImageFromMelon(coverImageURL, songID)
  lyric = getLyricFromMelon(songID)

  return artist, title, songID, coverImgFile, lyric, albumID
Beispiel #4
0
def download_audio_from_youtube(url, output_dir, strQuery, music_reporter):
  debug.log('\'{}\' is downloading from \'{}\'...'.format(strQuery, url))
  if music_reporter != None:
    music_reporter.updateMusic(strQuery, url)
  yt = YouTube(url)
  filename = convertQueryToFilename(strQuery)
  audio_list = yt.streams.filter(only_audio=True).all()
  if audio_list == []:
    audio_list = yt.streams.filter().all()
  for stream in audio_list:
    # print(stream)
    if stream.mime_type.find('mp4') >= 0:
      stream.download(output_dir, filename)
      break;
  return filename
Beispiel #5
0
def find_youtube(query):
  base_url = 'https://www.youtube.co.kr'
  req_url = base_url + '/results?search_query=' + urllib.parse.quote(query)
  debug.log(req_url)
  response = http.getHTMLDocument(req_url)
  # debug.log(response)

  soup = BeautifulSoup(response, "html.parser")
  # debug.log(soup)
  watch_urls = []
  for link in soup.find_all('h3', {'class':'yt-lockup-title'}):
    watch_urls.append(base_url + link.find('a').attrs['href'])
  # if len(watch_urls) < 1:
  #   debug.log(soup)

  return watch_urls
def getSearchList(artist, title):
  query = 'q={}+-+{}'.format(urllib.parse.quote(artist), urllib.parse.quote(title))
  url = 'http://www.melon.com/search/total/index.htm?{}'.format(query)
  debug.log("send the request to melon: [{}]".format(url))
  content = http.getHTMLDocument(url)
  soup = BeautifulSoup(content, "html.parser")

  if soup.find('div', {'class':'section_no_data'}) == None:
    song_table = soup.find_all('div', {'class':'tb_list d_song_list songTypeOne'})[-1]
    song_list = song_table.find_all('tr')
    music_list = []
    for song_record in song_list:
      soupTitle = song_record.find('a', {'class':'fc_gray'})
      if soupTitle != None:
        artist, title, songID, albumInfo = getSongInfoFromMelonOfSearch(song_record)
        music_list.append({'artist':artist, 'title':title, 'songID':songID, 'albumInfo':albumInfo})
        # print('{}. artist: {} | title: {} | album: {}'.format(count, artist, title, albumInfo))

    count = 0
    for item in music_list:
      count += 1
      print('[{}] artist: {} | title: {} | album: {}'.format(count, item['artist'], item['title'], item['albumInfo']))
    selected_num = -1
    while selected_num < 0 or selected_num > count:
      try:
        selected_num = int(input(
          "Please choose the number(1<=NUM<={}) of music to download (input '0' if you want to exit): ".format(
            count)))
      except ValueError:
        selected_num = -1
        continue

      if selected_num == 0:
        return
      if selected_num < 0 or selected_num > count:
        print('Input number is out of range (0<=NUM<={}). Try to input again.'.format(count))

      print("\n[{}]({}-{}<{}>) is selected.".format(selected_num,
                                                    music_list[selected_num - 1]['artist'],
                                                    music_list[selected_num - 1]['title'],
                                                    music_list[selected_num - 1]['albumInfo']))

      return music_list[selected_num - 1]['songID']
  else:
    print('There is no data ({}-{} couldn\'t be found in Melon.)'.format(artist, title))
    return None
def getAlbumInfoFromMelon(melon_albumID):
  if melon_albumID == None:
    return None
  base_url = 'http://www.melon.com/album/detail.htm?albumId='
  url = base_url + melon_albumID

  content = http.getHTMLDocument(url)

  soup = BeautifulSoup(content, "html.parser")
  soupAlbumName = soup.find('div', {'class':'song_name'})
  albumName = soupAlbumName.contents[-1].replace('\r\n\t\t\t\t\t\t\t\t\t', '').replace('\t', '')
  debug.log('Getting album information of \'{}\'(id:{})'.format(albumName, melon_albumID))

  info = soup.find('dl', {'class':'list'})
  if info == None:
    return None
  dd_list = info.find_all('dd')

  pub_date = dd_list[0].contents[0]
  genre = dd_list[1].contents[0]
  publisher = dd_list[2].contents[0]
  copyright = dd_list[3].contents[0]
  return {'album_name':albumName, 'pub_date':pub_date, 'genre':genre, 'publisher':publisher, 'copyright':copyright}
Beispiel #8
0
def getHTMLDocument(url, autoRetry=True):
    listAgent = [
        'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
    ]
    retryDelay = [0.1, 0.5, 1, 2, 5, 10, 30, 60, 180, 300]
    conLoop = True
    agentCount = 0
    delayCount = 0
    while (conLoop):
        opener = urllib.request.build_opener()
        opener.addheaders = [
            ('Host', 'www.melon.com'),
            ('Connection', 'Keep-Alive'),
            # ('Upgrade-Insecure-Requests', '1'),
            ('User_agent', listAgent[agentCount]),
            ('X-Requested-With', 'XMLHttpRequest'),
            ('Accept', '*/*'),
            # ('Accept-Encoding', 'gzip, deflate'),
            ('Accept-Language', 'en-US,en;q=0.9,ko-KR;q=0.8,ko;q=0.7'),
            ("Content-Type",
             "application/x-www-form-urlencoded;charset=utf-8"),
            ('Cookie',
             'SCOUTER=z39vkmg7pd9j91; PCID=15250529259645155634188; WMONID=GxTfcjDmib7; POC=WP10'
             )
        ]
        try:
            html = opener.open(url)
        except ConnectionResetError as e:
            if autoRetry:
                debug.log('Connection denied from \'{}\''.format(url))
                debug.log(
                    'Try again using another header after {}sec...'.format(
                        retryDelay[delayCount]))
                agentCount = (agentCount + 1) % len(listAgent)
                time.sleep(retryDelay[delayCount])
                delayCount = (delayCount + 1) % len(retryDelay)
            else:
                debug.log('Document couldn\'t be get from {}'.format(url))
                return None
        else:
            conLoop = False
    return html.read()
Beispiel #9
0
def print(*msgs, end='\n', indent='default', flush=True):
    log(*msgs, sep=' ', end=end, indent=indent, out=sys.stdout, debug=False)
    if flush: sys.stdout.flush()
Beispiel #10
0
    def handle(self, institution_name = YOUR_INSTITUTION, **options):
        verbosity = int(options.get('verbosity', 0))
        if verbosity > 1:
            debug.DEBUG = True
        # Create an error log
        debug.errorlog_start('scan_itunes')
        # Some basic error checking
        if institution_name is None:
            debug.errorlog("Please specify the institution to scan.", display=True)
            return False

        try:
            mode = int(options.get("mode",1))
        except ValueError:
            debug.errorlog("""Please specify a valid mode for this scan.
               1) Scan an institution's collection
               2) Scan the Top Collections chart
               3) Scan the Top Downloads chart
               4) Scan the list of institutions
               """, display=True)
            return False
        if mode < 1 or mode > 4:
            debug.errorlog("""Please specify a valid mode for this scan.
               1) Scan an institution's collection
               2) Scan the Top Collections chart
               3) Scan the Top Downloads chart
               4) Scan the list of institutions
               """, display=True)
            return False

        scantime = datetime.datetime.now(pytz.utc)
        print "Scan iTunes started at " + str(scantime) + "\n"

        scanlog = ItuScanLog(mode=mode, time=scantime, comments="")
        scanlog.save()

        if mode == 1:
            try:
                institution = ItuInstitution.objects.filter(name__iexact=institution_name)[0]
            except:
                debug.errorlog(institution_name + u" is not a recognised institution.", display=True)
                scanlog.delete()
                return False

            scanlog.institution = institution
            scanlog.save()

            comment = u"Scan (and update) of " + institution_name + u"\'s collection from %s" % institution.url
            debug.log(u"Log started for: %s" % unicode(comment), display=True)
            print comment

            print("Getting information about collections...")
            collections = itunes.get_institution_collections(institution, hurry=True)
            print("Processing collection information and scanning individual items...")
            collections_spotted = []
            items_spotted = []
            for collection_itunes in collections:
                if collection_itunes:
#                    for k in collection_itunes.keys():
#                        print(k + ': ' + collection_itunes[k])

                    #Check if this collection's genre exists - if not, create it.
                    genre = ItuGenre(name=collection_itunes['genre'], itu_id=int(collection_itunes['genre_id']), url=collection_itunes['genre_url'])
                    genre_exists = False
                    for saved_genre in ItuGenre.objects.all():
                        if int(genre.itu_id) == int(saved_genre.itu_id) and genre.name==saved_genre.name and genre.url==saved_genre.url:
                            genre_exists = True
                            genre = saved_genre
                    if not genre_exists:
                        debug.log(u'Created new genre ' + unicode(genre.name), display=True)
                        genre.save()

                    collection_record_absolute = ItuCollection(institution=institution)
                    if collection_itunes['last modified']:
                        last_modified = parse(collection_itunes['last modified']).date()
                    else:
                        last_modified = None
                    collection_record_historical = ItuCollectionHistorical(name=collection_itunes['series'],
                                                 itu_id=int(collection_itunes['series_id']),
                                                 img170=collection_itunes['series_img_170'],
                                                 url=collection_itunes['series_url'],
                                                 language=collection_itunes['language'],
                                                 last_modified=last_modified,
                                                 contains_movies=collection_itunes['contains_movies'],
                                                 missing=None,
                                                 version=1,
                                                 institution=institution,
                                                 scanlog=scanlog,
                                                 genre=genre,
                                                 previous=None,
                                                 itucollection=collection_record_absolute)

                    rating_checksum = 0
                    for rating in collection_itunes['ratings']:
                        rating_checksum += pow(10,rating['stars']) + (rating['count']/1000000000)

                    #Put together a list of saved collection_record_historicals that look like they're the same as our collection_record_historical, really.
                    similar_collection_records_historical = []
                    collection_record_historical_exists = False
                    for collection_record_historical_saved in ItuCollectionHistorical.objects.filter((Q(name=collection_record_historical.name) & Q(contains_movies=collection_record_historical.contains_movies)) | Q(itu_id=collection_record_historical.itu_id) | Q(url=collection_record_historical.url)): #name AND Video/Audio
                        if collection_record_historical.url != collection_record_historical_saved.url: #Don't add similar collection_record_historical if the URLs are different, but both are accessible.
                            try:
                                urllib2.urlopen(collection_record_historical.url)
                                urllib2.urlopen(collection_record_historical_saved.url)
                            except urllib2.URLError:
                                similar_collection_records_historical.append(collection_record_historical_saved)
                        else:
                            similar_collection_records_historical.append(collection_record_historical_saved)
                            if collection_record_historical.name==collection_record_historical_saved.name and collection_record_historical.contains_movies==collection_record_historical_saved.contains_movies and int(collection_record_historical.itu_id)==int(collection_record_historical_saved.itu_id) and collection_record_historical.url==collection_record_historical_saved.url and collection_record_historical.img170==collection_record_historical_saved.img170 and collection_record_historical.language==collection_record_historical_saved.language and rating_checksum==collection_record_historical_saved.rating_checksum():
                                collection_record_historical_exists=True
                                collection_record_historical = collection_record_historical_saved
                            else:
                                similar_collection_records_historical.append(collection_record_historical_saved)
                    if not collection_record_historical_exists:
                        if similar_collection_records_historical:
                            similar_collection_records_historical.sort(key=lambda this_collection_record_historical: this_collection_record_historical.version)
                            latest_similar_collection_record_historical = similar_collection_records_historical[-1]
                            collection_record_historical.previous = latest_similar_collection_record_historical
                            collection_record_historical.version = latest_similar_collection_record_historical.version + 1
                            collection_record_historical.itucollection = latest_similar_collection_record_historical.itucollection
                        else:
                            collection_record_absolute.save()
                            collection_record_historical.itucollection = collection_record_absolute
                        debug.log(u'Created new historical collection record for ' + unicode(collection_record_historical.name) + u', version ' + unicode(collection_record_historical.version), display=True)
                        collection_record_historical.save()

                        for r in collection_itunes['ratings']:
                            try:
                                rating = ItuRating(stars=r['stars'],
                                               count=r['count'],
                                               itucollectionhistorical=collection_record_historical)
                                rating.save()
                            except:
                                debug.log(u'WARNING: Failed to save rating.', display=True)

                    for comment in collection_itunes['comments']:
                        if comment and len(ItuComment.objects.filter(detail=comment['detail'])) == 0:
                            try:
                                new_comment = ItuComment(itucollectionhistorical=collection_record_historical,
                                                         stars=comment['rating'],
                                                         date=comment['date'],
                                                         detail=comment['detail'],
                                                         source=comment['source'],
                                                         ituinstitution=institution)
                                new_comment.save()
                                debug.log(u'Saved new comment by ' + unicode(new_comment.source) + u': \"' + unicode(new_comment.detail) + u'\".', display=True)
                            except:
                                debug.log(u'WARNING: Failed to save comment.', display=True)

                    collections_spotted.append(collection_record_historical)

                    #Acquire the list of items for this collection.
                    try:
                        items = itunes.get_collection_items(collection_record_historical.url, hurry=True)
                    except:
                        debug.errorlog('Could not get items for collection ' + collection_record_historical.name + '.', display=True)
                        items = []
                    for item in items:
                        if item is not {}: #Dictionary will be blank if we have failed to retrieve data on an item. If so, don't do anything with the item.
                            item_record_absolute = ItuItem(institution=institution)
                            try:
                                #Deal with things with no duration (like PDFs...)
                                if 'duration' in item.keys():
                                    item['duration'] = int(item['duration'])
                                else:
                                    item['duration'] = None
                                if 'songName' not in item.keys():
                                    item['songName'] = item['playlistName'] + ' ' + str(item['rank']) + ' {UNKNOWN NAME}'
                                item_record_historical = ItuItemHistorical(name=item['songName'],
                                                            itu_id=item['itemId'],
                                                            url=item['url'],
                                                            artist_name=item['artistName'],
                                                            description=item['description'],
                                                            duration=item['duration'],
                                                            explicit=bool(item['explicit']),
                                                            feed_url=item['feedURL'],
                                                            file_extension=item['fileExtension'],
                                                            kind=item['kind'],
                                                            long_description=item['longDescription'],
                                                            playlist_id=int(item['playlistId']),
                                                            playlist_name=item['playlistName'],
                                                            popularity=float(item['popularity']),
                                                            preview_length=int(item['previewLength']),
                                                            preview_url=item['previewURL'],
                                                            rank=int(item['rank']),
                                                            release_date=pytz.utc.localize(parse(item['releaseDate'],ignoretz=True)),
                                                            missing=None,
                                                            version=1,
                                                            previous=None,
                                                            ituitem=item_record_absolute,
                                                            institution=institution,
                                                            genre=genre,
                                                            scanlog=scanlog,
                                                            series=collection_record_historical)
                            except KeyError: #See if we've got data from a last-ditch attempt at downloading it instead.
                                try:
                                    duration = 0
                                    feedurl = ""
                                    for offerkey in item['store-offers'].keys(): #offerkey is something like 'standard-audio'. This code works on the assumption that, whatever the key, we want all the items in its list.
                                        try:
                                            duration = item['store-offers'][offerkey]['duration']
                                        except KeyError:
                                            duration = None
                                        feedurl = item['store-offers'][offerkey]['asset-url']
                                    item_record_historical = ItuItemHistorical(name=item['title'],
                                                              itu_id=item['item-id'],
                                                              url=item['url'],
                                                              artist_name=item['artist-name'],
                                                              description=item['description'],
                                                              duration=duration,
                                                              explicit=False,
                                                              feed_url=feedurl,
                                                              file_extension=feedurl.split('.')[-1],
                                                              kind='unknown',
                                                              long_description=item['long-description'],
                                                              playlist_id=collection_record_historical.id,
                                                              playlist_name=collection_record_historical.name,
                                                              popularity=0.0,
                                                              preview_length=0,
                                                              preview_url='unknown',
                                                              rank=int(item['track-number']),
                                                              release_date=item['release-date'],
                                                              missing=None,
                                                              version=1,
                                                              previous=None,
                                                              ituitem=item_record_absolute,
                                                              institution=institution,
                                                              genre=genre,
                                                              scanlog=scanlog,
                                                              series=collection_record_historical)
                                except KeyError:
                                    debug.errorlog(u'Missing key when trying to create an ItuItemHistorical. item=' + unicode(item), display=True)
                                except:
                                    debug.errorlog(u'Failed to process ItuItemHistorical.', display=True)

                            try: #We can't afford this bit to die in the middle of the night.
#                                Put together a list of saved item_record_historicals that look like they're the same as our item_record_historical, really.
                                similar_item_record_historicals = []
                                item_record_historical_exists = False
                                for saved_item_record_historical in ItuItemHistorical.objects.filter(Q(series__itucollection=collection_record_historical.itucollection) & (Q(name=item_record_historical.name) | Q(itu_id=item_record_historical.itu_id) | Q(url=item_record_historical.url)) & Q(file_extension=item_record_historical.file_extension)): #name AND Video/Audio
                                    if item_record_historical.url != saved_item_record_historical.url: #Don't add similar item_record_historical if the URLs are different, but both are accessible.
                                        try:
                                            urllib2.urlopen(item_record_historical.url)
                                            urllib2.urlopen(saved_item_record_historical.url)
                                        except urllib2.URLError:
                                            similar_item_record_historicals.append(saved_item_record_historical)
                                    else:
                                        if item_record_historical.name==saved_item_record_historical.name and item_record_historical.itu_id==saved_item_record_historical.itu_id and item_record_historical.url==saved_item_record_historical.url and item_record_historical.artist_name==saved_item_record_historical.artist_name and item_record_historical.description==saved_item_record_historical.description and item_record_historical.duration==saved_item_record_historical.duration and item_record_historical.explicit==saved_item_record_historical.explicit and item_record_historical.feed_url==saved_item_record_historical.feed_url and item_record_historical.file_extension==saved_item_record_historical.file_extension and item_record_historical.kind==saved_item_record_historical.kind and item_record_historical.long_description==saved_item_record_historical.long_description and item_record_historical.playlist_id==saved_item_record_historical.playlist_id and item_record_historical.playlist_name==saved_item_record_historical.playlist_name and item_record_historical.popularity==saved_item_record_historical.popularity and item_record_historical.preview_length==saved_item_record_historical.preview_length and item_record_historical.preview_url==saved_item_record_historical.preview_url and item_record_historical.rank==saved_item_record_historical.rank and item_record_historical.release_date==saved_item_record_historical.release_date:
                                            item_record_historical_exists = True
                                            item_record_historical = saved_item_record_historical
                                        else:
                                            similar_item_record_historicals.append(saved_item_record_historical)
                                if not item_record_historical_exists:
                                    if similar_item_record_historicals:
                                        similar_item_record_historicals.sort(key=lambda this_item_record_historical: this_item_record_historical.version)
                                        latest_similar_item_record_historical = similar_item_record_historicals[-1]
                                        item_record_historical.previous = latest_similar_item_record_historical
                                        item_record_historical.version = latest_similar_item_record_historical.version + 1
                                        item_record_historical.ituitem = latest_similar_item_record_historical.ituitem
                                    else:
                                        item_record_absolute.save()
                                        item_record_historical.ituitem = item_record_absolute
                                    debug.log(u'Created new historical item record for ' + unicode(item_record_historical.name) + u', version ' + unicode(item_record_historical.version), display=True)
                                    item_record_historical.save()
                                items_spotted.append(item_record_historical)
                            except:
                                debug.errorlog(u'Failed to process potential historical item record.', display=True)
                        else:
                            debug.log(u'WARNING: Blank item - perhaps we couldn\'t download the appropriate page?', display=True)
                else:
                    debug.log(u'WARNING: Blank category - perhaps we couldn\'t download the appropriate page?', display=True)
            print(u"Checking whether anything has gone missing or reappeared...")
            if collections:
                counter = 0
                for historical_collection_record in ItuCollectionHistorical.objects.filter(Q(institution=institution) & Q(itucollection__latest=F('id'))):
                    if historical_collection_record not in collections_spotted and historical_collection_record.missing == None:
                        debug.log(unicode(historical_collection_record.name) + u" appears to have gone missing! We last saw it at " + unicode(historical_collection_record.scanlog.time), display=True)
                        historical_collection_record.missing = scanlog
                        historical_collection_record.save()
                    elif historical_collection_record in collections_spotted and historical_collection_record.missing:
                        debug.log(unicode(historical_collection_record.name) + u" has reappeared! It went missing at " + unicode(historical_collection_record.missing.time), display=True)
                        historical_collection_record.missing = None
                        historical_collection_record.save()
                    counter += 1
                    if float(counter)/100.0 == int(float(counter)/100.0):
                        print (u'Still checking... (at object ' + unicode(counter) + u')')
                for historical_item_record in ItuItemHistorical.objects.filter(Q(institution=institution) & Q(ituitem__latest=F('id'))):
                    if historical_item_record not in items_spotted and historical_item_record.missing == None:
                        debug.log(unicode(historical_item_record.name) + u" appears to have gone missing! We last saw it at " + unicode(historical_item_record.scanlog.time), display=True)
                        historical_item_record.missing = scanlog
                        historical_item_record.save()
                    elif historical_item_record in items_spotted and historical_item_record.missing:
                        debug.log(unicode(historical_item_record.name) + u" has reappeared! It went missing at " + unicode(historical_item_record.missing.time), display=True)
                        historical_item_record.missing = None
                        historical_item_record.save()
                    counter += 1
                    if float(counter)/100.0 == int(float(counter)/100.0):
                        print (u'Still checking... (at object ' + unicode(counter) + u')')
            else:
                debug.log(u"WARNING: No collections found. Perhaps you scanned an institution that only publishes courses?", display=True)
        elif mode == 2:
            comment = u"Scan of the Top Collections Chart..."
            debug.log(u"Log started for: %s" % unicode(comment), display=True)
            updated_institutions = False
            collections = itunes.get_topcollections()
            for collection in collections:
                if collection:
                    try:
                        historical_collections=ItuCollectionHistorical.objects.filter(url=collection['series_url'])
                        if not historical_collections:
                            debug.log(u'WARNING: Couldn\'t find an historical record of collection at ' + unicode(collection['series_url']) + u'. Attempting an historical scan of ' + unicode(collection['institution']) + u' first...', display=True)
                            if not updated_institutions:
                                management.call_command('scan_itunes', mode=4)
                                updated_institutions = True
                            try:
                                management.call_command('scan_itunes', collection['institution'], mode=1)
                            except:
                                try: #Deal with institutions which aren't listed by Apple.
                                    institution = ItuInstitution(name = collection['institution'],
                                                                 itu_id = int(collection['institution_id']),
                                                                 url = collection['institution_url'])
                                    institution.save()
                                    management.call_command('scan_itunes', collection['institution'], mode=1)
                                except:
                                    debug.errorlog('Failed to scan institution ' + collection['institution'] + '. Perhaps this institution isn\'t listed by Apple?', display=True)
                            historical_collections=ItuCollectionHistorical.objects.filter(url=collection['series_url'])
                        if historical_collections.exists():
                            historical_collection=historical_collections[0].latest()
                            debug.log(u'Creating new chart row: ' + unicode(historical_collection.name) + u' Position: ' + unicode(collection['chart_position']), display=True)
                            chartrow=ItuCollectionChartScan(position=int(collection['chart_position']),
                                                            itucollection=historical_collection.itucollection,
                                                            itucollectionhistorical=historical_collection,
                                                            scanlog=scanlog,
                                                            date=scanlog.time)
                            chartrow.save()
                        else:
                            debug.errorlog(u'Couldn\'tfind an historical record of collection at ' + unicode(collection['series_url']) + u' despite updating the database.', display=True)
                    except KeyError:
                        debug.errorlog('WARNING: Couldn\'t access collection (KeyError):' + str(collection), display=True)

        elif mode == 3:
            comment = u"Scan of the Top Downloads Chart..."
            debug.log(u"Log started for: %s" % unicode(comment), display=True)
            updated_institutions = False
            items = itunes.get_topdownloads()
            for item in items:
                if item:
                    try:
                        historical_items=ItuItemHistorical.objects.filter(name=item['item'])
                        if not historical_items:
                            debug.log(u'WARNING: Couldn\'t find an historical record of item at ' + unicode(item['item_url']) + u'. Attempting an historical scan of ' + unicode(item['institution']) + u' first...', display=True)
                            if not updated_institutions:
                                management.call_command('scan_itunes', mode=4)
                                updated_institutions = True
                            try:
                                management.call_command('scan_itunes', item['institution'], mode=1)
                            except:
                                try: #Deal with institutions which aren't listed by Apple.
                                    institution = ItuInstitution(name = item['institution'],
                                                                 itu_id = int(item['institution_id']),
                                                                 url = item['institution_url'])
                                    institution.save()
                                    management.call_command('scan_itunes', item['institution'], mode=1)
                                except:
                                    debug.errorlog('Failed to scan institution ' + item['institution'] + '. This is a bug.', display=True)
                            historical_items=ItuItemHistorical.objects.filter(name=item['item'])
                        if historical_items.exists():
                            historical_item=historical_items[0].latest()
                            debug.log(u'Created new download chart row: ' + unicode(historical_item.name) + u' Position: ' + unicode(item['chart_position']), display=True)
                            chartrow=ItuItemChartScan(position=int(item['chart_position']),
                                                      ituitem=historical_item.ituitem,
                                                      ituitemhistorical=historical_item,
                                                      scanlog=scanlog,
                                                      date=scanlog.time)
                            chartrow.save()
                        else:
                            debug.errorlog(u'Couldn\'t find an historical record of item at ' + unicode(item['item_url']) + u' despite updating the database.', display=True)
                    except KeyError:
                        debug.errorlog('WARNING: Couldn\'t access item (KeyError):' + str(item), display=True)
        elif mode == 4:
            comment = "Scan of list of institutions..."
            debug.log(u"Log started for: %s" % unicode(comment))
            print comment
            institutions = itunes.get_institutions()
            for institution_itunes in institutions:
                if institution_itunes:
                    institution = ItuInstitution(name = institution_itunes['text'],
                                                 itu_id = int(institution_itunes['itu_id']),
                                                 url = institution_itunes['url'])
                    need_update = False
                    need_create = True
                    for saved_institution in ItuInstitution.objects.filter(Q(itu_id=institution.itu_id) | Q(name=institution.name) | Q(url = institution.url)):
                        if saved_institution.itu_id == institution.itu_id and saved_institution.name == institution.name and saved_institution.url == institution.url:
                            need_update = False
                            need_create = False
                        else:
                            need_update = True
                            need_create = False
                            saved_institution.itu_id = institution.itu_id
                            saved_institution.name = institution.name
                            saved_institution.url = institution.url
                            institution = saved_institution
                    if need_update:
                        debug.log(u'Updated institution ' + unicode(institution.name), display=True)
                        institution.save()
                    elif need_create:
                        debug.log(u'Created new institution ' + unicode(institution.name), display=True)
                        institution.save()
        else:
            debug.errorlog(u"We shouldn't ever get this scan...", display=True)

        print "\nScan iTunes finished at " + str(datetime.datetime.now(pytz.utc))

        # Write the error cache to disk
        debug.errorlog_save()
        debug.errorlog_stop()
        scanlog.complete = True
        scanlog.save()
        return None
def getMelonChart(maxRank = 50, period_type ='weekly', str_target_date=None):
  period_url = {'weekly': 'week', 'monthly': 'month', 'yearly': '', 'decennial': ''}

  if maxRank < 1:
    maxRank = 1
  elif maxRank > 50:
    maxRank = 50
  if str_target_date == None or str_target_date == '':
    if period_type == 'weekly':
      str_target_date = (date.today() - timedelta(days=date.today().isoweekday())).strftime('%Y%m%d')
    else:
      str_target_date = date.today().strftime('%Y%m%d')

  debug.log('target date={}'.format(str_target_date))
  target_date = date(int(str_target_date[0:4]),
                     int(str_target_date[4:6]),
                     int(str_target_date[6:8]))

  if target_date < date(1990, 1, 7):
    target_date = date(1990, 1, 7)
  elif target_date > date.today():
    target_date = date.today()

  if period_type == 'weekly':
    strTimeFormat = '%Y%m%d'
    if isWeekStartedFromSunday(target_date):
      startDay = target_date - timedelta(days=target_date.isoweekday()%7)
    else:
      startDay = target_date - timedelta(days=target_date.weekday())
    endDay = startDay + timedelta(days=6)
    if not isWeekStartedFromSunday(startDay) and isWeekStartedFromSunday(endDay):
      endDay = endDay - timedelta(days=1)
    if isWeekStartedFromSunday(startDay) and not isWeekStartedFromSunday(endDay):
      target_date = target_date - timedelta(days=1)
      startDay = target_date - timedelta(days=target_date.isoweekday()%7)
      endDay = startDay + timedelta(days=6)
    if target_date.year < 2017:
      if target_date < date(2009, 11, 1):
        if target_date < date(2004, 11, 22):
          classCd = 'KPOP'
        else:
          classCd = 'CL0000'
      else:
        classCd = 'DP0000'
    else:
      classCd = 'GN0000'
    url_param = 'chartType=WE&classCd={}&startDay={}&endDay={}'.format(
      classCd, startDay.strftime(strTimeFormat), endDay.strftime(strTimeFormat)
    )
    period_str = '{}-{}'.format(startDay.strftime('%Y.%m.%d'), endDay.strftime('%Y.%m.%d'))
  elif period_type == 'monthly':
    strYearFormat = '%Y'
    strMonthFormat = '%m'
    today = date.today()
    if target_date.year == today.year and target_date.month == today.month:
      target_date = (target_date.replace(day=1) - timedelta(days=1)).replace(day=1)
    rankYear = target_date.strftime(strYearFormat)
    rankMonth = target_date.strftime(strMonthFormat)
    if target_date.year < 2017:
      if target_date < date(2004, 11, 1):
        classCd = 'KPOP'
      else:
        classCd = 'DP0000'
    else:
      classCd = 'GN0000'
    url_param = 'chartType=MO&year={}&mon={}&classCd={}'.format(rankYear, rankMonth, classCd)
    period_str = '{}'.format(target_date.strftime('%Y.%m'))
  elif period_type == 'yearly':
    strYearFormat = '%Y'
    today = date.today()
    if target_date.year >= today.year:
      target_date = target_date.replace(year=today.year-1, month=1, day=1)
    rankYear = target_date.strftime(strYearFormat)
    classCd = 'KPOP'
    url_param = 'chartType=YE&year={}&classCd={}'.format(rankYear, classCd)
    period_str = '{}'.format(target_date.strftime('%Y'))
  else:
    # decennial
    strYearFormat = '%Y'
    today = date.today()
    today = today.replace(year=(today.year // 10) * 10, month=1, day=1)

    target_date = target_date.replace(year=(target_date.year // 10) * 10, month=1, day=1)
    if target_date >= today:
      target_date = target_date.replace(year=today.year-10, month=1, day=1)
    rankYear = target_date.strftime(strYearFormat)
    classCd = 'KPOP'
    url_param = 'chartType=AG&age={}&classCd={}'.format(rankYear, classCd)
    period_str = '{}s'.format(target_date.strftime('%Y'))
  url = "http://www.melon.com/chart/search/list.htm?{}&moved=Y".format(url_param)
  debug.log("Request chart to melon by query < {} >".format(url))
  content = http.getHTMLDocument(url)
  # debug.log(content)

  soup = BeautifulSoup(content, "html.parser")
  # debug.log(soup)

  chart_name = 'melon_{}_'.format(period_type) + period_str
  debug.log(chart_name)

  table = soup.find('tbody', {'id':'chartListObj'})
  # debug.log(table)
  debug.log('')
  count = 1
  chart_list = []
  for music in table.find_all('tr', {'class':'lst50'}):
    if count > maxRank:
      break
    # image = music.find('img')
    links = music.find_all('a')
    if len(links) > 3:
      artist , title, songID, coverImgFile, lyric, albumID = getSongInfoOfMelon(music)
      debug.log('{:02}. {} - {} (id:{}, {})'.format(count, artist, title, songID, coverImgFile))
      chart_list.append({'rank':count, 'artist':artist, 'title':title,
                         'songID':songID, 'albumID':albumID, 'lyric':lyric})
      # debug.log(lyric)
      count += 1
  return chart_name, chart_list
  # debug.log(table)
  debug.log('')
  count = 1
  chart_list = []
  for music in table.find_all('tr', {'class':'lst50'}):
    if count > maxRank:
      break
    # image = music.find('img')
    links = music.find_all('a')
    if len(links) > 3:
      artist , title, songID, coverImgFile, lyric, albumID = getSongInfoOfMelon(music)
      debug.log('{:02}. {} - {} (id:{}, {})'.format(count, artist, title, songID, coverImgFile))
      chart_list.append({'rank':count, 'artist':artist, 'title':title,
                         'songID':songID, 'albumID':albumID, 'lyric':lyric})
      # debug.log(lyric)
      count += 1
  return chart_name, chart_list

if __name__ == '__main__':
  # lyric, artist, title, albumID, imgUrl = getSongInfobySongIDOfMelon('30989550')
  # print(lyric)
  # print(artist)
  # print(title)
  # print(albumID)
  # print(imgUrl)
  # chartlist = getMelonChart()
  chartlist = getMelonChart(period_type='monthly', str_target_date='19901001')
  # chartlist = getMelonChart(period_type='weekly', str_target_date='20041120')
  for song in chartlist:
    debug.log(song)
Beispiel #13
0
def getSongFromYouTube(artist, title, songID, lyric, albumID, baseMusicDir, baseImageDir, youtube_api_key,
                       isOverwriteMode=False, music_reporter=None):
  audio_name = '{}-{}'.format(artist, title)
  query = '{} audio'.format(audio_name)
  # check whether mp3 file already exists.
  if not os.path.exists(baseMusicDir):
    os.mkdir(baseMusicDir)
  filename = convertQueryToFilename(audio_name)
  mp3_parent = os.path.join(baseMusicDir, convertQueryToFilename(artist))
  if not os.path.exists(mp3_parent):
    os.mkdir(mp3_parent)
  mp3_dir = os.path.join(convertQueryToFilename(artist), albumID)
  if not os.path.exists(os.path.join(baseMusicDir, mp3_dir)):
    os.mkdir(os.path.join(baseMusicDir, mp3_dir))
  mp3_filename = filename + '.mp3'
  mp3_path = os.path.join(mp3_dir, mp3_filename)

  isSkip = False
  if os.path.exists(os.path.join(baseMusicDir, mp3_path)):
    if isOverwriteMode:
      debug.log('{} is already exist. it will be overwritten.'.format(mp3_path))
      os.remove(os.path.join(baseMusicDir, mp3_path))
    else:
      debug.log('{} is already exist. Downloading will be skipped.'.format(mp3_path))
      isSkip = True
  if not isSkip:
    debug.log('Looking for youtube by the query \'{}\'...'.format(query))
    list, _ = find_youtube_by_api(query, youtube_api_key)
    retry = 0
    while len(list) <= 1 and retry < 5:
      debug.log('Youtube list couldn\'t be gotten. retry...')
      list, _ = find_youtube_by_api(query, youtube_api_key)
      retry += 1
    debug.log('trying to download \'' + query + '\'...')

    file_name = download_audio_from_youtube(list[0], baseMusicDir, audio_name, music_reporter)
    debug.log('\'' + file_name + '\' was downloaded.')
    debug.log('\'' + file_name + '\' is converting...')
    convertMP3(baseMusicDir, file_name, mp3_path)
    debug.log('\'' + mp3_path + '\' was converted.')
    img_path = os.path.join(baseImageDir, songID + '.jpg')
    setID3(baseMusicDir, mp3_path, artist, title, lyric, albumID, img_path)
    os.remove(img_path)
    debug.log('Song Information was recorded on \'' + mp3_path + '\'')

  return mp3_path
Beispiel #14
0
def repair_music():
    f = open("youtube_api_key.txt", "r")
    youtube_api_key = f.readline().split()[0]
    f.close()

    filename = FLAGS.path.split(os.sep)[-1]
    target_dir = FLAGS.path.replace(os.sep + filename, '')

    # check whether the target file exists and the file condision is satisfied.
    if not os.path.exists(FLAGS.path):
        debug.log('There is no target file.')
        return
    if not (os.path.isfile(FLAGS.path) or FLAGS.path.split('.')[-1] == 'mp3'):
        debug.log('It is not MP3 file.')
        return

    # get ID3 tag and query
    id3_tag = getID3Tag(FLAGS.path)
    if id3_tag == None:
        debug.log(
            'There is no ID3 Tag in the target mp3 file. Please check the file information'
        )
        return
    audio_name = getAudioNameFromID3(id3_tag)

    # search for youtube
    query = '{} audio'.format(audio_name)
    debug.log('Looking for youtube by the query \'{}\''.format(query))
    list = ye.find_youtube_detailed_by_api(query, youtube_api_key)

    count = 0
    for link in list:
        count += 1
        print("[{}] title:'{}', length:{}, link:< {} >".format(
            count, link['title'], link['length'], link['url']))

    selected_num = -1

    while selected_num < 0 or selected_num > count:
        try:
            selected_num = int(
                input(
                    "Please choose the link number(1<=NUM<={}) of music to repair (input '0' if you want to exit): "
                    .format(count)))
        except ValueError:
            selected_num = -1
            continue

        if selected_num == 0:
            return
        if selected_num < 0 or selected_num > count:
            print(
                'Input number is out of range (0<=NUM<={}). Try to input again.'
                .count())

    print("\n[{}]({}<{}>) is selected.".format(selected_num,
                                               list[selected_num - 1]['title'],
                                               list[selected_num - 1]['url']))
    old_filename = "{}_old.mp3".format(filename.split('.mp3')[0])
    old_file_path = os.path.join(target_dir, old_filename)
    if os.path.exists(old_file_path):
        os.remove(old_file_path)
        debug.log('Previous old mp3 file is removed.')
    os.rename(FLAGS.path, old_file_path)
    debug.log(
        "The name of previous file is changed to '{}'".format(old_filename))
    mr = music_reporter.MusicReporter('logs', 'report.log')
    conv_filename = ye.convertQueryToFilename(audio_name)
    output_filename = ye.download_audio_from_youtube(list[selected_num -
                                                          1]['url'],
                                                     output_dir=target_dir,
                                                     strQuery=conv_filename,
                                                     music_reporter=mr)
    del mr
    debug.log('\'' + output_filename + '\' was downloaded.')
    debug.log('\'' + output_filename + '\' is converting...')
    ye.convertMP3(target_dir, output_filename, conv_filename + '.mp3')
    debug.log('\'' + FLAGS.path + '\' was converted.')
    setID3Tag(FLAGS.path, id3_tag)
    debug.log('Song Information was recorded on \'' + FLAGS.path + '\'')