Beispiel #1
0
 def tracksearch(self, encoded_artist, encoded_title):
     """Get metadata from last.fm using the track.search method"""
     url = self.base_url.format(
         artist=encoded_artist, title=encoded_title,
         api_key=settings.LASTFM_API_KEY, method='track.search')
     try:
         resp = http_get(url).json()
     except JSONDecodeError:
         try:
             resp = http_get(url).json()
         except JSONDecodeError as e:
             log.error('Error occurred twice trying to parse response from {0}'.format(url))
             return None
     if isinstance(resp, dict):
         if (resp.get('results', {}).get('trackmatches')
                 and not isinstance(resp['results']['trackmatches'], basestring)):
             result = resp['results']['trackmatches']['track']
             if isinstance(result, list) and result:
                 result = result[0]
         else:
             # Track not found by last.fm
             result = None
     else:
         log.error('Invalid Last.fm response: {0}'.format(url))
         result = None
     return result
Beispiel #2
0
 def _get(self, url):
     """HTTP GET and decode JSON"""
     try:
         resp = http_get(url).json()
     except JSONDecodeError:
         try:
             resp = http_get(url).json()
         except JSONDecodeError:
             log.error('Error occurred twice trying to parse response from {0}'.format(url))
             return None
     return resp
Beispiel #3
0
    def scrape(self):
        page = 0
        date_string = self.date.strftime('%d.%m.%Y')
        while True:
            tracks_found = False
            url = self.base_url.format(date=date_string, time='00:00', start_from=page * self.page_size)
            resp = http_get(url)
            soup = BeautifulSoup(resp.text)
            for entry in soup.findAll('article'):
                if not entry.find('div', {'class': 'date'}).text == date_string:
                    # next day reached, but list is not necessarily ordered - see 30.07.2016 for example
                    continue
                tracks_found = True
                title = entry.find('h4').text.replace('Titel:', '')
                artist = entry.find('h5').text.replace('Artist:', '')
                time = entry.find('div', {'class': 'time'}).text.replace('UHR', '').strip()
                date_time = datetime.strptime('{} {}'.format(date_string, time), '%d.%m.%Y %H:%M')

                # filter dummy entries from lazy moderators/technical studio issues
                if artist.lower() == 'sunshine live' and title.lower() == 'electronic music radio':
                    continue
                else:
                    self.tracks.append((artist, title, date_time))

            if not tracks_found:
                self.log.info('SSLIVE: No more tracks for {} on page {}'.format(date_string, page))
                break
            page += 1
        if not self.tracks:
            self.log.error('SSLIVE: No tracks found for {}'.format(date_string))
        else:
            self.log.info('SSLIVE: Collected {} tracks for {}'.format(len(self.tracks), date_string))
Beispiel #4
0
 def scrape(self):
     resp = http_get(self.base_url)
     soup = BeautifulSoup(resp.text)
     date_links = []
     for cell in soup.findAll('span', {'class': 'progDayCell'}):
         date_links.extend([a['href'] for a in cell.findAll('a')])
     for url in date_links:
         if 'date={0}'.format(self.date.strftime('%Y%m%d')) in url:
             resp = http_get(url)
             self.soup = BeautifulSoup(resp.text)
             for tracklist_url in self.tracklist_urls:
                 resp = http_get(tracklist_url)
                 self.soup = BeautifulSoup(resp.text)
                 self.extract_tracks()
             return
     raise LookupError
Beispiel #5
0
 def scrape(self):
     """General scrape workflow. Can be overridden if necessary."""
     for url in self.tracklist_urls:
         resp = http_get(url, cookies=self.cookies)
         self.soup = BeautifulSoup(resp.text)
         result = self.extract_tracks()
         if not result:
             self.log.warn('No tracks found in url {0}'.format(url))
Beispiel #6
0
 def get_tags(self, mbid):
     """Get tags from last.fm by using mbid of track we found using track.search"""
     url = (u'http://ws.audioscrobbler.com/2.0/?method=track.getInfo'
            u'&mbid={mbid}&api_key={api_key}&format=json')
     url = url.format(mbid=mbid, api_key=settings.LASTFM_API_KEY)
     resp = http_get(url).json()
     if isinstance(resp, dict):
         return resp.get('track', {}).get('toptags')
     return []