def tracksearch(self, encoded_artist, encoded_title): """Get metadata from last.fm using the track.search method""" url = self.base_url.format( artist=encoded_artist, title=encoded_title, api_key=settings.LASTFM_API_KEY, method='track.search') try: resp = http_get(url).json() except JSONDecodeError: try: resp = http_get(url).json() except JSONDecodeError as e: log.error('Error occurred twice trying to parse response from {0}'.format(url)) return None if isinstance(resp, dict): if (resp.get('results', {}).get('trackmatches') and not isinstance(resp['results']['trackmatches'], basestring)): result = resp['results']['trackmatches']['track'] if isinstance(result, list) and result: result = result[0] else: # Track not found by last.fm result = None else: log.error('Invalid Last.fm response: {0}'.format(url)) result = None return result
def _get(self, url): """HTTP GET and decode JSON""" try: resp = http_get(url).json() except JSONDecodeError: try: resp = http_get(url).json() except JSONDecodeError: log.error('Error occurred twice trying to parse response from {0}'.format(url)) return None return resp
def scrape(self): page = 0 date_string = self.date.strftime('%d.%m.%Y') while True: tracks_found = False url = self.base_url.format(date=date_string, time='00:00', start_from=page * self.page_size) resp = http_get(url) soup = BeautifulSoup(resp.text) for entry in soup.findAll('article'): if not entry.find('div', {'class': 'date'}).text == date_string: # next day reached, but list is not necessarily ordered - see 30.07.2016 for example continue tracks_found = True title = entry.find('h4').text.replace('Titel:', '') artist = entry.find('h5').text.replace('Artist:', '') time = entry.find('div', {'class': 'time'}).text.replace('UHR', '').strip() date_time = datetime.strptime('{} {}'.format(date_string, time), '%d.%m.%Y %H:%M') # filter dummy entries from lazy moderators/technical studio issues if artist.lower() == 'sunshine live' and title.lower() == 'electronic music radio': continue else: self.tracks.append((artist, title, date_time)) if not tracks_found: self.log.info('SSLIVE: No more tracks for {} on page {}'.format(date_string, page)) break page += 1 if not self.tracks: self.log.error('SSLIVE: No tracks found for {}'.format(date_string)) else: self.log.info('SSLIVE: Collected {} tracks for {}'.format(len(self.tracks), date_string))
def scrape(self): resp = http_get(self.base_url) soup = BeautifulSoup(resp.text) date_links = [] for cell in soup.findAll('span', {'class': 'progDayCell'}): date_links.extend([a['href'] for a in cell.findAll('a')]) for url in date_links: if 'date={0}'.format(self.date.strftime('%Y%m%d')) in url: resp = http_get(url) self.soup = BeautifulSoup(resp.text) for tracklist_url in self.tracklist_urls: resp = http_get(tracklist_url) self.soup = BeautifulSoup(resp.text) self.extract_tracks() return raise LookupError
def scrape(self): """General scrape workflow. Can be overridden if necessary.""" for url in self.tracklist_urls: resp = http_get(url, cookies=self.cookies) self.soup = BeautifulSoup(resp.text) result = self.extract_tracks() if not result: self.log.warn('No tracks found in url {0}'.format(url))
def get_tags(self, mbid): """Get tags from last.fm by using mbid of track we found using track.search""" url = (u'http://ws.audioscrobbler.com/2.0/?method=track.getInfo' u'&mbid={mbid}&api_key={api_key}&format=json') url = url.format(mbid=mbid, api_key=settings.LASTFM_API_KEY) resp = http_get(url).json() if isinstance(resp, dict): return resp.get('track', {}).get('toptags') return []