def fetch(self) -> Iterable[Record]: end_berlin_time = datetime.datetime.now(tz=BERLIN_TIME) hour = end_berlin_time.hour minute = end_berlin_time.minute print(f'Requesting time at {hour}:{minute} (berlin time)') # Word 'air' is a constant, not program related url = f'https://{self.host}/services/program-info/history/{self.station}/air/0/{hour}/{minute}' params = { 'items': DMHUB_API_LIMIT, } resp = requests.get(url, params=params) assert resp.ok body = resp.json() print(f'Found {len(body)} records') for record in body: # Millis to seconds timestamp = datetime.datetime.fromtimestamp(record['start'] / 1000, tz=BERLIN_TIME) song_title = record['track']['title'] artist_name = record['track']['artist'] yield Record(timestamp=timestamp, title=song_title, artist=artist_name)
def fetch(self) -> Iterable[Record]: url = f'https://{self.host}.loverad.io/search.json' end = datetime.datetime.now(tz=BERLIN_TIME) start = end - HOUR params: RequestParams = { 'station': self.station, 'start': start.isoformat(), 'end': end.isoformat(), } resp = requests.get(url, params=params) assert resp.ok body = resp.json() found = body['result']['found'] print(f'Found {found} records') for record in body['result']['entry']: timestamp = record['airtime'] assert record['song']['found'] == '1', record['song'] [song] = record['song']['entry'] song_title = song['title'] # This is potentially a very bad assumption, that there will be 1 # artist? [artist] = song['artist']['entry'] artist_name = artist['name'] yield Record( timestamp=datetime.datetime.fromisoformat(timestamp), title=song_title, artist=artist_name)
def fetch(self) -> Iterable[Record]: url = 'https://www.fritz.de/programm/sendungen/playlists/' r = requests.get(url) r.raise_for_status() soup = BeautifulSoup(r.text, 'html.parser') table = soup.select_one('.playlist_tables') print(table) headers = table.find_all('h2') playlist_containers = table.select('div.table_container') assert len(headers) == len(playlist_containers) for (header, playlist) in zip(headers, playlist_containers): times = header.span.text [start, end] = times.split(' - ') # Converts <div class="sub_heading">vom 03.10.2020 <p class="moderation">mit <a href="/alles-fritzen/team/fritz_team/2721.html" title="Henrike Möller">Henrike Möller</a></p></div> # to 'vom 03.10.2020 ' # See https://stackoverflow.com/questions/44858226/how-to-extract-the-text-inside-a-tag-with-beautifulsoup-in-python/44859413 date_texts = playlist.select_one('.sub_heading').find_all( text=True, recursive=False) # There's sometimes whitespace floating around, this removes it stripped_and_filtered = [ t.strip() for t in date_texts if t.strip() ] assert len(stripped_and_filtered) == 1 [date_text] = stripped_and_filtered yield (date_text, start, end)
def fetch(self) -> Iterable[Record]: url = 'https://playlist.funtip.de/playList.do' end = datetime.now(tz=BERLIN_TIME) start = end - 5*HOUR params: RequestParams = { # action, remote, version copied verbatim from template request 'action': 'searching', 'remote': 1, 'version': 2, # format: dd-mm-yyyy_hh-mm 'from': start.strftime('%d-%m-%Y_%H-%M'), 'to': end.strftime('%d-%m-%Y_%H-%M'), 'jsonp_callback': 'lol', } r = requests.get(url, params=params) r.raise_for_status() # Ok, this is nasty. It's JSONP (i.e. wrapped in a callback) and the keys of the object # don't have quotes so we can't parse as JSON directly. There's a single property, 'key', # which contains HTML. # Strip the callback assert r.text.startswith('lol(') and r.text.endswith(')') json_text = r.text[4:-1] # Replace `key` (without quotes) with `"key"` (with quotes) so we can JSON-parse it json_text = json_text.replace('key', '"key"', 1) # Now parse and grab the HTML html = json.loads(json_text)['key'] # Parse content from HTML soup = BeautifulSoup(html, 'html.parser') [table] = soup.select('table.trackList') [header, *data] = table.find_all('tr') assert [el.text for el in header.find_all('th')] == ['Zeit', 'Artist - Track - Album'] for row in data: # This is actually when it was played, not the track length [timestamp] = row.select('td.trackLength') # .trackInterpret seems semantic, .left seems formatting-related [artist_and_title] = row.select('.trackInterpret .left') # Remove album info if it's present (it's in a span.trackLabel) track_label = artist_and_title.select_one('.trackLabel') if track_label: track_label.decompose() # This is the time (just the time, Berlin time) tt = time.strptime(timestamp.text, '%H:%M') timestamp = datetime_from_berlin_hhmmss(tt.tm_hour, tt.tm_min, 0) # That's an em-dash [artist, title_in_quotes] = artist_and_title.text.strip().split(" — ") yield Record(timestamp, title_in_quotes.strip('"'), artist)
def fetch(self) -> Iterable[Record]: # This only yields the last 10 results, there is an hour-by-hour search # but it requires parsing HTML. # Investigative start point is https://www.paradiso.de/playlist url = 'https://www.paradiso.de/pl/update.php?channel=paradiso_982' r = requests.get(url, timeout=10) r.raise_for_status() for entry in r.json(): yield Record( datetime.fromtimestamp(int(entry['timestamp']), tz=BERLIN_TIME), entry['song'], entry['artist'], )
def fetch(self) -> Iterable[Record]: url = 'https://www.fluxfm.de/fluxfm-playlist/api.php?act=list&loc=berlin&cuttime=1&limit=50' # NOTE: this only retrieves stuff for the same day! # So probably good to make sure we fetch at 11:59 Berlin time :) r = requests.get(url) r.raise_for_status() body = r.json() assert body['status'] == 'ok' for row in body['tracks']: timestamp = datetime.strptime(f"{row['date']} {row['time']}", '%Y-%m-%d %H:%M') timestamp = timestamp.replace(tzinfo=BERLIN_TIME) artist = row['artist'] title = row['title'] yield Record(timestamp, title, artist)
def fetch(self) -> Iterable[Record]: r = requests.get(self.url) r.raise_for_status() soup = BeautifulSoup(r.text, 'html.parser') [table] = soup.select('table') header = table.select('th') assert [h.text for h in header] == ['Datum', 'Zeit', 'Interpret', 'Titel'] rows = table.find_all('tr') # first row is the header for row in rows[1:]: [datum, zeit, interpret, titel] = [cell.text for cell in row.find_all('td')] timestamp = datetime.strptime( f'{datum} {zeit}', '%d.%m.%Y %H:%M').replace(tzinfo=BERLIN_TIME) yield Record(timestamp, titel, interpret)
def fetch(self) -> Iterable[Record]: url = "https://berlin.starfm.de/player/ajax/getCurrentSongList.php" r = requests.get(url) r.raise_for_status() assert r.text.startswith('(') and r.text.endswith(');') body = json.loads(r.text[1:-2]) # It's a dict with "0", "1" etc for keys (strings containing numbers) => objects for _index, record in body['all'].items(): # TODO: this clause hasn't been tested. if not (record.get('cDate') and record.get('artist') and record.get('song')): print(f'Skipped bad record: {record}') continue time = strptime(record['cDate'], '%H:%M:%S') timestamp = datetime_from_berlin_hhmmss(time.tm_hour, time.tm_min, time.tm_sec) yield Record( timestamp, record['song'], record['artist'], )