Exemple #1
0
    def fetch(self) -> Iterable[Record]:
        end_berlin_time = datetime.datetime.now(tz=BERLIN_TIME)
        hour = end_berlin_time.hour
        minute = end_berlin_time.minute
        print(f'Requesting time at {hour}:{minute} (berlin time)')
        # Word 'air' is a constant, not program related
        url = f'https://{self.host}/services/program-info/history/{self.station}/air/0/{hour}/{minute}'

        params = {
            'items': DMHUB_API_LIMIT,
        }
        resp = requests.get(url, params=params)
        assert resp.ok
        body = resp.json()
        print(f'Found {len(body)} records')
        for record in body:
            # Millis to seconds
            timestamp = datetime.datetime.fromtimestamp(record['start'] / 1000,
                                                        tz=BERLIN_TIME)
            song_title = record['track']['title']
            artist_name = record['track']['artist']

            yield Record(timestamp=timestamp,
                         title=song_title,
                         artist=artist_name)
Exemple #2
0
 def fetch(self) -> Iterable[Record]:
     url = f'https://{self.host}.loverad.io/search.json'
     end = datetime.datetime.now(tz=BERLIN_TIME)
     start = end - HOUR
     params: RequestParams = {
         'station': self.station,
         'start': start.isoformat(),
         'end': end.isoformat(),
     }
     resp = requests.get(url, params=params)
     assert resp.ok
     body = resp.json()
     found = body['result']['found']
     print(f'Found {found} records')
     for record in body['result']['entry']:
         timestamp = record['airtime']
         assert record['song']['found'] == '1', record['song']
         [song] = record['song']['entry']
         song_title = song['title']
         # This is potentially a very bad assumption, that there will be 1
         # artist?
         [artist] = song['artist']['entry']
         artist_name = artist['name']
         yield Record(
             timestamp=datetime.datetime.fromisoformat(timestamp),
             title=song_title,
             artist=artist_name)
Exemple #3
0
    def fetch(self) -> Iterable[Record]:
        url = 'https://www.fritz.de/programm/sendungen/playlists/'
        r = requests.get(url)
        r.raise_for_status()
        soup = BeautifulSoup(r.text, 'html.parser')
        table = soup.select_one('.playlist_tables')
        print(table)
        headers = table.find_all('h2')
        playlist_containers = table.select('div.table_container')
        assert len(headers) == len(playlist_containers)
        for (header, playlist) in zip(headers, playlist_containers):
            times = header.span.text
            [start, end] = times.split(' - ')
            # Converts <div class="sub_heading">vom 03.10.2020 <p class="moderation">mit <a href="/alles-fritzen/team/fritz_team/2721.html" title="Henrike Möller">Henrike Möller</a></p></div>
            # to 'vom 03.10.2020 '
            # See https://stackoverflow.com/questions/44858226/how-to-extract-the-text-inside-a-tag-with-beautifulsoup-in-python/44859413
            date_texts = playlist.select_one('.sub_heading').find_all(
                text=True, recursive=False)
            # There's sometimes whitespace floating around, this removes it
            stripped_and_filtered = [
                t.strip() for t in date_texts if t.strip()
            ]
            assert len(stripped_and_filtered) == 1

            [date_text] = stripped_and_filtered
            yield (date_text, start, end)
Exemple #4
0
    def fetch(self) -> Iterable[Record]:
        url = 'https://playlist.funtip.de/playList.do'

        end = datetime.now(tz=BERLIN_TIME)
        start = end - 5*HOUR

        params: RequestParams = {
            # action, remote, version copied verbatim from template request
            'action': 'searching',
            'remote': 1,
            'version': 2,
            # format: dd-mm-yyyy_hh-mm
            'from': start.strftime('%d-%m-%Y_%H-%M'),
            'to': end.strftime('%d-%m-%Y_%H-%M'),
            'jsonp_callback': 'lol',
        }
        r = requests.get(url, params=params)
        r.raise_for_status()
        # Ok, this is nasty. It's JSONP (i.e. wrapped in a callback) and the keys of the object
        # don't have quotes so we can't parse as JSON directly. There's a single property, 'key',
        # which contains HTML.
        # Strip the callback
        assert r.text.startswith('lol(') and r.text.endswith(')')
        json_text = r.text[4:-1]
        # Replace `key` (without quotes) with `"key"` (with quotes) so we can JSON-parse it
        json_text = json_text.replace('key', '"key"', 1)
        # Now parse and grab the HTML
        html = json.loads(json_text)['key']
        # Parse content from HTML
        soup = BeautifulSoup(html, 'html.parser')
        [table] = soup.select('table.trackList')
        [header, *data] = table.find_all('tr')
        assert [el.text for el in header.find_all('th')] == ['Zeit', 'Artist - Track - Album']

        for row in data:
            # This is actually when it was played, not the track length
            [timestamp] = row.select('td.trackLength')
            # .trackInterpret seems semantic, .left seems formatting-related
            [artist_and_title] = row.select('.trackInterpret .left')
            # Remove album info if it's present (it's in a span.trackLabel)
            track_label = artist_and_title.select_one('.trackLabel')
            if track_label:
                track_label.decompose()
            # This is the time (just the time, Berlin time)
            tt = time.strptime(timestamp.text, '%H:%M')
            timestamp = datetime_from_berlin_hhmmss(tt.tm_hour, tt.tm_min, 0)

            # That's an em-dash
            [artist, title_in_quotes] = artist_and_title.text.strip().split(" — ")
            yield Record(timestamp, title_in_quotes.strip('"'), artist)
Exemple #5
0
 def fetch(self) -> Iterable[Record]:
     # This only yields the last 10 results, there is an hour-by-hour search
     # but it requires parsing HTML.
     # Investigative start point is https://www.paradiso.de/playlist
     url = 'https://www.paradiso.de/pl/update.php?channel=paradiso_982'
     r = requests.get(url, timeout=10)
     r.raise_for_status()
     for entry in r.json():
         yield Record(
             datetime.fromtimestamp(int(entry['timestamp']),
                                    tz=BERLIN_TIME),
             entry['song'],
             entry['artist'],
         )
Exemple #6
0
 def fetch(self) -> Iterable[Record]:
     url = 'https://www.fluxfm.de/fluxfm-playlist/api.php?act=list&loc=berlin&cuttime=1&limit=50'
     # NOTE: this only retrieves stuff for the same day!
     # So probably good to make sure we fetch at 11:59 Berlin time :)
     r = requests.get(url)
     r.raise_for_status()
     body = r.json()
     assert body['status'] == 'ok'
     for row in body['tracks']:
         timestamp = datetime.strptime(f"{row['date']} {row['time']}", '%Y-%m-%d %H:%M')
         timestamp = timestamp.replace(tzinfo=BERLIN_TIME)
         artist = row['artist']
         title = row['title']
         yield Record(timestamp, title, artist)
Exemple #7
0
 def fetch(self) -> Iterable[Record]:
     r = requests.get(self.url)
     r.raise_for_status()
     soup = BeautifulSoup(r.text, 'html.parser')
     [table] = soup.select('table')
     header = table.select('th')
     assert [h.text
             for h in header] == ['Datum', 'Zeit', 'Interpret', 'Titel']
     rows = table.find_all('tr')
     # first row is the header
     for row in rows[1:]:
         [datum, zeit, interpret,
          titel] = [cell.text for cell in row.find_all('td')]
         timestamp = datetime.strptime(
             f'{datum} {zeit}',
             '%d.%m.%Y %H:%M').replace(tzinfo=BERLIN_TIME)
         yield Record(timestamp, titel, interpret)
Exemple #8
0
    def fetch(self) -> Iterable[Record]:
        url = "https://berlin.starfm.de/player/ajax/getCurrentSongList.php"
        r = requests.get(url)
        r.raise_for_status()
        assert r.text.startswith('(') and r.text.endswith(');')
        body = json.loads(r.text[1:-2])
        # It's a dict with "0", "1" etc for keys (strings containing numbers) => objects
        for _index, record in body['all'].items():
            # TODO: this clause hasn't been tested.
            if not (record.get('cDate') and record.get('artist') and record.get('song')):
                print(f'Skipped bad record: {record}')
                continue

            time = strptime(record['cDate'], '%H:%M:%S')
            timestamp = datetime_from_berlin_hhmmss(time.tm_hour, time.tm_min, time.tm_sec)

            yield Record(
                timestamp,
                record['song'],
                record['artist'],
            )