Esempio n. 1
0
def KCRW_harvest(maxbands):
    c = []
    i = 1
    allbands = []
    print('Grabbing KCRW bands')
    while (i < 30) and len(allbands) < maxbands:
        url = 'https://tracklist-api.kcrw.com/Simulcast/all/' + str(i)
        response = urllib.request.urlopen(url).read()
        data = json.loads(response)
        print(("KCRW page {0} \n".format(i)))
        for entry in data:
            bandname = entry["artist"]
            trackname = entry['title']
            if entry["program_title"] == "Morning Becomes Eclectic":
                if bandname != "[BREAK]":
                    newband = band(name=bandname,
                                   song=trackname,
                                   appeared='KCRW Eclectic')
                    allbands.append(newband)
            else:
                if bandname != "[BREAK]":
                    newband = band(name=bandname,
                                   song=trackname,
                                   appeared='not KCRW Eclectic')
                    allbands.append(newband)
        i += 1

    for j in allbands:
        if j not in c:
            c.append(j)

    return c[:maxbands]
Esempio n. 2
0
def add_to_db(Session, k):
    session = Session()
    t = dt.date.today()
    adds = 0
    cleantracks = k
    for line in cleantracks:
        clean_name = cleanup(line[0])
        n_ = band(name=line[0],
                  song=line[1],
                  album = line[2],
                  release_year = line[3],
                  source=line[4],
                  dateplayed=line[5],
                  dateadded=t,
                  cleanname=clean_name)
        q = session.query(band).filter(band.name == n_.name, band.song == n_.song, band.source == n_.source)
        if q.first() == None:
            session.add(n_)
            adds += 1
        else:
            try:
                print ('Already had {0} - {1}'.format(n_.name, n_.song))
            except:
                print ('Already had it. Cannot print. ID is {0}'.format(q.first().id))
        session.commit()

    return adds
Esempio n. 3
0
def Pitchfork_charts(maxbands):
    c = []
    allbands = []
    i = 0
    while (len(allbands) < maxbands) and (i < 20):
        i = i + 1
        try:
            print(('Pitchfork page: {0}'.format(i)))
            site = 'http://pitchfork.com/reviews/best/albums/?page=' + str(i)
            hdr = {'User-Agent': 'Mozilla/5.0'}
            req = urllib.request.Request(site, headers=hdr)
            page = urllib.request.urlopen(req)
            soup = BeautifulSoup(page, "html.parser")
            a = soup.findAll("ul", {"class": "artist-list"})
            print(("Page {0} retrieved".format(i)))
            for banddiv in a:
                album = banddiv.findNext("h2").text
                newband = band(name=banddiv.text,
                               appeared='Pitchfork 8.0+ reviews',
                               album=album)
                allbands.append(newband)
        except Exception as e:
            print(str(e))
            print(("Page {0} failed".format(i)))
            continue

    for j in allbands:
        if j not in c:
            c.append(j)

    return c[:maxbands]
Esempio n. 4
0
def metacritic(maxbands):

    socket.setdefaulttimeout(15)
    url = 'http://www.metacritic.com/browse/albums/score/metascore/year/filtered'

    chromeOptions = webdriver.ChromeOptions()
    prefs = {'profile.managed_default_content_settings.images': 2}
    chromeOptions.add_experimental_option("prefs", prefs)
    driver = webdriver.Chrome(chrome_options=chromeOptions)
    driver.get(url)

    innerHTML = driver.execute_script("return document.body.innerHTML")
    bs = BeautifulSoup(innerHTML, 'html.parser')

    driver.quit()
    allbands = []

    a = bs.find('div', {'class': 'product_rows'})
    b = a.find_all('div', {'class': 'product_row release'})
    for i in b:
        artist = i.find('div', {
            'class': 'product_item product_artist'
        }).text.strip()
        album = i.find('div', {
            'class': 'product_item product_title'
        }).text.strip()
        newband = band(name=artist, appeared='Metacritic', album=album)
        allbands.append(newband)

    c = []
    for j in allbands:
        if j not in c:
            c.append(j)

    return c[:maxbands]
Esempio n. 5
0
def get_jukebox_bangers():
    url = 'http://www.thesinglesjukebox.com/?p=25856'
    hdr = {'User-Agent': 'Mozilla/5.0'}
    req = urllib.request.Request(url, headers=hdr)
    page = urllib.request.urlopen(req)
    bs = BeautifulSoup(page, "html.parser")

    a = bs.find('li', {'id': 'linkcat-215'})
    b = a.findAll('a')
    links = []
    for i in b:
        print(i.text)
        print(i['href'])
        links.append(i['href'])

    adds = []
    for link in links:
        url = link
        req = urllib.request.Request(url, headers=hdr)
        page = urllib.request.urlopen(req)
        bs = BeautifulSoup(page, "html.parser")
        c = bs.find('div', {'class': 'post'})
        d = c.find('h2')
        e = d.text.split('–')
        artist = e[0].strip()
        song = e[1].strip()
        print(artist, song)
        newband = band(
            name=artist,
            appeared='Singles Jukebox 2018 Bangers',
            song=song,
        )
        adds.append(newband)

    return adds
Esempio n. 6
0
def load_to_db(albumlist):
    socket.setdefaulttimeout(15)
    # creation of the SQL database and the "session" object that is used to manage
    # communications with the database
    engine = create_engine('sqlite:///../../databases/scout.db')
    session_factory = sessionmaker(bind=engine)
    Session = scoped_session(session_factory)
    metadata = MetaData(db)
    db.metadata.create_all(engine)

    session = Session()

    t = dt.date.today()
    adds = 0

    for i in albumlist:
        print (i)
        clean_name = cleanup(i[0])
        n_ = band(name=i[0],
                  album=i[1],
                  source='KEXP Countdown 2018',
                  appeared ='KEXP Countdown 2018',
                  dateadded=t,
                  cleanname=clean_name)
        q = session.query(band).filter(band.name == n_.name, band.song == n_.song)
        if q.first() == None:
            session.add(n_)
            adds += 1
        else:
            try:
                print ('Already had {0} - {1}'.format(n_.name, n_.song))
            except:
                print ('Already had it. Cannot print. ID is {0}'.format(q.first().id))
        session.commit()
    print ('Added {0} songs'.format(adds))
Esempio n. 7
0
def KEXP_charts(maxbands):

    allbands = []

    basesite = 'http://kexp.org/charts/'
    hdr = {'User-Agent': 'Mozilla/5.0'}
    req = urllib.request.Request(basesite, headers=hdr)
    page = urllib.request.urlopen(req)
    bs = BeautifulSoup(page, "html.parser")

    for heading in bs.findAll('h4'):
        genre = heading.text.strip()[:-1]
        print(genre)
        contents = (heading.findNext('p').text).splitlines()
        for i in contents:
            if i == []:
                print('empty')
                continue
            elif len(i) > 1:
                a = i
                b = a.split()
                if b[0][0].isdigit():
                    b.remove(b[0])
                e = ' '.join(i for i in b)
                e = e.replace('(self-released)', '')
                # print (c)
                d = e.split('-')
                if len(d) == 2:
                    artist = d[0]
                    parens = d[1].find('(')
                    album = d[1][:parens].strip()
                d = e.split('–')
                if len(d) == 2:
                    artist = d[0].strip()
                    parens = d[1].find('(')
                    album = d[1][:parens].strip()

                print(artist, album)
                newband = band(name=artist, appeared=genre, album=album)
                allbands.append(newband)

    # half of this list will be the Top 90
    d = []
    e = []
    for i in allbands:
        if i.appeared == 'KEXP Top 90':
            if i not in d:
                d.append(i)
        else:
            if i not in e:
                e.append(i)

    half = maxbands // 2
    d = d[:half]
    shuffle(e)
    c = d + e[half:maxbands]

    return c[:maxbands]
Esempio n. 8
0
def sgum(maxbands):

    socket.setdefaulttimeout(10)
    allbands = []
    url1 = 'https://www.stereogum.com/category/franchises/album-of-the-week/'

    j = 1
    while len(allbands) < maxbands:
        print('Getting Stereogum Album of the Week, page {0}'.format(j))
        url = url1 + 'page/' + str(j) + '/'

        chromeOptions = webdriver.ChromeOptions()
        prefs = {'profile.managed_default_content_settings.images': 2}
        chromeOptions.add_experimental_option("prefs", prefs)
        driver = webdriver.Chrome(chrome_options=chromeOptions)
        driver.get(url)

        innerHTML = driver.execute_script("return document.body.innerHTML")
        bs = BeautifulSoup(innerHTML, 'html.parser')

        driver.quit()

        a = bs.find_all('h2')
        for i in a:
            if 'Album Of The Week:' in i.text:
                b = re.sub('Album Of The Week:', '', i.text)
                c = i.find('em')
                if c == None:
                    c = i.find('i')
                album = c.text.strip()
                artist = re.sub(album, '', b).strip()
                newband = band(name=artist, appeared='Stereogum', album=album)
                allbands.append(newband)

        j += 1
        print('Found {0} bands so far'.format(len(allbands)))

    c = []
    for j in allbands:
        if j not in c:
            c.append(j)

    return c[:maxbands]
Esempio n. 9
0
def do_album_query(i):
    artist = i.name
    album = i.album
    query = ' '.join(i for i in [artist, album])
    res = api.search(query, max_results=50)
    highest = 0
    for result in res['album_hits']:
        r = result['album']
        b1 = r['albumArtist']
        ratio = similar(artist, b1)
        b2 = r['name']
        ratio2 = similar(album, b2)
        prod = (ratio * ratio2)
        if prod > highest:
            match = r['albumId']
            highest = prod

    j = []
    try:
        c = api.get_album_info(match)
        for track in c['tracks']:
            new_track = band(name=i.name,
                             song = track['title'],
                             album = i.album,
                             release_year = i.release_year,
                             appeared = i.appeared,
                             comment = i.comment,
                             source = i.source,
                             cleanname = i.cleanname,
                             dateadded = i.dateadded,
                             dateplayed = i.dateplayed,
                             nid = i.nid,
                             storeID = track['storeId'],
                             storeID_year = track['year'])
            j.append(new_track)

    except:
        print ('No match for {0} - {1}'.format(artist, album))

    return j
Esempio n. 10
0
def get_npr_songs():
    page_num = 1
    url = 'https://www.npr.org/2018/12/05/671206143/the-100-best-songs-of-2018-page-1'
    for i in range(1, 6):
        print (url)
        print('NPR page: {0}'.format(i))
        hdr = {'User-Agent': 'Mozilla/5.0'}
        req = urllib.request.Request(url, headers=hdr)
        page = urllib.request.urlopen(req)
        soup = BeautifulSoup(page, "html.parser")
        l = soup.find('div', {'id':'res672080372'})
        m = l.find('b')
        print (m)
        n = m.parent
        print (n)
        o = n.findAll('a')

        print ('o:', '\n\n')
        for i in o:
            print (i)
        input('enter')

        a = soup.findAll('h3', {'class':'edTag'})
        g = []
        for j in a:
            k = j.text
            if k[0] == '\"':
                song = k.strip()[1:-1]
                new_band = band(name = artist, song = song, appeared = 'NPR Top 100 Songs')
                g.append(new_band)
            else:
                artist = k.strip()


    for k in g:
        print (k.name, k.song)

    print (len(g))

    return g
Esempio n. 11
0
def pfork_tracks(maxbands):
    c = []
    allbands = []
    i = 0
    while (len(allbands) < maxbands) and (i < 50):
        i = i + 1
        try:
            print(('Pitchfork page: {0}'.format(i)))
            site = 'https://pitchfork.com/reviews/tracks/?page=' + str(i)
            hdr = {'User-Agent': 'Mozilla/5.0'}
            req = urllib.request.Request(site, headers=hdr)
            page = urllib.request.urlopen(req)
            soup = BeautifulSoup(page, "html.parser")
            a = soup.findAll("div",
                             {"class": "track-collection-item__details"})
            print(("Page {0} retrieved".format(i)))
            for banddiv in a:
                artist = banddiv.find('ul', {'class': 'artist-list'}).li.text \
                    .strip().replace('”', '').replace('“', '')
                track = banddiv.find('h2', {'class': 'track-collection-item__title'}).text \
                    .strip().replace('”', '').replace('“', '')
                print(artist, track)
                newband = band(name=artist,
                               appeared='Pitchfork Top Tracks',
                               song=track)
                allbands.append(newband)
        except Exception as e:
            print(str(e))
            print(("Page {0} failed".format(i)))
            continue

    for j in allbands:
        if j not in c:
            c.append(j)

    return c[:maxbands]
Esempio n. 12
0
def MTM(maxbands):

    url = 'http://feeds.kexp.org/kexp/musicthatmatters'
    hdr = {'User-Agent': 'Mozilla/5.0'}
    req = urllib.request.Request(url, headers=hdr)
    page = urllib.request.urlopen(req)
    bs = BeautifulSoup(page, "html.parser")

    allbands = []
    maxbands = 200
    c = []

    for item in bs.findAll('item'):
        if len(allbands) <= maxbands:
            desc = item.find('description').text
            tr = False
            s = ''
            n = []

            for g in range(0, len(desc)):
                if desc[g].isdigit():
                    if desc[g + 1].isdigit() or desc[g + 1] == '.':
                        tr = False
                        if len(s) > 0:
                            n.append(s)
                        s = ''
                if desc[g] == '.':
                    if desc[g - 1].isdigit():
                        tr = True
                if tr == True:
                    s = s + desc[g]

            for i in n:
                h = i[2:].strip().split('<')[0]
                egg = h.split('-')
                if len(egg) < 2:
                    egg = h.split('–')
                if len(egg) < 2:
                    egg = h.split('-')

                try:
                    artist = egg[0].strip()
                    song = egg[1].strip()
                    newband = band(name=artist,
                                   appeared='KEXP Music That Matters',
                                   song=song)
                    allbands.append(newband)
                except Exception as e:
                    print(str(e))
                    try:
                        print(h)
                    except:
                        print('unprintable')
                    continue

    for j in allbands:
        if j not in c:
            c.append(j)

    for i in c[:maxbands]:
        print(i.name, i.song)

    return c[:maxbands]