def check_one_artist(done_db=None,new_db=None):
    """
    Check one artist to see if it has songs.
    -Get an artist name from the main queue
    -Check if it is in 'done_db', don't redo it if it is
    -Check number of songs, add it to 'new_db' if needed

    The two databases must be initialized.
    """

    # open connections
    connection_done = sqlite.connect(done_db)
    connection_new = sqlite.connect(new_db)
    # gets cursors
    cursor_done = connection_done.cursor()
    cursor_new = connection_new.cursor()

    try:
        while len(_main_artist_queue) > 0:
            # get artist name
            try:
                artist = _main_artist_queue.pop()
            except IndexError:
                continue # we're probably done
            # artist already done?
            query = 'SELECT name FROM artists WHERE name='
            query += '"' + artist + '"'
            cursor_done.execute(query)
            found = cursor_done.fetchmany(2)
            # artist not found = not done, get songs
            if len(found) == 0:
                try:
                    tids,tmp_titles,tmp_aids,tmp_artists = en_extras.search_tracks(artist)
                except pyechonest.util.EchoNestAPIError:
                    # add abck to queue, wait a second, move to other song
                    _main_artist_queue.appendleft(artist)
                    time.sleep(1)
                    continue
                if tids == None:
                    tids = ()
                _checked_artists_queue.appendleft( (artist, len(tids)) )                    
    except KeyboardInterrupt:
        # stop all threads
        _main_artist_queue.clear()
        # try to quit clean, commit than close
        connection_done.close()
        connection_new.close()
        return

    except:
        # just close
        connection_done.close()
        connection_new.close()
        # print execution queue
        traceback.print_exc()
        # last query
        print 'last query = ', query
        return

    # finished correctly, queue empty
    connection_done.commit()
    connection_new.commit()
    connection_done.close()
    connection_new.close()
    print 'THREAD FINISHED'
Esempio n. 2
0
def _thread_en(artistsdb,filename=''):
    """
    Thread that load EN data
    For artists receives a SQLlite database containing a table 'artists' with
    a field 'name'.
    Filename is used when downloading a dict from EN. If a filename is provided,
    we retrieve the uri to this file. Therefore, filename must be unique
    for each thread. If no filename is provided, we read straight from
    the stream, and we eventually seg fault.
    """
    cnt_iter = 0
    cnt_provided = 0
    waiting_artists = deque() # for db

    # MAIN LOOP
    while not _stop_en_thread:
        # debug
        cnt_iter += 1

        # get artist
        if len(waiting_artists) == 0:
            artist_list = get_artists_from_db(artistsdb)
            if artist_list == None:
                print 'ERROR,: en_thread, cant get artist from SQL database'
                time.sleep(50)
                continue
            for k in artist_list:
                waiting_artists.append(k)
        artist = waiting_artists.pop() # no thread concurrency here

        # get song
        sids, tmp_tids,tmp_titles,tmp_aids,tmp_artists = en_extras.search_tracks(artist,filename=filename)
        if sids == None or len(sids) == 0:
            continue
        songid = sids[np.random.randint(len(sids))]

        # save EchoNest data to queue
        segstart,chromas,beatstart,barstart,duration = en_extras.get_our_analysis(songid,filename=filename)
        if segstart == None:
            continue
        d = {'segstart':segstart,'chromas':chromas,
             'beatstart':beatstart,'barstart':barstart,'duration':duration}
        # put data in queue, deque is supposed to be thread safe but we have
        # an extra semaphore
        queue_size = _add_data(d)

        # queue full?        
        if queue_size >= _en_queue_size :
            time.sleep(5) # sleep for 5 seconds
            cnt_iter -= 1
            continue

        #print 'added data (artist :',artist,') to _en_queue' #debugging
        # success rate too low? print WARNING
        cnt_provided += 1
        if cnt_provided % 100 == 0:
            prob_provide = cnt_provided*100./cnt_iter
            if prob_provide < 85.:
                print 'WARNING: _en_thread, prob. of providing is low:',prob_provide,'% , artists do not actually have song?'

    # done
    print 'stopping _en_thread, prob. of providing:',cnt_provided*1./cnt_iter