def check_one_artist(done_db=None,new_db=None): """ Check one artist to see if it has songs. -Get an artist name from the main queue -Check if it is in 'done_db', don't redo it if it is -Check number of songs, add it to 'new_db' if needed The two databases must be initialized. """ # open connections connection_done = sqlite.connect(done_db) connection_new = sqlite.connect(new_db) # gets cursors cursor_done = connection_done.cursor() cursor_new = connection_new.cursor() try: while len(_main_artist_queue) > 0: # get artist name try: artist = _main_artist_queue.pop() except IndexError: continue # we're probably done # artist already done? query = 'SELECT name FROM artists WHERE name=' query += '"' + artist + '"' cursor_done.execute(query) found = cursor_done.fetchmany(2) # artist not found = not done, get songs if len(found) == 0: try: tids,tmp_titles,tmp_aids,tmp_artists = en_extras.search_tracks(artist) except pyechonest.util.EchoNestAPIError: # add abck to queue, wait a second, move to other song _main_artist_queue.appendleft(artist) time.sleep(1) continue if tids == None: tids = () _checked_artists_queue.appendleft( (artist, len(tids)) ) except KeyboardInterrupt: # stop all threads _main_artist_queue.clear() # try to quit clean, commit than close connection_done.close() connection_new.close() return except: # just close connection_done.close() connection_new.close() # print execution queue traceback.print_exc() # last query print 'last query = ', query return # finished correctly, queue empty connection_done.commit() connection_new.commit() connection_done.close() connection_new.close() print 'THREAD FINISHED'
def _thread_en(artistsdb,filename=''): """ Thread that load EN data For artists receives a SQLlite database containing a table 'artists' with a field 'name'. Filename is used when downloading a dict from EN. If a filename is provided, we retrieve the uri to this file. Therefore, filename must be unique for each thread. If no filename is provided, we read straight from the stream, and we eventually seg fault. """ cnt_iter = 0 cnt_provided = 0 waiting_artists = deque() # for db # MAIN LOOP while not _stop_en_thread: # debug cnt_iter += 1 # get artist if len(waiting_artists) == 0: artist_list = get_artists_from_db(artistsdb) if artist_list == None: print 'ERROR,: en_thread, cant get artist from SQL database' time.sleep(50) continue for k in artist_list: waiting_artists.append(k) artist = waiting_artists.pop() # no thread concurrency here # get song sids, tmp_tids,tmp_titles,tmp_aids,tmp_artists = en_extras.search_tracks(artist,filename=filename) if sids == None or len(sids) == 0: continue songid = sids[np.random.randint(len(sids))] # save EchoNest data to queue segstart,chromas,beatstart,barstart,duration = en_extras.get_our_analysis(songid,filename=filename) if segstart == None: continue d = {'segstart':segstart,'chromas':chromas, 'beatstart':beatstart,'barstart':barstart,'duration':duration} # put data in queue, deque is supposed to be thread safe but we have # an extra semaphore queue_size = _add_data(d) # queue full? if queue_size >= _en_queue_size : time.sleep(5) # sleep for 5 seconds cnt_iter -= 1 continue #print 'added data (artist :',artist,') to _en_queue' #debugging # success rate too low? print WARNING cnt_provided += 1 if cnt_provided % 100 == 0: prob_provide = cnt_provided*100./cnt_iter if prob_provide < 85.: print 'WARNING: _en_thread, prob. of providing is low:',prob_provide,'% , artists do not actually have song?' # done print 'stopping _en_thread, prob. of providing:',cnt_provided*1./cnt_iter