def _fill(csv_file_list): """ Fill the database given the list of data CSVs. :param csv_file_list: List of CSV file paths where the data is. :return: Database filled. """ song_added = 0 for csv_file_name in tqdm(csv_file_list, total=len(csv_file_list)): with open(csv_file_name, 'r') as csv_file: rows = [row for row in csv.reader(csv_file) if row][1:] for row in rows: try: if len(row) == len(SCRIPT_ROW): song_name = row[SCRIPT_ROW.index('SONG_NAME')] artist_name = row[SCRIPT_ROW.index('ARTIST_NAME')] if not song_service.get_song_by_name_and_artist( song_name, artist_name): lyrics = row[SCRIPT_ROW.index('LYRICS')] artist_url = row[SCRIPT_ROW.index('ARTIST_URL')] song_url = row[SCRIPT_ROW.index('SONG_URL')] song_id = song_service.add_song( artist_name, song_name, lyrics, artist_url, song_url) song_added += int(bool(song_id)) except Exception as e: log.warn(f'Skipping row due to [{e}]') log.warn(f'Row: {row}') log.info(f'Songs added: [{song_added}]')
def extract_features_from_content(content): """ Extract features from a content representing song lyrics. :param content: Song lyrics. :return: Features vector representing the given content. """ content = word2vec.clean_content(content) if content: return _extract(content) else: log.warn(f'Content empty after cleaning it: [{content}]') return None
def extract_features_from_song(song_id): """ Extract feature from a song given its database identifier. :param song_id: Song identifier. :return: Features vector representing the given song. """ song = song_service.get_song(song_id) if song: return _extract(song.lyrics) else: log.warn(f'Not song found with id: [{song_id}]') return None
def _delete_output_folder(unzipping_output_folder): """ Delete all the extracted files from the input zip file. :param unzipping_output_folder: Folder to delete. :return: Folder removed. """ if unzipping_output_folder: try: shutil.rmtree(unzipping_output_folder) except Exception as e: log.warn( f'Could not delete [{unzipping_output_folder}] due to [{e}].')
def read_maximum_distance(): """ Read the maximum distance from the file. :return: Maximum distance if it exists or it was generated, None otherwise. """ if os.path.isfile(FILE_NAME_MAXIMUM_DISTANCE): try: with open(FILE_NAME_MAXIMUM_DISTANCE, 'r') as file: maximum_distance = float(str(file.read())) return maximum_distance except Exception as e: log.warn(f'Error reading maximum distance: [{e}]') return None else: return None
def set_index_id(song_id, index_id): """ Update the song instance given its identifier setting a new NMSLIB index identifier. :param song_id: Song identifier. :param index_id: NMSLIB index identifier. :return: True if the update was successful, False otherwise. """ song = db_session().query(Song).filter_by(id=song_id).first() if song: song.index_id = index_id commit_session() return True else: log.warn(f'Not song found with id [{song_id}]') return False
def _extract(lyrics): """ Internal function for extracting features of a given bunch of words. :param lyrics: String contains some words representing the lyrics of a song. :return: Features vector representing the given lyrics, None if it was not possible. """ lyrics = word2vec.clean_lyrics(lyrics) lyrics = ' '.join(lyrics) w2v_instance = word2vec.load_w2v_instance(FILE_NAME_W2V) lyrics = word2vec.normalize(lyrics, w2v_instance) if lyrics is not None: lyrics = lyrics.reshape((1, NUM_FEATURES)) return lyrics else: log.warn('Empty lyrics after normalizing it.') return None
def search(features, amount_results=API_SONG_SIMILARITY_LIMIT, song_id=None): """ Query to the index given a features vector. :param features: Features vector needed for querying the index. :param amount_results: Maximum amount of results to return. :param song_id: Song identifier that represents the features vector for avoiding to return itself as a result. :return: Query results. """ results = [] index_id = -1 if song_id: song = song_service.get_song(song_id) if song: index_id = song.index_id else: log.warn(f'Not song found with id: [{song_id}]') nmslib_index = Nmslib() nmslib_index.load(FILE_NAME_INDEX) query_results = nmslib_index.batch_query(features, NEIGHBOURHOOD_AMOUNT) closest, distances = query_results[0] maximum_distance = read_maximum_distance() for i, dist in zip(closest, distances): i = int(i) dist = float(dist) if i != index_id: song = song_service.get_song_by_index_id(i) if song: result = song.serialize() if maximum_distance: dist = 100.0 - min(100.0, (dist * 100.0) / maximum_distance) dist = float(f'{dist:.2f}') result['percentage'] = dist results.append(result) if len(results) >= amount_results: break return results
def search(): """ Controller for searching songs from the database. :return: JSON response. """ try: # Parameters retrieving query = request.args.get('query') if not query: return response.make( error=True, message='`query` missed as a query parameter.') query = query.strip() if len(query) <= 2: log.warn(f'Query is too short: [{query}]') return response.make(error=False, response=dict(results=[])) # Cache processing method = search.__name__ key = '{}'.format(query) results_cached = cache.get(method, key) if results_cached is not None: return response.make(response=results_cached, cached=True) # Searching results = song_service.get_song_by_query(query) results = [{ 'id': q.id, 'name': f'{q.artist_name} - {q.song_name}' } for q in results] results = sorted(results, key=lambda q: q['name']) # Return results and refresh cache return response.make(error=False, response=dict(results=results), method=method, key=key) except Exception as e: log.error(f'Unexpected error: [{e}]') log.exception(e) return response.make(error=True, message='Unexpected error.')