예제 #1
0
def _fill(csv_file_list):
    """
    Fill the database given the list of data CSVs.
    :param csv_file_list: List of CSV file paths where the data is.
    :return: Database filled.
    """
    song_added = 0
    for csv_file_name in tqdm(csv_file_list, total=len(csv_file_list)):
        with open(csv_file_name, 'r') as csv_file:
            rows = [row for row in csv.reader(csv_file) if row][1:]
            for row in rows:
                try:
                    if len(row) == len(SCRIPT_ROW):
                        song_name = row[SCRIPT_ROW.index('SONG_NAME')]
                        artist_name = row[SCRIPT_ROW.index('ARTIST_NAME')]
                        if not song_service.get_song_by_name_and_artist(
                                song_name, artist_name):
                            lyrics = row[SCRIPT_ROW.index('LYRICS')]
                            artist_url = row[SCRIPT_ROW.index('ARTIST_URL')]
                            song_url = row[SCRIPT_ROW.index('SONG_URL')]
                            song_id = song_service.add_song(
                                artist_name, song_name, lyrics, artist_url,
                                song_url)
                            song_added += int(bool(song_id))
                except Exception as e:
                    log.warn(f'Skipping row due to [{e}]')
                    log.warn(f'Row: {row}')
    log.info(f'Songs added: [{song_added}]')
예제 #2
0
def extract_features_from_content(content):
    """
    Extract features from a content representing song lyrics.
    :param content: Song lyrics.
    :return: Features vector representing the given content.
    """
    content = word2vec.clean_content(content)
    if content:
        return _extract(content)
    else:
        log.warn(f'Content empty after cleaning it: [{content}]')
        return None
예제 #3
0
def extract_features_from_song(song_id):
    """
    Extract feature from a song given its database identifier.
    :param song_id: Song identifier.
    :return: Features vector representing the given song.
    """
    song = song_service.get_song(song_id)
    if song:
        return _extract(song.lyrics)
    else:
        log.warn(f'Not song found with id: [{song_id}]')
        return None
예제 #4
0
def _delete_output_folder(unzipping_output_folder):
    """
    Delete all the extracted files from the input zip file.
    :param unzipping_output_folder: Folder to delete.
    :return: Folder removed.
    """
    if unzipping_output_folder:
        try:
            shutil.rmtree(unzipping_output_folder)
        except Exception as e:
            log.warn(
                f'Could not delete [{unzipping_output_folder}] due to [{e}].')
예제 #5
0
def read_maximum_distance():
    """
    Read the maximum distance from the file.
    :return: Maximum distance if it exists or it was generated, None otherwise.
    """
    if os.path.isfile(FILE_NAME_MAXIMUM_DISTANCE):
        try:
            with open(FILE_NAME_MAXIMUM_DISTANCE, 'r') as file:
                maximum_distance = float(str(file.read()))
                return maximum_distance
        except Exception as e:
            log.warn(f'Error reading maximum distance: [{e}]')
            return None
    else:
        return None
예제 #6
0
파일: song.py 프로젝트: tabuckner/searchly
def set_index_id(song_id, index_id):
    """
    Update the song instance given its identifier setting a new NMSLIB index identifier.
    :param song_id: Song identifier.
    :param index_id: NMSLIB index identifier.
    :return: True if the update was successful, False otherwise.
    """
    song = db_session().query(Song).filter_by(id=song_id).first()
    if song:
        song.index_id = index_id
        commit_session()
        return True
    else:
        log.warn(f'Not song found with id [{song_id}]')
        return False
예제 #7
0
def _extract(lyrics):
    """
    Internal function for extracting features of a given bunch of words.
    :param lyrics: String contains some words representing the lyrics of a song.
    :return: Features vector representing the given lyrics, None if it was not possible.
    """
    lyrics = word2vec.clean_lyrics(lyrics)
    lyrics = ' '.join(lyrics)
    w2v_instance = word2vec.load_w2v_instance(FILE_NAME_W2V)
    lyrics = word2vec.normalize(lyrics, w2v_instance)
    if lyrics is not None:
        lyrics = lyrics.reshape((1, NUM_FEATURES))
        return lyrics
    else:
        log.warn('Empty lyrics after normalizing it.')
        return None
예제 #8
0
def search(features, amount_results=API_SONG_SIMILARITY_LIMIT, song_id=None):
    """
    Query to the index given a features vector.
    :param features: Features vector needed for querying the index.
    :param amount_results: Maximum amount of results to return.
    :param song_id: Song identifier that represents the features vector for avoiding to return itself as a result.
    :return: Query results.
    """
    results = []
    index_id = -1
    if song_id:
        song = song_service.get_song(song_id)
        if song:
            index_id = song.index_id
        else:
            log.warn(f'Not song found with id: [{song_id}]')
    nmslib_index = Nmslib()
    nmslib_index.load(FILE_NAME_INDEX)
    query_results = nmslib_index.batch_query(features, NEIGHBOURHOOD_AMOUNT)
    closest, distances = query_results[0]
    maximum_distance = read_maximum_distance()
    for i, dist in zip(closest, distances):
        i = int(i)
        dist = float(dist)
        if i != index_id:
            song = song_service.get_song_by_index_id(i)
            if song:
                result = song.serialize()
                if maximum_distance:
                    dist = 100.0 - min(100.0,
                                       (dist * 100.0) / maximum_distance)
                    dist = float(f'{dist:.2f}')
                result['percentage'] = dist
                results.append(result)
                if len(results) >= amount_results:
                    break
    return results
예제 #9
0
파일: song.py 프로젝트: tabuckner/searchly
def search():
    """
    Controller for searching songs from the database.
    :return: JSON response.
    """
    try:
        # Parameters retrieving
        query = request.args.get('query')
        if not query:
            return response.make(
                error=True, message='`query` missed as a query parameter.')
        query = query.strip()
        if len(query) <= 2:
            log.warn(f'Query is too short: [{query}]')
            return response.make(error=False, response=dict(results=[]))
        # Cache processing
        method = search.__name__
        key = '{}'.format(query)
        results_cached = cache.get(method, key)
        if results_cached is not None:
            return response.make(response=results_cached, cached=True)
        # Searching
        results = song_service.get_song_by_query(query)
        results = [{
            'id': q.id,
            'name': f'{q.artist_name} - {q.song_name}'
        } for q in results]
        results = sorted(results, key=lambda q: q['name'])
        # Return results and refresh cache
        return response.make(error=False,
                             response=dict(results=results),
                             method=method,
                             key=key)
    except Exception as e:
        log.error(f'Unexpected error: [{e}]')
        log.exception(e)
        return response.make(error=True, message='Unexpected error.')