Ejemplo n.º 1
0
def make_low_level_data_file(filename: str, output_file_path: str):
    """Extracts the lowlevel datafile from a given song file
    and outputs in the given outputfile

    Parameters
    ----------
    filename
        single song file path
    output_file_path
        output path of the lowlevel datafile
    """

    # Escape characters that the command line can't handle normally
    fixed_filename = filename.replace(" ", r"\ ") \
                             .replace("(", r"\(") \
                             .replace(")", r"\)") \
                             .replace("&", r"\&") \
                             .replace("'", r"\'")

    extractor_path = get_absolute_path("utilities/ressources" +
                                       "/extractors/" +
                                       "streaming_extractor_music")

    command = '{} {} {}'.format(extractor_path, fixed_filename,
                                output_file_path)

    subprocess.run(command, shell=True)
Ejemplo n.º 2
0
def test_happy_2():
    filename = get_absolute_path(current_directory + "test_face_emotion_extr" +
                                 "action_data/happy2.png")
    img = cv2.imread(filename)
    face_data = classify_faces([img])
    assert len(face_data[0]) == 7
    assert face_data[0][3] > 0.5
Ejemplo n.º 3
0
def test_neutral_1():
    filename = get_absolute_path(current_directory +
                                 "test_face_emotion_extra" +
                                 "ction_data/neutral1.png")
    img = cv2.imread(filename)
    face_data = classify_faces([img])
    assert len(face_data[0]) == 7
    assert face_data[0][6] > 0.5
def test_facial_recognition_of_at_least_one_face():
    test_filename = (get_absolute_path
                     ("video_emotion/facial_recognition/t/test" +
                      "_facial_recognition/Fun_at_a_Fair.mp4"))

    # analyze from 3s to 5s for performance reasons
    output_frames = analyze_video(test_filename, (3000, 5000))

    assert len(output_frames) != 0
Ejemplo n.º 5
0
def test_Many_Faces():
    face_list = []

    filename1 = get_absolute_path(current_directory +
                                  "test_face_emotion_extr" +
                                  "action_data/happy1.png")
    filename2 = get_absolute_path(current_directory +
                                  "test_face_emotion_extr" +
                                  "action_data/happy2.png")
    filename3 = get_absolute_path(current_directory +
                                  "test_face_emotion_extr" +
                                  "action_data/happy3.png")

    img1 = cv2.imread(filename1)
    img2 = cv2.imread(filename2)
    img3 = cv2.imread(filename3)
    face_list.append(img1)
    face_list.append(img2)
    face_list.append(img3)
    list_with_faces = classify_faces(face_list)
    assert list_with_faces[0][3] > 0.5
    assert list_with_faces[1][3] > 0.5
    assert list_with_faces[2][3] > 0.3
Ejemplo n.º 6
0
def _load_song(song_id, filename, segments, force=False):
    """ Loads song features from the database if
    available otherwise loading the file and
    loading features from it directly
    """

    print('Loading: ' + song_id)
    filename = get_absolute_path(filename)
    print(song_id + ' loading song segments')
    segs = segments.get_all_by_song_id(song_id)
    print(song_id + ' song segments loaded')

    segment_data = []

    if (segs == [] or force):
        print('Loading audio file')
        # No segments in db, which means no features in db
        y, sr = librosa.load(filename)

        for i in range(0, y.shape[0] // sr // 5 - 1):

            sample = y[(i * sr * 5):((i + 1) * sr * 5)]

            mfcc, chromagram, tempogram = _process_segment(sample, sr)

            _id = segments.add(song_id, i * 5 * 1000, (i + 1) * 5 * 1000,
                               mfcc.tobytes(), chromagram.tobytes(),
                               tempogram.tobytes(), [])

            feature = _create_feature(mfcc, chromagram, tempogram)

            segment_data.append((_id, song_id, i * 5, feature))

    else:
        print('Loaded from database')
        # There are segments in db, look for features
        for i in range(0, len(segs)):
            segment = segs[i]

            if (segment['mfcc'] is None or segment['chroma'] is None
                    or segment['tempogram'] is None):
                break

            feature = _create_feature(np.frombuffer(segment['mfcc']),
                                      np.frombuffer(segment['chroma']),
                                      np.frombuffer(segment['tempogram']))

            segment_data.append((segment['_id'], song_id, i * 5, feature))

    return segment_data
Ejemplo n.º 7
0
def test_song_data_extraction():
    filename = get_absolute_path("classification/extractor/t/test_extractor/" +
                                 "8376-1-1 Demolition_Man_proud_music_" +
                                 "preview.wav")

    # This setup is required, to dynamically run this
    # test from anywhere you'd like.
    dirname = os.path.abspath(os.path.dirname(__file__))
    output_filename = os.path.join(dirname, "8376-1-1_output.json")

    make_low_level_data_file(filename, output_filename)

    assert os.path.isfile(output_filename)
    assert len(os.stat(output_filename)) != 0

    os.remove(output_filename)
def test_api_helper():
    id = "1337"
    file_path = get_absolute_path("classification/t/test_segmented_audio_" +
                                  "analysis/8376-1-1_Demolition_Man_proud_" +
                                  "music_preview.wav")

    process_data_and_extract_profiles(id, file_path)

    DBConnection = TrackEmotion()
    data = DBConnection.get(id)

    assert data['song_id'] == '1337'
    assert int(data['bpm']['value']) == 139
    assert data['timbre']['value'] == 'dark'
    assert data['relaxed']['value'] == 'not_relaxed'
    assert data['party']['value'] == 'not_party'
    assert data['aggressive']['value'] == 'aggressive'
    assert data['happy']['value'] == 'not_happy'
    assert data['sad']['value'] == 'not_sad'
Ejemplo n.º 9
0
def get_mono_loaded_song(song_path: str):
    """Loads the file given at the path and returns the raw audio data

    Parameters
    ----------
    song_path : str
        The file path of the song

    Returns
    -------
    vector_real
        The file's audio downmixed to mono

    """

    path = get_absolute_path(song_path)
    loader = MonoLoader(filename=path)

    return loader()
Ejemplo n.º 10
0
def get_audio_loaded_song(song_path: str):
    """Loads the file given at the path and returns the audio as a stereosample

    Parameters
    ----------
    song_path : str
        The file path of the song

    Returns
    -------
    stereosample
        The input stereo audio signal

    """

    path = get_absolute_path(song_path)
    loader = AudioLoader(filename=path)

    return loader
def classify_faces(faces) -> [[float]]:
    """Classifies the given faces

    Parameters
    ----------
    faces
        The faces to be processed

    Returns
    -------
    array
        Returns a 2d array containing the face emotion data, for each face
        is an array index with a subarray containing the emotions
        indexed like in getlabels
    """

    # parameters for loading data and images
    emotion_model_path = get_absolute_path("video_emotion/" +
                                           "emotion_tagger/" +
                                           "models/emotion_model.hdf5")

    # loading models
    emotion_classifier = load_model(emotion_model_path)

    # getting input model shapes for inference
    emotion_target_size = emotion_classifier.input_shape[1:3]
    processed_faces = []
    for face in faces:
        face_grey = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
        face_resized = cv2.resize(face_grey, (emotion_target_size))
        face_processed = preprocess_input(face_resized)
        face_expanded_more = numpy.expand_dims(face_processed, -1)
        processed_faces.append(face_expanded_more)

    emotion_prediction = emotion_classifier.predict(
        numpy.asarray(processed_faces))
    K.clear_session()

    return emotion_prediction
def get_classifier_data(
        data_file_name: str
) -> Tuple[Tuple, Tuple, Tuple, Tuple, Tuple, Tuple]:
    """Extracts the highlevel mood classifications from a given song file

    Parameters
    ----------
    data_file_name
        single song file path

    Returns
    -------
    Tuple[Tuple, Tuple, Tuple, Tuple, Tuple, Tuple]
        A tuple of tuples describing all moods and their probability
    """

    # This setup is required, to dynamically run this
    # test from anywhere you'd like.
    dirname = os.path.abspath(os.path.dirname(__file__))
    profile_file = get_absolute_path("utilities/ressources/" +
                                     "timbre_moods_profile.yaml")

    # Temp file used instead of writing to an actual file
    temp_file = NamedTemporaryFile(delete=True)

    command = 'essentia_streaming_extractor_music_svm {} {} {}'.format(
        data_file_name, temp_file.name, profile_file)

    subprocess.run("cd {} && {}".format(dirname, command), shell=True)

    data = json.load(temp_file)

    temp_file.close()

    # Setting the data JSON data up in variables
    highlevel = data['highlevel']

    timbre = highlevel['timbre']['value']
    timbre_probability = highlevel['timbre']['probability']

    mood_relaxed = highlevel['mood_relaxed']['value']
    mood_relaxed_probability = highlevel['mood_relaxed']['probability']

    mood_party = highlevel['mood_party']['value']
    mood_party_probability = highlevel['mood_party']['probability']

    mood_aggressive = highlevel['mood_aggressive']['value']
    mood_aggressive_probability = highlevel['mood_aggressive']['probability']

    mood_happy = highlevel['mood_happy']['value']
    mood_happy_probability = highlevel['mood_happy']['probability']

    mood_sad = highlevel['mood_sad']['value']
    mood_sad_probability = highlevel['mood_sad']['probability']

    # list for beautifying code
    t = [(timbre, timbre_probability),
         (mood_relaxed, mood_relaxed_probability),
         (mood_party, mood_party_probability),
         (mood_aggressive, mood_aggressive_probability),
         (mood_happy, mood_happy_probability),
         (mood_sad, mood_sad_probability)]

    return t[0], t[1], t[2], t[3], t[4], t[5]
import os
from typing import Dict

import numpy as np
import cv2

from utilities.filehandler.handle_path import get_absolute_path

dirname = os.path.dirname(__file__)
CONFIDENCE_MINIMUM = 0.7

OPENCV_PROTOTXT = get_absolute_path("video_emotion/facial_recognition/" +
                                    "deploy.prototxt.txt")

OPENCV_MODEl = get_absolute_path("video_emotion/facial_recognition/res10" +
                                 "_300x300_ssd_iter_140000_fp16.caffemodel")
# Load model from disk
NET = cv2.dnn.readNetFromCaffe(OPENCV_PROTOTXT, OPENCV_MODEl)

# Various numeral constants
IMAGE_RESIZE = 300
SIZE_CONSTANT = 1.0
RED = 104.0
GREEN = 177.0
BLUE = 123.0


def analyze_video(video_path: str, time_range: int = None) -> Dict:
    """Analyses video finding faces, given videopath and a timerange

    Parameters