예제 #1
0
def test_channels(x, sr, num_channels, feature_set, feature_level):

    # create feature extractor for single channel

    fex = opensmile.Smile(feature_set, feature_level)

    y_mono = fex.process_signal(x, sr)

    # create feature extractor for multiple channels

    fex = opensmile.Smile(
        feature_set,
        feature_level,
        num_channels=num_channels,
    )

    with pytest.raises(RuntimeError):
        fex.process_signal(x, sr)  # channel mismatch
    x = np.repeat(x, num_channels, axis=0)
    y = fex.process_signal(x, sr)

    # assertions

    assert y_mono.shape[0] == y.shape[0]
    assert y_mono.shape[1] * fex.num_channels == y.shape[1]
    for c in range(num_channels):
        np.testing.assert_equal(
            y.values[:, c * fex.num_features:(c + 1) * fex.num_features],
            y_mono.values,
        )
예제 #2
0
def test_default(tmpdir, feature_set, feature_level):

    deltas = feature_level == opensmile.FeatureLevel.LowLevelDescriptors_Deltas

    if (feature_set in gemaps_family) and deltas:

        # deltas not available

        with pytest.raises(ValueError):
            opensmile.Smile(feature_set, feature_level)

    else:

        # create feature extractor

        if feature_set in deprecated_feature_sets:
            with pytest.warns(UserWarning):
                fex = opensmile.Smile(feature_set, feature_level)
        else:
            fex = opensmile.Smile(feature_set, feature_level)

        # extract features from file

        y = fex.process_file(pytest.WAV_FILE)

        # run SMILExtract from same file

        source_config_file = os.path.join(
            fex.default_config_root,
            opensmile.config.FILE_INPUT_CONFIG,
        )
        if feature_set in gemaps_family:
            sink_config_file = os.path.join(
                fex.default_config_root,
                opensmile.config.FILE_OUTPUT_CONFIG_NO_LLD_DE,
            )
        else:
            sink_config_file = os.path.join(
                fex.default_config_root,
                opensmile.config.FILE_OUTPUT_CONFIG,
            )
        output_file = os.path.join(tmpdir, f'{feature_level.value}.csv')
        command = f'{pytest.SMILEXTRACT} ' \
                  f'-C {fex.config_path} ' \
                  f'-source {source_config_file} ' \
                  f'-I {pytest.WAV_FILE} ' \
                  f'-sink {sink_config_file} ' \
                  f'-{feature_level.value}_csv_output {output_file}'
        os.system(command)

        # read output of SMILExtract and compare

        df = pd.read_csv(output_file, sep=';')
        np.testing.assert_allclose(df.values[:, 1:],
                                   y.values,
                                   rtol=1e-6,
                                   atol=0)
        assert fex.num_features == len(df.columns) - 1
        assert fex.feature_names == list(df.columns[1:])
예제 #3
0
def _extract_opensmile_features(audio, sr, features_set):
    smile = opensmile.Smile(
        feature_set=features_set,
        feature_level=opensmile.FeatureLevel.Functionals,
    )

    return smile.process_signal(audio, sr)
예제 #4
0
    def __init__(self, config: Optional[Any] = None):
        super().__init__(config=config)
        assert is_module_available(
            "opensmile"
        ), 'To use opensmile extractors, please "pip install opensmile" first.'
        import opensmile

        if isinstance(self.config.feature_set, str):
            self.feature_set = opensmile.FeatureSet[self.config.feature_set]
        else:
            self.feature_set = self.config.feature_set
        self.feature_level = opensmile.FeatureLevel(self.config.feature_level)
        self.smileExtractor = opensmile.Smile(
            feature_set=self.feature_set,
            feature_level=self.feature_level,
            sampling_rate=self.config.sampling_rate,
            options=self.config.options,
            loglevel=self.config.loglevel,
            logfile=self.config.logfile,
            channels=self.config.channels,
            mixdown=self.config.mixdown,
            resample=self.config.resample,
            num_workers=self.config.num_workers,
            verbose=self.config.verbose,
        )
예제 #5
0
def extract_gemaps_features(audio_filename):
    audio, sampling_rate = soundfile.read(audio_filename)

    # Convert 2 channel input to 1 channel
    if len(audio.shape) == 2:
        audio = (audio[:, 0] + audio[:, 1]) / 2

    feature_extractor = opensmile.Smile(
        feature_set=opensmile.FeatureSet.eGeMAPSv01b)

    # Ideally we would want to use 50 ms windows, but they are too short for opensmile
    # Therefore we use 100 ms windows instead, then upsample the feature vector to the same length
    step = int(0.1 * sampling_rate)
    features = [
        feature_extractor.process_signal(audio[start:start + step],
                                         sampling_rate).to_numpy()
        for start in range(0,
                           len(audio) - step, step)
    ]
    # remove excess dimension
    features = np.asarray(features).squeeze()

    # Upsample so that we have the same length as we would with 50 ms windows
    cols = np.linspace(0,
                       features.shape[0],
                       endpoint=False,
                       num=features.shape[0] * 2,
                       dtype=int)
    features = features[cols]

    return features
예제 #6
0
def gemaps_time_featurize(wav_file):

    # initialize features and labels
    labels = list()
    features = list()

    # extract LLD
    smile_LLD = opensmile.Smile(
        feature_set=opensmile.FeatureSet.eGeMAPSv01b,
        feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
    )

    y_LLD = smile_LLD.process_file(wav_file)

    labels_LLD = list(y_LLD)

    for i in range(len(labels_LLD)):
        features.append(list(y_LLD[labels_LLD[i]]))
        labels.append(labels_LLD[i])

    smile_LLD_deltas = opensmile.Smile(
        feature_set=opensmile.FeatureSet.eGeMAPSv01b,
        feature_level=opensmile.FeatureLevel.LowLevelDescriptors_Deltas,
    )

    y_LLD_deltas = smile_LLD_deltas.process_file(wav_file)

    labels_LLD_deltas = list(y_LLD_deltas)

    for i in range(len(labels_LLD_deltas)):
        features.append(list(y_LLD_deltas[labels_LLD_deltas[i]]))
        labels.append(labels_LLD_deltas[i])

    smile_functionals = opensmile.Smile(
        feature_set=opensmile.FeatureSet.eGeMAPSv01b,
        feature_level=opensmile.FeatureLevel.Functionals,
    )

    y_functionals = smile_functionals.process_file(wav_file)

    labels_y_functionals = list(y_functionals)

    for i in range(len(labels_y_functionals)):
        features.append(list(y_functionals[labels_y_functionals[i]]))
        labels.append(labels_y_functionals[i])

    return features, labels
예제 #7
0
 def __init__(self, logfile="./log_opensmile"):
     """Init method for OpenSmileExtractor."""
     super().__init__(logfile=logfile)
     self.smile = opensmile.Smile(  # Create the functionals extractor here
         feature_set=opensmile.FeatureSet.eGeMAPSv02,
         feature_level=opensmile.FeatureLevel.Functionals,
         options={
             "frameModeFunctionalsConf": "./data/custom_FrameModeFunctionals.conf.inc" # this local path might cause trouble
         },
     )
예제 #8
0
def opensmileTrial():
    signal, sampling_rate = audiofile.read("audio/1s.wav", always_2d=True)
    # wf = wave.open("audio/1s.wav", 'rb')
    # signal = wf.readframes(4096)
    smile = opensmile.Smile(
        feature_set='conf/alqudah_live.conf',
        feature_level='features',
        num_channels=2,
    )
    print(signal.shape)
    result = smile.process_signal(signal[:, :4096], sampling_rate)
    print(result)
예제 #9
0
def liveTest():
    wf = wave.open("audio/mono.wav", 'rb')
    print("samplewidth, nchannels, framerate, nframes")
    print(wf.getsampwidth())  #2
    print(wf.getnchannels())  #2 (now - 1)
    print(wf.getframerate())  #44100  # sampling frequency
    print(wf.getnframes())  #441000 -> 10sec audio (True)

    # signal, sampling_rate = audiofile.read("audio/1s.wav", always_2d=True)
    smile = opensmile.Smile(
        feature_set='conf/alqudah_live.conf',
        feature_level='features',
        num_channels=1  #wf.getnchannels()
    )

    c = C()

    p = pyaudio.PyAudio()

    def callback(in_data, frame_count, time_info, status):
        # print(frame_count)
        data = wf.readframes(frame_count)
        c.inc(np.frombuffer(data, dtype="int16") / pow(2, 15))
        c.print()
        features = smile.process_signal(
            np.frombuffer(data, dtype="int16") / pow(2, 15), wf.getframerate())
        c.appendFeatures(features)
        return (data, pyaudio.paContinue)

    # todo: set the format manually like "format=pyaudio.paInt16"
    stream = p.open(
        format=p.get_format_from_width(wf.getsampwidth()),
        channels=wf.getnchannels(),
        rate=wf.getframerate(),
        # input=True,
        output=True,
        stream_callback=callback,
        frames_per_buffer=int(wf.getframerate() / 10),  # 0.1sec
    )

    stream.start_stream()
    while stream.is_active():
        time.sleep(0.1)
    print("Done")

    stream.stop_stream()
    stream.close()
    wf.close()
    p.terminate()

    c.plot()
    c.saveCSV()
예제 #10
0
def audioFeature():
    s = time.time()
    smile2 = opensmile.Smile(feature_set=opensmile.FeatureSet.eGeMAPSv01b,
                             feature_level=opensmile.FeatureLevel.Functionals, num_channels=2)
    smile6 = opensmile.Smile(feature_set=opensmile.FeatureSet.eGeMAPSv01b,
                             feature_level=opensmile.FeatureLevel.Functionals, num_channels=6)

    directory_in_str = "MELD.Raw/dev_splits_complete"

    vec_528 = smile6.process_file(directory_in_str+"/dia1_utt5.mp4")
    vec_176 = smile2.process_file(directory_in_str+"/dia101_utt0.mp4")
    col_list = (vec_176.append([vec_528])).columns.tolist()


    X_dict = {}
    directory = os.fsencode(directory_in_str)
    i = 0
    # files = sorted(os.listdir(directory))
    for file in sorted(os.listdir(directory), key=lambda s: s.lower()):
        filename = os.fsdecode(file)
        # print(filename)
        try:
            feature_vec = smile6.process_file(directory_in_str + "/"+filename)
            X_dict[filename] = feature_vec
        except RuntimeError:
            feature_vec = smile2.process_file(directory_in_str + "/"+filename)
            feature_vec = feature_vec.reindex(columns=col_list, fill_value=0)
            X_dict[filename] = feature_vec
        except:
            continue

        i += 1
        if i%100==0:
            print(i)

    pickle.dump(X_dict, open('dict/audio_features_dev.p', 'wb'))

    print(time.time()-s)
예제 #11
0
def verbose_opensmile():
    for fset in opensmile.FeatureSet:
        print("========================================================")
        print(fset)
        for level in opensmile.FeatureLevel:
            print("==========================")
            print(level)
            try:
                smile = opensmile.Smile(
                    feature_set=fset,
                    feature_level=level,
                )
                print(smile.feature_names)
            except:
                pass
예제 #12
0
    def __init__(self, config_path="./config.yaml"):
        """Init method for the Searcher."""
        super().__init__()
        # Load the configuration
        conf = OmegaConf.load(config_path)
        self.dataset_path = conf.dataset_path
        self.audio_path = os.path.join(conf.dataset_path, "podcasts-audio")

        self.es_url = conf.search_es_url  # URL of Elasticsearch to query
        self.es_num = (conf.search_es_num
                       )  # Number of segments to request from Elasticsearch
        self.sample_rate = 44100  # Hardcoded sample rate of all podcast audio

        # Load the podcast metadata
        self.metadata = load_metadata(self.dataset_path)

        # Set up the reranking model
        self.rerank_tokenizer = AutoTokenizer.from_pretrained(
            conf.search_rerank_model,
            use_fast=True,
            cache_dir=conf.search_cache_dir)
        self.rerank_model = AutoModelForSequenceClassification.from_pretrained(
            conf.search_rerank_model, cache_dir=conf.search_cache_dir)
        self.rerank_model.to("cpu", non_blocking=True)
        self.rerank_max_seq_len = 512

        # Set up the openSMILE extractor
        self.smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.eGeMAPSv02,
            feature_level=opensmile.FeatureLevel.Functionals,
            options={
                "frameModeFunctionalsConf":
                os.path.join(
                    os.getenv("PODCAST_PATH"),
                    "data/custom_FrameModeFunctionals.conf.inc",
                )
            },
        )

        # Set up the YAMNet model
        params = yamnet_params.Params(sample_rate=self.sample_rate,
                                      patch_hop_seconds=0.48)
        self.yamnet_classes = yamnet_model.class_names(
            os.path.join(os.getenv("YAMNET_PATH"), "yamnet_class_map.csv"))
        self.yamnet_model = yamnet_model.yamnet_frames_model(params)
        self.yamnet_model.load_weights(
            os.path.join(os.getenv("PODCAST_PATH"), "data/yamnet.h5"))
예제 #13
0
def extract_opensmile_features_from_audio_sequence(
        data: Union[np.ndarray, str],
        sample_rate: Optional[int] = None,
        feature_type: str = 'LLD') -> np.ndarray:
    """Extracts opensmile ComParE_2016 features from audio sequence represented either by ndarray or path.
    https://github.com/audeering/opensmile-python

    :param data: np.ndarray or str
                Can be ndarray - already loaded sound data or str - path to data
    :param sample_rate: Optional[int]
                Sample rate is needed if data in ndarray format is provided
    :return: np.ndarray
                extracted features
    """
    supported_feature_types = ('LLD', 'Compare_2016_functionals', 'EGEMAPS')
    if not feature_type in supported_feature_types:
        raise AttributeError('feature_type must the value from %s. Got %s.' %
                             (supported_feature_types, feature_type))
    # configure opensmile to extract desirable features
    if feature_type == 'LLD':
        feature_level = opensmile.FeatureLevel.LowLevelDescriptors
        feature_set = opensmile.FeatureSet.ComParE_2016
    elif feature_type == 'Functionals':
        feature_level = opensmile.FeatureLevel.Functionals
        feature_set = opensmile.FeatureSet.ComParE_2016
    elif feature_type == 'EGEMAPS':
        feature_level = opensmile.FeatureLevel.Functionals
        feature_set = opensmile.FeatureSet.eGeMAPSv02
    # create opensmile Extractor
    smile = opensmile.Smile(
        feature_set=feature_set,
        feature_level=feature_level,
    )
    # check if data is a valid type and if yes, extract features properly
    if isinstance(data, str):
        extracted_features = smile.process_file(data).values
    elif isinstance(data, np.ndarray):
        # check if audio data is one-channel, then reshape it to 1D array (the requirement of opensmile)
        if len(data.shape) == 2 and data.shape[1] == 1:
            data = data.reshape((-1, ))
        extracted_features = smile.process_signal(
            data, sampling_rate=sample_rate).values
    else:
        raise AttributeError('Data should be either ndarray or str. Got %s.' %
                             (type(data)))
    return extracted_features
def get_feature_list(data_frame,audio_featurizer="mel_spectogram",text_featurizer="sentence_embeddings",checkpoint="roberta-large-nli-stsb-mean-tokens"):
    X_acoustic=[]
    Y=[]
    texts=[]
    if audio_featurizer=="opensmile":
        import opensmile
        smile = opensmile.Smile(
        feature_set=opensmile.FeatureSet.ComParE_2016,
        feature_level=opensmile.FeatureLevel.Functionals,
        )
    for i in range(len(data_frame)):
        if audio_featurizer=="mel_spectogram":
            raw_mel=extract_mel_spectogram(data_frame.iloc[i]['audio_file_path'],mel=True)
            X_acoustic.append(raw_mel)
        elif audio_featurizer=="opensmile":
            y = smile.process_file(model_train_DataFrame.iloc[i]['audio_file_path'])
            X_acoustic.append([y.iloc[0][col] for col in y.columns])
            
        Y.append(data_frame.iloc[i]['tag'])
        texts.append(data_frame.iloc[i]['text'])
    if text_featurizer=="sentence_embeddings":
        from sentence_transformers import SentenceTransformer
        if checkpoint is None:
            embedder = SentenceTransformer('roberta-large-nli-stsb-mean-tokens')
        else:
            embedder = SentenceTransformer(checkpoint)
        X_textual= embedder.encode(texts)
    X_acoustic_padded=pad_sequences(X_acoustic,padding='post')
    le=LabelEncoder()
    Y=le.fit_transform(Y)
    if len(X_acoustic_padded[0].shape)>1:
        X_acoustic_shape=(None,X_acoustic_padded[0].shape[1])
    else:
        X_acoustic_shape=(X_acoustic_padded[0].shape[0])
    print(X_acoustic_shape)    
    if len(X_textual[0].shape)>1:
        X_textual_shape=(None,X_textual[0].shape)
    else:
        X_textual_shape=(X_textual[0].shape[0])    
    if (audio_featurizer=="mel_spectogram") and text_featurizer=="sentence_embeddings":
        model=get_model(X_textual_shape,X_acoustic_shape,n_classes=len(le.classes_),acoustic_sequence=True)
    elif audio_featurizer=="opensmile" and text_featurizer=="sentence_embeddings": 
        model=get_model(X_textual_shape,X_acoustic_shape,n_classes=len(le.classes_),acoustic_sequence=False)
        
    return [np.array(X_textual),np.array(X_acoustic_padded)],np.array(Y),le,model
예제 #15
0
def test_signal(file, feature_set, feature_level):

    # create feature extractor

    fex = opensmile.Smile(feature_set, feature_level)

    # extract from numpy array

    x, sr = audiofile.read(file, always_2d=True)
    y = fex.process_signal(x, sr)
    y_file = fex.process_file(file)
    with pytest.warns(UserWarning):
        y_empty = fex.process_signal(x[0, :10], sr)

    # assertions

    assert fex.feature_names == y.columns.to_list()
    np.testing.assert_equal(y.values, y_file.values)
    assert all(y_empty.isna())
예제 #16
0
def mid_term_feat_extraction(wav_file_path):

    sampling_rate, signal = audioBasicIO.read_audio_file(wav_file_path)
    if sampling_rate == 0:
        print('Sampling rate not correct.')
        return None

    signal = audioBasicIO.stereo_to_mono(signal)
    if signal.shape[0] < float(sampling_rate) / 5:
        print("The duration of the audio is too short.")
        return None

    mid_window, mid_step, short_window, short_step = 0.5, 0.5, 0.05, 0.05
    mid_features, _, mid_feature_names = MidTermFeatures.mid_feature_extraction(
        signal, sampling_rate, round(mid_window * sampling_rate),
        round(mid_step * sampling_rate), round(sampling_rate * short_window),
        round(sampling_rate * short_step))
    mid_features = np.transpose(mid_features)
    mid_features = mid_features.mean(axis=0)
    # long term averaging of mid-term statistics
    if (not np.isnan(mid_features).any()) and (
            not np.isinf(mid_features).any()):
        #print('Mid-Terms features extracted correctly.')
        mid_dict = dict(zip(mid_feature_names, mid_features))
        mid_df = pd.DataFrame([mid_dict.values()], columns=mid_dict.keys())

        # Smile library audio extraction
        smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.eGeMAPSv01b,
            feature_level=opensmile.FeatureLevel.Functionals,
        )
        smile_features = smile.process_signal(signal, sampling_rate)
        smile_df = pd.DataFrame(smile_features).reset_index().iloc[:, 2:]

        final_df = pd.concat([mid_df, smile_df], axis=1)

        #excel_path = wav_file_path.strip('.') + 'features_extracted.xlsx'
        #final_df.to_excel(excel_path)
        return final_df
    else:
        #print('Mid-Terms features extracted incorrectly.')
        return None
예제 #17
0
def test_files(num_files, feature_set, feature_level, num_workers):

    # create feature extractor

    fex = opensmile.Smile(
        feature_set,
        feature_level,
        num_workers=num_workers,
    )

    # extract from single file

    y_file = fex.process_file(pytest.WAV_FILE)

    # extract from files

    y_files = fex.process_files([pytest.WAV_FILE] * num_files)

    # assertions

    np.testing.assert_equal(np.concatenate([y_file] * num_files),
                            y_files.values)
예제 #18
0
def test_custom(config, level):

    # create feature extractor

    fex = opensmile.Smile(config, level)

    # extract from file

    y_file = fex.process_file(pytest.WAV_FILE)

    # extract from array

    x, sr = audiofile.read(pytest.WAV_FILE)
    y_array = fex.process_signal(x, sr, file=pytest.WAV_FILE)

    # assertions

    assert fex.config_name == audeer.basename_wo_ext(config)
    assert fex.config_path == audeer.safe_path(config)
    assert fex.num_features == len(fex.feature_names)
    assert fex.feature_names == y_file.columns.to_list()
    pd.testing.assert_frame_equal(y_file, y_array)
    '6d8998ea8704af6685a50219c9dd3747', '2f8ddcfcb5a4419b16c533808ed9c38e',
    '57e7a29c01c11136336257b324b7a3af', 'f0132034f1bc1a891841d6e8bbb6eb1a',
    'd50f6c0da77b2223568ad042ea035a5e', '28081878fb0d5dbaac595fb031bb4e43',
    '5b67a12483bb2bc7eed1b214398bac9c', 'ae54788ccd8cc8b67e121f49e747f548',
    '9a3614ec10dcaeb5ee53088d939bbd6b', '56a336e76f530ef57ae1e7f0a156e8c0',
    '3ec0eea351847a9def135aa2b322637f', '9ea4000febe9fab7c8560437103192c6',
    '43bdf492e2b6bda5ec869dc003de81c1', '1e996fca7223c00c4c76b1f3b7dc1eaa',
    '2eef8025f3f03c9ad7532249808ab4f8'
]

file = accepted[0]
filepath = "./response1/" + file + ".wav"
signal, sampling_rate = audiofile.read(filepath, always_2d=True)
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    # feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals,
)
# print(smile.feature_names)

data = smile.process_signal(signal, sampling_rate)

data.insert(loc=0, column='key', value=file)

# %%
'''
FOLLOWING FILES
'''
#########################################################

for i in range(1, len(accepted)):
예제 #20
0
client.connect(broker_address, broker_port, keepalive)

client.loop_start()  #start the loop

# %% reload saved model
# load model from file
with open('../Network/model_smile_it.json', 'r') as json_file:
    loaded_json = json_file.read()
    model = model_from_json(loaded_json, custom_objects={'TCN': TCN})
    # restore weights
    model.load_weights('../Network/weights_smile_it.h5')

# %% Pre-process input
# Config for opensmile feature set
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
)


def input_prep(data, smile):
    X_smile = np.empty(shape=(1, 296, 25))
    df_x = smile.process_signal(data, 44100)
    scaler = MinMaxScaler()
    X_smile[0, :, :] = scaler.fit_transform(df_x.values)
    return X_smile


# %% Identificar dispositivos de audio do sistema
p = pyaudio.PyAudio()
info = p.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
예제 #21
0
                        '-o',
                        required=True,
                        help='output feature file')
    parser.add_argument('--feat',
                        '-x',
                        required=True,
                        choices=['eGeMAPS', 'xvector'],
                        help='Feature type')

    args = parser.parse_args()

    file_list = get_files(args.file)

    if args.feat == 'eGeMAPS':
        smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.eGeMAPSv02,
            feature_level=opensmile.FeatureLevel.Functionals)

        # write 1 example to include header
        get_feat_smile(file_list[0], args.out, smile, header=True)

        # generate iterable with arguments to func
        iterable = zip(file_list[1:], repeat(args.out), repeat(smile))

        func = get_feat_smile

    elif args.feat == 'xvector':
        get_feat_xvector(file_list[0], args.out, header=True)
        iterable = zip(file_list[1:], repeat(args.out))
        func = get_feat_xvector