Exemple #1
0
def prepare(params, samples):
    word_vec_path = utils.get_word_vec_path_by_name(params.word_vec_name)
    params.wvec_dim = 300

    _, params.word2id = utils.create_dictionary(samples)
    params.word_vec = utils.get_wordvec(word_vec_path, params.word2id)
    return
Exemple #2
0
def get_workers():
    if request:
        app.logger.info('request headers - {0}'.format(request.headers))
    category = request.args.get('category')
    filtered_list = []
    final_list = []
    # query = db.session.query(TweetInfo).filter(TweetInfo.published == TWEET_PUBLISHED).order_by(db.desc(TweetInfo.id))
    query = db.session.query(Worker).all()
    if query:
        app.logger.info('query value true')
    for item in query:
        if item.category == category:
            filtered_list.append(item)

    latitude = request.args.get('latitude')
    unicodedata.normalize('NFKD', latitude).encode('ascii', 'ignore')
    longitude = request.args.get('longitude')
    unicodedata.normalize('NFKD', longitude).encode('ascii', 'ignore')
    distance = request.args.get('distance')

    if not distance:
        distance = 2.0
    for item in filtered_list:
        distance_point = calc_dist(float(item.latitude), float(item.longitude),
                                   float(latitude), float(longitude))
        app.logger.info('dist - {0}'.format(distance_point))
        distance_point = 1.5
        if distance_point < distance:
            awesome_dict = create_dictionary(item, distance_point)
            final_list.append(awesome_dict)

    return jsonify({'workers': final_list})
Exemple #3
0
def build_vocabulary(path_data, path_vocs_dict, min_counts_dict, columns):
    """
    构建字典
    Args:
        path_data: str, 数据路径
        path_vocs_dict: dict, 字典存放路径
        min_counts_dict: dict, item最少出现次数
        columns: list of str, 每一列的名称
    Returns:
        voc_size_1, voc_size_2, ...: int
        sequence_length: 序列最大长度
    """
    print('building vocs...')
    file_data = codecs.open(path_data, 'r', encoding='utf-8')
    line = file_data.readline()

    sequence_length_dict = defaultdict(int)  # 计数 句子最大长度
    # 计数items
    feature_item_dict_list = []
    for i in range(len(columns)):
        feature_item_dict_list.append(defaultdict(int))
    sequence_length = 0
    while line:
        line = line.rstrip()  # rstrip() 删除 string 字符串末尾的指定字符(默认为空格)
        if not line:
            line = file_data.readline()
            sequence_length_dict[sequence_length] += 1
            sequence_length = 0
            continue
        items = line.split('\t')
        sequence_length += 1
        print(items)
        for i in range(len(items)):
            feature_item_dict_list[i][items[i]] += 1
        line = file_data.readline()
    file_data.close()
    # last instance
    if sequence_length != 0:
        sequence_length_dict[sequence_length] += 1

    # 写入文件
    voc_sizes = []
    for i, name in enumerate(columns):
        size = create_dictionary(feature_item_dict_list[i],
                                 path_vocs_dict[name],
                                 start=1,
                                 sort=True,
                                 min_count=min_counts_dict[name],
                                 overwrite=True)
        print('voc: %s, size: %d' % (path_vocs_dict[name], size))
        voc_sizes.append(size)
    print('句子长度分布:')
    print(sorted(sequence_length_dict.items()))
    print('done!')

    return voc_sizes, max(sequence_length_dict.keys())
Exemple #4
0
def run_mnn(dataset):
	## DATASET = 0 => ALL DATASET
	## DATASET = 1 => CONFUSION MATRIX
	train_csv = TRAIN_CSV if (dataset == 0) else TRAIN_CONF_CSV

	# print a log message for status update
	utils.write_log_msg("creating data dictionary...")

	# create a dictionary from the provided train.csv file
	dictionary = utils.create_dictionary(train_csv)  

	# print a log message for status update
	utils.write_log_msg("extracting features of training data...")

	# call the feature extraction module to get audio features
	tr_mnn_features, tr_mnn_labels =  features.parse_audio_files_train(TRAIN_AUDIO_PATH,train_csv,dictionary, 0)  

	# print a log message for status update
	utils.write_log_msg("extracting features of prediction data...")
	# call the feature extraction module to get audio features
	if (dataset == 0) :
		    ts_mnn_features, ts_mnn_name_list = features.parse_audio_files_predict(TEST_AUDIO_PATH,os.listdir(TEST_AUDIO_PATH), 0) 
	else :
		test_csv = pd.read_csv(TEST_CONF_CSV)
		ts_mnn_features, ts_mnn_name_list = features.parse_audio_files_predict(TRAIN_AUDIO_PATH,test_csv["fname"].tolist(), 0) 

	# print a log message for status update
	utils.write_log_msg("starting multi-layer neural network training...")
	# use the above extracted features for the training of the model
	mnn_y_pred, mnn_probs, mnn_pred = train.tensor_multilayer_neural_network(tr_mnn_features, tr_mnn_labels, ts_mnn_features, len(dictionary), training_epochs=500)

	# Get top 3 predictions
	ensembled_output = np.zeros(shape=(mnn_probs.shape[0], mnn_probs.shape[1]))
	for row, columns in enumerate(mnn_pred):
	    for i, column in enumerate(columns):
	        ensembled_output[row, column] += mnn_probs[row, i]

	top3 = ensembled_output.argsort()[:,-3:][:,::-1]

	# print the predicted results to a csv file.
	file_ = open(OUTPUT_CSV, "w")
	file_.write("fname,label\n")
	for i, value in enumerate(top3):
		if(dataset ==0):
			lbl_1 = [k for k, v in dictionary.items() if v == value[0]][0]
			lbl_2 = [k for k, v in dictionary.items() if v == value[1]][0]
			lbl_3 = [k for k, v in dictionary.items() if v == value[2]][0]
			file_.write("%s,%s %s %s\n" % (ts_mnn_name_list[i], lbl_1, lbl_2, lbl_3))
		else :
			lbl_1 = [k for k, v in dictionary.items() if v == value[0]][0]
			file_.write("%s,%s\n" % (ts_mnn_name_list[i], lbl_1))
	if (dataset ==0) :
		file_.write("0b0427e2.wav,Harmonica\n6ea0099f.wav,Harmonica\nb39975f5.wav,Harmonica") 

	# print a log message for status update
	utils.write_log_msg("done...")
Exemple #5
0
def build_vocabulary(path_data, path_vocs_dict, min_counts_dict, columns):
    '''
    构建字典
    :param path_data: str,数据路径
    :param path_vocs_dict: dict,字典存放路径
    :param min_counts_dict: dict,item最少出现次数
    :param columns: list of str,每一列的名称
    :return:
        voc_size_1, voc_size_2, ...: int
        sequence_length:序列最大长度
    '''
    print('builiding vocs ...')
    file_data = codecs.open(path_data, 'r', encoding='utf-8')
    line = file_data.readline()

    sequence_length_dict = defaultdict(int)  #句子最大长度
    # 计数items
    feature_item_dict_list = []
    for i in range(len(columns)):
        feature_item_dict_list.append(defaultdict(int))
    sequence_length = 0
    while line:
        line = line.strip()
        if not line:
            line = file_data.readline()
            sequence_length_dict[sequence_length] += 1
            sequence_length = 0
            continue
        items = line.split('\t')
        sequence_length += 1
        print(items)
        for i in range(len(items)):
            feature_item_dict_list[i][items[i]] += 1
        line = file_data.readline()
    file_data.close()
    # last instance
    if sequence_length != 0:
        sequence_length_dict[sequence_length] += 1

    # 写入文件
    voc_sizes = []
    for i, name in enumerate(columns):
        size = create_dictionary(feature_item_dict_list[i],
                                 path_vocs_dict[name],
                                 start=1,
                                 sort=True,
                                 min_count=min_counts_dict[name],
                                 overwrite=True)
        #print('voc: %s, size: %d'(path_vocs_dict[name],size))
        print('voc: %s, size: %d', (path_vocs_dict[name], size))
        voc_sizes.append(size)
    print('句子长度分布:')
    print(sorted(sequence_length_dict.items()))
    print('done')

    return voc_sizes, max(sequence_length_dict.keys())
Exemple #6
0
def read_audio_files():

    # print a log message for status update
    utils.write_log_msg("creating data dictionary...")

    # create a dictionary from the provided train.csv file
    dictionary = utils.create_dictionary(TRAIN_CSV)

    # print a log message for status update
    utils.write_log_msg("extracting features of training data...")
    # call the feature extraction module to get audio features
    tr_mnn_features, tr_mnn_labels = features.parse_audio_files_train(
        TRAIN_AUDIO_PATH, TRAIN_CSV, dictionary, 0)
    # call the feature extraction module to get audio features
    tr_cnn_features, tr_cnn_labels = features.parse_audio_files_train(
        TRAIN_AUDIO_PATH, TRAIN_CSV, dictionary, 1)

    # print a log message for status update
    utils.write_log_msg(
        "processed {0} files of training data for mnn...".format(
            len(tr_mnn_features)))
    # print a log message for status update
    utils.write_log_msg(
        "processed {0} files of training data for cnn...".format(
            len(tr_cnn_features)))

    # print a log message for status update
    utils.write_log_msg("extracting features of prediction data...")
    # call the feature extraction module to get audio features
    ts_mnn_features, ts_mnn_name_list = features.parse_audio_files_predict(
        TEST_AUDIO_PATH, os.listdir(TEST_AUDIO_PATH), 0)
    # call the feature extraction module to get audio features
    ts_cnn_features, ts_cnn_name_list = features.parse_audio_files_predict(
        TEST_AUDIO_PATH, os.listdir(TEST_AUDIO_PATH), 1)

    # print a log message for status update
    utils.write_log_msg(
        "processed {0} files of prediction data for mnn...".format(
            len(ts_mnn_features)))
    # print a log message for status update
    utils.write_log_msg(
        "processed {0} files of prediction data for cnn...".format(
            len(ts_cnn_features)))

    # print a log message for status update
    utils.write_log_msg("storing features for future use...")
    # store features so that they can be used in future
    features.store_features(dictionary, tr_mnn_features, tr_mnn_labels,
                            ts_mnn_features, ts_mnn_name_list, tr_cnn_features,
                            tr_cnn_labels, ts_cnn_features, ts_cnn_name_list)

    # return the results to calling program
    return dictionary, tr_mnn_features, tr_mnn_labels, ts_mnn_features, ts_mnn_name_list, tr_cnn_features, tr_cnn_labels, ts_cnn_features, ts_cnn_name_list
Exemple #7
0
def prepare(params, samples):
    word_vec_path = utils.get_word_vec_path_by_name(params.word_vec_name)
    word_count_path = params.word_count_path
    norm = params.norm
    params.wvec_dim = 300

    _, params.word2id = utils.create_dictionary(samples)
    params.word_vec = utils.get_wordvec(word_vec_path,
                                        params.word2id,
                                        norm=norm,
                                        path_to_counts=word_count_path)
    return
Exemple #8
0
def save_melgrams(dataset_path):
    print('Tamaño del dataset:', len(dataset_path))

    for song_path in dataset_path:
        song_signal, song_sr = librosa.load(song_path, duration=None, sr=44100)
        song_duration = librosa.get_duration(y=song_signal, sr=song_sr)
        song_dictionary = create_dictionary(song_path, song_duration, song_signal, song_sr)

        mel_spectogram = extract_features(song_dictionary)

        folder_number = song_path.split("/")[-2]
        song_name = song_path.split("/")[-1].replace(".wav", "")

        path = os.path.join(getcwd(), 'datasets/train_spec/{}'.format(folder_number))
        if not os.path.exists(path):
            makedirs(path)

        file_path = path + "/mel_spectrogram_{}".format(song_name)

        np.save(file_path, mel_spectogram)
Exemple #9
0
import sys
from utils import FORM, XPOS, DEPREL
from utils import create_dictionary, create_index, read_conllu, map_to_instances, shuffled_stream
from utils import parse_projective
from layers import Embeddings, BiLSTM
import random


class MLP(object):
    pass


if __name__ == "__main__":
    random.seed(1)
    train_file = "../treebanks/train/en/en.conllu"
    index = create_index(create_dictionary(read_conllu(train_file)))
    train_data = list(
        map_to_instances(read_conllu(train_file), index, (FORM, XPOS)))

    max_epochs = 30
    lstm_dim = 250
    arc_hidden_dim = 100
    label_hidden_dim = 100

    pc = dy.ParameterCollection()
    # embeddings = Embeddings(pc, [(len(index[FORM])+1, 100), (len(index[XPOS])+1, 25)])
    # input_dim = embeddings.dim

    input_dim = 125
    num_labels = len(index[DEPREL])
Exemple #10
0
    parser.add_argument("--inputfile", required=True)
    parser.add_argument("--outbasename", required=True)
    parser.add_argument("--fields",
                        default=["FORM", "UPOS", "FEATS", "DEPREL"],
                        nargs='+')
    parser.add_argument("--size",
                        default=[100, 25, 25, 0],
                        type=int,
                        nargs='+')
    parser.add_argument("--min_frequency", default=5, type=int)
    parser.add_argument("--window", default=5, type=int)
    parser.add_argument("--sg")
    parser.add_argument("--seed", default=1, type=int)

    args = parser.parse_args()
    args.fields = [STR_TO_FIELD[f.lower()] for f in args.fields]
    return args


if __name__ == "__main__":
    from gensim.models import Word2Vec
    args = _parse_args()

    print("building index...", end=" ")
    dic = create_dictionary(read_conllu(args.inputfile), fields=args.fields)
    index = create_index(dic, min_frequency=args.min_frequency)
    print("done")
    write_index(args.outbasename, index, args.fields)

    _word2vec(index, args)
Exemple #11
0
def build_vocabulary(path_data,
                     path_vocs_dict,
                     min_counts_dict,
                     columns,
                     sequence_len_pt=98,
                     use_char_featrue=False,
                     word_len_pt=98):
    """
    构建字典
    Args:
        path_data: str, 数据路径
        path_vocs_dict: dict, 字典存放路径
        min_counts_dict: dict, item最少出现次数
        columns: list of str, 每一列的名称
        sequence_len_pt: int,句子长度百分位
        use_char_featrue: bool,是否使用字符特征(针对英文)
        word_len_pt: int,单词长度百分位
    Returns:
        voc_size_1, voc_size_2, ...: int
        sequence_length: 序列最大长度
    """
    print('building vocs...')
    file_data = codecs.open(path_data, 'r', encoding='utf-8')
    line = file_data.readline()

    sequence_length_list = []  # 句子长度
    # 计数items
    feature_item_dict_list = []
    for i in range(len(columns)):
        feature_item_dict_list.append(defaultdict(int))
    # char feature
    if use_char_featrue:
        char_dict = defaultdict(int)
        word_length_list = []  # 单词长度
    sequence_length = 0
    sentence_count = 0  # 句子数
    while line:
        line = line.rstrip()
        if not line:
            sentence_count += 1
            sys.stdout.write('当前处理句子数: %d\r' % sentence_count)
            sys.stdout.flush()
            line = file_data.readline()
            sequence_length_list.append(sequence_length)
            sequence_length = 0
            continue
        items = line.split('\t')
        sequence_length += 1
        # print(items)
        for i in range(len(columns) - 1):
            feature_item_dict_list[i][items[i]] += 1
        # label
        feature_item_dict_list[-1][items[-1]] += 1
        # char feature
        if use_char_featrue:
            for c in items[0]:
                char_dict[c] += 1
            word_length_list.append(len(items[0]))
        line = file_data.readline()
    file_data.close()
    # last instance
    if sequence_length != 0:
        sentence_count += 1
        sys.stdout.write('当前处理句子数: %d\r' % sentence_count)
        sequence_length_list.append(sequence_length)
    print()

    # 写入文件
    voc_sizes = []
    if use_char_featrue:  # char feature
        size = create_dictionary(char_dict,
                                 path_vocs_dict['char'],
                                 start=2,
                                 sort=True,
                                 min_count=min_counts_dict['char'],
                                 overwrite=True)
        voc_sizes.append(size)
    for i, name in enumerate(columns):
        start = 1 if i == len(columns) - 1 else 2
        size = create_dictionary(feature_item_dict_list[i],
                                 path_vocs_dict[name],
                                 start=start,
                                 sort=True,
                                 min_count=min_counts_dict[name],
                                 overwrite=True)
        print('voc: %s, size: %d' % (path_vocs_dict[name], size))
        voc_sizes.append(size)

    print('句子长度分布:')
    sentence_length = -1
    option_len_pt = [90, 95, 98, 100]
    if sequence_len_pt not in option_len_pt:
        option_len_pt.append(sequence_len_pt)
    for per in sorted(option_len_pt):
        tmp = int(np.percentile(sequence_length_list, per))
        if per == sequence_len_pt:
            sentence_length = tmp
            print('%3d percentile: %d (default)' % (per, tmp))
        else:
            print('%3d percentile: %d' % (per, tmp))
    if use_char_featrue:
        print('单词长度分布:')
        word_length = -1
        option_len_pt = [90, 95, 98, 100]
        if word_len_pt not in option_len_pt:
            option_len_pt.append(word_len_pt)
        for per in sorted(option_len_pt):
            tmp = int(np.percentile(word_length_list, per))
            if per == word_len_pt:
                word_length = tmp
                print('%3d percentile: %d (default)' % (per, tmp))
            else:
                print('%3d percentile: %d' % (per, tmp))

    print('done!')
    lengths = [sentence_length]
    if use_char_featrue:
        lengths.append(word_length)
    return voc_sizes, lengths
    parser.add_argument("--cluster_num",
                        default=10,
                        type=int,
                        help="number of semantic groups to construct")
    parser.add_argument("--postprocessing",
                        default=1,
                        type=int,
                        help="principal component removal")
    args = parser.parse_args()

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # Load text file
    sentences = load_file(PATH_TO_SENTENCE)

    # Load dictionary
    args.id2word, args.word2id = create_dictionary(sentences)

    # Load word vectors
    args.word_vec_np = load_wordvec(PATH_TO_VEC, args.word2id)
    args.wvec_dim = args.word_vec_np.shape[1]

    # Load word weights
    args.word_weight = load_word_weight(PATH_TO_WORD_WEIGHTS,
                                        args.word2id,
                                        a=1e-3)

    # Construct semantic groups
    semantic_construction(args)

    # Generate embedding
    sentence_emb = compute_embedding(args, sentences)
def parse_fasta_input(options, Results, logger):
    modelName = path.splitext(path.basename(options.hmm_model))[0]
    logger.info('Parsing FASTA files')
    frame = '6'
    for fastafile in options.infiles:
        fastaBaseName = path.splitext(path.basename(fastafile))[0]
        hmmOut = '%s/%s-%s-hmmsearched.out' % (path.abspath(
            options.hmm_out_dir), fastaBaseName, modelName)
        fastaOut = '%s/%s-%s-filtered.fasta' % (path.abspath(
            options.final_gene_dir), fastaBaseName, modelName)
        aminoOut = '%s/%s-%s-filtered-peptides.fasta' % (path.abspath(
            options.final_gene_dir), fastaBaseName, modelName)
        orfFile = '%s/%s-%s-predicted-orfs.fasta' % (path.abspath(
            options.final_gene_dir), fastaBaseName, modelName)
        orfAminoFile = '%s/%s-%s-predicted-orfs-amino.fasta' % (path.abspath(
            options.final_gene_dir), fastaBaseName, modelName)
        hitFile = '%s/%s-positives.out' % (path.abspath(
            options.tmp_dir), fastaBaseName)
        elongated_fasta = '%s/%s-gene-elongated.fasta' % (path.abspath(
            options.tmp_dir), fastaBaseName)
        if options.protein:
            utils.perform_hmmsearch(fastafile, options.hmm_model, hmmOut,
                                    options)
            utils.classifier(hmmOut, hitFile, options)
            hitDict = utils.create_dictionary(hitFile, options)
            utils.retrieve_fasta(hitDict, fastafile, fastaOut, options)
        else:
            if options.store_peptides:
                peptideFile = '%s/%s-amino.fasta' % (path.abspath(
                    options.tmp_dir), fastaBaseName)
                utils.translate_sequence(fastafile, peptideFile, options,
                                         frame)
                logger.info('Performing hmmsearch')
                utils.perform_hmmsearch(peptideFile, options.hmm_model, hmmOut,
                                        options)
            else:
                utils.translate_and_search(fastafile, options.hmm_model,
                                           hmmOut, options)
            utils.classifier(hmmOut, hitFile, options)
            hitDict = utils.create_dictionary(hitFile, options)
            utils.retrieve_fasta(hitDict, fastafile, fastaOut, options)
            if not path.isfile(fastaOut):
                logger.critical('Could not find file %s', fastaOut)


#                exit()
            else:
                utils.retrieve_surroundings(hitDict, fastafile,
                                            elongated_fasta)
                if path.isfile(elongated_fasta):
                    if not options.orf_finder:
                        tmpORFfile = '%s/%s-long-orfs.fasta' % (
                            options.tmp_dir, fastaBaseName)
                        predict_orfs_prodigal(elongated_fasta, options.tmp_dir,
                                              tmpORFfile,
                                              options.min_orf_length)
                        orfFile = utils.retrieve_predicted_orfs(
                            options, tmpORFfile)
                    else:
                        tmpORFfile = '%s/%s-long-orfs.fasta' % (
                            options.tmp_dir, fastaBaseName)
                        predict_orfs_orfFinder(elongated_fasta,
                                               options.tmp_dir, tmpORFfile,
                                               options.min_orf_length)
                        orfFile = utils.retrieve_predicted_orfs(
                            options, tmpORFfile)
                if options.store_peptides:
                    options.retrieve_whole = False
                    utils.retrieve_peptides(hitDict, peptideFile, aminoOut,
                                            options)
                else:
                    tmpFastaOut = utils.make_fasta_unique(fastaOut, options)
                    utils.retrieve_predicted_genes_as_amino(options,
                                                            tmpFastaOut,
                                                            aminoOut,
                                                            frame='6')
        Results.count_hits(hitFile)
    if path.isfile(orfFile):
        if not options.orf_finder:
            Results.count_orfs_genomes(orfFile)
        else:
            Results.predictedOrfs = Results.count_contigs(orfFile)

    return orfFile
Exemple #14
0
def generate_dataset():
    all_audio_song_paths = list_dataset('./datasets/audio/')
    all_speech_song_paths = list_dataset('./datasets/speech/')

    # Vector de 1 a 10 para los dB
    vector_aux = list(range(1, 11, 1))
    targets = target(vector_aux)

    # Aleatoriza el orden de los segmentos de voz
    shuffle(all_speech_song_paths)

    # Comprueba si hay canciones de voz y de audio
    if (len(all_speech_song_paths) or len(all_audio_song_paths)) == 0:
        return

    aux = 0
    for k, audio_song_path in enumerate(all_audio_song_paths):

        # Carga la cancion y la duracion total
        audio_signal, audio_sr = librosa.load(audio_song_path,
                                              duration=None,
                                              sr=44100)

        audio_duration = librosa.get_duration(y=audio_signal,
                                              sr=audio_sr)

        # Comprueba si la duración del audio es menor que 5 y si lo es pasa a la siguiente cancion de audio
        if audio_duration < 5:
            # Pasa a la siguiente canción con música
            continue

        # Carga 5s de la señal de audio
        audio_signal, audio_sr = librosa.load(audio_song_path,
                                              duration=5,
                                              sr=44100)

        audio_duration = librosa.get_duration(y=audio_signal,
                                              sr=audio_sr)

        # Crea un dictionario con ciertas características de la señal
        audio_dictionary = create_dictionary(audio_song_path, audio_duration, audio_signal, audio_sr)

        # Coge una canción de la base de datos de voz y la quita de la lista (pop)
        speech_song_path = all_speech_song_paths.pop(0)

        # Carga la cancion y la duracion total
        speech_signal, speech_sr = librosa.load(speech_song_path,
                                                duration=None,
                                                sr=44100)

        speech_duration = librosa.get_duration(y=speech_signal,
                                               sr=speech_sr)

        exit_for = False
        # Si la voz dura menos de 5s, pasa a la siguiente
        while speech_duration < 5:
            # Comprueba si hay canciones de voz
            if not all_speech_song_paths:
                exit_for = True
                break
            speech_song_path = all_speech_song_paths.pop(0)
            speech_signal, speech_sr = librosa.load(speech_song_path,
                                                    duration=None,
                                                    sr=44100)

            speech_duration = librosa.get_duration(y=speech_signal,
                                                   sr=speech_sr)
        if exit_for:
            break

        # Carga 5s de la señal de voz
        speech_signal, speech_sr = librosa.load(speech_song_path,
                                                duration=5,
                                                sr=44100)

        speech_duration = librosa.get_duration(y=speech_signal,
                                               sr=speech_sr)

        # Crea un dictionario con ciertas características de la señal
        speech_dictionary = create_dictionary(speech_song_path, speech_duration, speech_signal, speech_sr)

        for db_level in vector_aux:

            audio_dict = normalize(audio_dictionary)

            speech_dict = normalize(speech_dictionary)

            new_mixed_song = mix_signals(audio_dict, speech_dict, targets[db_level-1], db_level)

            '''
            extract_features(audio_dict)
            extract_features(speech_dict)
            extract_features(new_mixed_song)
            '''

            path = os.path.join(getcwd(), 'datasets/train/', str(new_mixed_song['db_level']))
            if not os.path.exists(path):
                makedirs(path)
            librosa.output.write_wav(path + '/mix_' + str(k) + '.wav', new_mixed_song['signal'],
                                     new_mixed_song['sr'])

        aux += 1
        if aux % 100 == 0:
            total = len(vector_aux) * len(all_audio_song_paths)
            print('{} audios procesados de {}'.format(aux, total))
            if aux == len(all_audio_song_paths):
                songs_counter = len(vector_aux) * aux
                print('Proceso terminado. ¡Dataset de {} canciones creado con éxito!'.format(songs_counter))
                break