Beispiel #1
0
 def __init__(self, filename, filehash = True):
     AudioFile.__init__(self, filename, filehash)
     self.type = 'MPEG-3 audio stream'
     self.audio = mp3.MP3(self.file)
     self.__load_tags()
     self.__load_info()
     del self.audio
Beispiel #2
0
 def __init__(self, filename, filehash = True):
     AudioFile.__init__(self, filename, filehash)
     self.type   = 'MPEG-4 audio stream'
     self.aac    = mp4.MP4(filename)
     self.__load_tags()
     self.__load_info()
     del self.aac
Beispiel #3
0
 def check(self):
     answer = self.answerEdit.text()
     if self.audiofile.check_answer(answer):
         self.correct_nb += 1
         self.correct_nbLabel.setText(str(self.correct_nb))
         self.audiofile = AudioFile()
     else:
         self.wrong_nb += 1
         self.wrong_nbLabel.setText(str(self.wrong_nb))
     _stats.add_stats(
         answer,
         self.audiofile.get_pinyin(),
         self.audiofile.get_id(),
     )
     self.answerEdit.clear()
Beispiel #4
0
    def getListFromPath(self, parent, index=0):
        logger.debug( "Getting contents from: %s" % parent )
        try:
            if parent == 0: parent = "/"
            dirlist = os.listdir(parent)
            dirlist.sort()
        except Exception, e: logger.error( "at Browser.getListFromPath(): " + str(e) )

        list = []

        for x in dirlist:
            name, ext  = os.path.splitext(x)

            if x != get_ascii_filename(x):
                continue

            real_path = os.path.join(parent, uniconvert2(x))
            if os.path.isfile(real_path):
                if ext.lower() == '.ogg' or ext.lower() == '.mp3':
                    # read vorbis info
                    self.af = AudioFile(real_path)
                    time = int(self.af.getLength())
                    length = " (" + str(time) + ")"
                    list.append( {'location':real_path, 'id':'idd', 'img':'imgg', 'name': x + length, 'time': time, 'seconds':time } )
            else:
                list.append( {'location':real_path, 'id':'idd', 'img':'imgg', 'name': "/" + x } )

        return list
def biteval(filename):
    audio = AudioFile(filename)
    cnt = [0] * audio.bitDepth
    audio = np.array([2**(audio.bitDepth - 1) * v for v in audio])
    for v in audio[:1000]:
        bin = convert_to_bin_array(int(v), len(cnt))
        cnt = [i + j for i, j in zip(cnt, bin)]
    print(cnt)
    inp = input("Does this file have BIT DEPTH problems? [y/n]")
    return u.str2bool(inp)
Beispiel #6
0
def searchFood(term,
               limit=5,
               sort=2,
               radius_filter=8000):  #sort = 2 is for highested rated
    params = {
        'term': term,
        'limit': limit,
        'sort': sort,
        'radius_filter': radius_filter,
        'lang': 'fr'
    }
    result = client.search('Pittsburgh', **params)
    businesses = result.businesses
    names = ""
    for i in range(0, limit):
        if i == limit - 1:
            names += "and " + businesses[i].name
        else:
            names += businesses[i].name + ", "

    text_to_speech = TextToSpeechV1(
        username='******',
        password='******',
        x_watson_learning_opt_out=True)  # Optional flag

    with open(join(dirname(__file__), 'resources/recommendations.wav'),
              'wb') as audio_file:
        audio_file.write(
            text_to_speech.synthesize('Some good' + term + 'places are, ' +
                                      names,
                                      accept='audio/wav',
                                      voice="en-US_AllisonVoice"))
    audio = AudioFile('resources/recommendations.wav')
    audio.play()
    audio.close()
Beispiel #7
0
    def __init__(self, fn, userInfo, mode='publish'):
        self.fn = fn
        self.userInfo = userInfo
        self.mode = mode
        self.warnIncomplete = 1
        tags = ''

        self.config = LoadConfig.LoadConfig()

        # open ogg file
        self.af = AudioFile(fn)
        self.metadata = self.af.read_comments()

        if self.mode is 'publish':
            newFilePath = self.fn
            logger.debug5("ImportOGG() self.fn: " + self.fn)
            logger.debug5("ImportOGG() newFilePath: " + newFilePath)

            if newFilePath:
                if self.import2db(newFilePath):
                    self.setImported()
        elif self.mode is 'edit':
            if self.import2db(self.fn):
                logger.debug1("Updated database from file: %s" % self.fn)
Beispiel #8
0
def augment(input_dir: Path, output_dir: Path):
    seed(SEED)

    if not input_dir.exists:
        raise Exception('Input directory does not exist.')

    if not output_dir.exists:
        print("Making output directory {}".format(output_dir))
        output_dir.mkdir(parents=True)

    filelist = set([x for x in input_dir.glob('*.wav')])
    print("{} input files found".format(len(filelist)))

    while len(filelist) > 1:
        print("{} files remaining...".format(len(filelist)))
        f1 = choice(tuple(filelist))
        filelist.remove(f1)

        f2 = choice(tuple(filelist))
        filelist.remove(f2)

        # load audio files and apply a random amount of:
        #   gain reduction in steps of -3 db from [-15 db, 0 db]
        #   varispeed between [0.9, 1.1]
        reduction = [0, -3, -6, -12, -15]
        f1 = AudioFile(path=f1).varispeed(uniform(0.9,
                                                  1.1)).gain(choice(reduction))
        f2 = AudioFile(path=f2).varispeed(uniform(0.9,
                                                  1.1)).gain(choice(reduction))

        # mix two audio files - random amount of overlap from [0.5 to 1.5]
        f1.mix(audio=f2, relative_start=uniform(0.5, 1.5))

        # add a random amounts of silence [0 sec, 5 sec] before and after audio
        f1.add_silence(sec_before=uniform(0, 5), sec_after=uniform(0, 5))

        # save as new clean file
        filename = f1.filename.split(".wav")[0] + "_" + f2.filename
        f1.save(output_path=output_dir, filename=filename)
    def save_spec(self, filepath, converted_dir, audio_path):
        y, sr = librosa.load(filepath, sr=None, mono=False)

        # Reshaping the Audio file (mono) to deal with all wav files similarly
        if y.ndim == 1:
            y = y.reshape(1, -1)

        for ch in range(y.shape[0]):
            length = int(len(y[ch]) / sr)
            remainder = length % self.window_size

            audio = AudioFile(self.base_dir, converted_dir)
            acc = AccelerometerFile(self.base_dir, converted_dir, ch)

            file_type = audio if y.shape[0] == 1 else acc

            for i in range(0, length - remainder - self.window_size,
                           self.slide):
                begin_time = i
                end_time = i + self.window_size

                s_db = get_spectrogram(begin_time, end_time, y[ch], sr)
                # Extracting file identifier from the filepath
                # Example: (i.e.'cc16_352a_14401s' from path "'/cache/rmishra/cc16_ML/cc16_352a/cc16_352a_14401s_acc.wav'")
                # for saving spec and label files with begin and end timestamp
                file = os.path.split(filepath)[-1].rsplit("_", maxsplit=1)[0]

                # fetch the label txt file against the file identifier and create a label dataframe for calls between
                # the start and end timestamp
                call = fetch_files_with_numcalls(audio_path,
                                                 1).loc[file]['calls']
                timesteps = s_db.shape[1]
                timesteps_per_second = timesteps / self.window_size
                df = create_label_dataframe(os.path.join(audio_path,
                                                         call), begin_time,
                                            end_time, self.window_size,
                                            timesteps_per_second)

                # one hot encoding the label information for the audio data in a spec frame
                label_matrix = create_label_matrix(df, timesteps)

                if 1 in label_matrix[0][:8, :]:
                    print("Saving spectrogram: " + filepath + " " +
                          str(begin_time) + " to " + str(end_time))
                    file_type.save_spec_label(s_db, begin_time, end_time, file,
                                              label_matrix)
Beispiel #10
0
    def __init__(self, fn, userInfo, mode='publish'):
        self.fn             = fn
        self.userInfo       = userInfo
        self.mode           = mode
        self.warnIncomplete = 1
        tags                = ''

        self.config        = LoadConfig.LoadConfig()

        # open ogg file
        self.af           = AudioFile(fn)
        self.metadata     = self.af.read_comments()

        if self.mode is 'publish':
            newFilePath = self.fn
            logger.debug5( "ImportOGG() self.fn: " + self.fn )
            logger.debug5( "ImportOGG() newFilePath: " + newFilePath )

            if newFilePath:
                if self.import2db(newFilePath):
                    self.setImported()
        elif self.mode is 'edit':
            if self.import2db(self.fn):
                logger.debug1( "Updated database from file: %s" % self.fn )
Beispiel #11
0
class Window(QWidget):
    def __init__(self):

        super().__init__()

        self.initUI()
        self.audiofile = AudioFile()

    def initUI(self):
        self.wrong_nb = 0
        self.correct_nb = 0
        grid = QGridLayout()
        play = QPushButton('Play', self)
        check = QPushButton('Check', self)
        self.answerEdit = QLineEdit()
        title = QLabel('answer')
        correctLabel = QLabel('correct: ')
        wrongLabel = QLabel('wrong: ')
        self.wrong_nbLabel = QLabel(str(self.wrong_nb))
        self.correct_nbLabel = QLabel(str(self.correct_nb))

        self.setLayout(grid)

        play.clicked.connect(self.play)
        check.clicked.connect(self.check)
        self.answerEdit.returnPressed.connect(self.check)

        grid.addWidget(correctLabel, 0, 0)
        grid.addWidget(self.correct_nbLabel, 0, 1)
        grid.addWidget(wrongLabel, 0, 2)
        grid.addWidget(self.wrong_nbLabel, 0, 3)
        grid.addWidget(title, 1, 1)
        grid.addWidget(self.answerEdit, 1, 2)
        grid.addWidget(play, 2, 1)
        grid.addWidget(check, 2, 2)

        self.shortcut = QShortcut(QKeySequence(Qt.Key_Space), self)
        self.shortcut.activated.connect(self.play)

        self.answerEdit.setFocus()

        print(self.__dict__)
        self.show()

    def play(self):
        self.audiofile.play()

    def check(self):
        answer = self.answerEdit.text()
        if self.audiofile.check_answer(answer):
            self.correct_nb += 1
            self.correct_nbLabel.setText(str(self.correct_nb))
            self.audiofile = AudioFile()
        else:
            self.wrong_nb += 1
            self.wrong_nbLabel.setText(str(self.wrong_nb))
        _stats.add_stats(
            answer,
            self.audiofile.get_pinyin(),
            self.audiofile.get_id(),
        )
        self.answerEdit.clear()
Beispiel #12
0
def extractor_var(origen, framelen=0.25, destino=''):
    """
    Metodo extractor de features (caracteristicas o descriptores) de un array de tipo PCM
    Se realizara mediante segmentacion por segundos del array de muestras de audio.
    Por cada segundo tendremos una fila de un vector 1D de longitud el numero de parametros que extraigamos
    Al final se almacena esta matriz de dimensiones NumSegundosxNumParametros en formato npy en una ruta designada
    Un archivo por cada audio
    O tambien,si no le damos un destino, genera un vector uniendo todos los audios y lo devuelve

    :param s: ruta de los arrays originales para parametrizar
    :type s: string
    :param p: ruta donde se almacenaran los vectores parametrizados
    :type p: string

    Actualmente en config tenemos establecido este set de features:
    Vector de 20 parametros:
    lst_energy - hzcrr - centroid - spread - variance - rolloff - mean - crest - mfcc (8)
    """

    if not os.path.isdir(origen):
        if not os.path.isfile(origen):
            print 'Directorio o nombre de archivos de origen no valido o sin extension (wav/mp3)'
            sys.exit()
        else:
            origen = origen
    else:
        origen = os.path.join(origen, '*.wav')

    if not glob.glob(origen):
        print 'no hay archivos de formato wav en el directorio'
        sys.exit()

    vectortotal = []
    primero = True

    print 'Inicio del parametrizador. Extraccion segundo a segundo y con %i parametros' % NUM_PARAMETROS

    for filename in (glob.glob(origen)):
        print '\nVectorizando archivo: ', filename
        t1 = time.time()
        s = AudioFile.open(filename)
        sf = s.frames(s.sampleRate * framelen)
        num_ventanas = len(sf)
        lenv = np.round(s.sampleRate * VENTANA)
        vector = np.zeros([num_ventanas, NUM_PARAMETROS], dtype=np.float32)
        for indf, frame in enumerate(sf):
            print len(frame)
            if len(frame) < s.sampleRate:
                break
            Espectro = frame.spectrum()
            acumulado = 0

            for param in zip(PARAMETROS, TIPO_PARAMETROS):
                if not param[1]:
                    vector[indf,
                           acumulado] = getattr(Energy,
                                                param[0])(frame,
                                                          windowSize=lenv,
                                                          solape=SOLAPE)
                else:
                    vector[indf, acumulado] = Espectro.mean(
                    )  # getattr(Espectro, param[0])()
                acumulado = acumulado + 1

            if MFCC > 0:
                mfcc_features = mfcc(frame,
                                     samplerate=s.sampleRate,
                                     winlen=VENTANA,
                                     numcep=MFCC)
                mfcc_means = np.mean(mfcc_features, 0)
                for i in range(0, MFCC):
                    vector[indf, acumulado] = mfcc_means[i]
                    acumulado = acumulado + 1
                if DELTAS:
                    delta = np.zeros(MFCC)
                    dobledelta = np.zeros(MFCC)
                    for i in range(0, MFCC):
                        diferencias = np.diff(mfcc_features[:, i])
                        delta[i] = np.sum(diferencias)
                        dobledelta[i] = np.sum(np.diff(diferencias))
                    for i in range(0, MFCC):
                        vector[indf, acumulado] = delta[i]
                        acumulado = acumulado + 1
                    for i in range(0, MFCC):
                        vector[indf, acumulado] = dobledelta[i]
                        acumulado = acumulado + 1
            if CHROMA > 0:
                array_chroma = Espectro.chroma()
                for i in range(0, CHROMA):
                    vector[indf, acumulado] = array_chroma[i]
                    acumulado = acumulado + 1
            if FLUX > 0:
                spectral_frames = frame.frames(lenv)
                spectra = [f.spectrum_dct() for f in spectral_frames]
                flujo = SpectralFlux.spectralFlux(spectra, rectify=True)
                for i in range(0, FLUX):
                    vector[indf, acumulado] = flujo[i]
                    acumulado = acumulado + 1

        print 'Tiempo de parametrizacion (minutos): '
        print(time.time() - t1) / 60

        archivo = os.path.split(filename)[1].split('.')[0]
        ruta = os.path.join(destino, archivo)

        if destino:
            np.save(ruta, vector)
        if primero:
            vectortotal = np.array(vector)
        else:
            vectortotal = np.append(vectortotal, np.array(vector), axis=0)
        primero = False

    return vectortotal
Beispiel #13
0
    def __init__(self):

        super().__init__()

        self.initUI()
        self.audiofile = AudioFile()
Beispiel #14
0
class Browser:
    '''
    The browser object returns an array of items, typically to be used
    by the scroller object from the Burn Station.
    
    It should also provide a series of methods to browse back and forth
    through the child/parent items.
    '''

    #--------------------------------------------------------------------
    def __init__(self, level='', itemID=0):
        self.level = level
        self.itemID = itemID
        self.j = Jamendo()
        logger.debug("******** level: " + level)

    #--------------------------------------------------------------------
    def SetType(self, type):
        self.level = type

    #--------------------------------------------------------------------
    def getList(self, parent=0, index=0):
        if self.level == "path": return self.getListFromPath(parent, index)
        elif self.level == "jamendo":
            return self.getListFromJamendo(parent, index)
        elif self.level == "labels":
            return self.getListFromDB(parent, index)
        else:
            return self.getListFromDB(parent, index)

    #--------------------------------------------------------------------
    def getListFromJamendo(self, parent, index=0):
        return self.j.search_artist_by_name(search="")

    #--------------------------------------------------------------------
    def getListFromPath(self, parent, index=0):
        logger.debug("Getting contents from: %s" % parent)
        try:
            if parent == 0: parent = "/"
            dirlist = os.listdir(parent)
            dirlist.sort()
        except Exception, e:
            logger.error("at Browser.getListFromPath(): " + str(e))

        list = []

        for x in dirlist:
            name, ext = os.path.splitext(x)

            if x != get_ascii_filename(x):
                continue

            real_path = os.path.join(parent, uniconvert2(x))
            if os.path.isfile(real_path):
                if ext.lower() == '.ogg' or ext.lower() == '.mp3':
                    # read vorbis info
                    self.af = AudioFile(real_path)
                    time = int(self.af.getLength())
                    length = " (" + str(time) + ")"
                    list.append({
                        'location': real_path,
                        'id': 'idd',
                        'img': 'imgg',
                        'name': x + length,
                        'time': time,
                        'seconds': time
                    })
            else:
                list.append({
                    'location': real_path,
                    'id': 'idd',
                    'img': 'imgg',
                    'name': "/" + x
                })

        return list
Beispiel #15
0
class ImportOGG:
    def __init__(self, fn, userInfo, mode='publish'):
        self.fn = fn
        self.userInfo = userInfo
        self.mode = mode
        self.warnIncomplete = 1
        tags = ''

        self.config = LoadConfig.LoadConfig()

        # open ogg file
        self.af = AudioFile(fn)
        self.metadata = self.af.read_comments()

        if self.mode is 'publish':
            newFilePath = self.fn
            logger.debug5("ImportOGG() self.fn: " + self.fn)
            logger.debug5("ImportOGG() newFilePath: " + newFilePath)

            if newFilePath:
                if self.import2db(newFilePath):
                    self.setImported()
        elif self.mode is 'edit':
            if self.import2db(self.fn):
                logger.debug1("Updated database from file: %s" % self.fn)

    def isImported(self, path):
        metadata = {}

        path = removePathComponent(self.fn, "/usr/local/media/")
        name, ext = os.path.splitext(path)

        path = name + ".ogg"

        db = DB.connect()
        cursor = db.cursor()
        sql = "SELECT tr.id, tr.name, ar.name, al.name, la.name, li.name"
        sql += " FROM netjuke_tracks tr, netjuke_artists ar, netjuke_albums al, netjuke_labels la, licenses li"
        sql += " WHERE location = '%s'" % MySQLdb.escape_string(path)
        sql += " AND tr.ar_id=ar.id AND tr.al_id=al.id AND tr.la_id=la.id AND tr.license=li.id"

        logger.debug99("Using SQL query to check if file is imported: %s" %
                       sql)
        cursor.execute(sql)
        result = cursor.fetchall()
        record = None
        for record in result:
            metadata['ID'] = record[0]
            metadata['title'] = record[1]
            metadata['artist'] = unicode(record[2], 'utf8')
            metadata['album'] = unicode(record[3], 'utf8')
            metadata['label'] = unicode(record[4], 'utf8')
            metadata['license'] = record[5]

        logger.debug5("Import.isImported(%s) rowcount = %i" %
                      (path, cursor.rowcount))

        return (cursor.rowcount, metadata)

    def setImported(self):
        '''
        Should flag or delete from mp3tmp when file was imported, and delete the source file.
        '''

        sourceFile = removePathComponent(self.fn, self.userInfo['spoolDir'])
        name, ext = os.path.splitext(sourceFile)
        sourceFile = name + ".mp3"

        db = DB.connect()

        sql = "SELECT location FROM mp3tmp WHERE location LIKE '%s'" % MySQLdb.escape_string(
            sourceFile)
        cursor = db.cursor()
        cursor.execute(sql)
        result = cursor.fetchall()
        for record in result:
            sql = "UPDATE mp3tmp SET imported = 1 WHERE location = '%s'" % MySQLdb.escape_string(
                record[0])
            db.set_character_set('utf8')
            cursor = db.cursor()
            cursor.execute(sql)
            result = cursor.fetchall()

    """
    def MoveImported(self, file, target, spoolDir):
        return moveExactTree(file, target, spoolDir, 1)

    def CopyImported(self, file, target, spoolDir):
        logger.debug5( "---------------------------" )
        logger.debug5( "in ImportOGG.CopyImported: " )
        logger.debug5( "file: " + file )
        logger.debug5( "target: " + target )
        logger.debug5( "---------------------------" )

        target = copyExactTree(file, target, spoolDir, 1)

        return target
    """

    def import2db(self, newFilePath):

        self.af.SetFile(newFilePath)
        self.metadata = self.af.read_comments()
        logger.debug99("at import2db, metadata is: %s" % self.metadata)

        location = location2db(newFilePath, self.userInfo['home'])
        print "****************** LOCATION: %s" % location

        if self.mode is 'publish':
            (isImported, metadata) = self.isImported(location)
        elif self.mode is 'edit':
            isImported = 1

        logger.debug99("------------------------------------")
        logger.debug99("in ImportOGG.import2db():")
        logger.debug99("user home   = " + self.userInfo['home'])
        logger.debug99("is imported = " + str(isImported))
        logger.debug99("newFilePath = " + newFilePath)
        logger.debug99("location    = " + location)
        logger.debug99("------------------------------------")

        if (self.mode is 'publish') and isImported:
            logger.debug1("File already imported.. skipping: %s" % location)
            return

        artistID = getID('artists', self.metadata['artist'])
        albumID = getID('albums', self.metadata['album'])
        labelID = getID('labels', self.metadata['label'])
        licenseID = getID('licenses', self.metadata['license'])

        if artistID == 1 or albumID == 1 or labelID == 1 or licenseID == 1:
            logger.warn("Incomplete tags for file!")
            self.warnIncomplete = 0

        filename = os.path.basename(newFilePath)

        if self.metadata['title'] == '':
            title = MySQLdb.escape_string(filename)
        else:
            title = MySQLdb.escape_string(
                uniconvert2(self.metadata['title']).encode('utf8'))
        size = self.af.getSize()
        time = int(self.af.getLength())
        track_number = self.af.getTag('track_number')
        if track_number == '': track_number = self.af.getTag('tracknumber')
        if track_number == '': track_number = 0
        year = self.af.getTag('year')
        if year == '': year = 0
        bitrate = self.af.getBitrate()
        sample_rate = self.af.getSamplerate()
        kind = 'OGG/Vorbis'
        location = MySQLdb.escape_string(uniconvert2(location).encode('utf8'))
        comments = MySQLdb.escape_string(
            uniconvert2(self.metadata['comment']).encode('utf8'))

        if self.mode is 'publish':
            sql = "INSERT INTO"
        elif self.mode is 'edit':
            sql = "UPDATE"
        trackID = getIDbyLocation(location)

        sql += " netjuke_tracks SET ar_id = '" + str(
            artistID
        ) + "', al_id = '" + str(albumID) + "', ge_id = '1', la_id = '" + str(
            labelID
        ) + "', name = '" + title + "', size = '" + str(
            size
        ) + "', time = '" + str(time) + "', track_number = '" + str(
            track_number
        ) + "', year = '" + str(year) + "', date = now(), bit_rate = '" + str(
            bitrate
        ) + "', sample_rate = '" + str(
            sample_rate
        ) + "', kind = '" + kind + "', location = '" + str(
            location
        ) + "', comments = '" + comments + "', mtime = now(), license = '" + str(
            licenseID) + "', lg_id = '1', enabled = 2"

        if self.mode is 'edit': sql += " WHERE id = %s" % str(trackID)

        logger.debug99("Using SQL query: " + sql)

        db = DB.connect()
        cursor = db.cursor()
        cursor.execute(sql)

        fileID = db.insert_id()
        self.DoFileUploaderRelation(fileID, self.userInfo['ID'])

        # FIXME: do some checks, and return False if this fails
        print "=" * 77
        return True

    def DoFileUploaderRelation(self, fileID, userID):
        sql = "INSERT IGNORE INTO file_uploader SET file = '%i', uploader = '%i'" % (
            fileID, userID)
        logger.debug99("Using SQL query: " + sql)

        db = DB.connect()
        cursor = db.cursor()
        cursor.execute(sql)
Beispiel #16
0
 def __init__(self, file):
     AudioFile.__init__(self, file)
Beispiel #17
0
def processSpeech(name):
    '''
    Asks name what they want to do and processes
    the speech into text.

    @param name  string of name from face recognition
    @return      extracted keyword from speech
    '''
    if "Cho" in name:
        name = "Edward"
    elif "Wu" in name:
        name = "Johnny"
    elif "Deng" in name:
        name = "Michelle"
    elif "Liu" in name:
        name = "Vincent"
    #  Initial greeting
    tts('Hello ' + name + '. \
        What can I do for you today?', 'resources/greeting.wav')
    audio = AudioFile('resources/greeting.wav')
    audio.play()
    audio.close()

    #  Wait for response
    print('waiting for response')
    recorder = Recorder('request.wav')
    recorder.record_to_file()
    print('transcribing')
    result = stt('request.wav')
    transcript = result['results'][0]['alternatives'][0]['transcript']

    # Eating Section
    if "eat" in transcript:

        keyPhrase = 'I want to eat '
        if keyPhrase in transcript:
            yelp_search.searchFood(transcript[len(keyPhrase)::])
            return processSpeech(name)
        else:
            print('Did not say key phrase')
            return processSpeech(name)

    # Budget Section
    elif "budget" in transcript:
        watson_budget("Edward", "Friday")
        return processSpeech(name)

    elif "time" in transcript:
        if (datetime.datetime.time(datetime.datetime.now()) > datetime.time(4, 20, 0, 0) and
                datetime.datetime.time(datetime.datetime.now()) < datetime.time(4, 20, 59, 0)):
            tts("ay. lmao", 'resources/420.wav')
            audio = AudioFile('resources/420.wav')
            audio.play()
            audio.close()

        else:
            tts("The current time is " +
                datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y"),
                'resources/time.wav')
            audio = AudioFile('resources/time.wav')
            audio.play()
            audio.close()
    # Default Case for not getting any proper key words
    else:
        return processSpeech(name)
Beispiel #18
0
def preprocess(filename, timidity, latency, truncate, pad=1, get_raw=False):
    """
    Preprocess an audio file ands its MIDI counterpart. Computes transforms and labels.
    :param filename: audio filename
    :param timidity: set to True if the files was rendered with timidity
    :param latency: in seconds
    :param truncate: in seconds (0 for no truncation)
    :param pad: in seconds, will be added at the start and end before spectral transforms
    :param get_raw: set to True to return raw computed spectrograms (e.g. for visualization)
    :return:
    """
    filename_midi = filename.rsplit('.')[0] + '.mid'

    dname = filename.replace('/', '_').replace('\\', '_')

    # Load files
    ipad = int(pad * 44100)
    audio_pad = (ipad, ipad
                 )  # add one blank second at the beginning and at the end
    if truncate > 0:
        audio = AudioFile(filename,
                          truncate=int(truncate * 44100),
                          pad=audio_pad)
    else:
        audio = AudioFile(filename, pad=audio_pad)
    mid = MidiFile(filename_midi)

    step = 0.02  # seconds
    latency = int(round(latency / step, 0))

    # Compute spectrograms
    spectrograms = ComputeSpectrograms(audio, step=step)

    # Compute filtered spectrograms
    melgrams = ComputeMelLayers(spectrograms, step, audio.Fs, latency)

    # Build the input tensor
    cnn_window = 15
    tensor_mel = BuildTensor(melgrams[:, 2], cnn_window)

    # Compute CQT
    FreqAxisLog, time, cqgram = ComputeCqt(audio,
                                           200.,
                                           4000.,
                                           step,
                                           latency,
                                           r=3)
    tensor_cqt = BuildTensor([
        cqgram,
    ], cnn_window)

    # Global data length
    max_len = min(tensor_mel.shape[0], tensor_cqt.shape[0])

    # Compute output labels
    notes = mid.getNotes(timidity)
    notes_onset = np.array(notes)[:, 0]  # get only the note timing
    notes_value = np.array(notes, dtype=np.int)[:, 1]  # get only the key value

    onset_labels = np.zeros(max_len)
    onset_caracs = np.zeros((max_len, 5))
    onset_caracs[:, 2] = np.arange(max_len)

    note_low = 21  # lowest midi note on a keyboard
    note_high = 108  # highest midi note on a keyboard

    notes_labels = np.zeros((max_len, note_high - note_low + 1))
    notes_caracs = np.zeros((max_len, note_high - note_low + 1))

    for i in range(len(notes_onset)):
        t_win = int(np.floor(
            (notes_onset[i] + audio_pad[0] / audio.Fs) / step))
        if t_win >= len(onset_labels):
            break
        if t_win >= 0:
            onset_labels[t_win] = 1
            onset_caracs[t_win][0] += 1  # nb_notes
            onset_caracs[t_win][1] = max(onset_caracs[t_win][1],
                                         notes[i][2])  # max volume
            if t_win + 1 < len(onset_labels):
                onset_caracs[t_win + 1:, 2] -= onset_caracs[t_win + 1][
                    2]  # nb of blank windows since the last onset

            n = notes_value[i] - note_low
            notes_labels[t_win][n] = 1
            notes_caracs[t_win][n] = notes[i][2]  # volume

    counter = 0
    for i in range(len(onset_labels) - 1, -1, -1):
        onset_caracs[i][3] = counter
        if onset_labels[i] == 1:
            counter = 0
        else:
            counter += 1
    onset_caracs[:, 4] = np.minimum(onset_caracs[:, 2], onset_caracs[:, 3])

    # Extract useful CQT
    select = [i for i in range(max_len) if onset_labels[i] > 0]
    tensor_cqt_select = np.take(tensor_cqt, select, axis=0)
    notes_labels_select = np.take(notes_labels, select, axis=0)
    notes_caracs_select = np.take(notes_caracs, select, axis=0)

    if not get_raw:
        return (tensor_mel[:max_len, ...], tensor_cqt_select, onset_labels,
                onset_caracs, notes_labels_select, notes_caracs_select, dname)
    else:
        return (melgrams, tensor_mel, onset_labels, cqgram, tensor_cqt, time,
                FreqAxisLog, max_len, step)
Beispiel #19
0
def extractor_audio(buffer, fs):

    '''
    :param buffer: string list del audio a vectorizar
    :return: una fila del vector
    '''

    s = AudioFile.open_frombuffer(buffer)
    sf = s.frames(fs)


    num_ventanas = len(sf)

    lenv = np.round(s.sampleRate*VENTANA)

    vector = np.zeros([num_ventanas, NUM_PARAMETROS], dtype=np.float32)

    for indf, frame in enumerate(sf):


        Espectro = frame.spectrum()

        acumulado=0

        for param in zip(PARAMETROS, TIPO_PARAMETROS):

            if not param[1]:
                vector[indf,acumulado] = getattr(Energy, param[0])(frame, windowSize= lenv, solape=SOLAPE)

            else:
                vector[indf,acumulado] = Espectro.mean()# getattr(Espectro, param[0])()
            acumulado = acumulado + 1

        if MFCC > 0:
            mfcc_features = mfcc(s, samplerate=s.sampleRate,winlen=VENTANA, numcep=MFCC)

            mfcc_means = np.mean(mfcc_features, 0)
            for i in range(0, MFCC):
                vector[indf, acumulado] = mfcc_means[i]
                acumulado = acumulado + 1

            if DELTAS:
                delta=np.zeros(MFCC)
                dobledelta=np.zeros(MFCC)

                for i in range(0, MFCC):
                    diferencias=np.diff(mfcc_features[:, i])
                    delta[i] = np.sum(diferencias)
                    dobledelta[i] = np.sum(np.diff(diferencias))

                for i in range(0, MFCC):
                    vector[indf,acumulado] = delta[i]
                    acumulado = acumulado + 1

                for i in range(0, MFCC):
                    vector[indf,acumulado] = dobledelta[i]
                    acumulado = acumulado + 1

        if CHROMA > 0:
            array_chroma=Espectro.chroma()

            for i in range(0,CHROMA):
                vector[indf,acumulado]=array_chroma[i]
                acumulado = acumulado + 1


        if FLUX > 0:
            spectral_frames=s.frames(lenv)
            spectra = [f.spectrum_dct() for f in spectral_frames]
            flujo=SpectralFlux.spectralFlux(spectra, rectify=True)

            for i in range(0,FLUX):
                vector[indf, acumulado]=flujo[i]
                acumulado = acumulado + 1

    return vector
Beispiel #20
0
class ImportOGG:
    def __init__(self, fn, userInfo, mode='publish'):
        self.fn             = fn
        self.userInfo       = userInfo
        self.mode           = mode
        self.warnIncomplete = 1
        tags                = ''

        self.config        = LoadConfig.LoadConfig()

        # open ogg file
        self.af           = AudioFile(fn)
        self.metadata     = self.af.read_comments()

        if self.mode is 'publish':
            newFilePath = self.fn
            logger.debug5( "ImportOGG() self.fn: " + self.fn )
            logger.debug5( "ImportOGG() newFilePath: " + newFilePath )

            if newFilePath:
                if self.import2db(newFilePath):
                    self.setImported()
        elif self.mode is 'edit':
            if self.import2db(self.fn):
                logger.debug1( "Updated database from file: %s" % self.fn )

    def isImported(self, path):
        metadata = {}

        path = removePathComponent(self.fn, "/usr/local/media/")
        name, ext = os.path.splitext(path)

        path = name + ".ogg"

        db = DB.connect()
        cursor = db.cursor()
        sql  = "SELECT tr.id, tr.name, ar.name, al.name, la.name, li.name"
        sql += " FROM netjuke_tracks tr, netjuke_artists ar, netjuke_albums al, netjuke_labels la, licenses li"
        sql += " WHERE location = '%s'" % MySQLdb.escape_string(path)
        sql += " AND tr.ar_id=ar.id AND tr.al_id=al.id AND tr.la_id=la.id AND tr.license=li.id"

        logger.debug99( "Using SQL query to check if file is imported: %s" % sql )
        cursor.execute(sql)
        result = cursor.fetchall()
        record = None
        for record in result:
            metadata['ID']      = record[0]
            metadata['title']   = record[1]
            metadata['artist']  = unicode(record[2], 'utf8')
            metadata['album']   = unicode(record[3], 'utf8')
            metadata['label']   = unicode(record[4], 'utf8')
            metadata['license'] = record[5]

        logger.debug5( "Import.isImported(%s) rowcount = %i" % (path, cursor.rowcount) )

        return (cursor.rowcount, metadata)

    def setImported(self):
        '''
        Should flag or delete from mp3tmp when file was imported, and delete the source file.
        '''

        sourceFile = removePathComponent(self.fn, self.userInfo['spoolDir'])
        name, ext  = os.path.splitext(sourceFile)
        sourceFile = name + ".mp3"

        db = DB.connect()

        sql = "SELECT location FROM mp3tmp WHERE location LIKE '%s'" % MySQLdb.escape_string(sourceFile)
        cursor = db.cursor()
        cursor.execute(sql)
        result = cursor.fetchall()
        for record in result:
            sql = "UPDATE mp3tmp SET imported = 1 WHERE location = '%s'" % MySQLdb.escape_string(record[0])
            db.set_character_set('utf8')
            cursor = db.cursor()
            cursor.execute(sql)
            result = cursor.fetchall()

    """
    def MoveImported(self, file, target, spoolDir):
        return moveExactTree(file, target, spoolDir, 1)

    def CopyImported(self, file, target, spoolDir):
        logger.debug5( "---------------------------" )
        logger.debug5( "in ImportOGG.CopyImported: " )
        logger.debug5( "file: " + file )
        logger.debug5( "target: " + target )
        logger.debug5( "---------------------------" )

        target = copyExactTree(file, target, spoolDir, 1)

        return target
    """

    def import2db(self, newFilePath):

        self.af.SetFile(newFilePath)
        self.metadata = self.af.read_comments()
        logger.debug99("at import2db, metadata is: %s" % self.metadata)

        location = location2db(newFilePath, self.userInfo['home'])
        print "****************** LOCATION: %s" % location

        if self.mode is 'publish': (isImported, metadata) = self.isImported(location)
        elif self.mode is 'edit': isImported = 1

        logger.debug99( "------------------------------------" )
        logger.debug99( "in ImportOGG.import2db():" )
        logger.debug99( "user home   = " + self.userInfo['home'] )
        logger.debug99( "is imported = " + str(isImported) )
        logger.debug99( "newFilePath = " + newFilePath )
        logger.debug99( "location    = " + location )
        logger.debug99( "------------------------------------" )

    	if (self.mode is 'publish') and isImported:
            logger.debug1( "File already imported.. skipping: %s" % location )
            return

        artistID = getID('artists', self.metadata['artist'])
        albumID = getID('albums', self.metadata['album'])
        labelID = getID('labels', self.metadata['label'])
        licenseID = getID('licenses', self.metadata['license'])

        if artistID == 1 or albumID == 1 or labelID == 1 or licenseID == 1:
            logger.warn("Incomplete tags for file!")
            self.warnIncomplete = 0

        filename = os.path.basename(newFilePath)

        if self.metadata['title'] == '': title = MySQLdb.escape_string(filename)
        else: title = MySQLdb.escape_string(uniconvert2(self.metadata['title']).encode('utf8'))
        size = self.af.getSize()
    	time = int( self.af.getLength() )
        track_number = self.af.getTag('track_number')
        if track_number == '': track_number = self.af.getTag('tracknumber')
        if track_number == '': track_number = 0
        year = self.af.getTag('year')
        if year == '': year = 0
        bitrate = self.af.getBitrate()
        sample_rate = self.af.getSamplerate()
        kind = 'OGG/Vorbis'
        location = MySQLdb.escape_string(uniconvert2(location).encode('utf8'))
        comments = MySQLdb.escape_string(uniconvert2(self.metadata['comment']).encode('utf8'))

        if self.mode is 'publish':
            sql = "INSERT INTO"
        elif self.mode is 'edit':
            sql = "UPDATE"
        trackID = getIDbyLocation(location)

        sql += " netjuke_tracks SET ar_id = '"+ str(artistID) +"', al_id = '"+ str(albumID) +"', ge_id = '1', la_id = '"+ str(labelID) +"', name = '"+ title +"', size = '"+ str(size) +"', time = '"+ str(time) +"', track_number = '"+ str(track_number) +"', year = '"+ str(year) +"', date = now(), bit_rate = '"+ str(bitrate) +"', sample_rate = '"+ str(sample_rate) +"', kind = '"+ kind +"', location = '"+ str(location) +"', comments = '"+ comments +"', mtime = now(), license = '"+ str(licenseID) +"', lg_id = '1', enabled = 2"

        if self.mode is 'edit': sql += " WHERE id = %s" % str(trackID)

        logger.debug99( "Using SQL query: " + sql )

        db = DB.connect()
        cursor = db.cursor()
        cursor.execute(sql)

        fileID = db.insert_id()
        self.DoFileUploaderRelation(fileID, self.userInfo['ID'])

        # FIXME: do some checks, and return False if this fails
        print "="*77
        return True

    def DoFileUploaderRelation(self, fileID, userID):
        sql = "INSERT IGNORE INTO file_uploader SET file = '%i', uploader = '%i'" % (fileID, userID)
        logger.debug99( "Using SQL query: " + sql )

        db = DB.connect()
        cursor = db.cursor()
        cursor.execute(sql)
Beispiel #21
0
def extractor_audio(buffer, fs):
    '''
    :param buffer: string list del audio a vectorizar
    :return: una fila del vector
    '''

    s = AudioFile.open_frombuffer(buffer)
    sf = s.frames(fs)
    num_ventanas = len(sf)
    lenv = np.round(s.sampleRate * VENTANA)
    vector = np.zeros([num_ventanas, NUM_PARAMETROS], dtype=np.float32)

    for indf, frame in enumerate(sf):
        Espectro = frame.spectrum()
        acumulado = 0
        for param in zip(PARAMETROS, TIPO_PARAMETROS):
            if not param[1]:
                vector[indf, acumulado] = getattr(Energy,
                                                  param[0])(frame,
                                                            windowSize=lenv,
                                                            solape=SOLAPE)
            else:
                vector[indf, acumulado] = Espectro.mean(
                )  # getattr(Espectro, param[0])()
            acumulado = acumulado + 1

        if MFCC > 0:
            mfcc_features = mfcc(s,
                                 samplerate=s.sampleRate,
                                 winlen=VENTANA,
                                 numcep=MFCC)
            mfcc_means = np.mean(mfcc_features, 0)
            for i in range(0, MFCC):
                vector[indf, acumulado] = mfcc_means[i]
                acumulado = acumulado + 1

            if DELTAS:
                delta = np.zeros(MFCC)
                dobledelta = np.zeros(MFCC)

                for i in range(0, MFCC):
                    diferencias = np.diff(mfcc_features[:, i])
                    delta[i] = np.sum(diferencias)
                    dobledelta[i] = np.sum(np.diff(diferencias))

                for i in range(0, MFCC):
                    vector[indf, acumulado] = delta[i]
                    acumulado = acumulado + 1

                for i in range(0, MFCC):
                    vector[indf, acumulado] = dobledelta[i]
                    acumulado = acumulado + 1

        if CHROMA > 0:
            array_chroma = Espectro.chroma()

            for i in range(0, CHROMA):
                vector[indf, acumulado] = array_chroma[i]
                acumulado = acumulado + 1

        if FLUX > 0:
            spectral_frames = s.frames(lenv)
            spectra = [f.spectrum_dct() for f in spectral_frames]
            flujo = SpectralFlux.spectralFlux(spectra, rectify=True)

            for i in range(0, FLUX):
                vector[indf, acumulado] = flujo[i]
                acumulado = acumulado + 1

    return vector
Beispiel #22
0
def transcribe_to_midi(filename, onset_model, note_model, output):
    """
    Transcribes and audio file to midi and renders it to wav if timidity is found in the path
    :param filename: path of audio file to transcribe
    :param onset_model: onset detection model to use (hdf5 filename)
    :param note_model: key identification model to use (hdf5 filename)
    :param output: output filename (without extension)
    :return: None
    """
    step = 0.02

    _vprint(f'load audio {filename}...')
    audio = AudioFile(filename, pad=(44100, 44100))

    _vprint('computing spectrograms...')
    spectrograms = preprocess.ComputeSpectrograms(audio, step=step)

    _vprint('computing (mel) filtered spectrograms...')
    melgrams = preprocess.ComputeMelLayers(spectrograms,
                                           step,
                                           audio.Fs,
                                           latency=0)
    cnn_window = 15
    tensor_mel = preprocess.BuildTensor(melgrams[:, 2], cnn_window)

    _vprint('onset detection...')
    model = keras.models.load_model(onset_model)
    preds_onset = 1. * (model.predict(tensor_mel) >= 0.2)

    nb_notes = np.sum(preds_onset)

    _vprint(f'{nb_notes} onsets detected')

    _vprint('computing CQT...')
    # TODO: compute only useful ones
    FreqAxisLog, time, cqgram = preprocess.ComputeCqt(audio,
                                                      200.,
                                                      4000.,
                                                      step,
                                                      latency=0,
                                                      r=3)
    tensor_cqt = preprocess.BuildTensor([
        cqgram,
    ], cnn_window)

    max_len = min(tensor_mel.shape[0], tensor_cqt.shape[0])
    select = [i for i in range(max_len) if preds_onset[i] > 0]
    tensor_cqt_select = np.take(tensor_cqt, select, axis=0)

    _vprint('key identification...')
    model = keras.models.load_model(note_model)
    preds_notes = 1. * (model.predict(tensor_cqt_select) >= 0.5)
    _vprint(f'{np.sum(preds_notes)} keys identified')

    _vprint('midi writing...')
    mid = mido.MidiFile(ticks_per_beat=500)
    track = mido.MidiTrack()
    mid.tracks.append(track)
    track.append(mido.Message('program_change', program=1, time=0))

    i = 0
    t = 0  # time in seconds
    t_last = 0
    notes_on = np.zeros(88)
    for w in preds_onset:
        if w[0]:
            if np.sum(preds_notes[i]) > 0:
                delta_t = int((t - t_last) / 0.001)  # delta_time in midi ticks

                for n in notes_on.nonzero()[0]:
                    midi_note = int(n + 21)
                    track.append(
                        mido.Message('note_off',
                                     note=midi_note,
                                     velocity=0,
                                     time=delta_t))
                    notes_on[n] = 0
                    delta_t = 0

                for n in preds_notes[i].nonzero()[0]:
                    midi_note = int(n + 21)
                    track.append(
                        mido.Message('note_on',
                                     note=midi_note,
                                     velocity=64,
                                     time=delta_t))
                    notes_on[n] = 1
                    delta_t = 0
                t_last = t

            i += 1

        t += step

    mid.save(f'{output}.mid')

    timidity = find_exec(['timidity', 'timidity.exe'],
                         additional_dirs=[_TIMIDITY_DIR])

    if timidity is not None:
        _vprint('timidity found, rendering to audio: ' + timidity)
        subprocess.run([
            timidity, f'{output}.mid', '-Ow', '-o', f'{output}.wav',
            '--output-mono'
        ])
Beispiel #23
0
class Browser:
    '''
    The browser object returns an array of items, typically to be used
    by the scroller object from the Burn Station.
    
    It should also provide a series of methods to browse back and forth
    through the child/parent items.
    '''
    #--------------------------------------------------------------------
    def __init__(self, level='', itemID=0):
        self.level = level
        self.itemID = itemID
        self.j = Jamendo()
        logger.debug( "******** level: " + level)

    #--------------------------------------------------------------------
    def SetType(self, type):
        self.level = type

    #--------------------------------------------------------------------
    def getList(self, parent=0, index=0):
        if self.level == "path": return self.getListFromPath(parent, index)
        elif self.level == "jamendo": return self.getListFromJamendo(parent, index)
        elif self.level == "labels": return self.getListFromDB(parent, index)
        else: return self.getListFromDB(parent, index)

    #--------------------------------------------------------------------
    def getListFromJamendo(self, parent, index=0):
        return self.j.search_artist_by_name(search="")

    #--------------------------------------------------------------------
    def getListFromPath(self, parent, index=0):
        logger.debug( "Getting contents from: %s" % parent )
        try:
            if parent == 0: parent = "/"
            dirlist = os.listdir(parent)
            dirlist.sort()
        except Exception, e: logger.error( "at Browser.getListFromPath(): " + str(e) )

        list = []

        for x in dirlist:
            name, ext  = os.path.splitext(x)

            if x != get_ascii_filename(x):
                continue

            real_path = os.path.join(parent, uniconvert2(x))
            if os.path.isfile(real_path):
                if ext.lower() == '.ogg' or ext.lower() == '.mp3':
                    # read vorbis info
                    self.af = AudioFile(real_path)
                    time = int(self.af.getLength())
                    length = " (" + str(time) + ")"
                    list.append( {'location':real_path, 'id':'idd', 'img':'imgg', 'name': x + length, 'time': time, 'seconds':time } )
            else:
                list.append( {'location':real_path, 'id':'idd', 'img':'imgg', 'name': "/" + x } )

        return list

    #--------------------------------------------------------------------
    def getListFromDB(self, itemID=0, index=0):
        #print "AT GET LIST FROM DB:::::::::::::::" + self.level + "::" + str(itemID) + '::' + str(index) + "::"
        '''Get the list of items to be displayed'''

        if self.level == 'labels':
            logger.debug5( "GETTING LABELS" )
            items = Database.GetLabels()
            self.level   = 'artists'
        elif self.level == 'artists' and itemID>0:
            logger.debug5( "GETTING ARTISTS" )
            items = Database.GetArtists(itemID)
            self.labelID = itemID
            self.labelIndex = index
            self.level   = 'albums'
        elif self.level == 'albums':
            logger.debug5( "GETTING ALBUMS" )
            items = Database.GetAlbums(self.labelID, itemID)
            self.level = 'tracks'
            self.artistID = itemID
            self.artistIndex = index

        elif self.level == 'tracks':
            logger.debug5( "GETTING TRACKS" )
            items = Database.GetTracks(self.labelID, self.artistID, itemID)
            self.albumID = itemID
            self.albumIndex = index
            self.level = 'tracks_list'
        elif self.level == 'tracks_list':
            raise Exception, "Trying to open track means PLAY IT !"

        elif self.level == 'playlist' or self.level == 'playlist_tracks':
            logger.debug5( "GETTING DB PLAYLIST TRACKS" )
            if itemID == 0 and self.itemID > 0 : itemID = self.itemID
            items = Database.GetPlaylist(itemID)
            self.level = 'playlist_tracks'

        elif self.level == 'fs_playlist':
            logger.debug5( "GETTING FILESYSTEM PLAYLIST TRACKS" )
            if itemID == 0 and self.itemID > 0 : itemID = self.itemID
            items = Database.GetFilesystemPlaylist(itemID)
            self.level = 'fs_playlist'

        try: return items
        except: return [ { 'id':"a", 'name':"a", 'img':"a", 'info':"a" } ]

    #--------------------------------------------------------------------
    def descend(self, parent, selected):
        self.parentIndex = selected
        if self.level == 'jamendo':
            return self.j.descend(parent, selected)
        elif self.level == 'path':
            # browsing filesystem
            logger.debug( "Descend to: " + parent )
            if os.path.isdir(parent):
                self.currentDir = parent
                return self.getList(parent, selected)
            else:
                self.Play()
        elif self.level != 'path' and self.level != '' and self.level != 'playlist_tracks':
            # browsing database
            return self.getList(parent, selected)
        else:
            # no childs, impossible to descend
            raise Exception, "Descend() impossible. No child items found!"

    #--------------------------------------------------------------------
    def Back(self):
        print "BACK FROM TRACKS, self.level = " + self.level
        if self.level == 'path':
            dir = self.currentDir+'/..'
            logger.debug(" ********** currentDir: "+dir)
            items = self.getList(dir, self.parentIndex)
            self.currentDir = dir
            return { 'list':items, 'index':self.parentIndex }
        elif self.level == 'tracks_list':
            items = Database.GetAlbums(self.labelID, self.artistID)
            self.level = 'tracks'
            return { 'list':items, 'index':self.albumIndex }
        elif self.level == 'tracks':
            items = Database.GetArtists(self.labelID)
            self.level   = 'albums'
            return { 'list':items, 'index':self.artistIndex }
        elif self.level == 'albums':
            items = Database.GetLabels()
            self.level = 'artists'
            return { 'list':items, 'index':self.labelIndex }
        else: return
Beispiel #24
0
def augment(input_dir: Path, noise_dir: Path, output_dir: Path):
    seed(SEED)

    if not input_dir.exists:
        raise Exception('Input directory does not exist.')

    if not noise_dir.exists:
        raise Exception('Noise directory does not exist.')

    if not output_dir.exists:
        print("Making output directory {}".format(output_dir))
        output_dir.mkdir(parents=True)

    filelist = [x for x in input_dir.glob('*.wav')]
    print("{} input files found".format(len(filelist)))

    noiselist = set([x for x in noise_dir.glob('*.wav')])
    print("{} noise files found".format(len(noiselist)))

    print("Loading noise files into memory...")
    noise_files = [AudioFile(path=x) for x in noiselist]
    print("Done loading noise files.")

    irs = [x for x in Path('./IMreverbs-realistic').glob('*.wav')]

    while len(filelist) > 0:
        print("{} files remaining...".format(len(filelist)))
        f1 = filelist.pop()
        noise = choice(noise_files)

        # load audio files and apply a random amount of processing to noisy file:
        #   gain reduction in steps of -6 db
        #   varispeed between [0.9, 1.1]
        #   start position of audio in noise file (trimming start of file)
        gain = [6, 3, 1.5, 0, -1.5, -3, -6]
        f1 = AudioFile(path=f1)
        noise = noise.copy() \
            .trim_start(relative_start=uniform(0.0, 0.5)) \
            .trim_to_n_samples(n=f1.length)
        # noise = noise.varispeed(uniform(0.9, 1.1))
        noise = noise.gain(choice(gain)).clip()

        # add dynamic lpf to simulate speaker turning away
        filter_start = random()
        filter_end = random()
        if filter_start < filter_end:
            f1.dynamic_lpf(cutoff=uniform(1000, 8000),
                           order=randint(0, 3),
                           relative_start=filter_start,
                           relative_end=filter_end,
                           exponential=random())

        # add noise to audio
        f1.mix(noise, maintain_length=True)

        # choose random impulse response and add reverb to noisy audio
        ir = AudioFile(path=choice(irs))
        f1.conv_reverb(ir, wet_db=uniform(-70, -30), predelay=uniform(0, 50))

        # filtering
        f1.lpf(uniform(5000, 8000))
        f1.hpf(uniform(0, 250))

        # clipping
        clipping = [0.0, 1.0, 2.0, 3.0]
        f1.clip(choice(clipping))

        # save
        f1.save(output_path=output_dir)
Beispiel #25
0
def extractor_var(origen, framelen = 0.25, destino=''):

    """
    Metodo extractor de features (caracteristicas o descriptores) de un array de tipo PCM
    Se realizara mediante segmentacion por segundos del array de muestras de audio.
    Por cada segundo tendremos una fila de un vector 1D de longitud el numero de parametros que extraigamos
    Al final se almacena esta matriz de dimensiones NumSegundosxNumParametros en formato npy en una ruta designada
    Un archivo por cada audio
    O tambien,si no le damos un destino, genera un vector uniendo todos los audios y lo devuelve

    :param s: ruta de los arrays originales para parametrizar
    :type s: string
    :param p: ruta donde se almacenaran los vectores parametrizados
    :type p: string

    Actualmente en config tenemos establecido este set de features:
    Vector de 20 parametros:
    lst_energy - hzcrr - centroid - spread - variance - rolloff - mean - crest - mfcc (8)
    """


    if not os.path.isdir(origen):
        if not os.path.isfile(origen):
            print 'Directorio o nombre de archivos de origen no valido o sin extension (wav/mp3)'
            sys.exit()
        else:
            origen = origen

    else:
        origen = os.path.join(origen, '*.wav')



    if not glob.glob(origen):
        print 'no hay archivos de formato wav en el directorio'
        sys.exit()

    vectortotal = []
    primero = True

    print 'Inicio del parametrizador. Extraccion segundo a segundo y con %i parametros' % NUM_PARAMETROS

    for filename in (glob.glob(origen)):

        print '\nVectorizando archivo: ', filename

        t1 = time.time()

        s = AudioFile.open(filename)

        sf = s.frames(s.sampleRate*framelen)

        num_ventanas = len(sf)

        lenv = np.round(s.sampleRate*VENTANA)

        vector = np.zeros([num_ventanas, NUM_PARAMETROS], dtype=np.float32)

        for indf, frame in enumerate(sf):
            print len(frame)
            if len(frame)<s.sampleRate:
                break
            Espectro = frame.spectrum()

            acumulado=0

            for param in zip(PARAMETROS, TIPO_PARAMETROS):

                if not param[1]:
                    vector[indf, acumulado] = getattr(Energy, param[0])(frame, windowSize= lenv, solape=SOLAPE)

                else:
                    vector[indf, acumulado] = Espectro.mean()# getattr(Espectro, param[0])()
                acumulado = acumulado + 1

            if MFCC > 0:
                mfcc_features = mfcc(frame, samplerate=s.sampleRate,winlen=VENTANA, numcep=MFCC)

                mfcc_means = np.mean(mfcc_features, 0)
                for i in range(0, MFCC):
                    vector[indf, acumulado] = mfcc_means[i]
                    acumulado = acumulado + 1

                if DELTAS:
                    delta=np.zeros(MFCC)
                    dobledelta=np.zeros(MFCC)

                    for i in range(0, MFCC):
                        diferencias=np.diff(mfcc_features[:, i])
                        delta[i] = np.sum(diferencias)
                        dobledelta[i] = np.sum(np.diff(diferencias))

                    for i in range(0, MFCC):
                        vector[indf, acumulado] = delta[i]
                        acumulado = acumulado + 1

                    for i in range(0, MFCC):
                        vector[indf, acumulado] = dobledelta[i]
                        acumulado = acumulado + 1

            if CHROMA > 0:
                array_chroma=Espectro.chroma()

                for i in range(0,CHROMA):
                    vector[indf, acumulado]=array_chroma[i]
                    acumulado = acumulado + 1


            if FLUX > 0:
                spectral_frames=frame.frames(lenv)
                spectra = [f.spectrum_dct() for f in spectral_frames]
                flujo=SpectralFlux.spectralFlux(spectra, rectify=True)

                for i in range(0,FLUX):
                    vector[indf, acumulado]=flujo[i]
                    acumulado = acumulado + 1


        print 'Tiempo de parametrizacion (minutos): '
        print (time.time()-t1)/60

        archivo = os.path.split(filename)[1].split('.')[0]
        ruta = os.path.join(destino,archivo)

        if destino:

            np.save(ruta, vector)

        if primero:
            vectortotal = np.array(vector)
        else:
            vectortotal = np.append(vectortotal,np.array(vector), axis=0)

        primero = False


    return vectortotal