Beispiel #1
0
    def __call__(self, combo):
      c = np.array(combo)
      chunk_size = self.chunk_size
      db = self.db

      tt=transformFFT(frameSize=4096, hopSize=512, sampleRate=44100, window=blackmanharris)

      maxLength=0
      for i in range(len(self.sources)):
        instlen = int(util.getMidiLength(self.sources_midi[i]+'_g'+self.style_midi[s],os.path.join(db,f)))
        if instlen>maxLength:
          maxLength = instlen
      if chunk_size>maxLength:
        chunk_size = maxLength

      for chnk in range(int(np.floor(maxLength/chunk_size))):
        chunk_start = chunk_size * chnk
        chunk_end = (chnk+1) * chunk_size
        if not os.path.isfile(os.path.join(feature_path,f,self.style[s],f+'_'+str(c)+'_'+str(chnk)+'.data')):
          try:
            for i in range(len(self.sources)):

              nframes = int(np.ceil(chunk_size*self.sampleRate / np.double(tt.hopSize))) + 2
              size = int(chunk_size*self.sampleRate-int(np.max( c[:,0].astype(float))*self.sampleRate))

              if self.sampleRate != 44100:
                  print 'sample rate is not consistent'
              if i==0:
                  audio = np.zeros((size,len(self.sources)+1))

              melody,melodyBegin,melodyEnd,melNotes = util.getMidi(self.sources_midi[i]+'_g'+self.style_midi[s],os.path.join(db,f),chunk_start,chunk_end,self.sampleRate,tt.hopSize,tt.frameSize,c[i,0],c[i,0],nframes,1)

              #generate the audio, note by note
              for m in range(len(melNotes)):
                note = self.instruments[i].getNote(melNotes[m],self.allowed_dynamics[int(c[i,1])],self.allowed_styles[int(c[i,2])],int(c[i,3]))
                if note is None:
                  raise GetOutOfLoop
                else:
                  segment = note.getAudio(max_duration=melodyEnd[m]-melodyBegin[m])
                  if len(segment)>(len(audio)-int(np.floor(melodyBegin[m]*self.sampleRate))):
                    audio[int(np.floor(melodyBegin[m]*self.sampleRate)):int(np.floor(melodyBegin[m]*self.sampleRate)+len(segment)),i+1] = segment[:len(audio)-int(np.floor(melodyBegin[m]*self.sampleRate))]
                  else:
                    audio[int(np.floor(melodyBegin[m]*self.sampleRate)):int(np.floor(melodyBegin[m]*self.sampleRate)+len(segment)),i+1] = segment
                  segment = None
                note = None
                segment = None

            audio[:,0] = np.sum(audio[:,1:len(self.sources)+1],axis=1)

            tt.compute_transform(audio,os.path.join(feature_path,f,self.style[s],f+'_'+str(c).encode('base64','strict')+'_'+str(chnk)+'.data'),phase=False)

            audio = None
            melody= None
          except GetOutOfLoop:
            pass
    else:
        feature_path = os.path.join(db, 'transforms', 't1')
    assert os.path.isdir(
        db
    ), "Please input the directory for the iKala dataset with --db path_to_iKala"

    tt = None
    for f in os.listdir(os.path.join(db, "Wavfile")):
        if f.endswith(".wav"):
            #read the audio file
            audioObj, sampleRate, bitrate = util.readAudioScipy(
                os.path.join(db, "Wavfile", f))
            if tt is None:
                #initialize the transform object which will compute the STFT
                tt = transformFFT(frameSize=1024,
                                  hopSize=512,
                                  sampleRate=sampleRate,
                                  window=blackmanharris)
                pitchhop = 0.032 * float(sampleRate)  #seconds to frames
            assert sampleRate == 44100, "Sample rate needs to be 44100"

            audio = np.zeros((audioObj.shape[0], 3))

            audio[:,
                  0] = audioObj[:,
                                0] + audioObj[:,
                                              1]  #create mixture voice + accompaniment
            audio[:, 1] = audioObj[:, 1]  #voice
            audio[:, 2] = audioObj[:, 0]  #accompaniment
            audioObj = None

            #read pitches so they can be written as separate features
Beispiel #3
0
        if kwargs.__getattribute__('skip'):
            skip = int(kwargs.__getattribute__('skip'))
        else:
            skip = False

    path_in = []
    testfile_list = []

    path_in = feature_path
    for f in sorted(os.listdir(db)):
        if os.path.isdir(os.path.join(db, f)) and f[0].isdigit():
            testfile_list.append(f)

    #tt object needs to be the same as the one in compute_features
    tt = transformFFT(frameSize=4096,
                      hopSize=512,
                      sampleRate=44100,
                      window=blackmanharris)

    ld1 = LargeDataset(path_transform_in=path_in,
                       nsources=4,
                       nsamples=nsamples,
                       batch_size=batch_size,
                       batch_memory=batch_memory,
                       time_context=time_context,
                       overlap=overlap,
                       nprocs=nprocs,
                       mult_factor_in=scale_factor,
                       mult_factor_out=scale_factor)
    logging.info("  Maximum:\t\t{:.6f}".format(ld1.getMax()))
    logging.info("  Mean:\t\t{:.6f}".format(ld1.getMean()))
    logging.info("  Standard dev:\t\t{:.6f}".format(ld1.getStd()))
    def __call__(self, combo):
        c = np.array(combo)
        chunk_size = self.chunk_size
        db = self.db
        feature_path = self.feature_path

        tt = transformFFT(frameSize=4096,
                          hopSize=512,
                          sampleRate=44100,
                          window=blackmanharris)
        maxLength = 0
        for i in range(len(self.sources)):
            instlen = util.getMidiLength(
                self.sources_midi[i] + '_g' + self.style_midi, db)
            if instlen > maxLength:
                maxLength = instlen
        if chunk_size > maxLength:
            chunk_size = maxLength

        for chnk in range(int(np.floor(maxLength / chunk_size))):
            chunk_start = float(chunk_size * chnk)
            chunk_end = float((chnk + 1) * chunk_size)
            if not os.path.isfile(
                    os.path.join(
                        feature_path, self.style,
                        str(c).encode('base64', 'strict') + '_' + str(chnk) +
                        '.data')):
                try:
                    nelem_g = 1
                    for i in range(len(self.sources)):
                        ng = util.getMidiNum(
                            self.sources_midi[i] + '_g' + self.style_midi, db,
                            chunk_start, chunk_end)
                        nelem_g = np.maximum(ng, nelem_g)
                    melody_g = np.zeros((len(self.sources), int(nelem_g),
                                         2 * self.nharmonics + 3))
                    melody_e = np.zeros((len(self.sources), int(nelem_g),
                                         2 * self.nharmonics + 3))

                    for i in range(len(self.sources)):

                        nframes = int(
                            np.ceil(chunk_size * self.sampleRate /
                                    np.double(tt.hopSize))) + 2
                        size = int(chunk_size * self.sampleRate - int(
                            np.max(c[:, 0].astype(float)) * self.sampleRate))

                        if self.sampleRate != 44100:
                            print 'sample rate is not consistent'
                        if i == 0:
                            audio = np.zeros((size, len(self.sources) + 1))

                        tmp = util.expandMidi(
                            self.sources_midi[i] + '_g' + self.style_midi, db,
                            chunk_start, chunk_end, self.interval,
                            self.tuning_freq, self.nharmonics, self.sampleRate,
                            tt.hopSize, tt.frameSize, c[i, 0], c[i,
                                                                 0], nframes)
                        melody_g[i, :tmp.shape[0], :] = tmp
                        tmp = None
                        tmp = util.expandMidi(self.sources_midi[i] + '_g' +
                                              self.style_midi,
                                              db,
                                              chunk_start,
                                              chunk_end,
                                              self.interval,
                                              self.tuning_freq,
                                              self.nharmonics,
                                              self.sampleRate,
                                              tt.hopSize,
                                              tt.frameSize,
                                              c[i, 0] + 0.2,
                                              c[i, 0] + 0.2,
                                              nframes,
                                              fermata=c[i, 0] + 0.5)
                        melody_e[i, :tmp.shape[0], :] = tmp
                        tmp = None
                        #generate the audio, note by note
                        for m in range(nelem_g):
                            if melody_g[i, m, 2] > 0:
                                note = self.instruments[i].getNote(
                                    melody_g[i, m, 2],
                                    self.allowed_dynamics[int(c[i, 1])],
                                    self.allowed_styles[int(c[i, 2])],
                                    int(c[i, 3]))
                                if note is None:
                                    raise GetOutOfLoop
                                else:
                                    segment = note.getAudio(
                                        max_duration=float(melody_g[i, m, 1] -
                                                           melody_g[i, m, 0]) *
                                        tt.hopSize / self.sampleRate)
                                    if len(segment) > (len(audio) - int(
                                            np.floor(melody_g[i, m, 0] *
                                                     tt.hopSize))):
                                        audio[int(
                                            np.floor(melody_g[i, m, 0] *
                                                     tt.hopSize)
                                        ):int(
                                            np.floor(melody_g[i, m, 0] *
                                                     tt.hopSize) +
                                            len(segment)), i +
                                              1] = segment[:len(audio) - int(
                                                  np.floor(melody_g[i, m, 0] *
                                                           tt.hopSize))]
                                    else:
                                        audio[int(
                                            np.floor(melody_g[i, m, 0] *
                                                     tt.hopSize)
                                        ):int(
                                            np.floor(melody_g[i, m, 0] *
                                                     tt.hopSize) +
                                            len(segment)), i + 1] = segment
                                    segment = None
                                note = None
                                segment = None

                    audio[:, 0] = np.sum(audio[:, 1:len(self.sources) + 1],
                                         axis=1)
                    tt.compute_transform(
                        audio,
                        os.path.join(
                            feature_path, self.style,
                            str(c).encode('base64', 'strict') + '_' +
                            str(chnk) + '.data'),
                        phase=False)
                    tt.saveTensor(melody_g, '__g_')
                    tt.saveTensor(melody_e, '__e_')
                    audio = None
                    melody_g = None
                    melody_e = None
                except GetOutOfLoop:
                    pass
Beispiel #5
0
            nsamples = int(kwargs.__getattribute__('nsamples'))
        else:
            nsamples = 0
        if kwargs.__getattribute__('function'):
            function = kwargs.__getattribute__('function')
        else:
            function = 'build_ca'
        funcs = {'build_ca': build_ca}
        if function not in funcs:
            function = 'build_ca'

    path_in = [feature_path]

    #tt object needs to be the same as the one in compute_features
    tt = transformFFT(frameSize=1024,
                      hopSize=512,
                      sampleRate=44100,
                      window=hanning)

    ld1 = LargeDatasetMulti(path_transform_in=path_in, nsources=4, nsamples=nsamples, batch_size=batch_size, batch_memory=batch_memory, time_context=time_context, overlap=overlap, nprocs=nprocs,mult_factor_in=scale_factor,mult_factor_out=scale_factor,\
        sampleRate=tt.sampleRate,tensortype=theano.config.floatX)
    logging.info("  Maximum input:\t\t{:.6f}".format(ld1.getMax()))
    logging.info("  Minimum input:\t\t{:.6f}".format(ld1.getMin()))
    logging.info("  Mean input:\t\t{:.6f}".format(ld1.getMean()))
    logging.info("  Standard dev input:\t\t{:.6f}".format(ld1.getStd()))
    logging.info("  Maximum:\t\t{:.6f}".format(ld1.getMax(inputs=False)))
    logging.info("  Minimum:\t\t{:.6f}".format(ld1.getMin(inputs=False)))
    logging.info("  Mean:\t\t{:.6f}".format(ld1.getMean(inputs=False)))
    logging.info("  Standard dev:\t\t{:.6f}".format(ld1.getStd(inputs=False)))

    if not os.path.exists(os.path.join(output, 'output', model)):
        os.makedirs(os.path.join(output, 'output', model))