Esempio n. 1
0
 def run(self):
     logger.info('Launched computation of dynamics map')
     tick = time.time()
     self.Q_map = self.env.compute_dynamics_map()
     tock = time.time()
     logger.info(f'Done in {tock-tick:.2f} s.')
     npsave(str(self.save_path), self.Q_map, allow_pickle=True)
     logger.info(f'Output saved in {str(self.save_path)}.npy')
Esempio n. 2
0
 def save_trained_artefacts(self, filename='cartpole_artefacts'):
     """ Saves Q-table and episode index into a numpy file.
     Episode index is considered to be the time-step within an episode from where a successful streak started.
     
     Keyword Arguments:
         filename {str} -- Name of the artefacts file (default: {'cartpole_artefacts'})
     """
     artefacts_to_save = {
         'Q': self.Q,
         'successful_episode_index': self.episodes_taken
     }
     npsave(filename, artefacts_to_save)
Esempio n. 3
0
    def store(self, batch):
        r"""
Stores a batch on file and returns a stored batch. This method can be passed as *store* argument to create an open :class:`Sample` instance.
    """
        #-------------------------------------------------------------------------------
        with self.base.open('rb+') as u:
            n = int(pickle.load(u))
            u.seek(0)
            pickle.dump(n + 1, u)
            u.truncate()
        p = self.getpath(n)
        npsave(p, batch)
        return npload(p, mmap_mode='r+')
Esempio n. 4
0
 def testAbstractDatasetDescriptor(self):
     array = self._createNumpyArray()
     from tempfile import mkstemp
     from numpy import save as npsave
     (osfd, filename) = mkstemp(suffix='.npy', prefix='scisofttmp-')
     os.close(osfd)
     try:
         npsave(filename, array)
     except:
         # If we failed to write the file, remove it
         # mkstemp returned a new file that did not exist, so 
         # we can't be removing someone else's file
         os.remove(filename)
         raise
     add = dnp.rpc.datasetdescriptor(filename=filename, deleteAfterLoad=False)
     self._flattenAndUnflatten(add, array, dnp.ndarray)
     self.assertTrue(os.path.exists(filename))
     os.remove(filename)
def main():

    ## set the printfiles option to false and you will simply read in the s_collection_ft etc. from ../data_fs/exter/*.npy
    collection = simulate_tof(nwaveforms=16,nelectrons=12,e_retardation=530,e_photon=600,printfiles = False)

    ### Writing output files ###
    print('### Writing output files ###')
    collection_name = '../data_fs/extern/CookieBox_waveforms.randomsources.dat'
    print(collection_name)
    savetxt(collection_name,collection,fmt='%4f')

    collection_name = '../data_fs/extern/CookieBox_waveforms.randomsources'
    print(collection_name)
    npsave(collection_name,collection)

    integration_name = '../data_fs/extern/integration.randomsources.dat'
    print(integration_name)
    out = column_stack((collection[:,0],npsum(collection[:,1:],axis=1)))
    savetxt(integration_name,out,fmt='%4f')


    imageoutpath = '../data_fs/raw/'
    nimages = int(10)
    xrayintensities = gengamma.rvs(a=2,c=1,loc=0,scale=1,size=nimages)
    (nu_center, nu_width) = (560.,2.5)
    photonenergies = nu_center+nu_width*randn(nimages)

    (s,n,f,t) = readimpulseresponses('../data_fs/extern/')
    img = int(0)
    nwaveforms = int(16)
    retvec = ones(nwaveforms,dtype=float) * 520.
    transvec = ones(nwaveforms,dtype=float)
    collection = simulate_cb(s,n,f,t,retardations=retvec,transmissions=transvec,intensity = xrayintensities[img],photonenergy=photonenergies[img])

    ### Writing output files ###
    print('### Writing output files ###')
    collection_name = imageoutpath + 'CookieBox_waveforms.image%04i.dat' % img
    print(collection_name)
    savetxt(collection_name,collection[:,1:],fmt='%4f')

    collection_name = imageoutpath + 'CookieBox_waveforms.times.dat'
    print(collection_name)
    savetxt(collection_name,collection[:,0],fmt='%4f')
Esempio n. 6
0
def extract_features(fname, bdir, sox, htk_mfc, mfc_extension, stereo_wav,
        gammatones, spectrograms, filterbanks):
#def extract_features(fname, bdir):
    if fname[-4:] != '.wav':
        return
    rawfname = bdir+'/'+fname[:-4]+'.rawaudio'
    wavfname = bdir+'/'+fname
    tempfname = bdir+'/'+fname[:-4]+'_temp.wav'
    # temp fname with .wav for sox
    mfccfname = bdir+'/'+fname[:-4]+mfc_extension
    if sox:
        shutil.move(wavfname, tempfname)
        call(['sox', tempfname, wavfname])
        #call(['sox', '-G', tempfname, '-r 16k', wavfname])
        # w/o headers, sox uses extension
        shutil.move(tempfname, rawfname)
    if htk_mfc:
        call(['HCopy', '-C', 'wav_config', wavfname, mfccfname])
    srate = 16000
    #srate, sound = wavfile.read(wavfname)
    sound, srate = readwav(wavfname)
    if stereo_wav and len(sound.shape) == 2: # in mono sound is a list
        sound = 0.5 * (sound[:, 0] + sound[:, 1])
        # for stereo wav, sum both channels
    if gammatones:
        gammatonefname = bdir+'/'+fname[:-4]+'_gamma.npy'
        tmp_snd = loadsound(wavfname)
        gamma_cf = erbspace(20*Hz, 20*kHz, N_GAMMATONES_FILTERS)
        gamma_fb = Gammatone(tmp_snd, gamma_cf)
        with open(gammatonefname, 'w') as o_f:
            npsave(o_f, gamma_fb.process())
    if spectrograms:
        powerspec, _, _, _ = specgram(sound, NFFT=int(srate
            * SPECGRAM_WINDOW), Fs=srate, noverlap=int(srate
                * SPECGRAM_OVERLAP)) # TODO
        specgramfname = bdir+'/'+fname[:-4]+'_specgram.npy'
        with open(specgramfname, 'w') as o_f:
            npsave(o_f, powerspec.T)
    if filterbanks:
        # convert to Mel filterbanks
        fbanks = Spectral(nfilt=N_FBANKS,      # nb of filters in mel bank
                     alpha=0.97,               # pre-emphasis
                     do_dct=False,             # we do not want MFCCs
                     compression='log',
                     fs=srate,                 # sampling rate
                     lowerf=50,                # lower frequency
                     frate=FBANKS_RATE,        # frame rate
                     wlen=FBANKS_WINDOW,       # window length
                     nfft=1024,                # length of dft
                     do_deltas=False,          # speed
                     do_deltasdeltas=False     # acceleration
                     )
        sound /= np.abs(sound).max(axis=0)  # TODO put that as option
        fbank = fbanks.transform(sound)
        fbanksfname = bdir+'/'+fname[:-4]+'_fbanks.npy'
        with open(fbanksfname, 'w') as o_f:
            npsave(o_f, fbank)
    # TODO wavelets scattergrams / scalograms
    print "dealt with file", wavfname
Esempio n. 7
0
    def train(self, data):
        n_temp = len(self.args.T_list)
        for (iT, T) in enumerate(self.args.T_list):
            self.model = get_model(data.train_in.shape,
                                   feat_ext=self.args.FEAT,
                                   hid_act=self.args.ACT,
                                   hid_filters=self.args.HF,
                                   kernels=self.args.K,
                                   pbc=self.args.PBC)

            self.model.compile(optimizer=self.args.OPT,
                               loss=self.loss,
                               metrics=self.metrics_list)

            hist = self.model.fit(
                x=data.train_in[iT * self.args.nTR:(iT + 1) *
                                self.args.nTR][:self.args.TRS],
                y=data.train_out[iT * self.args.nTR:(iT + 1) *
                                 self.args.nTR][:self.args.TRS],
                batch_size=self.args.BS,
                epochs=self.args.EP,
                verbose=self.args.VB,
                callbacks=self.callbacks,
                validation_data=(
                    data.test_in[iT * self.args.nTE:(iT + 1) *
                                 self.args.nTE][:self.args.VALS],
                    data.test_out[iT * self.args.nTE:(iT + 1) *
                                  self.args.nTE][:self.args.VALS]))

            self.metrics = get_metrics(hist, reg=self.reg_flag)

            ### Save files ###
            npsave('%s/%s/Met%.4f.npy' % (self.args.metrics_dir, self.name, T),
                   self.metrics)
            self.model.save('%s/%s/Net%.4f.h5' %
                            (self.args.model_dir, self.name, T))

            print('Temperature %d / %d done!' % (iT + 1, n_temp))
Esempio n. 8
0
    def train(self, data, run_time=0, save_models=True):
        hist = self.model.fit(x=data.train_in[:self.args.TRS],
                              y=data.train_out[:self.args.TRS],
                              batch_size=self.args.BS,
                              epochs=self.args.EP,
                              verbose=self.args.VB,
                              callbacks=self.callbacks,
                              validation_data=(data.test_in[:self.args.VALS],
                                               data.test_out[:self.args.VALS]))

        self.metrics = get_metrics(hist, reg=self.reg_flag)

        ### Save files ###
        if save_models:
            npsave(
                '%s/%s_MReg%.2fEReg%.2fB%d_Ver%dRun%d.npy' %
                (self.args.metrics_dir, self.name, self.args.magR,
                 self.args.enR, self.args.BS, self.args.VER, run_time),
                self.metrics)
            self.model.save(
                '%s/%s_MReg%.2fEReg%.2fB%d_Ver%dRun%d.h5' %
                (self.args.model_dir, self.name, self.args.magR, self.args.enR,
                 self.args.BS, self.args.VER, run_time))
Esempio n. 9
0
def save_outputs(output, serie=0, prefix=None, as_text=True):
    """
    save output as numpy format in a file and increment file number if file already exists
    """
    idx = 0
    fprefix = prefix or 'out_'
    if as_text is True:
        ext = 'txt'
    else:
        ext = 'npy'

    fname = '{0}{1:04d}_{2:04d}.{3}'.format(fprefix, serie, idx, ext)

    while exists(fname):
        idx += 1
        fname = '{0}{1:04d}_{2:04d}.{3}'.format(fprefix, serie, idx, ext)

    if ext == 'txt':
        npsavetxt(fname, output)
    else:
        npsave(fname, output)
    print('data saved in file [{0}]'.format(fname))
    return fname
Esempio n. 10
0
#!/usr/bin/env python3
"""
INPUTS
    Explicit:
        film_xmol   ->  Particle positions
        vel_dat     ->  Particle velocities

OUTPUTS
    Implicit:

    Explicit:
        r_all.npy
        v_all.npy
"""

#!/usr/bin/env python3

from scrapfilm import scrapfilm
from numpy import save as npsave

sf = scrapfilm('film_xmol', 'vel.dat')

r_all = sf.read_film()

v_all = sf.read_vel()

npsave('r_all', r_all)
npsave('v_all', v_all)
Esempio n. 11
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Usage: db_dumpfeat.py -i <dbroot> -d <dataout> -l <labelout>

Options:
    -h --help  show this message
"""


def parse_args():
    args = docopt(__doc__)
    dbroot = args['<dbroot>']
    dataout = args['<dataout>']
    labelout = args['<labelout>']
    return dbroot, dataout, labelout

if __name__ == '__main__':
    from numpy import save as npsave
    from mscr.data import Data
    from mscr.features import Features
    from docopt import docopt
    from cv2.xfeatures2d import SURF_create as SURF

    dbroot, dataout, labelout = parse_args()
    data = Data(Features(SURF()))
    X, y = data.load(dbroot, {'txt': 0, 'ms': 1})
    npsave(dataout, X)
    npsave(labelout, y)
Esempio n. 12
0
              xmin=xmin,
              xmax=xmax,
              ymin=ymin,
              ymax=ymax,
              rmin=rmin,
              rmax=rmax,
              thetamin=thetamin,
              thetamax=thetamax,
              progress=progress)

t0 = time.time()
new_data = transform(filtered.real,
                     polar2cartesian,
                     order=3,
                     output_shape=(H_new, W_new),
                     extra_keywords=kwargs)
print >> msgout, ""
print >> msgout, "Reconstruction done: %.2f seconds" % (time.time() - t0)

t0 = time.time()
print >> msgout, "Writing data to '%s'..." % args.output,
msgout.flush()
if args.output != "-":
    outfile = open(args.output, "wb")
npsave(outfile, "data")
npsave(outfile, new_metadata)
npsave(outfile, new_data)
if outfile is not sys.stdout:
    outfile.close()
print >> msgout, "%.2f seconds" % (time.time() - t0)
def fillimpulseresponses(printfiles = True,samplefiles = False):
    (s_collection_ft,n_collection_ft) = (nparray([0,0,0],dtype=complex),nparray([0,0,0],dtype=complex))
    filepath = '../data_fs/ave1/'
    filematch = filepath + 'C1--LowPulseHighRes-in-100-out1700-an2100--*.txt'
    filelist = glob.glob(filematch)


    print('filling impulse response files\n\tnum files = %i' % len(filelist))

    for i,f in enumerate(filelist):

        ## processing images 
        ## samplefiles = False
        m = re.search('(.+).txt$',f)
        if (i%10 == 0 and samplefiles):
            outname_spect = m.group(1) + '.spect.dat'
            outname_time = m.group(1) + '.time.dat'
            outname_simTOF = m.group(1) + '.simTOF.dat'

        fi = open(f, "r")
        for passline in range(6):
            headline = '# ' + fi.readline()
        (t,v) = fi.readline().split()
        v_vec=nparray(float(v),dtype=float)
        t_vec=nparray(float(t)*1.e9,dtype=float)
        for line in fi:
            (t,v) = line.split()
            v_vec = row_stack((v_vec,float(v)))
            t_vec = row_stack((t_vec,float(t)*1.e9))
        fi.close()
        #Get the mean time-step for sake of frequencies
        dt = mean(diff(t_vec,n=1,axis=0))
        #FFT the vector
        v_vec_ft = FFT(v_vec,axis=0)
        f = FREQ(v_vec_ft.shape[0],dt)
        m_extend = 10
        f_extend = FREQ(v_vec_ft.shape[0]*m_extend,dt)
        t_extend = arange(0,((t_vec[-1]-t_vec[0])+dt)*m_extend,dt)
        # deep copy for the noise extimation 
        n_vec_ft = npcopy(v_vec_ft)
        # find indices where there is only noise in the power, and indices with predominantly signal
        # replace the signal elements in the noise vector with a random sampling from the noise portion
        chooseinds = nparray([i for i,nu in enumerate(f) if (npabs(nu)> 6.5 and npabs(nu)<(20))])
        replaceinds = nparray([i for i,nu in enumerate(f) if npabs(nu)< 6.5])
        values = choice(n_vec_ft[chooseinds,0],len(replaceinds))
        n_vec_ft[replaceinds,0] = values

        ## build noise vector and add to n_collection_ft
        # sort inds for f and use for interp to extend noise in fourier domain
        inds = argsort(f)
        n_vec_extend_ft_r = interp(f_extend,f[inds],npabs(n_vec_ft[inds,0]))
        n_vec_extend_ft_phi = choice(npangle(n_vec_ft[:,0]),f_extend.shape[0])
        n_vec_extend_ft = nprect(n_vec_extend_ft_r,n_vec_extend_ft_phi)
        n_vec_extend_ft.shape = (n_vec_extend_ft.shape[0],1)
        
        if n_collection_ft.shape[0] < n_vec_extend_ft.shape[0]:
            n_collection_ft = npcopy(n_vec_extend_ft)
           # s_collection_ft.shape = (s_collection_ft.shape[0],1)
        else:
            n_collection_ft = column_stack((n_collection_ft,n_vec_extend_ft))

        ## build signal vector and add to n_collection_ft
        noiseamp = nppower(mean(npabs(values)),int(2))
        sigamp = nppower(mean(nparray([i for i,nu in enumerate(f) if npabs(nu)< 1.0])),int(2))
        s_vec_ft = npcopy(v_vec_ft)
        s_vec_ft[:,0] *= Weiner(f,sigamp,noiseamp,cut = 5,p = 4) * fourier_delay(f,-40) ## Weiner filter and dial back by 40 ns

        if samplefiles:
            out = column_stack((f,npabs(v_vec_ft),npabs(n_vec_ft),npabs(s_vec_ft)))
            savetxt(outname_spect,out,fmt='%.4f')

        s_vec = real(IFFT(s_vec_ft,axis=0))
        s_vec_extend = zeros((f_extend.shape[0],1),dtype=float) 
        s_vec_extend[:s_vec.shape[0],0] = s_vec[:,0]
        s_vec_extend_ft = FFT(s_vec_extend,axis=0)

        if s_collection_ft.shape[0] < s_vec_extend_ft.shape[0]:
            s_collection_ft = npcopy(s_vec_extend_ft)
           # s_collection_ft.shape = (s_collection_ft.shape[0],1)
        else:
            s_collection_ft = column_stack((s_collection_ft,s_vec_extend_ft))

        # first sum all the Weiner filtered and foureir_delay() signals, then add the single noise vector back
    if printfiles:
        outpath = '../data_fs/extern/'
        filename = outpath + 'signal_collection_ft'
        npsave(filename,s_collection_ft)
        filename = outpath + 'noise_collection_ft'
        npsave(filename,n_collection_ft)
        filename = outpath + 'frequencies_collection'
        npsave(filename,f_extend)
        filename = outpath + 'times_collection'
        npsave(filename,t_extend)

    return (s_collection_ft,n_collection_ft,f_extend,t_extend)
Esempio n. 14
0
 def save_dark(self):
     npsave(self.pathToDark, self.darkData)
Esempio n. 15
0
def process(folder,
            debug=False,
            htk_mfc=False,
            forcemfcext=False,
            stereo_wav=False,
            gammatones=False,
            spectrograms=False,
            filterbanks=False,
            sox=True):
    """ applies to all *.wav in folder """

    # first find if we produce normalized MFCC, otherwise note it in the ext
    # because we can then normalize on the whole corpus with another py script
    mfc_extension = '.mfc_unnorm'
    wcfg = open('wav_config', 'r')
    for line in wcfg:
        if "ENORMALISE" in line:
            mfc_extension = '.mfc'
    if forcemfcext:
        mfc_extension = '.mfc'
    print "MFC extension:", mfc_extension
    if gammatones:
        try:
            from brian import Hz, kHz
            from brian.hears import loadsound, erbspace, Gammatone
        except ImportError:
            print >> sys.stderr, "You need Brian Hears"
            print >> sys.stderr, "http://www.briansimulator.org/docs/\
                    hears.html"

            sys.exit(-1)
    if spectrograms:
        try:
            from pylab import specgram
        except ImportError:
            print >> sys.stderr, "You need Pylab"
            sys.exit(-1)
    fbanks = None
    if filterbanks:
        try:
            sys.path.append('../spectral')
            from spectral import Spectral
        except ImportError:
            print >> sys.stderr, "You need spectral (in the parent folder)"
            print >> sys.stderr, "https://github.com/mwv/spectral"
            sys.exit(-1)

    # run through all the folders and files in the path "folder"
    # and put a header to the waves, save the originals as .rawaudio
    # use HCopy to produce MFCC files according to "wav_config" file
    for bdir, _, files in os.walk(folder):
        for fname in files:
            if fname[-4:] != '.wav':
                continue
            rawfname = bdir + '/' + fname[:-4] + '.rawaudio'
            wavfname = bdir + '/' + fname
            tempfname = bdir + '/' + fname[:-4] + '_temp.wav'
            # temp fname with .wav for sox
            mfccfname = bdir + '/' + fname[:-4] + mfc_extension
            if sox:
                shutil.move(wavfname, tempfname)
                call(['sox', tempfname, wavfname])
                # w/o headers, sox uses extension
                shutil.move(tempfname, rawfname)
            if htk_mfc:
                call(['HCopy', '-C', 'wav_config', wavfname, mfccfname])
            srate = 16000
            srate, sound = wavfile.read(wavfname)
            if stereo_wav and len(sound.shape) == 2:  # in mono sound is a list
                sound = sound[:, 0] + sound[:, 1]
                # for stereo wav, sum both channels
            if gammatones:
                gammatonefname = bdir + '/' + fname[:-4] + '_gamma.npy'
                tmp_snd = loadsound(wavfname)
                gamma_cf = erbspace(20 * Hz, 20 * kHz, N_GAMMATONES_FILTERS)
                gamma_fb = Gammatone(tmp_snd, gamma_cf)
                with open(gammatonefname, 'w') as o_f:
                    npsave(o_f, gamma_fb.process())
            if spectrograms:
                powerspec, _, _, _ = specgram(
                    sound,
                    NFFT=int(srate * SPECGRAM_WINDOW),
                    Fs=srate,
                    noverlap=int(srate * SPECGRAM_OVERLAP))  # TODO
                specgramfname = bdir + '/' + fname[:-4] + '_specgram.npy'
                with open(specgramfname, 'w') as o_f:
                    npsave(o_f, powerspec.T)
            if filterbanks:
                # convert to Mel filterbanks
                if fbanks == None:  # assume parameters are fixed
                    fbanks = Spectral(
                        nfilt=N_FBANKS,  # nb of filters in mel bank
                        alpha=0.97,  # pre-emphasis
                        do_dct=False,  # we do not want MFCCs
                        fs=srate,  # sampling rate
                        frate=FBANKS_RATE,  # frame rate
                        wlen=FBANKS_WINDOW,  # window length
                        nfft=1024,  # length of dft
                        do_deltas=False,  # speed
                        do_deltasdeltas=False  # acceleration
                    )
                fbank = fbanks.transform(sound)[0]  # first dimension is for
                # deltas & deltasdeltas
                fbanksfname = bdir + '/' + fname[:-4] + '_fbanks.npy'
                with open(fbanksfname, 'w') as o_f:
                    npsave(o_f, fbank)
            # TODO wavelets scattergrams / scalograms
            print "dealt with file", wavfname
Esempio n. 16
0
    def initialization_data_preprocessing_function(object):
        '''

        Args: '数据预处理初始化'
            object:
            *path:

        Returns:

        '''

        variable_character_encoding = character_encoding_function(object)
        INIT_TFIDF = TFIDF_function(object)
        variable_TFIDF = INIT_TFIDF['TF-IDF']
        variable_TF = INIT_TFIDF["TF"]
        variable_IDF = INIT_TFIDF["IDF"]
        variable_words_counter_from_TFIDF = INIT_TFIDF["words_counter"]
        variable_document_count = INIT_TFIDF["document_count"]
        variable_init_TF = INIT_TFIDF["init_TF"]
        variable_vocabulary_from_TFIDF = INIT_TFIDF["vocabulary_from_TF-IDF"]

        INIT_inverted_index = inverted_index_function(object)
        variable_article_tokens = INIT_inverted_index["article_tokens"]
        variable_words_dictionary = INIT_inverted_index["words_dictionary"]
        variable_inverted_index = INIT_inverted_index["inverted_index"]
        variable_onehots_encoding_for_each_article = variable_character_encoding[
            "onehots_encoding_for_each_article"]
        variable_counter_vectors = variable_character_encoding[
            "counter_vectors"]
        variable_probabilistic_feature_of_words_in_each_article = variable_character_encoding[
            "probabilistic_feature_of_words_in_each_article "]
        variable_vocabulary = variable_character_encoding["vocabulary"]
        variable_probabilistic_feature_dictionary = variable_character_encoding[
            "probabilistic_feature_dictionary"]
        variable_probabilistic_feature_vectors = variable_character_encoding[
            "probabilistic_feature_vectors"]
        variable_counter = variable_character_encoding["counter"]

        return {
            "TFIDF": {
                "data": variable_TFIDF,
                "save": lambda data, path: npsave(path + "TFIDF", data),
                "state": True
            },
            "TF": {
                "data": variable_TF,
                "save": lambda data, path: npsave(path + "TF", data),
                "state": True
            },
            "IDF": {
                "data": variable_IDF,
                "save": lambda data, path, : npsave(path + "IDF", data),
                "state": True
            },
            "init_TF": {
                "data": variable_init_TF,
                "save": lambda data, path, : npsave(path + "init_TF", data),
                "state": True
            },
            "words_counter_from_TFIDF": {
                "data":
                variable_words_counter_from_TFIDF,
                "save":
                lambda data, path, : npsave(path + "words_counter_from_TFIDF",
                                            data),
                "state":
                True
            },
            "document_count": {
                "data":
                variable_document_count,
                "save":
                lambda data, path, : npsave(path + "document_count_from_TFIDF",
                                            data),
                "state":
                True
            },
            "vocabulary_from_TFIDF": {
                "data":
                variable_vocabulary_from_TFIDF,
                "save":
                lambda data, path, : data_init.save_set_function(
                    data, "vocabulary_from_TFIDF", path),
                "state":
                True
            },
            "inverted_index": {
                "data":
                variable_inverted_index,
                "save":
                lambda data, path: data_init.save_json_function(
                    data, 'inverted_index', path),
                "state":
                False
            }  # 到排表
            ,
            "article_tokens": {
                "data":
                variable_article_tokens,
                "save":
                lambda data, path: data_init.save_json_function(
                    data, "article_tokens", path),
                "state":
                False
            },
            "words_dictionary": {
                "data":
                variable_words_dictionary,
                "save":
                lambda data, path: data_init.save_set_function(
                    data, 'words_dictionary', path),
                "state":
                False
            },
            "onehots_encoding_for_each_article": {
                "data":
                variable_onehots_encoding_for_each_article,
                "save":
                lambda data, path: data_init.ndarrays_to_dataframe_function(
                    data,
                    filename="onehots_encoding_for_each_article",
                    column_names=variable_vocabulary),
                "state":
                False
            }  # 每一篇文章的onehot编码
            ,
            "counter_vectors": {
                "data":
                variable_counter_vectors,
                "save":
                lambda data, path: data.to_csv(path + "counter_vectors.csv"),
                "state":
                True
            }  # 计数向量
            ,
            "probabilistic_feature_of_words_in_each_article": {
                "data":
                variable_probabilistic_feature_of_words_in_each_article,
                "save":
                lambda data, path: data.
                to_csv(path +
                       "probabilistic_feature_of_words_in_each_article.csv"),
                "state":
                True
            }
            # 每篇文章的概率特征
            ,
            "vocabulary": {
                "data":
                variable_vocabulary,
                "save":
                lambda data, path: data_init.save_set_function(
                    data, "vocabulary", path),
                "state":
                False
            }  # 词汇表
            ,
            "probabilistic_feature_dictionary": {
                "data":
                variable_probabilistic_feature_dictionary,
                "save":
                lambda data, path: data_init.save_json_function(
                    data, 'probabilistic_feature_dictionary', path),
                "state":
                False
            }  # 概率特征字典
            ,
            "probabilistic_feature_vectors": {
                "data":
                variable_probabilistic_feature_vectors,
                "save":
                lambda data, path, : npsave(
                    path + "probabilistic_feature_vectors", data),
                "state":
                True
            }  # 概率特征向量
        }  # 总单词数量计数
Esempio n. 17
0
def process(folder,debug=False,htk_mfcc=False,forcemfcext=False,stereo_wave=False,gammatones=False,spectograms=False,filterbanks=False,sox=True):
    mfc_extension = '.mfc_unnorm'
    wcfg = open('wav_config','r')
    for line in wcfg:
        if "ENORMALISE" in line:
            mfc_extension = '.mfc'

    if forcemfcext:
        mfc_extension = '.mfc'
    print "MFC Extension is", mfc_extension
    if gammatones:
        try:
            from brian import Hz, kHz
            from brian.hears import loadsound, erbspace, Gammatone

        except ImportError:
            print >> sys.stderr, "You need Brian Hears"

            sys.exit(-1)

    if spectograms:
        try:
            from pylab import specgram

        except ImportError:
            print >> sys.stderr,'You need Pylab'
            sys.exit(-1)

    fbanks = None
    if filterbanks:
        try:
            sys.path.append('../spectral')
            from spectral import Spectral

        except ImportError:
            print >> sys.stderr, 'you need spectral (in the parent folder)'

    for bdir, _ , files in  os.walk(folder):
        for fname in files:
            if fname[-4:] != '.WAV':
                continue
            rawfname= bdir + '/' + fname[:-4]+'.rawaudio'
            wavfname = bdir + '/'+ fname
            tempfname = bdir + '/' + fname[:-4] + '_temp.wav'
            mfccfname = bdir + '/' + fname[:-4] + '.txt'
            if sox:
                shutil.move(wavfname, tempfname)
                call(['sox',tempfname,wavfname])
                shutil.move(tempfname,wavfname)

            if htk_mfcc:
                call(['HCopy','-C','wav_config',wavfname,mfccfname])
            srate = 16000

            srate, sound = wavfile.read(wavfname)
            if stereo_wave and len(sound.shape == 2):
                sound = sound[:,0]+ sound[:,1]
            if gammatones:
                gammatonefname = bdir + '/' + fname[:-4] + '_gamma.npy'
                tmp_snd = loadsound(wavfname)
                gamma_cf = erbspace(20*Hz, 20*kHz, n_gmammatones_filters)
                gamma_fb = Gammatone(tmp_snd, gamma_cf)
                with open(gammatonefname,'w') as o_f:
                    npsave(o_f, gamma_fb.process())

            if spectograms:
                powersspec, _,_,_ = specgram(sound, NFFT=int(srate * specgram_window), Fs=srate,noverlap=int(srate*specgram_window))
                specgramfname = bdir + '/' + fname[:-4]+'_specgram.npy'
                with open(specgramfname,'w') as o_f:
                    npsave(o_f , powerspec.T)
            if filterbanks:
                if fbanks ==None:
                    fbanks = Spectral(nfilt = n_fbanks, alpha=0.97,do_dct=False, fs=srate, frate=fbanks_rate, wlen=fbanks_window,nfft=1024,do_deltas=False,do_deltasdeltas=False)
                fbank = fbanks.transform(sound)[0]
                fbanksfname = bdir + '/' + fname[:-4]+'_fbanks.npy'
                with open(fbanksfname,'w') as o_f:
                    npsave(o_f, fbank)
            print "Dealt with the file ", wavfname
Esempio n. 18
0
                                   nmax=args.nmax)
    jm = parallel.JobManager(threads=args.threads)
    jm.jobqueue.put(qinv)
    while True:
        with qinv.status:
            qinv.status.wait()
            print >> msgout, "Performing inversion: %.2f%% done.\r" % (
                qinv.jobsdone * 100.0 / qinv.jobcount),
            msgout.flush()
            if qinv.jobsdone >= qinv.jobcount or not qinv.running:
                break
    print >> msgout, ""
    reconstructed = qinv.reconstruct().real
    output_metadata = array(
        (qinv.rmin, qinv.rmax, qinv.thetamin, qinv.thetamax))
else:
    print >> sys.stderr, "Error: Unknown Broken Ray transform type: '%s'" % brt_type
    sys.exit()

print >> msgout, "Writing data to '%s'..." % args.output,
msgout.flush()
t0 = time.time()
if args.output != "-":
    outfile = open(args.output, "wb")
npsave(outfile, "data")
npsave(outfile, output_metadata)
npsave(outfile, reconstructed)
print >> msgout, "%.2f seconds" % (time.time() - t0)
if outfile is not sys.stdin:
    outfile.close()
Esempio n. 19
0
def process(
    folder,
    debug=False,
    htk_mfc=False,
    forcemfcext=False,
    stereo_wav=False,
    gammatones=False,
    spectrograms=False,
    filterbanks=False,
    sox=True,
):
    """ applies to all *.wav in folder """

    # first find if we produce normalized MFCC, otherwise note it in the ext
    # because we can then normalize on the whole corpus with another py script
    mfc_extension = ".mfc_unnorm"
    wcfg = open("wav_config", "r")
    for line in wcfg:
        if "ENORMALISE" in line:
            mfc_extension = ".mfc"
    if forcemfcext:
        mfc_extension = ".mfc"
    print "MFC extension:", mfc_extension
    if gammatones:
        try:
            from brian import Hz, kHz
            from brian.hears import loadsound, erbspace, Gammatone
        except ImportError:
            print >> sys.stderr, "You need Brian Hears"
            print >> sys.stderr, "http://www.briansimulator.org/docs/\
                    hears.html"
            sys.exit(-1)
    if spectrograms:
        try:
            from pylab import specgram
        except ImportError:
            print >> sys.stderr, "You need Pylab"
            sys.exit(-1)
    fbanks = None
    if filterbanks:
        try:
            sys.path.append("../spectral")
            from spectral import Mel
        except ImportError:
            print >> sys.stderr, "You need spectral (in the parent folder)"
            print >> sys.stderr, "https://github.com/mwv/spectral"
            sys.exit(-1)

    # run through all the folders and files in the path "folder"
    # and put a header to the waves, save the originals as .rawaudio
    # use HCopy to produce MFCC files according to "wav_config" file
    for bdir, _, files in os.walk(folder):
        for fname in files:
            if fname[-4:] != ".wav":
                continue
            rawfname = bdir + "/" + fname[:-4] + ".rawaudio"
            wavfname = bdir + "/" + fname
            tempfname = bdir + "/" + fname[:-4] + "_temp.wav"
            # temp fname with .wav for sox
            mfccfname = bdir + "/" + fname[:-4] + mfc_extension
            if sox:
                shutil.move(wavfname, tempfname)
                call(["sox", tempfname, wavfname])
                # w/o headers, sox uses extension
                shutil.move(tempfname, rawfname)
            if htk_mfc:
                call(["HCopy", "-C", "wav_config", wavfname, mfccfname])
            srate = 16000
            srate, sound = wavfile.read(wavfname)
            if stereo_wav and len(sound.shape) == 2:  # in mono sound is a list
                sound = sound[:, 0] + sound[:, 1]
                # for stereo wav, sum both channels
            if gammatones:
                gammatonefname = bdir + "/" + fname[:-4] + "_gamma.npy"
                tmp_snd = loadsound(wavfname)
                gamma_cf = erbspace(20 * Hz, 20 * kHz, N_GAMMATONES_FILTERS)
                gamma_fb = Gammatone(tmp_snd, gamma_cf)
                with open(gammatonefname, "w") as o_f:
                    npsave(o_f, gamma_fb.process())
            if spectrograms:
                powerspec, _, _, _ = specgram(
                    sound, NFFT=int(srate * SPECGRAM_WINDOW), Fs=srate, noverlap=int(srate * SPECGRAM_OVERLAP)
                )  # TODO
                specgramfname = bdir + "/" + fname[:-4] + "_specgram.npy"
                with open(specgramfname, "w") as o_f:
                    npsave(o_f, powerspec.T)
            if filterbanks:
                # convert to Mel filterbanks
                if fbanks == None:  # assume parameters are fixed
                    fbanks = Mel(
                        nfilt=N_FBANKS,  # nb of filters in mel bank
                        alpha=0.97,  # pre-emphasis
                        fs=srate,  # sampling rate
                        frate=FBANKS_RATE,  # frame rate
                        wlen=FBANKS_WINDOW,  # window length
                        nfft=1024,  # length of dft
                        mel_deltas=False,  # speed
                        mel_deltasdeltas=False,  # acceleration
                    )
                fbank = fbanks.transform(sound)[0]  # first dimension is for
                # deltas & deltasdeltas
                fbanksfname = bdir + "/" + fname[:-4] + "_fbanks.npy"
                with open(fbanksfname, "w") as o_f:
                    npsave(o_f, fbank)
            # TODO wavelets scattergrams / scalograms
            print "dealt with file", wavfname
Esempio n. 20
0
 def save_state(self, file_path):
     npsave(file_path, self.holdkey_matrix)
Esempio n. 21
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Usage: db_dumpfeat.py -i <dbroot> -d <dataout> -l <labelout>

Options:
    -h --help  show this message
"""


def parse_args():
    args = docopt(__doc__)
    dbroot = args['<dbroot>']
    dataout = args['<dataout>']
    labelout = args['<labelout>']
    return dbroot, dataout, labelout


if __name__ == '__main__':
    from numpy import save as npsave
    from mscr.data import Data
    from mscr.features import Features
    from docopt import docopt
    from cv2.xfeatures2d import SURF_create as SURF

    dbroot, dataout, labelout = parse_args()
    data = Data(Features(SURF()))
    X, y = data.load(dbroot, {'txt': 0, 'ms': 1})
    npsave(dataout, X)
    npsave(labelout, y)