Esempio n. 1
0
    def load_audio_metadata_speechrecognition(cls, conn, path, audio_path):
        '''
        Pre-process and loads the metadata

        Parameters
        ----------
        conn : CAS
            A connection object to the current session.
        path : string
            Location to the input metadata file.
        audio_path : delimiter
            Delimiter for the metadata file.

        Returns
        -------
        :class:`CASTable`

        '''
        output_name = random_name('AudioTable_Metadata', 6)
        
        dc = DataClean(conn=conn, contents_as_path=path)
        dc_response = dc.process_contents(audio_path = audio_path)
        tbl = dc.create_castable(dc_response['results'], output_name, replace=True, promote=False,
                                 col_names=dc_response['col_names'])

        scode = 'length _fName_ $1000; '
        scode += '_fName_ = _filename_; '

        ctbl = CASTable(tbl, computedvars=['_fName_'],
                        computedvarsprogram=scode)

        conn.table.partition(table=ctbl, casout=dict(name=tbl, replace=True))

        return CASTable(tbl)
Esempio n. 2
0
    def load_audio_metadata_speechrecognition(cls, conn, path, audio_path):
        '''
        Pre-process and loads the metadata

        Parameters
        ----------
        conn : CAS
            A connection object to the current session.
        path : string
            Location to the input metadata file.
        audio_path : delimiter
            Delimiter for the metadata file.

        Returns
        -------
        :class:`CASTable`

        Examples
        --------
        >>> import swat
        >>> from dlpy.audio import AudioTable
        >>> s=swat.CAS("cloud.example.com", 5570)
        >>> aud_tbl = AudioTable.load_audio_metadata_speechrecognition(s, path="/path/to/metadata/file.txt", audio_path="/path/to/audio/file.txt")
        >>> aud_tbl.set_connection(s)
        '''

        if conn is None:
            conn = cls.get_connection()

        if conn is None:
            raise DLPyError('cannot get a connection object to the current session.')

        output_name = random_name('AudioTable_Metadata', 6)

        dc = DataClean(conn=conn, contents_as_path=path)
        dc_response = dc.process_contents(audio_path = audio_path)
        tbl = dc.create_castable(dc_response['results'], output_name, replace=True, promote=False,
                                 col_names=dc_response['col_names'])

        scode = 'length _fName_ varchar(*); '
        scode += '_fName_ = _filename_; '

        ctbl = CASTable(tbl, computedvars=['_fName_'],
                        computedvarsprogram=scode)

        conn.table.partition(table=ctbl, casout=dict(name=tbl, replace=True))

        return CASTable(tbl)
Esempio n. 3
0
    def __extract_audio_features(conn, table, frame_shift=10, frame_length=25, n_bins=40, n_ceps=40,
                                 feature_scaling_method='STANDARDIZATION', n_output_frames=500, casout=None,
                                 label_level=0,
                                 random_shuffle=True,
                                 **kwargs):

        conn.loadactionset('audio', _messagelevel='error')

        if isinstance(table, AudioTable) is False and isinstance(table, CASTable) is False:
            return None

        if casout is None:
            casout = dict(name=random_name('AudioTable', 6))
        elif isinstance(casout, CASTable) or isinstance(casout, AudioTable):
            casout = casout.to_outtable_params()

        # always use dither with 0 to turn it off
        rt = conn.retrieve('audio.computefeatures', _messagelevel='error', table=table,
                           frameExtractionOptions=dict(frameshift=frame_shift, framelength=frame_length, dither=0.0),
                           melBanksOptions=dict(nbins=n_bins), mfccOptions=dict(nceps=n_ceps),
                           featureScalingMethod=feature_scaling_method, nOutputFrames=n_output_frames,
                           casout=casout, **kwargs)
        if rt.severity > 1:
            for msg in rt.messages:
                print(msg)
            return None

        server_type = get_cas_host_type(conn).lower()
        if server_type.startswith("lin") or server_type.startswith("osx"):
            fs = "/"
        else:
            fs = "\\"

        if label_level:
            scode = "i=find(_path_,'{0}',-length(_path_)); ".format(fs)
            scode += "length _fName_ varchar(*); length _label_ varchar(*); "
            scode += "_fName_=substr(_path_, i+length('{0}'), length(_path_)-i);".format(fs)
            scode += "_label_=scan(_path_,{},'{}');".format(label_level, fs)
            ctable = CASTable(casout['name'], computedvars=['_fName_', '_label_'],
                              computedvarsprogram=scode)
        else:
            scode = "i=find(_path_,'{0}',-length(_path_)); ".format(fs)
            scode += "length _fName_ varchar(*); "
            scode += "_fName_=substr(_path_, i+length('{0}'), length(_path_)-i);".format(fs)
            ctable = CASTable(casout['name'], computedvars=['_fName_'],
                              computedvarsprogram=scode)

        if random_shuffle:
            conn.table.shuffle(table=ctable, casout=dict(name=casout['name'], replace=True))
        else:
            conn.table.partition(table=ctable, casout=dict(name=casout['name'], replace=True))

        out = AudioTable(casout['name'])
        out.set_connection(connection=conn)

        out.feature_size = n_ceps
        out.num_of_frames_col = '_num_frames_'

        if label_level:
            out.label_col = '_label_'
        else:
            out.label_col = None

        return out
Esempio n. 4
0
    def extract_audio_features(cls, conn, table, frame_shift=10, frame_length=25, n_bins=40, n_ceps=40,
                               feature_scaling_method='STANDARDIZATION', n_output_frames=500, casout=None, **kwargs):
        '''
        Extracts audio features from the audio files

        Parameters
        ----------
        conn : CAS
            A connection object to the current session.
        table : AudioTable
            An audio table containing the audio files.
        frame_shift : int, optional
            Specifies the time difference (in milliseconds) between the beginnings of consecutive frames.
            Default: 10
        frame_length : int, optional
            Specifies the length of a frame (in milliseconds).
            Default: 25
        n_bins : int, optional
            Specifies the number of triangular mel-frequency bins.
            Default: 40
        n_ceps : int, optional
            Specifies the number of cepstral coefficients in each MFCC feature frame (including C0).
            Default: 40
        feature_scaling_method : string, optional
            Specifies the feature scaling method to apply to the computed feature vectors.
            Default: 'standardization'
        n_output_frames : int, optional
            Specifies the exact number of frames to include in the output table (extra frames are dropped and missing
            frames are padded with zeros).
            Default: 500
        casout : dict or string or CASTable, optional
            CAS Output table
        kwargs : keyword-arguments, optional
            Additional parameter for feature extraction.

        Returns
        -------
        :class:`AudioTable`
            If table exists
        None
            If no table exists

        '''
        if isinstance(table, AudioTable) or isinstance(table, CASTable):
            if casout is None:
                casout = dict(name=random_name('AudioTable', 6))
            elif isinstance(casout, CASTable) or isinstance(casout, AudioTable):
                casout = casout.to_outtable_params()

            rt = conn.retrieve('audio.computefeatures', _messagelevel='error', table=table,
                               frameExtractionOptions=dict(frameshift=frame_shift, framelength=frame_length),
                               melBanksOptions=dict(nbins=n_bins), mfccOptions=dict(nceps=n_ceps),
                               featureScalingMethod=feature_scaling_method, nOutputFrames=n_output_frames,
                               casout=casout, **kwargs)
            if rt.severity > 1:
                for msg in rt.messages:
                    print(msg)
                return None

            server_type = get_cas_host_type(conn).lower()
            if server_type.startswith("lin") or server_type.startswith("osx"):
                fs = "/"
            else:
                fs = "\\"

            scode = "i=find(_path_,'{0}',-length(_path_)); ".format(fs)
            scode += "length _fName_ $1000; "
            scode += "_fName_=substr(_path_, i+length('{0}'), length(_path_)-i);".format(fs)

            ctable = CASTable(casout['name'], computedvars=['_fName_'],
                              computedvarsprogram=scode)

            conn.table.partition(table=ctable, casout=dict(name=casout['name'], replace=True))

            return AudioTable(casout['name'])

        return None