def check_sampwidth(params, sampwidth): """ Check if the input audio has the desired sampwdith (byte width). Parameters ---------- params : class : 'wave._wave_params' Specifies the original parameters of the audio. sampwidth : int Specifies the desired sampwidth. Returns ------- boolean """ if params.sampwidth not in {1, 2, 3, 4}: raise DLPyError( "invalid wave input! Only byte width values included in {1, 2, 3, 4} are accepted." ) if sampwidth not in {1, 2, 3, 4}: raise DLPyError( "invalid desired byte width! Only byte width values included in {1, 2, 3, 4} are accepted." ) return params.sampwidth == sampwidth
def __init__(self, n, n_blocks=1, rnn_type='gru', output_type='samelength', dropout=0.2, max_output_length=None, src_layers=None, name=None): if isinstance(n, int): if n_blocks == 1: self.n = [n] elif n_blocks > 1: self.n = [n] * n_blocks else: raise DLPyError('n_blocks should be larger than 0.') else: if len(n) == n_blocks: self.n = n else: raise DLPyError( 'the length of the neurons should be equal to the number of blocks' ) self.n_blocks = n_blocks self.src_layers = src_layers self.max_output_length = max_output_length self.rnn_type = rnn_type self.output_type = output_type self.dropout = dropout self.layers = [] self.name = name self.add_layers()
def keras_zeropad2d_layer(layer, src_layer): ''' Extract concatenate layer parameters from layer definition object Parameters ---------- layer : Layer object Concatenate layer Returns ------- zero_pad padding ''' config = layer.get_config() zero_pad = {} # extract source layer(s) if (layer.name in src_layer.keys()): zero_pad['source_str'] = src_layer[layer.name] else: raise KerasParseError('Unable to determine source layer for ' 'zero padding layer = ' + layer.name) # Keras padding definition: # - If int: the same symmetric padding is applied to height and width. # - If tuple of 2 ints: interpreted as two different symmetric padding values for # height and width: (symmetric_height_pad, symmetric_width_pad). # - If tuple of 2 tuples of 2 ints: interpreted as # ((top_pad, bottom_pad), (left_pad, right_pad)) # determine padding padding = config['padding'] if len(padding) == 1: zero_pad['height'] = padding[0] zero_pad['width'] = padding[0] else: if isinstance(padding[0], tuple): # height if (padding[0][0] == padding[0][1]): zero_pad['height'] = padding[0][0] else: raise DLPyError('Asymmetric padding is not supported') # width if (padding[1][0] == padding[1][1]): zero_pad['width'] = padding[1][0] else: raise DLPyError('Asymmetric padding is not supported') else: zero_pad['height'] = padding[0][0] zero_pad['width'] = padding[0][1] return zero_pad
def query_action_parm(conn, action_name, action_set, parm_name): ''' Check whether action includes given parameter Parameters ---------- conn : CAS The CAS connection object action_name : string The name of the action action_set : string The name of the action set that contains the action parm_name : string The parameter name. Returns ------- boolean Indicates whether action supports parameter list of dictionaries Dictionaries that describe action parameters ''' # check whether action set is loaded parm_valid = False act_parms = [] r = conn.retrieve('queryactionset', _messagelevel='error', actionset=action_set) if r[action_set]: # check whether action part of action set r = conn.retrieve('listactions', _messagelevel='error', actionset=action_set) if action_name in r[action_set]['name'].tolist(): r = conn.retrieve('builtins.reflect', action=action_name, actionset=action_set) # check for parameter act_parms = r[0]['actions'][0]['params'] for pdict in act_parms: if pdict['name'].lower() == parm_name.lower(): parm_valid = True break else: raise DLPyError(action_name + ' is not an action in the ' + action_set + ' action set.') else: raise DLPyError(action_set + ' is not valid or not currently loaded.') return parm_valid, act_parms
def sas_var_info(var_type): ''' Returns SAS variable type information Extracts variable information needed to update extended attribute table. Parameters ---------- var_type : string Specifies the type of the input data in the data spec. Valid Values: NUMERICNOMINAL, NUMNOM, TEXT, IMAGE, OBJECTDETECTION Returns ------- dict SAS variable information ''' if var_type.lower() in ["numericnominal", "numnom"]: var_info = {"ds_type" : 1, "rtype" : 1, "rawlen" : 8, "fmt_name" : "BEST", "fmt_nfl" : 12, "fmt_nfd" : 0, "fmt_datalen" : 12} elif var_type.lower() == "text": raise DLPyError('Attribute updating not supported for text variable(s).') elif var_type.lower() == "image": var_info = {"ds_type" : 3, "rtype" : 0, "rawlen" : 1000000, "fmt_name" : "BEST", "fmt_nfl" : 0, "fmt_nfd" : 0, "fmt_datalen" : 1} elif var_type.lower() == "objectdetection": var_info = {"ds_type" : 4, "rtype" : 1, "rawlen" : 8, "fmt_name" : "BEST", "fmt_nfl" : 12, "fmt_nfd" : 0, "fmt_datalen" : 12} else: raise DLPyError('The variable type is invalid. Only NUMERICNOMINAL,\n' 'NUMNOM, TEXT, IMAGE, and OBJECTDETECTION are supported.') return var_info
def scored_results_to_dict(result_tbl): ''' Converts results in CASResults table to a dictionary of values Parameters ---------- result_tbl : CASResults object Table containing results from scoring the test data Returns ------- dict ''' exists = True try: result_columns = list(result_tbl.columns) except: exists = False if exists is False: raise DLPyError( 'Specified result_tbl could not be located in the caslib') filename_idx = result_columns.index('_filename_0') caption_idx = result_columns.index('caption') prediction_idx = result_columns.index('_DL_Pred_') result_values = dict() for row in list(result_tbl.values): tuple1 = [row[caption_idx].strip(), row[prediction_idx].strip()] result_values[row[filename_idx]] = tuple(tuple1) return result_values
def load_audio_metadata(cls, conn, path, audio_path, task='speech2text'): ''' Pre-process and loads the metadata Parameters ---------- conn : CAS A connection object to the current session. path : string Location to the input metadata file. audio_path : string Location to the audio files. task : string, optional Specifies the task Returns ------- :class:`CASTable` Raises ------ DLPyError If anything goes wrong, it complains and prints the appropriate message. ''' if task == 'speech2text': return cls.load_audio_metadata_speechrecognition( conn, path, audio_path) else: raise DLPyError("We do not support this task yet!")
def get_max_capt_len(captions_file, delimiter='\t'): ''' Finds maximum length of captions from file containing Parameters ---------- captions_file : string Specifies physical path to file containing ground truth image captions. This has to be client accesible. delimiter : string, optional Specifies delimiter between captions and filenames in captions_file Default : '\t' Returns ------- int ''' max_cap_len = 0 with open(captions_file, 'r') as readFile: for line in readFile: captions = line.split(delimiter)[1:] if len(captions) < 1: raise DLPyError("Error with captions file or delimiter") for cap in captions: if len(cap.split()) > max_cap_len: max_cap_len = len(cap.split()) return max_cap_len
def load_audio_metadata(cls, conn, path, audio_path, task='speech2text'): ''' Pre-process and loads the metadata Parameters ---------- conn : CAS A connection object to the current session. path : string Location to the input metadata file. audio_path : string Location to the audio files. task : string, optional Specifies the task Returns ------- ??? ''' if task == 'speech2text': return cls.load_audio_metadata_speechrecognition( conn, path, audio_path) else: raise DLPyError("We do not support this task yet!.")
def _find_layer_def(self, lname, layer_info): if isinstance(lname, list): names = lname else: names = [lname] ldefs = [] for name in names: found_layer = False for key1 in layer_info.keys(): # quit if layer found if found_layer: break if isinstance(layer_info[key1], list): for l_element in layer_info[key1]: # quit if layer found if found_layer: break for key2 in l_element.keys(): if l_element[key2]['name'] == name: ldefs.append(l_element[key2]['ldef']) found_layer = True break else: for key2 in layer_info[key1]: if layer_info[key1][key2]['name'] == name: ldefs.append(layer_info[key1][key2]['ldef']) found_layer = True break if not found_layer: raise DLPyError('Could not find definition for layer ' + name) return ldefs
def __init__(self, data_array, var_names, var_types): vars = [None] * len(data_array) offset = 0 for ii in range(len(data_array)): if var_types[ii] == 'NUMERIC': vars[ii] = dict(name=var_names[ii], label=var_names[ii], length=8, type='sas', rtype='numeric', offset=offset) offset = offset + 8 elif var_types[ii] == 'VARCHAR': vars[ii] = dict(name=var_names[ii], label=var_names[ii], length=16, type='varchar', rtype='char', offset=offset) offset = offset + 16 else: raise DLPyError("Unknown variable type " + var_types[ii] + " given.") self.data = [None] * len(data_array[0]) tmp = [None] * len(data_array) for rr in range(len(data_array[0])): for cc in range(len(data_array)): tmp[cc] = data_array[cc][rr] self.data[rr] = tuple(tmp) super(BertDMH, self).__init__(vars)
def export_attr_xml(conn, model_name, file_name): ''' Create XML version of extended attribute table Call action to create XML blob containing model attributes. Write resulting blob to text file. Parameters ---------- conn : CAS The CAS connection object model_name : string Specifies the name of the deep learning model file_name : string Name of XML file ''' rt = conn.retrieve('table.attribute', _messagelevel = 'error', name=model_name + '_weights', task="EXPORT", xml="attr") if rt.severity > 1: for msg in rt.messages: print(msg) raise DLPyError('Cannot export model attributes, there seems to be a problem.') ascii_text = rt['xmlblob'].decode('utf8') with open(file_name, "w") as myfile: myfile.write(ascii_text) myfile.close()
def __init__(self, kernel_sizes=3, n_filters=(16, 16), strides=None, batch_norm_first=False, conv_short_cut=False): self.count_instances() if strides is None: self.strides = [1] * len(n_filters) else: if isinstance(strides, int): self.strides = [strides] + [1] * (len(n_filters) - 1) elif isinstance(strides, list) or isinstance( strides, set) or isinstance(strides, tuple): if len(strides) == 1: self.strides = [strides].append([1] * (len(n_filters) - 1)) else: self.strides = strides else: raise DLPyError( 'The strides parameter needs to be an integer or list of integers.' ) if len(self.strides) != len(n_filters): raise DLPyError( 'The length of strides must be equal to the length of n_filters.' ) self.kernel_sizes = kernel_sizes self.n_filters = n_filters if isinstance(self.kernel_sizes, int): self.kernel_sizes = [self.kernel_sizes] else: self.kernel_sizes = self.kernel_sizes if len(self.kernel_sizes) == 1: self.kernel_sizes = [self.kernel_sizes] * len(self.n_filters) elif len(self.kernel_sizes) != len(self.n_filters): raise DLPyError( 'The length of kernel_sizes must be equal to the length of n_filters.' ) self.batch_norm_first = batch_norm_first self.conv_short_cut = conv_short_cut self.layers = [] self.add_layers()
def load_audio_metadata(cls, conn, path, audio_path, task='speech2text'): ''' Pre-process and loads the metadata Parameters ---------- conn : CAS A connection object to the current session. path : string Location to the input metadata file. audio_path : string Location to the audio files. task : string, optional Specifies the task Note: currently only support 'speech2text' (default) Returns ------- :class:`CASTable` Raises ------ DLPyError If anything goes wrong, it complains and prints the appropriate message. Examples -------- >>> import swat >>> from dlpy.audio import AudioTable >>> s=swat.CAS("cloud.example.com", 5570) >>> aud_table = AudioTable.load_audio_metadata(s, path="/path/to/metadata/file.txt", audio_path="/path/to/audio/file.txt") >>> aud_table.set_connection(s) ''' if conn is None: conn = cls.get_connection() if conn is None: raise DLPyError('cannot get a connection object to the current session.') if task == 'speech2text': return cls.load_audio_metadata_speechrecognition(conn, path, audio_path) else: raise DLPyError("We do not support this task yet!")
def create_captions_table(conn, captions_file, caption_col_name='Var', delimiter='\t'): ''' Generate CASTable of captions and filenames Parameters ---------- conn : CAS Specifies the CAS connection object. captions_file : string Specifies absolute path to file containing image filenames and captions. This file has to be accessible from the client. caption_col_name : string, optional Specifies base name of columns that contain captions Default : 'Var' delimiter : string, optional Specifies delimiter in the captions_file between captions Default : '\t' Returns ------- :class:`CASTable` ''' captions_dict = dict() line_list = [] # read file lines into large list with open(captions_file, 'r') as readFile: for line in readFile: line_list.append(line) # find number of captions num_captions = len(line_list[0].split(delimiter)) - 1 if num_captions == 0: raise DLPyError('Something went wrong with the captions file -' ' most likely the wrong delimiter was specified or' ' the captions file is incorrectly formatted') # initialize dictionary captions_dict['_filename_0'] = list() for i in range(num_captions): captions_dict['{}{}'.format(caption_col_name, i)] = list() # add filenames and captions to dictionary for line in line_list: items = line.split(delimiter) captions_dict['_filename_0'].append(items[0]) for j in range(num_captions): captions_dict['{}{}'.format(caption_col_name, j)].append(items[j + 1].strip()) captions = CASTable.from_dict(conn, captions_dict) return captions
def create_audio_table(cls, conn, data_path, metadata_path, features_parameters=dict( frame_shift=10, frame_length=25, n_bins=40, n_ceps=40, feature_scaling_method='STANDARDIZATION', n_output_frames=500), casout=None, task='speech2text'): ''' Creates an Audio table and takes care of all the necessary steps Parameters ---------- conn : CAS A connection object to the current session. data_path : string Path to the file that contains the list of audio files (this is expected to be on the server side). metadata_path : string Location to the metadata file (this is expected to be on the client side). features_parameters : dict, optional Parameters to be used while extracting audio features casout : string, dict, or CASTable, optional Resulting output CAS table task : string, optional Specifies the type of the task. Default is speech to text. Note: currently only support 'speech2text' (default) Returns ------- :class:`AudioTable` A table containing audio features of audio files as well as their labels. The resulting table can be directly used in the deep learning models. Raises ------ DLPyError If anything goes wrong at any point in the process of creating this AudioTable, it complains and prints the appropriate message. ''' if task == 'speech2text': return cls.create_audio_table_speechrecognition( conn, data_path, metadata_path, features_parameters=features_parameters, casout=casout) else: raise DLPyError("We do not support this task!")
def __init__(self, conn, layers=None, model_table=None): Model.__init__(self, conn, model_table=model_table) if layers is None: self.layers = [] self.layers_dict = {} elif type(layers) is list or type(layers) is set or type( layers) is tuple: self.layers = layers for layer in self.layers: if layer.name is not None: self.layers_dict[layer.name] = layer if len(layers) > 0 and isinstance( layers[-1], Layer) and layers[-1].can_be_last_layer: self.compile() else: raise DLPyError('layers has to be a list of layer(s).') else: raise DLPyError('layers has to be a list of layer(s).')
def get_image_features(conn, model, image_table, dense_layer, target='_filename_0'): ''' Generate CASTable of image features Parameters ---------- conn : CAS Specifies the CAS connection object. model: dlpy Model object Specifies CNN model to use for extracting features image_table: imageTable Specifies name of CASTable that contains images to be used for training dense_layer: string Specifies layer from CNN model to extract features from target: string, optional Specifies the name of the column containing the response variable Default: '_filename_0' Returns ------- :class:`CASTable` ''' width = model.summary['Output Size'][0][1] height = model.summary['Output Size'][0][0] image_table.resize(width=width, height=height) if dense_layer not in list(model.summary['Layer']): raise DLPyError('Specified dense_layer not a layer in model') X, y = model.get_features(data=image_table, dense_layer=dense_layer, target=target) # initialize dictionary with columns table_dict = {} for i in range(len(X[0])): table_dict['f{}'.format(i)] = list() # add filenames to dict table_dict[target] = list() for file in y: table_dict[target].append(file) # add features to dict for var in table_dict[target]: idx = list(y).index(var) X_list = X[idx] for i in range(len(X[0])): table_dict['f{}'.format(i)].append(X_list[i]) features = CASTable.from_dict(conn, table_dict) return features
def _from_huggingface_model(self): # verify key parameters if 'max_position_embeddings' not in self._config.keys(): raise DLPyError('Maximum position embedding is unspecified') elif 'hidden_size' not in self._config.keys(): raise DLPyError('Hidden size is unspecified') sas_layer_info = {} # input layers (tokens, position, and segments) sas_layer_info['input'] = self._add_input_layers( self._config['max_position_embeddings'] * self._config['hidden_size']) # embedding layers (tokens, position, and segments) sas_layer_info['embedding'] = self._add_bert_embedding_layer( 'embedding', sas_layer_info) # encoding layers last_key = list(sas_layer_info['embedding'].keys( ))[-1] # dictionary is ordered (OrderedDict), getting last layer added encoder_src_layer = sas_layer_info['embedding'][last_key]['name'] sas_layer_info['encoder'] = [] for lnum in range(self._config['num_hidden_layers']): sas_layer_info['encoder'].append( self._add_bert_encoding_layer('encoder' + str(lnum), [encoder_src_layer], sas_layer_info)) # dictionary is ordered (OrderedDict), getting last layer added last_key = list(sas_layer_info['encoder'][lnum].keys())[-1] encoder_src_layer = sas_layer_info['encoder'][lnum][last_key][ 'name'] # pooling layer sas_layer_info['pooler'] = self._add_bert_pooling_layer( 'bert_pooling', encoder_src_layer, sas_layer_info) return sas_layer_info
def SequenceLabeling(conn, model_table='sequence_labeling_model', neurons=10, n_blocks=3, rnn_type='gru'): ''' Generates a sequence labeling model. Parameters ---------- conn : CAS Specifies the CAS connection object. model_table : string, optional Specifies the name of CAS table to store the model. neurons : int, optional Specifies the number of neurons to be in each layer. Default: 10 n_blocks : int, optional Specifies the number of bidirectional blocks to be added to the model. Default: 3 rnn_type : string, optional Specifies the type of the rnn layer. Default: GRU Valid Values: RNN, LSTM, GRU Returns ------- :class:`Sequential` ''' conn.retrieve('loadactionset', _messagelevel='error', actionset='deeplearn') if n_blocks >= 1: model = Sequential(conn=conn, model_table=model_table) model.add( Bidirectional(n=neurons, n_blocks=n_blocks, rnn_type=rnn_type, name='bi_' + rnn_type + '_layer_')) model.add(OutputLayer()) else: raise DLPyError( 'The number of blocks for a sequence labeling model should be at least 1.' ) return model
def load_audio_metadata_speechrecognition(cls, conn, path, audio_path): ''' Pre-process and loads the metadata Parameters ---------- conn : CAS A connection object to the current session. path : string Location to the input metadata file. audio_path : delimiter Delimiter for the metadata file. Returns ------- :class:`CASTable` Examples -------- >>> import swat >>> from dlpy.audio import AudioTable >>> s=swat.CAS("cloud.example.com", 5570) >>> aud_tbl = AudioTable.load_audio_metadata_speechrecognition(s, path="/path/to/metadata/file.txt", audio_path="/path/to/audio/file.txt") >>> aud_tbl.set_connection(s) ''' if conn is None: conn = cls.get_connection() if conn is None: raise DLPyError('cannot get a connection object to the current session.') output_name = random_name('AudioTable_Metadata', 6) dc = DataClean(conn=conn, contents_as_path=path) dc_response = dc.process_contents(audio_path = audio_path) tbl = dc.create_castable(dc_response['results'], output_name, replace=True, promote=False, col_names=dc_response['col_names']) scode = 'length _fName_ varchar(*); ' scode += '_fName_ = _filename_; ' ctbl = CASTable(tbl, computedvars=['_fName_'], computedvarsprogram=scode) conn.table.partition(table=ctbl, casout=dict(name=tbl, replace=True)) return CASTable(tbl)
def ImageCaptioning(conn, model_name='image_captioning', num_blocks=3, neurons=50, rnn_type='LSTM', max_output_len=15): ''' Builds an RNN to be used for image captioning Parameters ---------- conn : CAS Specifies the CAS connection object. model_name : string, optional Specifies output name of the model Default: 'image_captioning' num_blocks : int, optional Specifies number of samelength recurrent layers Default : 3 neurons : int, optional Specifies number of neurons in each layer Default : 50 rnn_type : string, optional Specifies the type of the rnn layer. Possible Values: RNN, LSTM, GRU Default: LSTM max_output_len : int, optional Specifies max number of tokens to generate in the final layer (i.e. max caption length) Default : 15 Returns ------- :class:`CASTable` ''' if num_blocks < 1: raise DLPyError('num_blocks must be greater than 1') model = Sequential(conn, model_table=model_name) model.add(InputLayer(name='input')) print('InputLayer added named "input"') for i in range(num_blocks): model.add(Recurrent(n=neurons, init='msra', rnn_type=rnn_type, output_type='samelength')) model.add(Recurrent(n=neurons, init='msra', rnn_type=rnn_type, output_type='encoding')) model.add(Recurrent(n=neurons, init='msra', rnn_type=rnn_type, output_type='arbitrarylength', max_output_length=max_output_len)) model.add(OutputLayer(name='output')) print('OutputLayer added named "output"') return model
def scored_results_to_dict(result_tbl, filename_col='_filename_0', caption_col='caption'): ''' Converts results in CASResults table to a dictionary of values Parameters ---------- result_tbl : CASResults object Table containing results from scoring the test data filename_col : str, optional Specifies the column name for the filename data. Default = '_filename_0' caption_col : str, optional Specifies the column name for the ground-truth caption data. Default = 'caption' Returns ------- dict ''' exists = True try: result_columns = list(result_tbl.columns) except: exists = False if exists is False: raise DLPyError( 'Specified result_tbl could not be located in the caslib') filename_idx = result_columns.index(filename_col) caption_idx = None if caption_col in result_tbl.columns: caption_idx = result_columns.index(caption_col) prediction_idx = result_columns.index('_DL_Pred_') result_values = dict() for row in list(result_tbl.values): if caption_idx: tuple1 = [row[caption_idx].strip(), row[prediction_idx].strip()] else: tuple1 = ["N/A", row[prediction_idx].strip()] result_values[row[filename_idx]] = tuple(tuple1) return result_values
def attr_helper(conn, model_name, attr_set, attr_key, attr_blob): ''' Call action to update individual extended model attribute Key/value pair required to specify extended attributes. Provide correct syntax for calling attribute action. Parameters ---------- conn : CAS The CAS connection object model_name : string Specifies the name of the deep learning model attr_set : string Name of attribute set to update attr_key : string Key name of attribute attr_blob : double, int64, int, char, or binary blob Representation of attribute ''' # drop existing attribute rt = conn.retrieve('table.attribute', _messagelevel = 'error', name=model_name + '_weights', attributes=[{"key":attr_key}], set=attr_set, task="DROP") # NOTE: ignore errors if attribute or attribute set # doesn't exist # add new attribute rt = conn.retrieve('table.attribute', _messagelevel = 'error', name=model_name + '_weights', attributes=[{"key":attr_key,"value":attr_blob}], set=attr_set, task="ADD") if rt.severity > 1: for msg in rt.messages: print(msg) raise DLPyError('Cannot add attribute, there seems to be a problem.')
def check_stereo(params): """ Check if the input audio has 2 channels (stereo). Parameters ---------- params : class : 'wave._wave_params' Specifies the original parameters of the audio. Returns ------- boolean """ if params.nchannels not in {1, 2}: raise DLPyError( "invalid wave input! Only mono and stereo are supported.") return params.nchannels == 2
def convert_one_audio_file_to_specgram(local_audio_file, converted_local_png_file): ''' Convert a local audio file into a png format with spectrogram. Parameters ---------- local_audio_file : string Local location to the audio file to be converted. converted_local_png_file : string Local location to store the converted audio file Returns ------- None Raises ------ DLPyError If anything goes wrong, it complains and prints the appropriate message. ''' try: import soundfile as sf import matplotlib.pylab as plt except (ModuleNotFoundError, ImportError): raise DLPyError('cannot import soundfile') data, sampling_rate = sf.read(local_audio_file) fig, ax = plt.subplots(1) fig.subplots_adjust(left=0, right=1, bottom=0, top=1) ax.axis('off') ax.specgram(x=data, Fs=sampling_rate) ax.axis('off') fig.savefig(converted_local_png_file, dpi=300, frameon='false') # this is the key to avoid mem leaking in notebook plt.ioff() plt.close(fig)
def play_one_audio_file(local_audio_file): ''' Play a local audio file using soundfile and sounddevice. Parameters ---------- local_audio_file : string Local location to the audio file to be played. When it is a directory, a file will be randomly chosen. Returns ------- None Raises ------ DLPyError If anything goes wrong, it complains and prints the appropriate message. ''' try: import soundfile as sf import sounddevice as sd except (ModuleNotFoundError, ImportError): raise DLPyError('cannot import soundfile or sounddevice') if os.path.isdir(local_audio_file): local_audio_file_real = random_file_from_dir(local_audio_file) else: local_audio_file_real = local_audio_file print('File location: {}'.format(local_audio_file_real)) data, sampling_rate = sf.read(local_audio_file_real) print('Frequency [Hz]: {}'.format(sampling_rate)) print('Duration [s]: {}'.format(data.shape[0] / sampling_rate)) sd.play(data, sampling_rate) sd.wait()
def create_class_labels(n_levels, labels=None): ''' Create class labels Create class labels with or without user-defined labels. Parameters ---------- n_levels : integer The number of levels for each classification variable. labels : list of string or None Specifies the class labels Returns ------- list Left-justified class labels. ''' # create needed labels for nominal variables ljust_labels = [] if labels is None: # strictly numeric labels (e.g. 0, 1, ...) for ii in range(n_levels): ljust_labels.append(str(ii).ljust(12)) else: # user-supplied labels if n_levels != len(labels): raise DLPyError('The number of class labels does not match\n' 'the number of class levels for object detection.\n') else: for lval in labels: if len(lval) > 12: ljust_labels.append(lval[:12]) else: ljust_labels.append(lval.ljust(12)) return ljust_labels
def add(self, layer): ''' Add layer(s) to model Parameters ---------- layer : Layer or list-of-Layers Specifies the layer to be added ''' self.layers.append(layer) if isinstance(layer, Layer): if layer.name is not None: self.layers_dict[layer.name] = layer if layer.type == 'recurrent': self.model_type = 'RNN' print('NOTE: ' + layer.type_desc + ' added.') if layer.can_be_last_layer: self.compile() if isinstance(layer, Bidirectional): self.model_type = 'RNN' if layer.src_layers is not None: new_src_layers = [] for l in layer.src_layers: if not isinstance(l, Layer): if l in self.layers_dict: new_src_layers.append(self.layers_dict[l]) else: raise DLPyError('cannot find the layer named: ' + l) else: new_src_layers.append(l) layer.src_layers = new_src_layers
def display_spectrogram_for_one_audio_file(local_audio_file): ''' Display spectrogram for a local audio file using soundfile. Parameters ---------- local_audio_file : string Local location to the audio file to be displayed. Returns ------- None Raises ------ DLPyError If anything goes wrong, it complains and prints the appropriate message. ''' try: import soundfile as sf import matplotlib.pylab as plt except (ModuleNotFoundError, ImportError): raise DLPyError('cannot import soundfile') if os.path.isdir(local_audio_file): local_audio_file_real = random_file_from_dir(local_audio_file) else: local_audio_file_real = local_audio_file print('File location: {}'.format(local_audio_file_real)) data, sampling_rate = sf.read(local_audio_file_real) plt.specgram(data, Fs=sampling_rate) # add axis labels plt.ylabel('Frequency [Hz]') plt.xlabel('Time [sec]')