class ModelsWrapper: def __init__(self, arch): self.architechture = arch self.in_layer = None self.out_layer = None if arch == 'musicnn': self.feature_extractor = es.TensorflowInputMusiCNN() self.frame_size = 512 self.hop_size = 256 self.patch_size = 187 self.num_bands = 96 elif arch == 'vggish': self.feature_extractor = es.TensorflowInputVGGish() self.frame_size = 400 self.hop_size = 200 self.patch_size = 96 self.num_bands = 64 self.feature_frames = [] self.in_pool = Pool() self.out_pool = Pool() # setup model self.predict = None def load_model(self, model_path, in_layer, out_layer): if not self.predict: self.predict = es.TensorflowPredict(graphFilename=model_path, inputs=[in_layer], outputs=[out_layer], squeeze=True) self.in_layer = in_layer self.out_layer = out_layer def compute_features(self, audio): frames = [] self.feature_frames = [] # ensure it's empty for frame in es.FrameGenerator(audio, frameSize=self.frame_size, hopSize=self.hop_size, startFromZero=True): frames.append(frame) for f in frames: self.feature_frames.append(self.feature_extractor(f)) return self.feature_frames def make_prediction(self): self._featuresToTensorAsBatch() self.out_pool.clear() self.out_pool = self.predict(self.in_pool) return self.out_pool[self.out_layer] def _featuresToTensorAsBatch(self): # reshape features as tensor, zeropadding as needed feature_frames_as_np = np.array(self.feature_frames, dtype=np.single) incomplete_patch_size = feature_frames_as_np.shape[0] % self.patch_size zero_frame_size = self.patch_size - incomplete_patch_size zero_frames = np.zeros((zero_frame_size, self.num_bands), dtype=np.single) zero_padded_features = np.append(feature_frames_as_np, zero_frames, axis=0) batch = np.expand_dims( np.reshape(zero_padded_features, [-1, self.patch_size, self.num_bands]), 1) self.in_pool.set(self.in_layer, batch) def dispose(self): # clear model from memory self.predict = None self.in_layer = None
buffer[:] = array(unpack('f' * bufferSize, data)) # generate predictions reset(vimp) run(vimp) # ## Prcess from a file # In[7]: # initialize plot buffers melBuffer = np.zeros([numberBands, patchSize * displaySize]) actBuffer = np.zeros([nLabels, displaySize]) # reset storage pool.clear() # initialize plots f, ax = plt.subplots(1, 2, figsize=[9.6, 7]) f.canvas.draw() ax[0].set_title('Mel Bands') img_mel = ax[0].imshow(melBuffer, aspect='auto', origin='lower', vmin=0, vmax=6) ax[0].set_xticks([]) ax[1].set_title('Activations') img_act = ax[1].matshow(actBuffer, aspect='0.5', vmin=0, vmax=1)