class SoundRecognitionApp():
    def __init__(self, cfg) -> None:
        self.transformer = Signal2ImageTransformer(**cfg['transforms'])
        self.audio = Audio(cfg['audio'])
        self.load_model(cfg['model'])
        pass

    def run(self):
        print("============= REALTIME START ==============")
        self.audio.start()
        self.flag = True

        try:
            while self.flag:
                status, data = self.audio.get()
                if status == Audio.ERROR:
                    print('[error]')
                    break
                elif status == Audio.WAIT:
                    continue
                mel_spec = self.preprocess(data)
                result = self.inference(mel_spec)
        except KeyboardInterrupt:
            pass
        except Exception as e:
            print(e)
        finally:
            self.audio.stop()
        print("============= REALTIME FINISH ==============")

    def preprocess(self, signal):
        return np.expand_dims(self.transformer.transform(signal), axis=0)

    def inference(self, X):
        image = torch.from_numpy(X.astype(np.float32)).clone()
        image.to(self.device).float()
        prob = self.model(image)['multilabel_proba'].detach().cpu().numpy()
        return prob

    def load_model(self, cfg):
        try:
            self.device = torch.device(cfg["device"])
            self.model = getattr(ml.my_model, cfg['name'])(**cfg['params'])
            self.model.load_state_dict(torch.load(cfg['path']))
            self.model.to(self.device)
        except AttributeError as e:
            print(f"Model {cfg['name']} is None. {e}")
            exit(1)
        except FileNotFoundError as e:
            print(f"{e}")
            exit(1)
        except Exception as e:
            print(f"{e}")
            exit(1)
Beispiel #2
0
class MusicChangePointDetector(object):
    def __init__(self, setting_path: str, audio_path: str):
        """コンストラクタ

        Args:
            setting_path (str): 設定ファイルのパス
            audio_path (str): 音楽ファイルのパス
        """
        with open(setting_path, 'r') as f:
            cfg = yaml.load(f)

        self.cf = cf.ChangeFinder(**cfg['change_finder'])
        self.audio = Audio(cfg['audio'], audio_file_path=audio_path)

        self.buffer = np.zeros(cfg['model']['buffer_audio_length'],
                               dtype=np.float32)
        self.buf_num = int(cfg['model']['frame_buf_num'])
        self.spec_buf = []
        self.thr = float(cfg['model']['thr'])

    def run(self):
        """メインループ開始"""
        self.audio.start()
        try:
            while True:
                status, data = self.audio.get()

                if status == Audio.ERROR:
                    break
                elif status == Audio.WAIT:
                    continue

                self.buffer = np.roll(self.buffer, -data.shape[0], axis=0)
                self.buffer[-data.shape[0]:] = data

                if self.detect():
                    print('detect')

        except KeyboardInterrupt:
            print('Interrupt')
        self.audio.stop()

    def detect(self):
        """検出

        Returns:
            bool: 転調したかどうか
        """
        is_detect = False
        D = np.average(librosa.amplitude_to_db(np.abs(librosa.stft(
            self.buffer)),
                                               ref=np.max),
                       axis=1)[:512]
        D /= np.linalg.norm(D, ord=2)
        self.spec_buf.append(D)

        if len(self.spec_buf) > self.buf_num:
            similarity = np.average(
                np.dot(self.spec_buf[-1],
                       np.array(self.spec_buf[-(self.buf_num - 1):-1]).T))
            score = self.cf.update(similarity)
            self.spec_buf.pop()
            if score > self.thr:
                is_detect = True

        return is_detect