class RecognizerWindowApp(WindowApp): def __init__(self, **kwargs): super(WindowApp, self).__init__(**kwargs) #create recognizer class self.recognizer = Recognizer() #load templates to recognizer self.templates = Helper.returnTemplates() self.loadTemplates() #store current gesture self.gesture = [] #load templates to recognizer def loadTemplates(self): for key, value in self.templates.iteritems(): #iterate throught dict for template in value: #iterate through templates self.recognizer.addTemplate(key, template) #fired when a touch is up def on_touch_up(self, touch): #build gesture self.gesture.append(touch.userdata['trace']) if len(getCurrentTouches()) == 0: (name, score) = self.recognizer.recognize(self.gesture) print name, score #clean gesture self.gesture = []
def __init__(self, **kwargs): super(WindowApp, self).__init__(**kwargs) #create recognizer class self.recognizer = Recognizer() #load templates to recognizer self.templates = Helper.returnTemplates() self.loadTemplates() #store current gesture self.gesture = []
def run_evaluation(H): vocab = Vocabulary(os.path.join(H.ROOT_DIR, H.EXPERIMENT), encoding=H.TARGET_ENCODING) audio_transform = transforms.Compose([ AudioNormalizeDB(db=H.NORMALIZE_DB, max_gain_db=H.NORMALIZE_MAX_GAIN), AudioSpectrogram(sample_rate=H.AUDIO_SAMPLE_RATE, window_size=H.SPECT_WINDOW_SIZE, window_stride=H.SPECT_WINDOW_STRIDE, window=H.SPECT_WINDOW), AudioNormalize(), FromNumpyToTensor(tensor_type=torch.FloatTensor) ]) label_transform = transforms.Compose([ TranscriptEncodeCTC(vocab), FromNumpyToTensor(tensor_type=torch.LongTensor) ]) test_dataset = AudioDataset(os.path.join(H.ROOT_DIR, H.EXPERIMENT), manifests_files=H.MANIFESTS, datasets="test", transform=audio_transform, label_transform=label_transform, max_data_size=None, sorted_by='recording_duration') test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=H.BATCH_SIZE, num_workers=H.NUM_WORKERS, shuffle=False, collate_fn=collate_fn, pin_memory=True) logger.info(test_loader.dataset) model_pred = SpeechCNN(len(vocab), input_size=256, hidden_size=H.CNN_HIDDEN_SIZE, dropout=H.CNN_DROPOUT, initialize=torch_weight_init) if H.USE_CUDA: model_pred.cuda() state = torch.load(os.path.join(H.EXPERIMENT, H.MODEL_NAME + '.tar')) model_pred.load_state_dict(state) ctc_decoder = CTCGreedyDecoder(vocab) recognizer = Recognizer(model_pred, ctc_decoder, test_loader) hypotheses = recognizer() transcripts = [] for _, labels, _, label_sizes, _ in test_loader: label_seq = CTCGreedyDecoder.decode_labels(labels, label_sizes, vocab) transcripts.extend(label_seq) bleu = Scorer.get_moses_multi_bleu(hypotheses, transcripts, lowercase=False) wer, cer = Scorer.get_wer_cer(hypotheses, transcripts) acc = Scorer.get_acc(hypotheses, transcripts) logger.info('Test Summary \n' 'Bleu: {bleu:.3f}\n' 'WER: {wer:.3f}\n' 'CER: {cer:.3f}\n' 'ACC: {acc:.3f}'.format(bleu=bleu, wer=wer * 100, cer=cer * 100, acc=acc * 100))