def button_process_click(self): preprocessor = Preprocessor() labeled_img = preprocessor.process(self.image) cv2.imwrite('result_img.png', labeled_img) recognizer = Recognizer() disease = recognizer.recognize_disease('result_img.png') print(disease) QMessageBox.about(self.mainwindow, "Predicted Disease", disease)
def getWord(self): """ Effects: gets word from input stream """ regexp = RegExp.RegExp(); begin = self.findBegin() fin = begin - 1 old_start = begin # start of word rec = Recognizer() # Verify string if self.data[ begin:begin+1 ] == metadata["string"]: fin = self.data.find(metadata["string"], begin+1) word = self.data[ begin : fin+1 ] self.cursy = fin+1 # Verify double delimiter elif self.data[ begin:begin+2 ] in metadata["double"]: word = self.data[ begin : begin+2 ] self.cursy = begin+2 else: isEndLexem = 0 while ((not isEndLexem) and (fin+1 < len(self.data))): fin = fin + 1 if ((self.data[fin] in self.delimiters) or (self.data[fin] in self.whitespaces) ): isEndLexem = 1 for _cond_delim in self.conditional_delimiters.keys(): if (self.data[fin] == _cond_delim): if not (regexp.match(self.data[fin-1], self.conditional_delimiters[_cond_delim]["before"]) and regexp.match(self.data[fin+1], self.conditional_delimiters[_cond_delim]["after"]) ): isEndLexem = 1 if ((self.data[fin] in metadata["sign"]) and (len(self.wordSequence) > 0) and (not rec.isOperator(self.wordSequence[len(self.wordSequence)-1])) and (not regexp.match(self.data[fin-2 : fin], "[0-9]e")) ): isEndLexem = 1 #-- while if begin == fin: word = self.data[begin] self.cursy = fin+1 else: word = self.data[begin:fin] self.cursy = fin if old_start <> self.cursy: self.code = self.code + self.data[old_start : self.cursy] else: self.code = self.code + self.data[old_start] self.wordSequence.append(word) return word
def __init__(self, vs, width=320, height=450, framerate=32): self.vs = vs self.root = tki.Tk() self.framerate = framerate self.sleepduration = 1.0 / self.framerate self.frame = None self.thread = None self.stopEvent = None self.root.resizable(width=False, height=False) self.root.geometry('{}x{}'.format(width, height)) self.panelWidth = width self.panel = None self.button = tki.Button(self.root, text="Ring the Bell!", command=self.ring) self.button.pack(side="bottom", fill="both", expand="yes", padx=10, pady=10) self.stopVideoLoop = threading.Event() self.thread = threading.Thread(target=self.videoLoop, args=()) self.thread.start() self.root.wm_title("Hoosthere") self.root.wm_protocol("WM_DELETE_WINDOW", self.onClose) self.recognizer = Recognizer()
def __init__(self, opts): #import the recognizer so Gst doesn't clobber our -h from Recognizer import Recognizer self.ui = None #keep track of the opts self.opts = opts ui_continuous_listen = False self.continuous_listen = False self.commands = {} self.read_commands() self.recognizer = Recognizer(lang_file, dic_file) self.recognizer.connect('finished',self.recognizer_finished) if opts.interface != None: if opts.interface == "q": #import the ui from qt from QtUI import UI elif opts.interface == "g": from GtkUI import UI else: print "no GUI defined" sys.exit() self.ui = UI(args,opts.continuous) self.ui.connect("command", self.process_command) if self.opts.history: self.history = []
def __init__(self): self.detection_reader = DetectionReader('detections.json') self.project_file_name = '/home/algernon/andro2' self.video_file_name = '' self.db_name = '' self.data_base = None self.video_maker = None self.db_user_name = 'root' self.db_user_pass = '******' self.db_host = 'localhost' self.commands = [] self.output_video_file_name = 'output.mkv' self.video_reader = None self.video_writer = None self.emotion_detection_reader = DetectionReader('emotion_results/er.json') self.emotion_recognizer = EmotionRecognizer(self.EMOTION_PROB_THRESH) self.captioner = Captioner('/home/algernon/a-PyTorch-Tutorial-to-Image-Captioning/weights/BEST_checkpoint_coco_5_cap_per_img_5_min_word_freq.pth.tar', '/home/algernon/a-PyTorch-Tutorial-to-Image-Captioning/weights/WORDMAP_coco_5_cap_per_img_5_min_word_freq.json') self.segmentator = None self.clothes_detector = ClothesDetector("yolo/df2cfg/yolov3-df2.cfg", "yolo/weights/yolov3-df2_15000.weights", "yolo/df2cfg/df2.names") self.face_recognizer = FaceRecognizer() self.open_project() self.recognizer = Recognizer( '/home/algernon/PycharmProjects/AIVlog/mmdetection/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py', '/home/algernon/PycharmProjects/AIVlog/mmdetection/work_dirs/faster_rcnn_r50_fpn_1x_voc0712/epoch_10.pth')
def Main(): if(Train): print("Loading CSV...") machine = MyMachine("./Data/labels.csv") print("CSV Loaded.") trainCoulumnName = "id" valuesColumnName = "breed" filesType = ".jpg" TrainingPicturesPathDirectory = "C:\\Users\\hosse\\Desktop\\Dog Breed Project\\Data\\train\\" TestingPicturesPathDirectoy= "./Data/test/" trainingImagesNo = 12000 # Should be more than 126 due to __show_25_images() u may get Index out of bound error callbackLogsPath = "C:\\Users\\hosse\\Desktop\\Dog Breed Project\\Logs\\" modelsSavingPath = "C:\\Users\\hosse\\Desktop\\Dog Breed Project\\Models\\" machine.initializer(trainCoulumnName, valuesColumnName, filesType, TrainingPicturesPathDirectory, TestingPicturesPathDirectoy, trainingImagesNo, callbackLogsPath, modelsSavingPath) machine.Train() else: print("Recognizing...") imgPath = "7.jpg" # Predict an Image modelPath = "C:\\Users\\hosse\\Desktop\\Dog Breed Project\\Models\\ModelNO_10_LOSS_0.74_ACCURACY_0.82_IMG_NO_12000.h5" CSVLabelsPath = "./Data/labels.csv" X_ColumnName = "id" Y_ColumnName = "breed" imagePath = "C:\\Users\\hosse\\Desktop\\Dog Breed Project\\ImagesToTest\\" + str(imgPath) trainPicturesPath = "C:\\Users\\hosse\\Desktop\\Dog Breed Project\\Data\\train\\" filesType = ".jpg" imgReco = Recognizer(modelPath,CSVLabelsPath,X_ColumnName,Y_ColumnName,trainPicturesPath,filesType) imgReco.predict(imagePath) return 0
def __init__(self): super(MainWindow, self).__init__() self.recognizer = Recognizer() self.recognizer.learn() self.ui = loadUi(os.path.join(THIS_DIR, 'mainwindow.ui'), self) self.thread = QThread() try: self.camera = CameraDevice() except ValueError: self.ui.video.setText("Device not found!\n\nIs FFMPEG available?") else: self.camera.frame_ready.connect(self.update_video_label) self.ui.video.setMinimumSize(640 * 2, 480) self.camera.moveToThread(self.thread) self.ui.t_max.setValue(0) self.ui.t_min.setValue(255) self.ui.s_max.setValue(200) self.ui.s_min.setValue(3) self.update_values()
def __init__(self, opts): #import the recognizer so Gst doesn't clobber our -h from Recognizer import Recognizer self.ui = None #keep track of the opts self.opts = opts ui_continuous_listen = False self.continuous_listen = opts.continuous self.commands = {} self.read_commands() self.recognizer = Recognizer(lang_file, dic_file, opts.microphone) self.recognizer.connect('finished', self.recognizer_finished) self.matchTime = 0 self.keywordTimeLimit = opts.keytime #set to 0 to always speak the keyword self.commandFileTime = 0 #updates language file and commands on start self.checkCommandFile() self.commandFileTime = os.path.getmtime(command_file) #read options if opts.interface != None: if opts.interface == "q": #import the ui from qt from QtUI import UI elif opts.interface == "g": from GtkUI import UI else: print "no GUI defined" sys.exit() self.ui = UI(args, opts.continuous) self.ui.connect("command", self.process_command) #can we load the icon resource? icon = self.load_resource("icon.png") if icon: self.ui.set_icon(icon) if self.opts.history: self.history = []
def __init__(self, opts): #import the recognizer so Gst doesn't clobber our -h from Recognizer import Recognizer self.ui = None self.options = {} ui_continuous_listen = False self.continuous_listen = False self.commander = Command.Commander(command_file,strings_file) #load the options file self.load_options() #merge the opts for k,v in opts.__dict__.items(): if (not k in self.options) or opts.override: self.options[k] = v if self.options['interface'] != None: if self.options['interface'] == "q": from QtUI import UI elif self.options['interface'] == "g": from GtkUI import UI elif self.options['interface'] == "gt": from GtkTrayUI import UI else: print "no GUI defined" sys.exit() self.ui = UI(args, self.options['continuous']) self.ui.connect("command", self.process_command) #can we load the icon resource? icon = self.load_resource("icon.png") if icon: self.ui.set_icon_active_asset(icon) #can we load the icon_inactive resource? icon_inactive = self.load_resource("icon_inactive.png") if icon_inactive: self.ui.set_icon_inactive_asset(icon_inactive) if self.options['history']: self.history = [] #create the recognizer try: self.recognizer = Recognizer(lang_file, dic_file, self.options['microphone'] ) except Exception, e: #no recognizer? bummer sys.exit()
def __init__(self, opts): # Initialize our ROS node: rospy.init_node('voice') # Define our publisher: self.voice_pub = rospy.Publisher('voice', String, queue_size=1) #import the recognizer so Gst doesn't clobber our -h from Recognizer import Recognizer self.ui = None #keep track of the opts self.opts = opts ui_continuous_listen = False self.continuous_listen = opts.continuous self.stringsFileTime = os.path.getmtime(strings_file) self.commands = {} self.read_commands() self.recognizer = Recognizer(lang_file, dic_file, opts.microphone) self.recognizer.connect('finished', self.recognizer_finished) self.matchTime = 0 self.keywordTimeLimit = opts.keytime #set to 0 to always speak the keyword # Update the Language File and Commands? self.commandFileTime = os.path.getmtime(command_file) if ((AUTO_UPDATE_CMD_FILE) or (self.commandFileTime > self.stringsFileTime)): # Trick the system by making it think we just created the command file: self.commandFileTime = time.time() self.checkCommandFile() #read options if self.opts.history: self.history = []
def __init__(self, opts): #import the recognizer so Gst doesn't clobber our -h from Recognizer import Recognizer self.ui = None #keep track of the opts self.opts = opts ui_continuous_listen = False self.continuous_listen = False self.commands = {} self.read_commands() self.recognizer = Recognizer(lang_file, dic_file, opts.microphone ) self.recognizer.connect('finished',self.recognizer_finished) self.matchTime = 0 self.keywordTimeLimit = opts.keytime #set to 0 to always speak the keyword self.commandFileTime = 0 #updates language file and commands on start self.checkCommandFile() self.commandFileTime = os.path.getmtime(command_file) #read options if opts.interface != None: if opts.interface == "q": #import the ui from qt from QtUI import UI elif opts.interface == "g": from GtkUI import UI else: print "no GUI defined" sys.exit() self.ui = UI(args,opts.continuous) self.ui.connect("command", self.process_command) #can we load the icon resource? icon = self.load_resource("icon.png") if icon: self.ui.set_icon(icon) if self.opts.history: self.history = []
from exemplary_points import filter_and_getExemplaries sample_rate = 10 # load data N_users = 10 for i in range(N_users): execfile("data/user" + str(i + 1) + ".py") all_data = [data_user1, data_user2, data_user3, data_user4, data_user5, \ data_user6, data_user7, data_user8, data_user9, data_user10] # extract exemplary points filter_and_getExemplaries(all_data, sample_rate) # make prediction recognizer = Recognizer(sample_rate) # The data is from letter "O" print recognizer.predict_one([[ 0.00151, 0.01135], [ 0.01175, 0.02839], [ 0.0243 , -0.00781], [ 0.01606, -0.03011], [-0.01251, -0.02651], [-0.08557, 0.02435], [-0.05574, 0.02611], [ 0.00386, 0.01013], [ 0.03279, 0.00049], [ 0.02381, -0.0027 ], [-0.0126 , -0.00321], [-0.06615, 0.01246], [-0.06997, 0.00418], [-0.06926, -0.00977],
class Blather: def __init__(self, opts): #import the recognizer so Gst doesn't clobber our -h from Recognizer import Recognizer self.ui = None #keep track of the opts self.opts = opts ui_continuous_listen = False self.continuous_listen = False self.commands = {} self.read_commands() self.recognizer = Recognizer(lang_file, dic_file, opts.microphone ) self.recognizer.connect('finished',self.recognizer_finished) self.matchTime = 0 self.keywordTimeLimit = opts.keytime #set to 0 to always speak the keyword self.commandFileTime = 0 #updates language file and commands on start self.checkCommandFile() self.commandFileTime = os.path.getmtime(command_file) #read options if opts.interface != None: if opts.interface == "q": #import the ui from qt from QtUI import UI elif opts.interface == "g": from GtkUI import UI else: print "no GUI defined" sys.exit() self.ui = UI(args,opts.continuous) self.ui.connect("command", self.process_command) #can we load the icon resource? icon = self.load_resource("icon.png") if icon: self.ui.set_icon(icon) if self.opts.history: self.history = [] def read_commands(self): #read the.commands file file_lines = open(command_file) strings = open(strings_file, "w") self.commands = {} keywords = [] for line in file_lines: print line #trim the white spaces line = line.strip() #if the line has length and the first char isn't a hash if len(line) and line[0]!="#": #this is a parsible line (key,value) = line.split(":",1) print key, value #get the keyword out of the commands file if value == "keyword" and key.strip().lower() not in keywords: keywords.append(key.strip().lower()) continue self.commands[key.strip().lower()] = value.strip() strings.write( key.strip()+"\n") #close the strings file strings.close() def log_history(self,text): if self.opts.history: self.history.append(text) if len(self.history) > self.opts.history: #pop off the first item self.history.pop(0) #open and truncate the blather history file hfile = open(history_file, "w") for line in self.history: hfile.write( line+"\n") #close the file hfile.close() def recognizer_finished(self, recognizer, text): #split the words spoken into an array t = text.lower() textWords = t.split(" ") #get the keys array for all commands biggestKey = "" biggestKeySet = [] biggestKeyCount = 0 ret = self.search_for_matches(textWords) biggestKey = ret['biggestKey'] biggestKeySet = ret['biggestKeySet'] biggestKeyCount = ret['biggestKeyCount'] #find the match percentage percentMatch = self.calculate_match_percentage(biggestKeySet, biggestKeyCount) #call the process if biggestKeyCount > 0 and ((len(textWords) <= 2 and len(biggestKeySet) == len(textWords)) or percentMatch >= PERCENT_MATCH_LIMIT): #must be equal or a 60% match self.matchTime = time.time() print("Best match: " + biggestKey, "Detected: " + text.lower(), "Percent match: " + str(percentMatch)); cmd = self.commands[biggestKey] if cmd == "cancel" and hasattr(self, 'runningProcess'): print("Cancelling previous command with PID "+str(self.runningProcess.pid)) self.terminate_child_processes(self.runningProcess.pid) #terminate parent process self.runningProcess.terminate(); elif cmd != "cancel": print cmd if "plugins/" in cmd: #execute a plugin script self.runningProcess = subprocess.Popen(os.path.join(file_dir,cmd), shell=True) else: self.runningProcess = subprocess.Popen(cmd, shell=True) self.log_history(text) else: print("No matching command", "Percent match: " + str(percentMatch)) #if there is a UI and we are not continuous listen if self.ui: if not self.continuous_listen: #stop listening self.recognizer.pause() #let the UI know that there is a finish self.ui.finished(t) #check if the command.conf file has changed. self.checkCommandFile() def run(self): if self.ui: self.ui.run() else: blather.recognizer.listen() def quit(self): sys.exit(0) def checkCommandFile(self): newFileTime = os.path.getmtime(command_file) if newFileTime > self.commandFileTime: print("Command.conf file modified") subprocess.call(language_update_script) print("Language file updated") self.commandFileTime = newFileTime self.read_commands() def process_command(self, UI, command): print command if command == "listen": self.recognizer.listen() elif command == "stop": self.recognizer.pause() elif command == "continuous_listen": self.continuous_listen = True self.recognizer.listen() elif command == "continuous_stop": self.continuous_listen = False self.recognizer.pause() elif command == "quit": self.quit() def load_resource(self,string): local_data = os.path.join(os.path.dirname(__file__), 'data') paths = ["/usr/share/blather/","/usr/local/share/blather", local_data] for path in paths: resource = os.path.join(path, string) if os.path.exists( resource ): return resource #if we get this far, no resource was found return False def search_for_matches(self, textWords): #TODO: https://github.com/ajbogh/blather/issues/1 ret = {'biggestKey':'', 'biggestKeySet':{}, 'biggestKeyCount':0} currentTime = time.time() matchLimit = 1 for key in self.commands.keys(): if self.commands[key] == "keyword": continue #split the keys on each word words = set(key.split(" ")) #append the keyword to the command if it's not there already ##only if the timed keyword activation is needed if self.continuous_listen and (currentTime - self.matchTime) > self.keywordTimeLimit and len(set(keywords).intersection(set(words))) == 0: words.update(keywords) #find the matching words matches = words.intersection(set(textWords)) #determine if the words match if self.continuous_listen and len(set(keywords).intersection(set(textWords))) > 0 and (currentTime - self.matchTime) > self.keywordTimeLimit: matchLimit = 2 if len(matches) >= matchLimit and len(matches) > ret['biggestKeyCount']: ret['biggestKeySet'] = words ret['biggestKeyCount'] = len(matches) ret['biggestKey'] = key return ret def calculate_match_percentage(self, biggestKeySet, biggestKeyCount): percentMatch = 0 if len(biggestKeySet) > 0: percentMatch = (biggestKeyCount/float(len(biggestKeySet))) * 100 return percentMatch # terminate_child_processes kills any child processes under a parent pid. # It uses pgrep to list child processes, so the system must have pgrep installed in order # to use the 'cancel' commands def terminate_child_processes(self, pid): out = subprocess.Popen(['pgrep', '-P', str(pid)], stdout=subprocess.PIPE).communicate()[0] childProcesses = out.splitlines() # Kill any orphaned children. for pid in childProcesses: #recursive call to kill entire family tree self.terminate_child_processes(int(pid)) print("Killing child with PID "+str(pid)) p = psutil.Process(int(pid)) p.terminate()
mo.open("com.lx.jdhg", "com.lx.jdhg/com.ly.lxdr.AppActivity") time.sleep(4) mo.click(params['skip2_x'], params['skip2_y']) mo.click(params['skip_x'], params['skip_y']) mo.click(params['start_button_x'], params['start_button_y']) while True: current = time.time() * 1000 p = os.popen( 'adb shell "dumpsys window | grep mCurrentFocus"') # 启动前检测是否在正确的页面 result = str(p.read()) if not result[:-1].endswith("com.lx.jdhg/com.ly.lxdr.AppActivity}"): restart_app() continue reco = Recognizer(mo.get_screen_shot()) try: table = reco.find() path = table.find_path(True) step_time = time.time() * 1000 index = 0 while index < len(path) - 1: mo.swipe(path[index][0] + params['main_area_west'], path[index][1] + params['main_area_north'], path[index + 1][0] + params['main_area_west'], path[index + 1][1] + params['main_area_north']) index += 1 print("滑动耗时 %d ms" % (time.time() * 1000 - step_time, )) mo.click(params['collect_button_x'], params['collect_button_y']) time.sleep(0.5) mo.click(params['next_x'], params['next_y'])
class Controller(): global language language = "eng" global recognizer recognizer = Recognizer() global engoperations engoperations = English_Operations() global uroperations uroperations = Urdu_Operations() def __init__(self): app = QtWidgets.QApplication(sys.argv) MainWindow = QtWidgets.QMainWindow() global ui ui = Ui_MainWindow() ui.setupUi(MainWindow) ui.microbutton.clicked.connect(lambda: self.callistner("button")) ui.querytextfield.returnPressed.connect( lambda: self.callistner("textfield")) ui.actionEnglish.triggered.connect(lambda: self.change_language("eng")) ui.actionUrdu.triggered.connect(lambda: self.change_language("ur")) ui.add_label("goti", "Hello! I am Gotti. How can I help you?") recognizer.talk("Hello! I am Gotti. How can I help you?", "en-uk") MainWindow.show() sys.exit(app.exec_()) def change_language(self, lang): global language language = lang def makedecision(self, command): if language == 'eng': if 'search' in command: engoperations.OpenChrome(command) elif 'launch' in command: engoperations.LaunchApp(command) else: ui.add_label("goti", "Sorry can't understand your command") recognizer.talk("Sorry can't understand Your command", "en-uk") else: if 'تلاش' in command: uroperations.OpenChrome(command) elif 'کھولو' in command: uroperations.LaunchApp(command) else: ui.add_label("goti", "معاف کیجئے گا آپ کا حکم سمجھ نہیں آیا") def callistner(self, who): if ((who == "button") & (language == "eng")): ui.changetext("Gouti is Listening..") text = recognizer.myCommand('en-US') if text == -1: text = "your last command couldn\'t be heard.please speak again" ui.add_label("goti", text) recognizer.talk(text, "en-uk") else: ui.add_label("user", text) ui.changetext("") self.makedecision(text) elif ((who == "textfield") & (language == "eng")): text = ui.querytextfield.text() ui.add_label("user", text) ui.changetext("") self.makedecision(text) elif ((who == "button") & (language == "ur")): ui.changetext("گوٹی سن رہا ہے ...") text = recognizer.myCommand('ur-PK') if text == -1: text = "آپ کا آخری حکم نہیں سنا گیا۔ براہ کرم دوبارہ بولیں" ui.add_label("goti", text) else: ui.add_label("user", text) ui.changetext("") self.makedecision(text) elif ((who == "textfield") & (language == "ur")): text = ui.querytextfield.text() ui.add_label("user", text) ui.changetext("") self.makedecision(text) ui.scrolled(ui.scrollArea.verticalScrollBar().maximum())
class Blather: def __init__(self, opts): #import the recognizer so Gst doesn't clobber our -h from Recognizer import Recognizer #set variables self.ui = None self.options = {} ui_continuous_listen = False self.continuous_listen = False self.commands = {} #read the commands self.load_commands_file() #load the options file print("load the options") self.load_options_file() #merge the opts for k, v in opts.__dict__.items(): if (not k in self.options) or opts.override: self.options[k] = v # should we be updating? if self.options['update']: #make the sentences corpus self.generate_sentences_corpus() #run the update stuff UpdateLanguage.update_language() if self.options['interface'] != None: if self.options['interface'] == "q": from QtUI import UI elif self.options['interface'] == "g": from GtkUI import UI elif self.options['interface'] == "gt": from GtkTrayUI import UI else: print("no GUI defined") sys.exit() self.ui = UI(args, self.options['continuous']) self.ui.connect("command", self.process_command) #can we load the icon resource? icon = self.load_resource("icon.png") if icon: self.ui.set_icon_active_asset(icon) #can we load the icon_inactive resource? icon_inactive = self.load_resource("icon_inactive.png") if icon_inactive: self.ui.set_icon_inactive_asset(icon_inactive) if self.options['history']: self.history = [] #create the recognizer try: self.recognizer = Recognizer(lang_file, dic_file, self.options['microphone']) except Exception as e: print(e) #no recognizer? bummer sys.exit() self.recognizer.connect('finished', self.recognizer_finished) print("Using Options: ", self.options) def read_key_val_file(self, file_path, lowercase_key=False, lowercase_value=False): print(file_path) file_text = open(file_path) return_struct = {} for line in file_text: #trim the white spaces line = line.strip() #if the line has length and the first char isn't a hash if len(line) and line[0] != "#": #this is a parsible line (key, value) = line.split(":", 1) key = key.strip() value = value.strip() print(key, value) if lowercase_key: key = key.lower() if lowercase_value: value = value.lower() if value == "None" or value == "null": value = None if value == "True" or value == "true": value = True if value == "False" or value == "false": value = False return_struct[key] = value return return_struct def load_commands_file(self): #read the.commands file self.commands = self.read_key_val_file(command_file) def generate_sentences_corpus(self): file_lines = open(command_file) strings = open(strings_file, "w") for i in self.commands: strings.write(i.lower() + "\n") #close the strings file strings.close() def load_options_file(self): #is there an opt file? try: self.options = self.read_key_val_file(opt_file) #if there is a microphone option, convert value to int if 'microphone' in self.options: self.options['microphone'] = int(self.options['microphone']) except: print("failed to read options file") def log_history(self, text): if self.options['history']: self.history.append(text) if len(self.history) > self.options['history']: #pop off the first item self.history.pop(0) #open and truncate the blather history file hfile = open(history_file, "w") for line in self.history: hfile.write(line + "\n") #close the file hfile.close() # Print the cmd and then run the command def run_command(self, cmd): print(cmd) subprocess.call(cmd, shell=True) def recognizer_finished(self, recognizer, text): t = text.lower() #is there a matching command? if t in self.commands: #run the valid_sentence_command if there is a valid sentence command if self.options['valid_sentence_command']: subprocess.call(self.options['valid_sentence_command'], shell=True) cmd = self.commands[t] #should we be passing words? if self.options['pass_words']: cmd += " " + t self.run_command(cmd) else: self.run_command(cmd) self.log_history(text) else: #run the invalid_sentence_command if there is a valid sentence command if self.options['invalid_sentence_command']: subprocess.call(self.options['invalid_sentence_command'], shell=True) print("no matching command %s" % (t)) #if there is a UI and we are not continuous listen if self.ui: if not self.continuous_listen: #stop listening self.recognizer.pause() #let the UI know that there is a finish self.ui.finished(t) def run(self): #is a UI going to be used? if self.ui: self.ui.run() else: blather.recognizer.listen() def quit(self): sys.exit() def process_command(self, UI, command): print(command) if command == "listen": self.recognizer.listen() elif command == "stop": self.recognizer.pause() elif command == "continuous_listen": self.continuous_listen = True self.recognizer.listen() elif command == "continuous_stop": self.continuous_listen = False self.recognizer.pause() elif command == "quit": self.quit() def load_resource(self, string): local_data = os.path.join(os.path.dirname(__file__), 'data') paths = ["/usr/share/blather/", "/usr/local/share/blather", local_data] for path in paths: resource = os.path.join(path, string) if os.path.exists(resource): return resource #if we get this far, no resource was found return False
def __init__(self, opts): #import the recognizer so Gst doesn't clobber our -h from Recognizer import Recognizer #set variables self.ui = None self.options = {} ui_continuous_listen = False self.continuous_listen = False self.commands = {} #read the commands self.load_commands_file() #load the options file print("load the options") self.load_options_file() #merge the opts for k, v in opts.__dict__.items(): if (not k in self.options) or opts.override: self.options[k] = v # should we be updating? if self.options['update']: #make the sentences corpus self.generate_sentences_corpus() #run the update stuff UpdateLanguage.update_language() if self.options['interface'] != None: if self.options['interface'] == "q": from QtUI import UI elif self.options['interface'] == "g": from GtkUI import UI elif self.options['interface'] == "gt": from GtkTrayUI import UI else: print("no GUI defined") sys.exit() self.ui = UI(args, self.options['continuous']) self.ui.connect("command", self.process_command) #can we load the icon resource? icon = self.load_resource("icon.png") if icon: self.ui.set_icon_active_asset(icon) #can we load the icon_inactive resource? icon_inactive = self.load_resource("icon_inactive.png") if icon_inactive: self.ui.set_icon_inactive_asset(icon_inactive) if self.options['history']: self.history = [] #create the recognizer try: self.recognizer = Recognizer(lang_file, dic_file, self.options['microphone']) except Exception as e: print(e) #no recognizer? bummer sys.exit() self.recognizer.connect('finished', self.recognizer_finished) print("Using Options: ", self.options)
def __init__(self): super().__init__() self.initUI() self.pathList = [] self.rgzr = Recognizer()
from Network import Network #Parse arguments ap = argparse.ArgumentParser() ap.add_argument("-p", "--picamera", type=int, default=1, help="Use Raspberry Camera") ap.add_argument("-w", "--width", type=int, default=316, help="Witdh of the window") ap.add_argument("-ht", "--height", type=int, default=450, help="Height of the window") ap.add_argument("-fr", "--framerate", type=int, default=25, help="Frame rate of the camera") opt = vars(ap.parse_args()) #recognizer = Recognizer() recognizer = Recognizer(modelFile='model.mdl') network = Network() #network = Network(endpoint='http://localhost:8000/hoo/') print('INFO: People: ') print(recognizer.people) print("INFO: Launching camera") vs = VideoStream(usePiCamera=opt["picamera"] > 0).start() time.sleep(2.0) view = View(vs, recognizer, network, width=opt["width"], height=opt["height"], framerate=opt["framerate"]) print("INFO: Application started successfully.") view.root.mainloop()
class Blather: def __init__(self, opts): # Initialize our ROS node: rospy.init_node('voice') # Define our publisher: self.voice_pub = rospy.Publisher('voice', String, queue_size=1) #import the recognizer so Gst doesn't clobber our -h from Recognizer import Recognizer self.ui = None #keep track of the opts self.opts = opts ui_continuous_listen = False self.continuous_listen = opts.continuous self.stringsFileTime = os.path.getmtime(strings_file) self.commands = {} self.read_commands() self.recognizer = Recognizer(lang_file, dic_file, opts.microphone) self.recognizer.connect('finished', self.recognizer_finished) self.matchTime = 0 self.keywordTimeLimit = opts.keytime #set to 0 to always speak the keyword # Update the Language File and Commands? self.commandFileTime = os.path.getmtime(command_file) if ((AUTO_UPDATE_CMD_FILE) or (self.commandFileTime > self.stringsFileTime)): # Trick the system by making it think we just created the command file: self.commandFileTime = time.time() self.checkCommandFile() #read options if self.opts.history: self.history = [] def read_commands(self): #read the.commands file file_lines = open(command_file) strings = open(strings_file, "w") self.commands = {} self.keywords = [] for line in file_lines: # print line #trim the white spaces line = line.strip() #if the line has length and the first char isn't a hash if len(line) and line[0] != "#": #this is a parsible line (key, value) = line.split(":", 1) print key, value #get the keyword out of the commands file if value == "keyword" and key.strip().lower( ) not in self.keywords: self.keywords.append(key.strip().lower()) continue self.commands[key.strip().lower()] = value.strip() strings.write(key.strip() + "\n") #close the strings file strings.close() def log_history(self, text): if self.opts.history: self.history.append(text) if len(self.history) > self.opts.history: #pop off the first item self.history.pop(0) #open and truncate the blather history file hfile = open(history_file, "w") for line in self.history: hfile.write(line + "\n") #close the file hfile.close() def recognizer_finished(self, recognizer, text): #split the words spoken into an array t = text.lower() textWords = t.split(" ") #get the keys array for all commands biggestKey = "" biggestKeySet = [] biggestKeyCount = 0 ret = self.search_for_matches(textWords) biggestKey = ret['biggestKey'] biggestKeySet = ret['biggestKeySet'] biggestKeyCount = ret['biggestKeyCount'] #find the match percentage percentMatch = self.calculate_match_percentage(biggestKeySet, biggestKeyCount) if self.continuous_listen and len( set(self.keywords).intersection(set(biggestKeySet))) == 0: biggestKeyCount = 0 #call the process if biggestKeyCount > 0 and ( (len(textWords) <= 2 and len(biggestKeySet) == len(textWords)) or percentMatch >= PERCENT_MATCH_LIMIT ): #must be equal or a 60% match self.matchTime = time.time() print("Best match: " + biggestKey, "Detected: " + text.lower(), "Percent match: " + str(percentMatch)) cmd = self.commands[biggestKey] if cmd == "cancel" and hasattr(self, 'runningProcess'): print("Cancelling previous command with PID " + str(self.runningProcess.pid)) self.terminate_child_processes(self.runningProcess.pid) #terminate parent process self.runningProcess.terminate() elif cmd != "cancel": print cmd if ("ros/" in cmd): [junk, ROScmd] = cmd.split("/") self.voice_pub.publish(ROScmd) elif "plugins/" in cmd: #execute a plugin script self.runningProcess = subprocess.Popen(os.path.join( conf_dir, cmd), shell=True) else: self.runningProcess = subprocess.Popen(cmd, shell=True) self.log_history(text) else: print("No matching command", "Percent match: " + str(percentMatch)) #if there is a UI and we are not continuous listen if self.ui: if not self.continuous_listen: #stop listening self.recognizer.pause() #let the UI know that there is a finish self.ui.finished(t) #check if the command.conf file has changed. self.checkCommandFile() def run(self): if self.ui: self.ui.run() else: blather.recognizer.listen() def quit(self): sys.exit(0) def checkCommandFile(self): stringsFileTime = os.path.getmtime(strings_file) if stringsFileTime < self.commandFileTime: print("Command.conf file modified") subprocess.call(language_update_script) print("Language file updated") self.read_commands() else: print 'NO NEED TO UPDATE?' # We'll update this time here (at the end) # because we used the trick above to force a re-load. # FIXME -- This is a really bad way to do things. # The problem is that the script re-writes the strings file # at the very beginning. This automatically makes the # strings file newer than the command file. self.commandFileTime = os.path.getmtime(command_file) def process_command(self, UI, command): print command if command == "listen": self.recognizer.listen() elif command == "stop": self.recognizer.pause() elif command == "continuous_listen": self.continuous_listen = True self.recognizer.listen() elif command == "continuous_stop": self.continuous_listen = False self.recognizer.pause() elif command == "quit": self.quit() def search_for_matches(self, textWords): #TODO: https://github.com/ajbogh/blather/issues/1 ret = {'biggestKey': '', 'biggestKeySet': {}, 'biggestKeyCount': 0} currentTime = time.time() matchLimit = 1 for key in self.commands.keys(): if self.commands[key] == "keyword": continue #split the keys on each word words = set(key.split(" ")) #append the keyword to the command if it's not there already ##only if the timed keyword activation is needed if self.keywordTimeLimit > 0 and self.continuous_listen and ( currentTime - self.matchTime) > self.keywordTimeLimit and len( set(self.keywords).intersection(set(words))) == 0: words.update(self.keywords) elif len(set(self.keywords).intersection(set(textWords))) > 0: words.update(self.keywords) #find the matching words matches = words.intersection(set(textWords)) #determine if the words match if self.continuous_listen and len( set(self.keywords).intersection(set(textWords))) > 0 and ( currentTime - self.matchTime) > self.keywordTimeLimit: matchLimit = 2 if len(matches) >= matchLimit and len( matches) > ret['biggestKeyCount']: ret['biggestKeySet'] = words ret['biggestKeyCount'] = len(matches) ret['biggestKey'] = key return ret def calculate_match_percentage(self, biggestKeySet, biggestKeyCount): percentMatch = 0 if len(biggestKeySet) > 0: percentMatch = (biggestKeyCount / float(len(biggestKeySet))) * 100 return percentMatch # terminate_child_processes kills any child processes under a parent pid. # It uses pgrep to list child processes, so the system must have pgrep installed in order # to use the 'cancel' commands def terminate_child_processes(self, pid): out = subprocess.Popen(['pgrep', '-P', str(pid)], stdout=subprocess.PIPE).communicate()[0] childProcesses = out.splitlines() # Kill any orphaned children. for pid in childProcesses: #recursive call to kill entire family tree self.terminate_child_processes(int(pid)) print("Killing child with PID " + str(pid)) p = psutil.Process(int(pid)) p.terminate()
class MainWindow(QMainWindow): def __init__(self): super(MainWindow, self).__init__() self.recognizer = Recognizer() self.recognizer.learn() self.ui = loadUi(os.path.join(THIS_DIR, 'mainwindow.ui'), self) self.thread = QThread() try: self.camera = CameraDevice() except ValueError: self.ui.video.setText("Device not found!\n\nIs FFMPEG available?") else: self.camera.frame_ready.connect(self.update_video_label) self.ui.video.setMinimumSize(640 * 2, 480) self.camera.moveToThread(self.thread) self.ui.t_max.setValue(0) self.ui.t_min.setValue(255) self.ui.s_max.setValue(200) self.ui.s_min.setValue(3) self.update_values() @pyqtSlot() def start_recog(self): if self.recognizer is not None: self.camera.set_recog(self.recognizer) else: print("plz do load recogi") @pyqtSlot() def stop_recog(self): self.camera.remove_recog() @pyqtSlot() def make_data(self): print("Not supported.") @pyqtSlot(int) def t_max_changed(self, val): if val < self.ui.t_min.value(): self.ui.t_min.setValue(val) self.update_values() @pyqtSlot(int) def t_min_changed(self, val): if val > self.ui.t_max.value(): self.ui.t_max.setValue(val) self.update_values() @pyqtSlot(int) def s_max_changed(self, val): if val < self.ui.s_min.value(): self.ui.s_min.setValue(val) self.update_values() @pyqtSlot(int) def s_min_changed(self, val): if val > self.ui.s_max.value(): self.ui.s_max.setValue(val) self.update_values() @pyqtSlot(QImage) def update_video_label(self, image): pixmap = QPixmap.fromImage(image) self.ui.video.setPixmap(pixmap) self.ui.video.update() def update_values(self): self.camera.set_values(self.ui.t_max.value(), self.ui.t_min.value(), self.ui.s_max.value(), self.ui.s_min.value())
"um": audio.wavread(wav_directory + 'um.wav')[0], "dois": audio.wavread(wav_directory + 'dois.wav')[0], } audio_base[3] = { "matrix": audio.wavread(wav_directory + 'matrix.wav')[0], "braveheart": audio.wavread(wav_directory + 'braveheart.wav')[0], "constantine": audio.wavread(wav_directory + 'constantine.wav')[0], } audio_base[4] = { "dinheiro": audio.wavread(wav_directory + 'dinheiro.wav')[0], "cartao": audio.wavread(wav_directory + 'cartao.wav')[0], } audio_base[5] = { "finalizar_compra": audio.wavread(wav_directory + 'finalizar_compra.wav')[0], "sair": audio.wavread(wav_directory + 'sair.wav')[0], } if __name__ == "__main__": for i in range(6): recorder = Recorder() recognizer = Recognizer() recorder.record(time_to_run=2) (input_signal1, sampling_rate1, bits1) = audio.wavread('record.wav') Recognizer.test_audio(audio_base[i], input_signal1, wav_directory) fs = sampling_rate1 lowcut = 300 highcut = 3400
class Blather: def __init__(self, opts): #import the recognizer so Gst doesn't clobber our -h from Recognizer import Recognizer self.ui = None #keep track of the opts self.opts = opts ui_continuous_listen = False self.continuous_listen = False self.commands = {} self.read_commands() self.recognizer = Recognizer(lang_file, dic_file) self.recognizer.connect('finished',self.recognizer_finished) if opts.interface != None: if opts.interface == "q": #import the ui from qt from QtUI import UI elif opts.interface == "g": from GtkUI import UI else: print "no GUI defined" sys.exit() self.ui = UI(args,opts.continuous) self.ui.connect("command", self.process_command) if self.opts.history: self.history = [] def read_commands(self): #read the.commands file file_lines = open(command_file) strings = open(strings_file, "w") for line in file_lines: print line #trim the white spaces line = line.strip() #if the line has length and the first char isn't a hash if len(line) and line[0]!="#": #this is a parsible line (key,value) = line.split(":",1) print key, value self.commands[key.strip().lower()] = value.strip() strings.write( key.strip()+"\n") #close the strings file strings.close() def log_history(self,text): if self.opts.history: self.history.append(text) if len(self.history) > self.opts.history: #pop off the first item self.history.pop(0) #open and truncate the blather history file hfile = open(history_file, "w") for line in self.history: hfile.write( line+"\n") #close the file hfile.close() def recognizer_finished(self, recognizer, text): t = text.lower() #is there a matching command? if self.commands.has_key( t ): cmd = self.commands[t] print cmd subprocess.call(cmd, shell=True) self.log_history(text) else: print "no matching command" #if there is a UI and we are not continuous listen if self.ui: if not self.continuous_listen: #stop listening self.recognizer.pause() #let the UI know that there is a finish self.ui.finished(t) def run(self): if self.ui: self.ui.run() else: blather.recognizer.listen() def quit(self): if self.ui: self.ui.quit() sys.exit() def process_command(self, UI, command): print command if command == "listen": self.recognizer.listen() elif command == "stop": self.recognizer.pause() elif command == "continuous_listen": self.continuous_listen = True self.recognizer.listen() elif command == "continuous_stop": self.continuous_listen = False self.recognizer.pause() elif command == "quit": self.quit()
from VizGen import * from Recognizer import Recognizer if __name__ == "__main__": alphabet = ['B', 'A', 'D', 'C'] generated_string, characters_dict, original_image = generate_image( alphabet, 8) noised_image = noise_image(original_image, sigma=100) recognizer = Recognizer(original_image, noised_image, alphabet, characters_dict) print(original_image.shape[1]) print(noised_image.shape[1]) recognized_string = recognizer.recognize() recognized_image = concatenate_images(recognized_string, characters_dict) show_triple_images(original_image, noised_image, recognized_image, "original image", "noised image", "recognized image")
class Blather: def __init__(self, opts): #import the recognizer so Gst doesn't clobber our -h from Recognizer import Recognizer self.ui = None #keep track of the opts self.opts = opts ui_continuous_listen = False self.continuous_listen = opts.continuous self.commands = {} self.read_commands() self.recognizer = Recognizer(lang_file, dic_file, opts.microphone) self.recognizer.connect('finished', self.recognizer_finished) self.matchTime = 0 self.keywordTimeLimit = opts.keytime #set to 0 to always speak the keyword self.commandFileTime = 0 #updates language file and commands on start self.checkCommandFile() self.commandFileTime = os.path.getmtime(command_file) #read options if opts.interface != None: if opts.interface == "q": #import the ui from qt from QtUI import UI elif opts.interface == "g": from GtkUI import UI else: print "no GUI defined" sys.exit() self.ui = UI(args, opts.continuous) self.ui.connect("command", self.process_command) #can we load the icon resource? icon = self.load_resource("icon.png") if icon: self.ui.set_icon(icon) if self.opts.history: self.history = [] def read_commands(self): #read the.commands file file_lines = open(command_file) strings = open(strings_file, "w") self.commands = {} self.keywords = [] for line in file_lines: print line #trim the white spaces line = line.strip() #if the line has length and the first char isn't a hash if len(line) and line[0] != "#": #this is a parsible line (key, value) = line.split(":", 1) print key, value #get the keyword out of the commands file if value == "keyword" and key.strip().lower( ) not in self.keywords: self.keywords.append(key.strip().lower()) continue self.commands[key.strip().lower()] = value.strip() strings.write(key.strip() + "\n") #close the strings file strings.close() def log_history(self, text): if self.opts.history: self.history.append(text) if len(self.history) > self.opts.history: #pop off the first item self.history.pop(0) #open and truncate the blather history file hfile = open(history_file, "w") for line in self.history: hfile.write(line + "\n") #close the file hfile.close() def recognizer_finished(self, recognizer, text): #split the words spoken into an array t = text.lower() textWords = t.split(" ") #get the keys array for all commands biggestKey = "" biggestKeySet = [] biggestKeyCount = 0 ret = self.search_for_matches(textWords) biggestKey = ret['biggestKey'] biggestKeySet = ret['biggestKeySet'] biggestKeyCount = ret['biggestKeyCount'] #find the match percentage percentMatch = self.calculate_match_percentage(biggestKeySet, biggestKeyCount) if self.continuous_listen and len( set(self.keywords).intersection(set(biggestKeySet))) == 0: biggestKeyCount = 0 #call the process if biggestKeyCount > 0 and ( (len(textWords) <= 2 and len(biggestKeySet) == len(textWords)) or percentMatch >= PERCENT_MATCH_LIMIT ): #must be equal or a 60% match self.matchTime = time.time() print("Best match: " + biggestKey, "Detected: " + text.lower(), "Percent match: " + str(percentMatch)) cmd = self.commands[biggestKey] if cmd == "cancel" and hasattr(self, 'runningProcess'): print("Cancelling previous command with PID " + str(self.runningProcess.pid)) self.terminate_child_processes(self.runningProcess.pid) #terminate parent process self.runningProcess.terminate() elif cmd != "cancel": print cmd if "plugins/" in cmd: #execute a plugin script self.runningProcess = subprocess.Popen(os.path.join( file_dir, cmd), shell=True) else: self.runningProcess = subprocess.Popen(cmd, shell=True) self.log_history(text) else: print("No matching command", "Percent match: " + str(percentMatch)) #if there is a UI and we are not continuous listen if self.ui: if not self.continuous_listen: #stop listening self.recognizer.pause() #let the UI know that there is a finish self.ui.finished(t) #check if the command.conf file has changed. self.checkCommandFile() def run(self): if self.ui: self.ui.run() else: blather.recognizer.listen() def quit(self): sys.exit(0) def checkCommandFile(self): newFileTime = os.path.getmtime(command_file) if newFileTime > self.commandFileTime: print("Command.conf file modified") subprocess.call(language_update_script) print("Language file updated") self.commandFileTime = newFileTime self.read_commands() def process_command(self, UI, command): print command if command == "listen": self.recognizer.listen() elif command == "stop": self.recognizer.pause() elif command == "continuous_listen": self.continuous_listen = True self.recognizer.listen() elif command == "continuous_stop": self.continuous_listen = False self.recognizer.pause() elif command == "quit": self.quit() def load_resource(self, string): local_data = os.path.join(os.path.dirname(__file__), 'data') paths = ["/usr/share/blather/", "/usr/local/share/blather", local_data] for path in paths: resource = os.path.join(path, string) if os.path.exists(resource): return resource #if we get this far, no resource was found return False def search_for_matches(self, textWords): #TODO: https://github.com/ajbogh/blather/issues/1 ret = {'biggestKey': '', 'biggestKeySet': {}, 'biggestKeyCount': 0} currentTime = time.time() matchLimit = 1 for key in self.commands.keys(): if self.commands[key] == "keyword": continue #split the keys on each word words = set(key.split(" ")) #append the keyword to the command if it's not there already ##only if the timed keyword activation is needed if self.keywordTimeLimit > 0 and self.continuous_listen and ( currentTime - self.matchTime) > self.keywordTimeLimit and len( set(self.keywords).intersection(set(words))) == 0: words.update(self.keywords) elif len(set(self.keywords).intersection(set(textWords))) > 0: words.update(self.keywords) #find the matching words matches = words.intersection(set(textWords)) #determine if the words match if self.continuous_listen and len( set(self.keywords).intersection(set(textWords))) > 0 and ( currentTime - self.matchTime) > self.keywordTimeLimit: matchLimit = 2 if len(matches) >= matchLimit and len( matches) > ret['biggestKeyCount']: ret['biggestKeySet'] = words ret['biggestKeyCount'] = len(matches) ret['biggestKey'] = key return ret def calculate_match_percentage(self, biggestKeySet, biggestKeyCount): percentMatch = 0 if len(biggestKeySet) > 0: percentMatch = (biggestKeyCount / float(len(biggestKeySet))) * 100 return percentMatch # terminate_child_processes kills any child processes under a parent pid. # It uses pgrep to list child processes, so the system must have pgrep installed in order # to use the 'cancel' commands def terminate_child_processes(self, pid): out = subprocess.Popen(['pgrep', '-P', str(pid)], stdout=subprocess.PIPE).communicate()[0] childProcesses = out.splitlines() # Kill any orphaned children. for pid in childProcesses: #recursive call to kill entire family tree self.terminate_child_processes(int(pid)) print("Killing child with PID " + str(pid)) p = psutil.Process(int(pid)) p.terminate()
audio_base[2] = { "um": audio.wavread(wav_directory + 'um.wav')[0], "dois": audio.wavread(wav_directory + 'dois.wav')[0], } audio_base[3] = { "matrix": audio.wavread(wav_directory + 'matrix.wav')[0], "braveheart": audio.wavread(wav_directory + 'braveheart.wav')[0], "constantine": audio.wavread(wav_directory + 'constantine.wav')[0], } audio_base[4] = { "dinheiro": audio.wavread(wav_directory + 'dinheiro.wav')[0], "cartao": audio.wavread(wav_directory + 'cartao.wav')[0], } audio_base[5] = { "finalizar_compra": audio.wavread(wav_directory + 'finalizar_compra.wav')[0], "sair": audio.wavread(wav_directory + 'sair.wav')[0], } if __name__ == "__main__": for i in range(6): recorder = Recorder() recognizer = Recognizer() recorder.record(time_to_run=2) (input_signal1, sampling_rate1, bits1) = audio.wavread('record.wav') Recognizer.test_audio(audio_base[i], input_signal1, wav_directory) fs = sampling_rate1 lowcut = 300 highcut = 3400
from exemplary_points import filter_and_getExemplaries sample_rate = 10 # load data N_users = 10 for i in range(N_users): execfile("data/user" + str(i + 1) + ".py") all_data = [data_user1, data_user2, data_user3, data_user4, data_user5, \ data_user6, data_user7, data_user8, data_user9, data_user10] # extract exemplary points filter_and_getExemplaries(all_data, sample_rate) # make prediction recognizer = Recognizer(sample_rate) # The data is from letter "O" print recognizer.predict_one([[0.00151, 0.01135], [0.01175, 0.02839], [0.0243, -0.00781], [0.01606, -0.03011], [-0.01251, -0.02651], [-0.08557, 0.02435], [-0.05574, 0.02611], [0.00386, 0.01013], [0.03279, 0.00049], [0.02381, -0.0027], [-0.0126, -0.00321], [-0.06615, 0.01246], [-0.06997, 0.00418], [-0.06926, -0.00977], [-0.0444, -0.01925], [0.00515, -0.02594], [0.01419, -0.02939], [0.00689, -0.01478], [0.00534, -0.01004], [-0.00194, -0.01154], [-0.01755, -0.0048], [-0.0367, -0.0101], [-0.05585, 0.00492], [-0.03075, 0.01247], [0.01603, -0.02082], [0.04772, -0.01042], [0.05997, -0.0237], [0.06643, -0.01772],
class Window(object): def __init__(self): ''' variables clarification definition of variables in this class ''' self.root = Tk() self.welcome_label = Label(master=self.root, width=80, height=30) self.photo = PhotoImage(file='new_welc.png') self.back_right = Label(master=self.root, width=50, height=30, bg='green') self.back_left = Label(master=self.root, width=30, height=30, bg='blue') self.img_label = Label(master=self.welcome_label, image=self.photo, height=self.photo.height(), width=self.photo.width()) self.welc_over_but = Button(master=self.img_label, text='Get Start', width=20, height=1, command=self.come_in) self.mfcc_label = Label(master=self.back_right, width=50, height=16, bg='yellow') self.word_label = Label(master=self.back_left, width=30, height=30, bg='pink') self.word_list = Listbox(master=self.word_label, width=30, height=10) self.word_rec_label = Label(master=self.word_label, height=1, width=30, bg='red') self.word_rec_but = Button(master=self.word_rec_label, height=1, width=10, text='Listen to me', command=self.listen_word) self.auto_cover = IntVar() self.word_args = Label(master=self.word_label, width=30, height=6, bg='blue') self.word_rec_time = Text(master=self.word_args, width=30, height=1) self.word_file = Text(master=self.word_args, width=30, height=1) self.word_listen_status = Label(master=self.word_args, width=30, height=4, bg='white') self.word_result = Listbox(master=self.word_label, height=13, width=30) self.mfcc_pic_label = Label(master=self.mfcc_label, height=16, width=50) self.mfcc_pic = None self.talk_label = Label(master=self.back_right, width=50, height=14, bg='purple') self.talk_arg = Label(master=self.talk_label, width=50, height=2) self.lcy = Label(master=self.talk_arg, width=20, height=3) self.fjw = Label(master=self.talk_arg, width=30, height=3, bg='green') self.talk_rec_but = Button(master=self.lcy, width=10, height=2, command=self.listen_talk, text="Let's chat!") self.talk_auto_cover = IntVar() self.talk_rec_time = Text(master=self.fjw, width=30, height=1) self.talk_file = Text(master=self.fjw, width=30, height=1) self.talk_area = Label(master=self.talk_label, width=50, height=12) self.talk_history = Text(master=self.talk_area, width=50, height=12) self.speech_content = Text(master=self.talk_area, width=50, height=2) self.rc = Recognizer() self.talker = Talker() self.root.title('Speech Recognizer') self.welcome() def come_in(self): print('in come_in') self.welcome_label.destroy() self.back_left.pack(side=LEFT) self.back_right.pack(side=LEFT) self.back_left.pack_propagate(0) self.back_right.pack_propagate(0) self.mfcc_label.pack(side=TOP) self.mfcc_label.pack_propagate(0) self.word_label.pack() self.word_label.pack_propagate(0) self.words_section() self.mfcc_section() self.talk_section() self.rc.welcome() def welcome(self): print('in welcome') self.welcome_label.pack() self.welcome_label.pack_propagate(0) self.img_label.pack() self.img_label.pack_propagate(0) self.welc_over_but.pack() ''' the input parameter is the name of the mfcc_picture this function only update mfcc_pic_label ''' def show_mfcc(self, filename): self.mfcc_pic = PhotoImage(file=filename) self.mfcc_pic_label.destroy() self.mfcc_pic_label = Label(master=self.mfcc_label, image=self.mfcc_pic, height=400, width=400) self.mfcc_pic_label.pack() return def talk_section(self): self.talk_label.pack() self.talk_label.pack_propagate(0) self.talk_arg.pack() self.talk_arg.pack_propagate(0) self.lcy.pack(side=LEFT) self.lcy.pack_propagate(0) self.fjw.pack(side=RIGHT) self.fjw.pack_propagate(0) self.talk_rec_but.pack(side=LEFT) self.talk_rec_but.pack_propagate(0) self.talk_auto_cover.set(1) Checkbutton(self.lcy, text='cover', variable=self.talk_auto_cover, width=10, height=2).pack() self.talk_rec_time.pack() self.talk_rec_time.insert(END, 'Record time: 2s') self.talk_file.pack() self.talk_file.insert(END, 'File: ') self.talk_area.pack() self.talk_area.pack_propagate(0) self.talk_history.pack(side=TOP) self.talk_history.pack_propagate(0) self.speech_content.pack(side=BOTTOM) self.speech_content.pack_propagate(0) ''' called when you hit 'Let's chat' button using self.talker to chat ''' def listen_talk(self): ret, name = self.talker.work((self.talk_auto_cover.get() == 1)) # print(ret) self.talk_file.delete('0.0', END) self.talk_file.insert(END, 'File: ' + name) data = self.rc.get_wav_mfcc(name) filename = 'Oldboy.png' self.rc.plot_save(filename, data) self.show_mfcc(filename=filename) ''' react to the ret ''' self.word_result.delete(0, END) for xx in ret: self.word_result.insert(END, xx) self.speech_content.delete('0.0', END) try: self.speech_content.insert(END, ret[0]) except: self.speech_content.insert(END, '你说话了吗') self.talk_react(ret[0]) return ''' for message translated by API react some words ''' def talk_react(self, msg): print('get message', msg) if msg.find('天气') != -1: ans = talk_online.weather() speech.say('为你找到今天的天气') self.talk_history.insert(END, '\n------------------\n') for ww in ans: self.talk_history.insert(END, ww + '\n') return if msg.find('笑话') != -1: ans = talk_online.joke() speech.say('我想到一个好笑的笑话') self.talk_history.insert(END, '\n------------------\n') self.talk_history.insert(END, ans + '\n') return if msg.find('成绩') != -1: ans = talk_online.grade() speech.say('正在查询你的成绩') self.talk_history.insert(END, '\n------------------\n') for line in ans: self.talk_history.insert(END, line + '\n') return if msg.find('你好') != -1: ans = talk_online.say_hello() speech.say(ans) self.talk_history.insert(END, '\n------------------\n') self.talk_history.insert(END, ans + '\n') return if msg.find('名字') != -1: ans = talk_online.get_name() speech.say(ans) self.talk_history.insert(END, '\n------------------\n') self.talk_history.insert(END, ans + '\n') return if msg.find('年龄') != -1 or msg.find('岁') != -1 or msg.find( '你多大') != -1: ans = talk_online.get_age() speech.say(ans) self.talk_history.insert(END, '\n------------------\n') self.talk_history.insert(END, ans + '\n') return if msg.find('音乐') != -1 or msg.find('歌') != -1: speech.say('为你打开网易云音乐') talk_online.music() self.talk_history.insert(END, '\n------------------\n') self.talk_history.insert(END, '为你打开网易云音乐' + '\n') return def mfcc_section(self): self.mfcc_pic_label.pack() def prep_word_list(self): for a_word in [ 'rich', 'flash', 'blog', 'mathematics', 'hard', 'twenty', 'love', 'girl', 'banana', 'apple' ]: self.word_list.insert(END, a_word) def words_section(self): self.prep_word_list() self.word_list.pack() self.word_rec_label.pack(side=TOP) self.word_rec_label.pack_propagate(0) self.word_rec_but.pack(side=LEFT) self.auto_cover.set(1) Checkbutton(self.word_rec_label, text='auto-cover', variable=self.auto_cover, width=20).pack(side=LEFT) self.word_args.pack() self.word_args.pack_propagate(0) self.word_rec_time.grid(row=0, column=0) self.word_rec_time.insert(END, 'Record time: 2s') self.word_file.grid(row=1, column=0) self.word_file.insert(END, 'File: ') self.word_listen_status.grid(row=2, column=0) self.word_listen_status.pack_propagate(0) # for i in self.word_list.get(0, END): # self.word_result.insert(END, str(i) + ' with probability of ') self.word_result.pack() ''' listen_word function, when you hit 'Listen to me' Button, print 'in listen word', then the machine record the word you say for the next 2 seconds, record filename will be sent by rec_word function from Recognizer use show_mfcc to plot the picture of wav_file 'filename' submit data to word_react''' def listen_word(self): print('in listen_word') Label(master=self.word_listen_status, text="I'm listening~", width=30, height=4).pack() wav_file = self.rc.record( (self.auto_cover.get() == 1)) # start to record self.word_file.delete('0.0', END) self.word_file.insert(END, 'File: ' + wav_file) filename = 'Oldboy.png' # mfcc picture filename data = self.rc.rec_word(wav_file) # recognize word data mfcc_data = self.rc.get_wav_mfcc(wav_file) # mfcc data self.rc.plot_save(filename, mfcc_data) # plot the data self.show_mfcc(filename) # show the plot picture # not complete # complete self.word_react(data) # update result on GUI ''' word_react get recognition data from listen_word use the data to update the content in word_result list and voice speech ''' def word_react(self, data): self.word_result.delete(0, END) usu = [] for word in data: if word not in usu: self.word_result.insert(END, word) usu.append(word) try: speech.say('我认为你刚才说了' + data[0]) except: speech.say('你发音不太标准哦') return
def __init__(self): ''' variables clarification definition of variables in this class ''' self.root = Tk() self.welcome_label = Label(master=self.root, width=80, height=30) self.photo = PhotoImage(file='new_welc.png') self.back_right = Label(master=self.root, width=50, height=30, bg='green') self.back_left = Label(master=self.root, width=30, height=30, bg='blue') self.img_label = Label(master=self.welcome_label, image=self.photo, height=self.photo.height(), width=self.photo.width()) self.welc_over_but = Button(master=self.img_label, text='Get Start', width=20, height=1, command=self.come_in) self.mfcc_label = Label(master=self.back_right, width=50, height=16, bg='yellow') self.word_label = Label(master=self.back_left, width=30, height=30, bg='pink') self.word_list = Listbox(master=self.word_label, width=30, height=10) self.word_rec_label = Label(master=self.word_label, height=1, width=30, bg='red') self.word_rec_but = Button(master=self.word_rec_label, height=1, width=10, text='Listen to me', command=self.listen_word) self.auto_cover = IntVar() self.word_args = Label(master=self.word_label, width=30, height=6, bg='blue') self.word_rec_time = Text(master=self.word_args, width=30, height=1) self.word_file = Text(master=self.word_args, width=30, height=1) self.word_listen_status = Label(master=self.word_args, width=30, height=4, bg='white') self.word_result = Listbox(master=self.word_label, height=13, width=30) self.mfcc_pic_label = Label(master=self.mfcc_label, height=16, width=50) self.mfcc_pic = None self.talk_label = Label(master=self.back_right, width=50, height=14, bg='purple') self.talk_arg = Label(master=self.talk_label, width=50, height=2) self.lcy = Label(master=self.talk_arg, width=20, height=3) self.fjw = Label(master=self.talk_arg, width=30, height=3, bg='green') self.talk_rec_but = Button(master=self.lcy, width=10, height=2, command=self.listen_talk, text="Let's chat!") self.talk_auto_cover = IntVar() self.talk_rec_time = Text(master=self.fjw, width=30, height=1) self.talk_file = Text(master=self.fjw, width=30, height=1) self.talk_area = Label(master=self.talk_label, width=50, height=12) self.talk_history = Text(master=self.talk_area, width=50, height=12) self.speech_content = Text(master=self.talk_area, width=50, height=2) self.rc = Recognizer() self.talker = Talker() self.root.title('Speech Recognizer') self.welcome()
class AppUI(QWidget): """class that represent UI of OMR, using PyQt5""" def __init__(self): super().__init__() self.initUI() self.pathList = [] self.rgzr = Recognizer() def initUI(self): font = QFont() font.setPointSize(12) QToolTip.setFont(font) addBtn = QPushButton(" Додати зображення ", self) addBtn.setFont(font) addBtn.setFixedSize(210, 40) addBtn.clicked.connect(self.showDialog) recognBtn = QPushButton("Розпізнати", self) recognBtn.setFont(font) recognBtn.setFixedSize(210, 40) recognBtn.clicked.connect(self.recognize) delBtn = QPushButton(self) delBtn.setFixedSize(50, 50) delBtn.setIcon(QIcon("images/delete.png")) delBtn.clicked.connect(self.deleteImg) delBtn.setToolTip("Видалити зображення з набору") rotateBtn = QPushButton(self) rotateBtn.setFixedSize(50, 50) rotateBtn.setIcon(QIcon("images/rotate.png")) rotateBtn.clicked.connect(self.rotateImg) rotateBtn.setToolTip("Повернути зображення") self.pic = QLabel("", self) self.pic.setFixedSize(330, 580) self.pic.setFrameShape(QFrame.Panel) lbl = QLabel(" Список зображень: ") lbl.setFont(font) lbl.setFixedSize(300, 40) self.picList = QListView(self) self.picList.setFixedSize(300, 300) self.picList.clicked.connect(self.itemClicked) self.listModel = QStandardItemModel(self.picList) self.picList.setModel(self.listModel) splitter = QSplitter(Qt.Vertical) vbtnbox = QVBoxLayout() vbtnbox.addStretch(1) vbtnbox.addWidget(addBtn) vbtnbox.addWidget(recognBtn) vbtnbox.setAlignment(Qt.AlignCenter) vlistbox = QVBoxLayout() vlistbox.addStretch(1) vlistbox.addWidget(lbl) vlistbox.addWidget(self.picList) hbtnbox = QHBoxLayout() hbtnbox.addWidget(delBtn) hbtnbox.addWidget(rotateBtn) vimgbox = QVBoxLayout() vimgbox.addWidget(self.pic) vimgbox.addLayout(hbtnbox) vbox = QVBoxLayout() vbox.addLayout(vlistbox) vbox.addLayout(vbtnbox) hbox = QHBoxLayout() #hbox.addWidget(self.pic, alignment=Qt.AlignLeft) hbox.addLayout(vimgbox) hbox.addWidget(splitter) hbox.addLayout(vbox) self.setLayout(hbox) self.setGeometry(300, 300, 600, 600) self.setWindowTitle('OMR') self.show() def showDialog(self): fname = QFileDialog.getOpenFileNames(self, 'Open file', filter='*.jpg') if fname[0]: for path in fname[0]: if path not in self.pathList: self.pathList.append(path) self.addItemToList(path[path.rfind('/') + 1:]) self.sltdImgIndx = len(self.pathList) - 1 path = self.pathList[-1] pixmap = QPixmap(path) pixmap = pixmap.scaled(self.pic.size(), aspectRatioMode=Qt.KeepAspectRatio) self.pic.setPixmap(pixmap) def addItemToList(self, path): item = QStandardItem(path) item.setEditable(False) self.listModel.appendRow(item) def itemClicked(self, sender): indx = sender.row() self.sltdImgIndx = indx pixmap = QPixmap(self.pathList[indx]) pixmap = pixmap.scaled(self.pic.size(), aspectRatioMode=Qt.KeepAspectRatio) self.pic.setPixmap(pixmap) def recognize(self): fname = QFileDialog.getSaveFileName( self, "Оберіть розташування та назву для файла", filter='*.txt') if fname[0]: saver = ResultSaver(fname[0]) for img_path in self.pathList: answ, image_path = self.rgzr.recognize(img_path) saver.write(answ, img_path[img_path.rfind('/'):]) QPicDialog.show(self, image_path) os.remove(image_path) def deleteImg(self): if self.pathList: indx = self.sltdImgIndx self.pathList.remove(self.pathList[indx]) self.listModel.removeRow(indx) if len(self.pathList) > 0: self.sltdImgIndx = 0 pixmap = QPixmap(self.pathList[0]) pixmap = pixmap.scaled(self.pic.size(), aspectRatioMode=Qt.KeepAspectRatio) self.pic.setPixmap(pixmap) else: self.sltdImgIndx = None self.pic.clear() def rotateImg(self): new_path = ImageRotator.rotate(self.pathList[self.sltdImgIndx]) pixmap = QPixmap(new_path) pixmap = pixmap.scaled(self.pic.size(), aspectRatioMode=Qt.KeepAspectRatio) self.pic.setPixmap(pixmap) item = QStandardItem(new_path[new_path.rfind('/') + 1:]) item.setEditable(False) self.listModel.setItem(self.sltdImgIndx, item) self.pathList[self.sltdImgIndx] = new_path