from parser import parser from nn import relationExtractor, relationLabeler import os print("-------- Parsing --------") parser.init() for filename in sorted(os.listdir("data/original")): parser.parse(filename) print(f"Parsed {filename}") print("Finished parsing all texts") print("-------- Extracting relations --------") relationExtractor.init() relationExtractor.extractRelations() print("Finished extracting relations between words") print("-------- Labeling relations --------") relationLabeler.init() relationLabeler.labelRelations() print("Finished labeling relations between words")
missing = [] # Remove entries from text and wav where there is no associated file for line in lines: tokens = line.strip().split(" ", 1) UID = tokens[0] audio_sample_file_path = voco_data_base + "/staging/audio_data/" + UID + ".wav" if not os.path.isfile(audio_sample_file_path): # print(UID) missing.append(UID) # input("Done printing missing files") dynamic_rules, static_rules, var_lookup = parser.init() UIDS = {} print(len(lines)) for line in lines: tokens = line.strip().split(" ", 1) if len(tokens) > 1: UID = tokens[0] phrase = tokens[1] phrase_len = len(phrase.split(" ")) commands, matches = parser.parsephrase(dynamic_rules, static_rules,
def main(): pp = pprint.PrettyPrinter(depth=4, width=60) # VOCO_DATA is the environment variable that points to where VOCO saves audio data and records try: voco_data_base = os.environ['VOCO_DATA'] print(os.environ['VOCO_DATA']) except: print('VOCO_DATA not defined') basedir = voco_data_base + "/staging/" #---------------------------------------------------------------------------- # Parse input options - noexec, debug, playback # noexec - don't execute any commands, useful for debugging # debug - show additional debugging information during runtime # playback - playback the audio recorded #---------------------------------------------------------------------------- debug = False noexec_mode = False playback_mode = False deepspeech = False try: options = sys.argv for x in options: if x == "noexec": noexec_mode = True print("noexec_mode = True") if x == "debug": debug = True print("debug = True") if x == "playback": playback_mode = True print("playback_mode = True") if x == "help": print("noexec, debug, playback") if x == "deepspeech": deepspeech = True print("deepspeech = True") except: print("Input argument error") #---------------------------------------------------------------------------- # set_up pyaudio # important to note here is that the chunk size affects the latency, so smaller chunk size is better #------------------------------------------------------------------------ try: mic = -1 chunk = 0 byterate = 16000 pa = pyaudio.PyAudio() sample_rate = byterate stream = None chunk = 128 * 2 * sample_rate // byterate if mic == -1: mic_info = pa.get_default_input_device_info() mic = mic_info['index'] pp.pprint(mic_info) if debug: print("Using mic " + str(mic)) stream = pa.open( rate=sample_rate, format=pyaudio.paInt16, channels=1, input=True, input_device_index=mic, frames_per_buffer=chunk) if debug: pp = pprint.PrettyPrinter(depth=3, width=5) pp.pprint(pa.get_default_input_device_info()) except IOError as e: print("Setup error: %s" % e) if (e.errno == -9997 or e.errno == 'Invalid sample rate'): new_sample_rate = int( pa.get_device_info_by_index(mic)['defaultSampleRate']) if (sample_rate != new_sample_rate): sample_rate = new_sample_rate sys.exit(0) print("\nLISTENING TO MICROPHONE") #---------------------------------------------------------------------------- # create skp2gender - only needs to be created once #---------------------------------------------------------------------------- if not os.path.exists(basedir + "audio_records/"): os.makedirs(basedir + "audio_records/") outputfile = open(basedir + "audio_records/" + 'spk2gender', 'w') outputfile.write("bartek m \n") #---------------------------------------------------------------------------- # Setup session and recording counter # the session and recording counters are combined to form a unique identifier ( called UID in Kaldi) for example #---------------------------------------------------------------------------- try: with open("session_counter.txt") as f: session_counter = int(f.read()) + 1 except IOError: session_counter = 1 recording_counter = 0 audio_samples = collections.deque() audio_frames_prefix = 10 audio_timeout_frames = 20 rec = False timeout = 0 pause_flag = False dictate_flag = False #---------------------------------------------------------------------------- # setup gates # these two variables set the sound levels (RMS) the recorded signal #---------------------------------------------------------------------------- # Normal gate = 400 end_gate = 400 # noisy # gate = 800 # end_gate = 800 print("Start recording gate: " + str(gate)) print("Stop recording gate: " + str(end_gate)) #---------------------------------------------------------------------------- # Notify user #---------------------------------------------------------------------------- # os.system("aplay media/shovel.wav") #---------------------------------------------------------------------------- # init parser # Load the static and dynamic rules from file #---------------------------------------------------------------------------- dynamic_rules, static_rules, var_lookup = parser.init() #---------------------------------------------------------------------------- # start recording # Begin the main loop #---------------------------------------------------------------------------- while (True): # read the sample, calculated its RMS and appended to the queue sample = stream.read(chunk) rms = audioop.rms(sample, 2) audio_samples.append(sample) if rec == False: if rms >= gate: # notify the user the system has started recording write_i3blocks('REC', 'recording') rec = True timeout = 0 else: # if the system is not recording trim the queue to only keep a few historic samples. # This is so that when speech is detected the system has some initial samples of the signal, which helps in decoding. while len(audio_samples) > audio_frames_prefix: audio_samples.popleft() else: if rms >= end_gate: timeout = 0 elif (rms < end_gate) and (timeout < audio_timeout_frames): timeout += 1 else: # Stop recording transcribe the audio and execute the commands #---------------------------------------------------------------------------- # Get window context # this function gets the class of window that is currently selected, for example Firefox or Emacs #---------------------------------------------------------------------------- try: active_window = subprocess.check_output( ['/usr/bin/xdotool', 'getactivewindow']) active_window = active_window.strip().decode('UTF-8') windowclass = subprocess.check_output( ["xprop", "-notype", "-id", active_window, "WM_CLASS"]) windowclass = windowclass.strip().decode('UTF-8') expr = "WM_CLASS = \"([^\"]*)\", \"([^\"]*)\"" m = re.search(expr, windowclass) context = m.group(2).upper() except: context = "" # notify the user decoding has started write_i3blocks('DECODING', 'decoding') # create the UID UID = "LIVE" + str(session_counter).zfill(8) + "_" + str( recording_counter).zfill(5) audio_sample_file_path = basedir + "audio_data/" + UID + ".wav" # Write the WAV file and the Kaldi records write_audio_data(audio_samples, audio_sample_file_path, byterate) write_audio_records(basedir + "audio_records/", session_counter, audio_sample_file_path, UID) if deepspeech or dictate_flag: print("deepspeech") model_dir = "/home/lyncis/proj/deepspeech" audio_text = open("%s/audio.txt" % model_dir, "w") audio_text.write(audio_sample_file_path) audio_text.close() result = subprocess.check_output( "./deepspeech.sh").strip().decode('UTF-8') dictate_flag = False write_i3blocks(result.upper(), 'neutral') else: # Run Kaldi, the script decodes for your sample and saves the transcription to a text file. result = subprocess.check_output( "./kaldi_decode.sh").strip().decode('UTF-8') try: result = result.split(" ", 1)[1].strip() except IndexError as e: # this error occurs if Kaldi did not manage to transcribe anything result = "" if debug: print(UID) print(result) # print("aplay %s" % audio_sample_file_path) if len(result) == 0: if pause_flag: write_i3blocks("PAUSED", 'neutral') else: write_i3blocks('NONE', 'neutral') if debug: print("Zero length command") else: try: if result == "pause": if pause_flag: write_i3blocks("UNPAUSED", 'neutral') pause_flag = not pause_flag if pause_flag: write_i3blocks("PAUSED", 'neutral') if result == "dictate": dictate_flag = True write_i3blocks("DICTATE", 'neutral') else: dictate_flag = False # Replay the audio clip if playback mode is on if playback_mode: os.system("aplay " + audio_sample_file_path) if (not noexec_mode) and (not pause_flag) and ( not dictate_flag): # parse the transcription commands, matches = parser.parsephrase( dynamic_rules, static_rules, var_lookup, result, context) # Execute the command for cmd in commands: # if the command requires XDOTOOL then use subprocess.call # since that waits for each command to complete before the # next commander started. # This is usefull for commands where order is important such # as keystrokes since # it prevents them being executed in the wrong order. # Otherwise use pop open since this prevents VOCO locking up # while waiting for the command to complete. # For instance in Emacs if you issue a command to helm this # command will not complete until Helm is closed # and this will prevent VOCO decoding any further commands. if len(cmd) > 0: if cmd[0] == "/usr/bin/xdotool": subprocess.call(cmd) else: # print(cmd) subprocess.Popen( cmd, shell=False, stdin=None, stdout=None, stderr=None, close_fds=True) # show the user what the Kaldi transcribed write_i3blocks(result.upper(), 'neutral') # write the log write_log(basedir, UID, result, "", "0.0", audio_sample_file_path) print("%s | %s" % (result.rjust(20), context.rjust(20))) if debug: print("-----------------") print(result + "\n") print("Wrote log to:" + basedir + "log") except Exception as e: print(e) # tb = traceback.format_exc() # print(tb) recording_counter += 1 rec = False