Beispiel #1
0
from parser import parser
from nn import relationExtractor, relationLabeler

import os

print("-------- Parsing --------")
parser.init()
for filename in sorted(os.listdir("data/original")):
    parser.parse(filename)
    print(f"Parsed {filename}")
print("Finished parsing all texts")

print("-------- Extracting relations --------")
relationExtractor.init()
relationExtractor.extractRelations()
print("Finished extracting relations between words")

print("-------- Labeling relations --------")
relationLabeler.init()
relationLabeler.labelRelations()
print("Finished labeling relations between words")
Beispiel #2
0
missing = []

# Remove entries from text and wav where there is no associated file
for line in lines:

    tokens = line.strip().split(" ", 1)
    UID = tokens[0]
    audio_sample_file_path = voco_data_base + "/staging/audio_data/" + UID + ".wav"
    if not os.path.isfile(audio_sample_file_path):
        # print(UID)
        missing.append(UID)

# input("Done printing missing files")

dynamic_rules, static_rules, var_lookup = parser.init()

UIDS = {}
print(len(lines))
for line in lines:

    tokens = line.strip().split(" ", 1)

    if len(tokens) > 1:
        UID = tokens[0]
        phrase = tokens[1]

        phrase_len = len(phrase.split(" "))

        commands, matches = parser.parsephrase(dynamic_rules,
                                               static_rules,
Beispiel #3
0
def main():
    pp = pprint.PrettyPrinter(depth=4, width=60)

    # VOCO_DATA is the environment variable that points to where VOCO saves audio data and records
    try:
        voco_data_base = os.environ['VOCO_DATA']
        print(os.environ['VOCO_DATA'])
    except:
        print('VOCO_DATA not defined')

    basedir = voco_data_base + "/staging/"

    #----------------------------------------------------------------------------
    # Parse input options - noexec, debug, playback
    # noexec - don't execute any commands, useful for debugging
    # debug - show additional debugging information during runtime
    # playback - playback the audio recorded
    #----------------------------------------------------------------------------

    debug = False
    noexec_mode = False
    playback_mode = False
    deepspeech = False

    try:
        options = sys.argv

        for x in options:
            if x == "noexec":
                noexec_mode = True
                print("noexec_mode = True")
            if x == "debug":
                debug = True
                print("debug = True")
            if x == "playback":
                playback_mode = True
                print("playback_mode = True")
            if x == "help":
                print("noexec, debug, playback")
            if x == "deepspeech":
                deepspeech = True
                print("deepspeech = True")
    except:
        print("Input argument error")

    #----------------------------------------------------------------------------
    # set_up pyaudio
    # important to note here is that the chunk size affects the latency, so smaller chunk size is better
    #------------------------------------------------------------------------

    try:

        mic = -1
        chunk = 0
        byterate = 16000
        pa = pyaudio.PyAudio()
        sample_rate = byterate
        stream = None
        chunk = 128 * 2 * sample_rate // byterate

        if mic == -1:
            mic_info = pa.get_default_input_device_info()
            mic = mic_info['index']
            pp.pprint(mic_info)
            if debug:
                print("Using mic " + str(mic))

        stream = pa.open(
            rate=sample_rate,
            format=pyaudio.paInt16,
            channels=1,
            input=True,
            input_device_index=mic,
            frames_per_buffer=chunk)

        if debug:
            pp = pprint.PrettyPrinter(depth=3, width=5)
            pp.pprint(pa.get_default_input_device_info())

    except IOError as e:
        print("Setup error: %s" % e)
        if (e.errno == -9997 or e.errno == 'Invalid sample rate'):
            new_sample_rate = int(
                pa.get_device_info_by_index(mic)['defaultSampleRate'])
            if (sample_rate != new_sample_rate):
                sample_rate = new_sample_rate

        sys.exit(0)

    print("\nLISTENING TO MICROPHONE")

    #----------------------------------------------------------------------------
    # create skp2gender - only needs to be created once
    #----------------------------------------------------------------------------

    if not os.path.exists(basedir + "audio_records/"):
        os.makedirs(basedir + "audio_records/")

    outputfile = open(basedir + "audio_records/" + 'spk2gender', 'w')

    outputfile.write("bartek m \n")

    #----------------------------------------------------------------------------
    # Setup session and recording counter
    # the session and recording counters are combined to form a unique identifier ( called UID in Kaldi) for example
    #----------------------------------------------------------------------------

    try:
        with open("session_counter.txt") as f:
            session_counter = int(f.read()) + 1
    except IOError:
        session_counter = 1

    recording_counter = 0
    audio_samples = collections.deque()
    audio_frames_prefix = 10
    audio_timeout_frames = 20

    rec = False
    timeout = 0
    pause_flag = False
    dictate_flag = False
    #----------------------------------------------------------------------------
    # setup gates
    # these two variables set the sound levels (RMS) the recorded signal
    #----------------------------------------------------------------------------
    # Normal
    gate = 400
    end_gate = 400

    # noisy
    # gate = 800
    # end_gate = 800

    print("Start recording gate: " + str(gate))
    print("Stop recording gate: " + str(end_gate))

    #----------------------------------------------------------------------------
    # Notify user
    #----------------------------------------------------------------------------

    # os.system("aplay media/shovel.wav")

    #----------------------------------------------------------------------------
    # init parser
    # Load the static and dynamic rules from file
    #----------------------------------------------------------------------------

    dynamic_rules, static_rules, var_lookup = parser.init()

    #----------------------------------------------------------------------------
    # start recording
    # Begin the main loop
    #----------------------------------------------------------------------------

    while (True):

        # read the sample, calculated its RMS and appended to the queue
        sample = stream.read(chunk)
        rms = audioop.rms(sample, 2)
        audio_samples.append(sample)

        if rec == False:
            if rms >= gate:

                # notify the user the system has started recording
                write_i3blocks('REC', 'recording')

                rec = True
                timeout = 0

            else:
                # if the system is not recording trim the queue to only keep a few historic samples.
                # This is so that when speech is detected the system has some initial samples of the signal, which helps in decoding.
                while len(audio_samples) > audio_frames_prefix:
                    audio_samples.popleft()

        else:
            if rms >= end_gate:
                timeout = 0
            elif (rms < end_gate) and (timeout < audio_timeout_frames):
                timeout += 1
            else:
                # Stop recording transcribe the audio and execute the commands

                #----------------------------------------------------------------------------
                # Get window context
                # this function gets the class of window that is currently selected, for example Firefox or Emacs
                #----------------------------------------------------------------------------
                try:
                    active_window = subprocess.check_output(
                        ['/usr/bin/xdotool', 'getactivewindow'])
                    active_window = active_window.strip().decode('UTF-8')
                    windowclass = subprocess.check_output(
                        ["xprop", "-notype", "-id", active_window, "WM_CLASS"])
                    windowclass = windowclass.strip().decode('UTF-8')
                    expr = "WM_CLASS = \"([^\"]*)\", \"([^\"]*)\""
                    m = re.search(expr, windowclass)
                    context = m.group(2).upper()
                except:
                    context = ""

                # notify the user decoding has started
                write_i3blocks('DECODING', 'decoding')

                # create the UID
                UID = "LIVE" + str(session_counter).zfill(8) + "_" + str(
                    recording_counter).zfill(5)

                audio_sample_file_path = basedir + "audio_data/" + UID + ".wav"

                # Write the WAV file and the Kaldi records
                write_audio_data(audio_samples, audio_sample_file_path,
                                 byterate)

                write_audio_records(basedir + "audio_records/",
                                    session_counter, audio_sample_file_path,
                                    UID)

                if deepspeech or dictate_flag:
                    print("deepspeech")
                    model_dir = "/home/lyncis/proj/deepspeech"
                    audio_text = open("%s/audio.txt" % model_dir, "w")
                    audio_text.write(audio_sample_file_path)
                    audio_text.close()
                    result = subprocess.check_output(
                        "./deepspeech.sh").strip().decode('UTF-8')

                    dictate_flag = False
                    write_i3blocks(result.upper(), 'neutral')

                else:

                    # Run Kaldi, the script decodes for your sample and saves the transcription to a text file.
                    result = subprocess.check_output(
                        "./kaldi_decode.sh").strip().decode('UTF-8')

                    try:
                        result = result.split(" ", 1)[1].strip()
                    except IndexError as e:
                        # this error occurs if Kaldi did not manage to transcribe anything
                        result = ""

                    if debug:
                        print(UID)
                        print(result)

                    # print("aplay %s" % audio_sample_file_path)

                    if len(result) == 0:
                        if pause_flag:
                            write_i3blocks("PAUSED", 'neutral')
                        else:
                            write_i3blocks('NONE', 'neutral')

                        if debug:
                            print("Zero length command")

                    else:
                        try:

                            if result == "pause":
                                if pause_flag:
                                    write_i3blocks("UNPAUSED", 'neutral')
                                pause_flag = not pause_flag

                            if pause_flag:
                                write_i3blocks("PAUSED", 'neutral')

                            if result == "dictate":
                                dictate_flag = True
                                write_i3blocks("DICTATE", 'neutral')
                            else:
                                dictate_flag = False

                            # Replay the audio clip if playback mode is on
                            if playback_mode:
                                os.system("aplay " + audio_sample_file_path)

                            if (not noexec_mode) and (not pause_flag) and (
                                    not dictate_flag):

                                # parse the transcription
                                commands, matches = parser.parsephrase(
                                    dynamic_rules, static_rules, var_lookup,
                                    result, context)

                                # Execute the command
                                for cmd in commands:

                                    # if the command requires XDOTOOL then use subprocess.call
                                    # since that waits for each command to complete before the
                                    # next commander started.
                                    # This is usefull for commands where order is important such
                                    # as keystrokes since
                                    # it prevents them being executed in the wrong order.
                                    # Otherwise use pop open since this prevents VOCO locking up
                                    # while waiting for the command to complete.
                                    # For instance in Emacs if you issue a command to helm this
                                    # command will not complete until Helm is closed
                                    # and this will prevent VOCO decoding any further commands.

                                    if len(cmd) > 0:
                                        if cmd[0] == "/usr/bin/xdotool":
                                            subprocess.call(cmd)
                                        else:
                                            # print(cmd)
                                            subprocess.Popen(
                                                cmd,
                                                shell=False,
                                                stdin=None,
                                                stdout=None,
                                                stderr=None,
                                                close_fds=True)

                                # show the user what the Kaldi transcribed
                                write_i3blocks(result.upper(), 'neutral')

                            # write the log
                            write_log(basedir, UID, result, "", "0.0",
                                      audio_sample_file_path)

                            print("%s |  %s" %
                                  (result.rjust(20), context.rjust(20)))

                            if debug:
                                print("-----------------")
                                print(result + "\n")
                                print("Wrote log to:" + basedir + "log")

                        except Exception as e:
                            print(e)
                            # tb = traceback.format_exc()
                            # print(tb)

                recording_counter += 1
                rec = False