Esempio n. 1
0
def recognize(host):
    me = mp.current_process()
    print me.name, 'PID', me.pid

    context = zmq.Context()

    rec_in = context.socket(zmq.SUB)
    rec_in.connect('tcp://{}:{}'.format(host, RECOGNIZE_IN))
    rec_in.setsockopt(zmq.SUBSCRIBE, b'')

    rec_learn = context.socket(zmq.SUB)
    rec_learn.connect('tcp://{}:{}'.format(host, RECOGNIZE_LEARN))
    rec_learn.setsockopt(zmq.SUBSCRIBE, b'')

    sender = context.socket(zmq.PUSH)
    sender.connect('tcp://{}:{}'.format(host, EXTERNAL))

    poller = zmq.Poller()
    poller.register(rec_in, zmq.POLLIN)
    poller.register(rec_learn, zmq.POLLIN)

    memories = []
    recognizer = []

    while True:
        events = dict(poller.poll())

        if rec_in in events:
            audio_segment = recv_array(rec_in)
            scaler = pp.MinMaxScaler()
            scaled_audio = scaler.fit_transform(audio_segment)
            output = recognizer(scaled_audio)
            winner = np.argmax(np.mean(output, axis=0))
            sender.send_json('winner {}'.format(winner))

        if rec_learn in events:
            audio_segment = recv_array(rec_learn)
            scaler = pp.MinMaxScaler()
            scaled_audio = scaler.fit_transform(audio_segment)
            memories.append(scaled_audio)

            targets = []
            for i, memory in enumerate(memories):
                target = np.zeros((memory.shape[0], len(memories)))
                target[:, i] = 1
                targets.append(target)

            start_time = time.time()
            recognizer = _train_network(np.vstack(memories),
                                        np.vstack(targets),
                                        output_dim=200,
                                        leak_rate=.7)
            print 'Learning new categorizing network in {} seconds.'.format(
                time.time() - start_time)
Esempio n. 2
0
def recognize(host):
    me = mp.current_process()
    print me.name, 'PID', me.pid

    context = zmq.Context()

    rec_in = context.socket(zmq.SUB)
    rec_in.connect('tcp://{}:{}'.format(host, RECOGNIZE_IN))
    rec_in.setsockopt(zmq.SUBSCRIBE, b'')

    rec_learn = context.socket(zmq.SUB)
    rec_learn.connect('tcp://{}:{}'.format(host, RECOGNIZE_LEARN))
    rec_learn.setsockopt(zmq.SUBSCRIBE, b'')

    sender = context.socket(zmq.PUSH)
    sender.connect('tcp://{}:{}'.format(host, EXTERNAL))

    poller = zmq.Poller()
    poller.register(rec_in, zmq.POLLIN)
    poller.register(rec_learn, zmq.POLLIN)

    memories = []
    recognizer = []

    while True:
        events = dict(poller.poll())

        if rec_in in events:
            audio_segment = recv_array(rec_in)
            scaler = pp.MinMaxScaler()
            scaled_audio = scaler.fit_transform(audio_segment)
            output = recognizer(scaled_audio)
            winner = np.argmax(np.mean(output, axis=0))
            sender.send_json('winner {}'.format(winner))

        if rec_learn in events:
            audio_segment = recv_array(rec_learn)
            scaler = pp.MinMaxScaler()
            scaled_audio = scaler.fit_transform(audio_segment)
            memories.append(scaled_audio)
            
            targets = []
            for i, memory in enumerate(memories):
                target = np.zeros((memory.shape[0], len(memories)))
                target[:,i] = 1
                targets.append(target)
                
            start_time = time.time()                            
            recognizer = _train_network(np.vstack(memories), np.vstack(targets), output_dim=200, leak_rate=.7)
            print 'Learning new categorizing network in {} seconds.'.format(time.time() - start_time)
Esempio n. 3
0
def idle(host):
    context = zmq.Context()

    face = context.socket(zmq.SUB)
    face.connect('tcp://{}:{}'.format(host, IO.FACE))
    face.setsockopt(zmq.SUBSCRIBE, b'')

    robocontrol = context.socket(zmq.PUSH)
    robocontrol.connect('tcp://{}:{}'.format(host, IO.ROBO))

    stateQ = context.socket(zmq.SUB)
    stateQ.connect('tcp://{}:{}'.format(host, IO.STATE))
    stateQ.setsockopt(zmq.SUBSCRIBE, b'') 

    state = stateQ.recv_json()

    poller = zmq.Poller()
    poller.register(face, zmq.POLLIN)
    poller.register(stateQ, zmq.POLLIN)

    sender = context.socket(zmq.PUSH)
    sender.connect('tcp://{}:{}'.format(host, IO.EXTERNAL))

    face_timer = 0
    saysomething_timer = 0
    saysomething_interval = 10
    urge_to_say_something = 0
    
    def update_urge_to_say_something(urge_to_say_something, saysomething_interval):
        # linear increase over time for now
        # this could be more sophisticated
        # also including random impulses, emotions, etc
        return urge_to_say_something + saysomething_interval

    while True:
        events = dict(poller.poll(timeout=100))

        if face in events:
            new_face = utils.recv_array(face)
            face_timer = time.time()
                      
        if time.time() - face_timer > np.random.rand()*1.5 + 1:
            print '[self.] searches for a face'
            robocontrol.send_json([ 1, 'pan', (2*np.random.rand() -1)/10 ])
            robocontrol.send_json([ 1, 'tilt', (2*np.random.rand()-1)/10])
            face_timer = time.time()

        if stateQ in events:
            state = stateQ.recv_json()

        if not state['enable_say_something']:
            urge_to_say_something = 0

        if state['enable_say_something'] and not state['i_am_speaking'] and time.time() - saysomething_timer > saysomething_interval:
            urge_to_say_something = update_urge_to_say_something(urge_to_say_something, saysomething_interval)
            sender.send_json('urge_to_say_something {}'.format(urge_to_say_something))
            saysomething_timer = time.time()
Esempio n. 4
0
def video():
    import cv2

    cv2.namedWindow('Output', cv2.WND_PROP_FULLSCREEN)
    camera = cv2.VideoCapture(0)

    context = zmq.Context()
    publisher = context.socket(zmq.PUB)
    publisher.bind('tcp://*:{}'.format(CAMERA))

    projector = context.socket(zmq.PULL)
    projector.bind('tcp://*:{}'.format(PROJECTOR))
    
    eventQ = context.socket(zmq.SUB)
    eventQ.connect('tcp://localhost:{}'.format(EVENT))
    eventQ.setsockopt(zmq.SUBSCRIBE, b'')

    poller = zmq.Poller()
    poller.register(eventQ, zmq.POLLIN)
    poller.register(projector, zmq.POLLIN)

    while True:
        events = dict(poller.poll(timeout=0))

        if eventQ in events:
            pushbutton = eventQ.recv_json()
            if 'display2' in pushbutton:
                cv2.moveWindow('Output', 2000, 100)
            if 'fullscreen' in pushbutton:
                cv2.setWindowProperty('Output', cv2.WND_PROP_FULLSCREEN, cv2.cv.CV_WINDOW_FULLSCREEN)

        if projector in events:
            cv2.imshow('Output', cv2.resize(recv_array(projector), FRAME_SIZE))
        else:
            cv2.imshow('Output', np.zeros(FRAME_SIZE[::-1]))
        
        _, frame = camera.read()
        frame = cv2.resize(frame, FRAME_SIZE)
        send_array(publisher, frame)

        cv2.waitKey(VIDEO_SAMPLE_TIME)
    '''takes in jsonl file and yields generator for feeding data to requests
    :param:jsonl: path to jsonl file with text data 
    :return: python generator for passing text to client'''
    for msg in jsonlines.open(jsonl):
        yield msg


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument(
        "jsonl",
        help=
        'path to jsonl file containing new line delimited lines of text for encoding'
    )
    args = parser.parse_args()
    start = time.time()
    msg_gen = data_gen(args.jsonl)
    # send data to server
    df = pd.DataFrame(columns=['sentence', 'embedding'])
    for i, m in enumerate(msg_gen):
        print("sending request %s" % i)
        socket.send_json(m)
        message = recv_array(socket)
        print(f"received reply {message}")
        msg_dictionary = {'sentence': m, 'embedding': message}
        df.append(msg_dictionary, ignore_index=True)
        print('reply written to file')
    end = time.time()
    time_taken = end - start
    df.to_csv('processed_xnli.csv', index=False)
    print(f"processing took {time_taken}")
Esempio n. 6
0
        yield msg

if __name__=="__main__":
    parser = ArgumentParser()
    parser.add_argument("jsonl", help='path to jsonl file containing new line delimited lines of text for encoding')
    args = parser.parse_args()
    start = time.time()
    msg_gen = data_gen(args.jsonl)
    # read sentences, tokenize and send to server 
    df = pd.DataFrame(columns=['sentence','embedding'])
    for i, m in enumerate(msg_gen):
        print('tokenizing sentence before sending')
        tokens_tensor = preprocess(m, tokenizer)
        print(f"sending job {i}")
        ventilator.send_array(ventilator,m)
        message = recv_array(sink)
        print(f"received reply {message}")
        msg_dictionary = {'sentence':m, 'embedding':message}
        df = df.append(msg_dictionary, ignore_index=True)
        print('reply written to file')
    end = time.time()
    time_taken = end-start
    df.to_csv('processed_xnli.csv', index=False)
    print(f"processing took {time_taken}")

# for creating multilingual emb viz 
df = pd.read_csv('by_language.csv')

def get_encs(sentence):
    with torch.no_grad():
        print(sentence)
Esempio n. 7
0
def live(audio_recognizer, audio_producer, audio2video, scaler, host):
    import Oger

    me = mp.current_process()
    print me.name, 'PID', me.pid

    context = zmq.Context()
    mic = context.socket(zmq.SUB)
    mic.connect('tcp://{}:{}'.format(host, MIC))
    mic.setsockopt(zmq.SUBSCRIBE, b'')

    speaker = context.socket(zmq.PUSH)
    speaker.connect('tcp://{}:{}'.format(host, SPEAKER)) 

    camera = context.socket(zmq.SUB)
    camera.connect('tcp://{}:{}'.format(host, CAMERA))
    camera.setsockopt(zmq.SUBSCRIBE, b'')

    projector = context.socket(zmq.PUSH)
    projector.connect('tcp://{}:{}'.format(host, PROJECTOR)) 

    stateQ = context.socket(zmq.SUB)
    stateQ.connect('tcp://{}:{}'.format(host, STATE))
    stateQ.setsockopt(zmq.SUBSCRIBE, b'') 

    eventQ = context.socket(zmq.SUB)
    eventQ.connect('tcp://{}:{}'.format(host, EVENT))
    eventQ.setsockopt(zmq.SUBSCRIBE, b'') 

    snapshot = context.socket(zmq.REQ)
    snapshot.connect('tcp://{}:{}'.format(host, SNAPSHOT))
    snapshot.send(b'Send me the state, please')
    state = snapshot.recv_json()

    sender = context.socket(zmq.PUSH)
    sender.connect('tcp://{}:{}'.format(host, EXTERNAL))
    sender.send_json('register {}'.format(me.name))

    poller = zmq.Poller()
    poller.register(mic, zmq.POLLIN)
    poller.register(camera, zmq.POLLIN)
    poller.register(stateQ, zmq.POLLIN)
    poller.register(eventQ, zmq.POLLIN)

    previous_prediction = []
    # Approximately 10 seconds of audio/video
    error = deque(maxlen=3400)
    audio = deque(maxlen=3400)
    video = deque(maxlen=80)
    while True:
        events = dict(poller.poll())

        if stateQ in events:
            state = stateQ.recv_json()

        if mic in events:
            new_audio = np.atleast_2d(recv_array(mic))
            if state['record']:
                scaled_signals = scaler.transform(new_audio)
                audio.append(np.ndarray.flatten(scaled_signals))
                if len(previous_prediction):
                    error.append(scaled_signals[:,idxs].flatten() - previous_prediction.flatten())
                previous_prediction = audio_recognizer(scaled_signals[:,idxs]) # This would not be necessary in a centralized recognizer

        if camera in events:
            new_video = recv_array(camera)
            if state['record']:
                video.append(new_video)

        if eventQ in events:
            pushbutton = eventQ.recv_json()
            if 'reset' in pushbutton:
                error.clear()
                audio.clear()
                video.clear()
                previous_prediction = []

            if 'rmse' in pushbutton:
                rmse = np.sqrt((np.array(list(error)).flatten() ** 2).mean())
                sender.send_json('{} RMSE {}'.format(me.name, rmse))
                
            if 'respond' in pushbutton and pushbutton['respond'] == me.name:
                audio_data = np.array(list(audio))
                video_data = np.array(list(video))

                print '{} chosen to respond. Audio data: {} Video data: {}'.format(me.name, audio_data.shape, video_data.shape)

                if audio_data.size == 0 and video_data.size == 0:
                    print '*** Audio data and video data arrays are empty. Aborting the response. ***'
                    continue

                row_diff = audio_data.shape[0] - audio_producer.length
                if row_diff < 0:
                    audio_data = np.vstack([ audio_data, np.zeros((-row_diff, audio_data.shape[1])) ])
                else:
                    audio_data = audio_data[:audio_producer.length]

                sound = audio_producer(audio_data)
                
                stride = audio_producer.length/audio2video.length
                projection = audio2video(audio_data[audio_data.shape[0] - stride*audio2video.length::stride])

                # DREAM MODE: You can train a network with zero audio input -> video output, and use this
                # to recreate the original training sequence with scary accuracy...

                for row in projection:
                    send_array(projector, row)

                for row in scaler.inverse_transform(sound):
                    send_array(speaker, row)

            if 'save' in pushbutton:
                filename = '{}.{}'.format(pushbutton['save'], me.name)
                pickle.dump((audio_recognizer, audio_producer, audio2video, scaler, host), file(filename, 'w'))
                print '{} saved as file {} ({})'.format(me.name, filename, filesize(filename))
Esempio n. 8
0
def live(audio_recognizer, audio_producer, audio2video, scaler, host):
    import Oger

    me = mp.current_process()
    print me.name, 'PID', me.pid

    context = zmq.Context()
    mic = context.socket(zmq.SUB)
    mic.connect('tcp://{}:{}'.format(host, MIC))
    mic.setsockopt(zmq.SUBSCRIBE, b'')

    speaker = context.socket(zmq.PUSH)
    speaker.connect('tcp://{}:{}'.format(host, SPEAKER))

    camera = context.socket(zmq.SUB)
    camera.connect('tcp://{}:{}'.format(host, CAMERA))
    camera.setsockopt(zmq.SUBSCRIBE, b'')

    projector = context.socket(zmq.PUSH)
    projector.connect('tcp://{}:{}'.format(host, PROJECTOR))

    stateQ = context.socket(zmq.SUB)
    stateQ.connect('tcp://{}:{}'.format(host, STATE))
    stateQ.setsockopt(zmq.SUBSCRIBE, b'')

    eventQ = context.socket(zmq.SUB)
    eventQ.connect('tcp://{}:{}'.format(host, EVENT))
    eventQ.setsockopt(zmq.SUBSCRIBE, b'')

    snapshot = context.socket(zmq.REQ)
    snapshot.connect('tcp://{}:{}'.format(host, SNAPSHOT))
    snapshot.send(b'Send me the state, please')
    state = snapshot.recv_json()

    sender = context.socket(zmq.PUSH)
    sender.connect('tcp://{}:{}'.format(host, EXTERNAL))
    sender.send_json('register {}'.format(me.name))

    poller = zmq.Poller()
    poller.register(mic, zmq.POLLIN)
    poller.register(camera, zmq.POLLIN)
    poller.register(stateQ, zmq.POLLIN)
    poller.register(eventQ, zmq.POLLIN)

    previous_prediction = []
    # Approximately 10 seconds of audio/video
    error = deque(maxlen=3400)
    audio = deque(maxlen=3400)
    video = deque(maxlen=80)
    while True:
        events = dict(poller.poll())

        if stateQ in events:
            state = stateQ.recv_json()

        if mic in events:
            new_audio = np.atleast_2d(recv_array(mic))
            if state['record']:
                scaled_signals = scaler.transform(new_audio)
                audio.append(np.ndarray.flatten(scaled_signals))
                if len(previous_prediction):
                    error.append(scaled_signals[:, idxs].flatten() -
                                 previous_prediction.flatten())
                previous_prediction = audio_recognizer(
                    scaled_signals[:, idxs]
                )  # This would not be necessary in a centralized recognizer

        if camera in events:
            new_video = recv_array(camera)
            if state['record']:
                video.append(new_video)

        if eventQ in events:
            pushbutton = eventQ.recv_json()
            if 'reset' in pushbutton:
                error.clear()
                audio.clear()
                video.clear()
                previous_prediction = []

            if 'rmse' in pushbutton:
                rmse = np.sqrt((np.array(list(error)).flatten()**2).mean())
                sender.send_json('{} RMSE {}'.format(me.name, rmse))

            if 'respond' in pushbutton and pushbutton['respond'] == me.name:
                audio_data = np.array(list(audio))
                video_data = np.array(list(video))

                print '{} chosen to respond. Audio data: {} Video data: {}'.format(
                    me.name, audio_data.shape, video_data.shape)

                if audio_data.size == 0 and video_data.size == 0:
                    print '*** Audio data and video data arrays are empty. Aborting the response. ***'
                    continue

                row_diff = audio_data.shape[0] - audio_producer.length
                if row_diff < 0:
                    audio_data = np.vstack([
                        audio_data,
                        np.zeros((-row_diff, audio_data.shape[1]))
                    ])
                else:
                    audio_data = audio_data[:audio_producer.length]

                sound = audio_producer(audio_data)

                stride = audio_producer.length / audio2video.length
                projection = audio2video(
                    audio_data[audio_data.shape[0] -
                               stride * audio2video.length::stride])

                # DREAM MODE: You can train a network with zero audio input -> video output, and use this
                # to recreate the original training sequence with scary accuracy...

                for row in projection:
                    send_array(projector, row)

                for row in scaler.inverse_transform(sound):
                    send_array(speaker, row)

            if 'save' in pushbutton:
                filename = '{}.{}'.format(pushbutton['save'], me.name)
                pickle.dump((audio_recognizer, audio_producer, audio2video,
                             scaler, host), file(filename, 'w'))
                print '{} saved as file {} ({})'.format(
                    me.name, filename, filesize(filename))
Esempio n. 9
0
def new_learn_audio(host, debug=False):
    context = zmq.Context()

    mic = context.socket(zmq.SUB)
    mic.connect('tcp://{}:{}'.format(host, IO.MIC))
    mic.setsockopt(zmq.SUBSCRIBE, b'')

    dreamQ = context.socket(zmq.PUSH)
    dreamQ.connect('tcp://{}:{}'.format(host, IO.DREAM))


    stateQ, eventQ, brainQ = _three_amigos(context, host)

    sender = context.socket(zmq.PUSH)
    sender.connect('tcp://{}:{}'.format(host, IO.EXTERNAL))

    counterQ = context.socket(zmq.REQ)
    counterQ.connect('tcp://{}:{}'.format(host, IO.COUNTER))
    
    poller = zmq.Poller()
    poller.register(mic, zmq.POLLIN)
    poller.register(stateQ, zmq.POLLIN)
    poller.register(eventQ, zmq.POLLIN)

    audio = deque()
    NAPs = []
    wavs = []
    wav_audio_ids = {}
    NAP_hashes = {}

    audio_classifier = []
    audio_recognizer = []
    global_audio_recognizer = []
    mixture_audio_recognizer = []
    maxlen = []

    deleted_ids = []
    
    state = stateQ.recv_json()
    
    black_list = open('black_list.txt', 'a')

    audio_memory = AudioMemory()
    
    if debug:
        import matplotlib.pyplot as plt
        plt.ion()

    while True:
        events = dict(poller.poll())
        
        if stateQ in events:
            state = stateQ.recv_json()

        if mic in events:
            new_audio = utils.recv_array(mic)
            if state['record']:
                audio.append(new_audio)

        if eventQ in events:
            pushbutton = eventQ.recv_json()
            if 'learn' in pushbutton:
                try:
                    t0 = time.time()
                    filename = pushbutton['filename']
                    audio_segments = utils.get_segments(filename)

                    print 'Learning {} duration {} seconds with {} segments'.format(filename, audio_segments[-1], len(audio_segments)-1)
                    new_sentence = utils.csv_to_array(filename + 'cochlear')
                    norm_segments = np.rint(new_sentence.shape[0]*audio_segments/audio_segments[-1]).astype('int')

                    audio_ids = []
                    new_audio_hash = []
                    amps = utils.get_amps(filename)
                    most_significant_value = -np.inf
                    most_significant_audio_id = []

                    original_NAP_length = len(NAPs)
                    
                    for segment, new_sound in enumerate([ utils.trim_right(new_sentence[norm_segments[i]:norm_segments[i+1]]) for i in range(len(norm_segments)-1) ]):
                        # We filter out short, abrupt sounds with lots of noise.
                        if np.mean(new_sound) < 2 or new_sound.shape[0] == 0:
                          black_list.write('{} {}\n'.format(filename, segment))
                          print 'BLACKLISTED segment {} in file {}'.format(segment, filename)
                          continue

                        if debug:
                            utils.plot_NAP_and_energy(new_sound, plt)

                        audio_id = audio_memory.learn(new_sound, filename, [ audio_segments[segment], audio_segments[segment+1] ])

                        # START LEGACY
                        try:
                            wavs[audio_id].append(filename)
                        except:
                            wavs.append([filename])
                        wav_audio_ids[(filename, audio_id)] = [ audio_segments[segment], audio_segments[segment+1] ]
                        # END LEGACY
                        
                        audio_ids.append(audio_id)
                        if amps[segment] > most_significant_value:
                            most_significant_audio_id = audio_id
                            most_significant_value = amps[segment]

                    black_list.flush()
                    print 'AUDIO IDs after blacklisting {}'. format(audio_ids)
                    if len(audio_ids):
                        # while len(NAPs) - len(deleted_ids) > AUDIO_MEMORY_SIZE:
                        #     utils.delete_loner(counterQ, NAPs, 'audio_ids_counter', int(AUDIO_MEMORY_SIZE*PROTECT_PERCENTAGE), deleted_ids)

                        # maxlen = max([ m.shape[0] for memory in NAPs for m in memory if len(m) ])
                        # memories = [ np.ndarray.flatten(utils.zero_pad(m, maxlen)) for memory in NAPs for m in memory if len(m) ]

                        # targets = [ i for i,f in enumerate(NAPs) for k in f if len(k) ]
                        # audio_classifier = train_rPCA_SVM(memories, targets)

                        # all_hammings = [ utils.hamming_distance(new_audio_hash[i], new_audio_hash[j])
                        #                                         for i in range(len(new_audio_hash)) for j in range(len(new_audio_hash)) if i > j ]
                    
                        # print 'RHYME VALUE', np.mean(sorted(all_hammings)[int(len(all_hammings)/2):])
                        # rhyme = np.mean(sorted(all_hammings)[int(len(all_hammings)/2):]) < RHYME_HAMMERTIME

                        # sender.send_json('rhyme {}'.format(rhyme))

                        brainQ.send_pyobj(['audio_learn', filename, audio_ids, audio_memory, most_significant_audio_id, wavs, wav_audio_ids])
                        print 'Audio learned from {} in {} seconds'.format(filename, time.time() - t0)
                    else:
                        print 'SKIPPING fully blacklisted file {}'.format(filename)
                except:
                    utils.print_exception('Audio learning aborted.')

                audio.clear()

            if 'dream' in pushbutton:
                new_dream(audio_memory)
                     
            if 'save' in pushbutton:
                utils.save('{}.{}'.format(pushbutton['save'], mp.current_process().name), [ deleted_ids, NAPs, wavs, wav_audio_ids, NAP_hashes, audio_classifier, maxlen, audio_memory ])
                
            if 'load' in pushbutton:
                deleted_ids, NAPs, wavs, wav_audio_ids, NAP_hashes, audio_classifier, maxlen, audio_memory = utils.load('{}.{}'.format(pushbutton['load'], mp.current_process().name))