def recognize(host): me = mp.current_process() print me.name, 'PID', me.pid context = zmq.Context() rec_in = context.socket(zmq.SUB) rec_in.connect('tcp://{}:{}'.format(host, RECOGNIZE_IN)) rec_in.setsockopt(zmq.SUBSCRIBE, b'') rec_learn = context.socket(zmq.SUB) rec_learn.connect('tcp://{}:{}'.format(host, RECOGNIZE_LEARN)) rec_learn.setsockopt(zmq.SUBSCRIBE, b'') sender = context.socket(zmq.PUSH) sender.connect('tcp://{}:{}'.format(host, EXTERNAL)) poller = zmq.Poller() poller.register(rec_in, zmq.POLLIN) poller.register(rec_learn, zmq.POLLIN) memories = [] recognizer = [] while True: events = dict(poller.poll()) if rec_in in events: audio_segment = recv_array(rec_in) scaler = pp.MinMaxScaler() scaled_audio = scaler.fit_transform(audio_segment) output = recognizer(scaled_audio) winner = np.argmax(np.mean(output, axis=0)) sender.send_json('winner {}'.format(winner)) if rec_learn in events: audio_segment = recv_array(rec_learn) scaler = pp.MinMaxScaler() scaled_audio = scaler.fit_transform(audio_segment) memories.append(scaled_audio) targets = [] for i, memory in enumerate(memories): target = np.zeros((memory.shape[0], len(memories))) target[:, i] = 1 targets.append(target) start_time = time.time() recognizer = _train_network(np.vstack(memories), np.vstack(targets), output_dim=200, leak_rate=.7) print 'Learning new categorizing network in {} seconds.'.format( time.time() - start_time)
def recognize(host): me = mp.current_process() print me.name, 'PID', me.pid context = zmq.Context() rec_in = context.socket(zmq.SUB) rec_in.connect('tcp://{}:{}'.format(host, RECOGNIZE_IN)) rec_in.setsockopt(zmq.SUBSCRIBE, b'') rec_learn = context.socket(zmq.SUB) rec_learn.connect('tcp://{}:{}'.format(host, RECOGNIZE_LEARN)) rec_learn.setsockopt(zmq.SUBSCRIBE, b'') sender = context.socket(zmq.PUSH) sender.connect('tcp://{}:{}'.format(host, EXTERNAL)) poller = zmq.Poller() poller.register(rec_in, zmq.POLLIN) poller.register(rec_learn, zmq.POLLIN) memories = [] recognizer = [] while True: events = dict(poller.poll()) if rec_in in events: audio_segment = recv_array(rec_in) scaler = pp.MinMaxScaler() scaled_audio = scaler.fit_transform(audio_segment) output = recognizer(scaled_audio) winner = np.argmax(np.mean(output, axis=0)) sender.send_json('winner {}'.format(winner)) if rec_learn in events: audio_segment = recv_array(rec_learn) scaler = pp.MinMaxScaler() scaled_audio = scaler.fit_transform(audio_segment) memories.append(scaled_audio) targets = [] for i, memory in enumerate(memories): target = np.zeros((memory.shape[0], len(memories))) target[:,i] = 1 targets.append(target) start_time = time.time() recognizer = _train_network(np.vstack(memories), np.vstack(targets), output_dim=200, leak_rate=.7) print 'Learning new categorizing network in {} seconds.'.format(time.time() - start_time)
def idle(host): context = zmq.Context() face = context.socket(zmq.SUB) face.connect('tcp://{}:{}'.format(host, IO.FACE)) face.setsockopt(zmq.SUBSCRIBE, b'') robocontrol = context.socket(zmq.PUSH) robocontrol.connect('tcp://{}:{}'.format(host, IO.ROBO)) stateQ = context.socket(zmq.SUB) stateQ.connect('tcp://{}:{}'.format(host, IO.STATE)) stateQ.setsockopt(zmq.SUBSCRIBE, b'') state = stateQ.recv_json() poller = zmq.Poller() poller.register(face, zmq.POLLIN) poller.register(stateQ, zmq.POLLIN) sender = context.socket(zmq.PUSH) sender.connect('tcp://{}:{}'.format(host, IO.EXTERNAL)) face_timer = 0 saysomething_timer = 0 saysomething_interval = 10 urge_to_say_something = 0 def update_urge_to_say_something(urge_to_say_something, saysomething_interval): # linear increase over time for now # this could be more sophisticated # also including random impulses, emotions, etc return urge_to_say_something + saysomething_interval while True: events = dict(poller.poll(timeout=100)) if face in events: new_face = utils.recv_array(face) face_timer = time.time() if time.time() - face_timer > np.random.rand()*1.5 + 1: print '[self.] searches for a face' robocontrol.send_json([ 1, 'pan', (2*np.random.rand() -1)/10 ]) robocontrol.send_json([ 1, 'tilt', (2*np.random.rand()-1)/10]) face_timer = time.time() if stateQ in events: state = stateQ.recv_json() if not state['enable_say_something']: urge_to_say_something = 0 if state['enable_say_something'] and not state['i_am_speaking'] and time.time() - saysomething_timer > saysomething_interval: urge_to_say_something = update_urge_to_say_something(urge_to_say_something, saysomething_interval) sender.send_json('urge_to_say_something {}'.format(urge_to_say_something)) saysomething_timer = time.time()
def video(): import cv2 cv2.namedWindow('Output', cv2.WND_PROP_FULLSCREEN) camera = cv2.VideoCapture(0) context = zmq.Context() publisher = context.socket(zmq.PUB) publisher.bind('tcp://*:{}'.format(CAMERA)) projector = context.socket(zmq.PULL) projector.bind('tcp://*:{}'.format(PROJECTOR)) eventQ = context.socket(zmq.SUB) eventQ.connect('tcp://localhost:{}'.format(EVENT)) eventQ.setsockopt(zmq.SUBSCRIBE, b'') poller = zmq.Poller() poller.register(eventQ, zmq.POLLIN) poller.register(projector, zmq.POLLIN) while True: events = dict(poller.poll(timeout=0)) if eventQ in events: pushbutton = eventQ.recv_json() if 'display2' in pushbutton: cv2.moveWindow('Output', 2000, 100) if 'fullscreen' in pushbutton: cv2.setWindowProperty('Output', cv2.WND_PROP_FULLSCREEN, cv2.cv.CV_WINDOW_FULLSCREEN) if projector in events: cv2.imshow('Output', cv2.resize(recv_array(projector), FRAME_SIZE)) else: cv2.imshow('Output', np.zeros(FRAME_SIZE[::-1])) _, frame = camera.read() frame = cv2.resize(frame, FRAME_SIZE) send_array(publisher, frame) cv2.waitKey(VIDEO_SAMPLE_TIME)
'''takes in jsonl file and yields generator for feeding data to requests :param:jsonl: path to jsonl file with text data :return: python generator for passing text to client''' for msg in jsonlines.open(jsonl): yield msg if __name__ == "__main__": parser = ArgumentParser() parser.add_argument( "jsonl", help= 'path to jsonl file containing new line delimited lines of text for encoding' ) args = parser.parse_args() start = time.time() msg_gen = data_gen(args.jsonl) # send data to server df = pd.DataFrame(columns=['sentence', 'embedding']) for i, m in enumerate(msg_gen): print("sending request %s" % i) socket.send_json(m) message = recv_array(socket) print(f"received reply {message}") msg_dictionary = {'sentence': m, 'embedding': message} df.append(msg_dictionary, ignore_index=True) print('reply written to file') end = time.time() time_taken = end - start df.to_csv('processed_xnli.csv', index=False) print(f"processing took {time_taken}")
yield msg if __name__=="__main__": parser = ArgumentParser() parser.add_argument("jsonl", help='path to jsonl file containing new line delimited lines of text for encoding') args = parser.parse_args() start = time.time() msg_gen = data_gen(args.jsonl) # read sentences, tokenize and send to server df = pd.DataFrame(columns=['sentence','embedding']) for i, m in enumerate(msg_gen): print('tokenizing sentence before sending') tokens_tensor = preprocess(m, tokenizer) print(f"sending job {i}") ventilator.send_array(ventilator,m) message = recv_array(sink) print(f"received reply {message}") msg_dictionary = {'sentence':m, 'embedding':message} df = df.append(msg_dictionary, ignore_index=True) print('reply written to file') end = time.time() time_taken = end-start df.to_csv('processed_xnli.csv', index=False) print(f"processing took {time_taken}") # for creating multilingual emb viz df = pd.read_csv('by_language.csv') def get_encs(sentence): with torch.no_grad(): print(sentence)
def live(audio_recognizer, audio_producer, audio2video, scaler, host): import Oger me = mp.current_process() print me.name, 'PID', me.pid context = zmq.Context() mic = context.socket(zmq.SUB) mic.connect('tcp://{}:{}'.format(host, MIC)) mic.setsockopt(zmq.SUBSCRIBE, b'') speaker = context.socket(zmq.PUSH) speaker.connect('tcp://{}:{}'.format(host, SPEAKER)) camera = context.socket(zmq.SUB) camera.connect('tcp://{}:{}'.format(host, CAMERA)) camera.setsockopt(zmq.SUBSCRIBE, b'') projector = context.socket(zmq.PUSH) projector.connect('tcp://{}:{}'.format(host, PROJECTOR)) stateQ = context.socket(zmq.SUB) stateQ.connect('tcp://{}:{}'.format(host, STATE)) stateQ.setsockopt(zmq.SUBSCRIBE, b'') eventQ = context.socket(zmq.SUB) eventQ.connect('tcp://{}:{}'.format(host, EVENT)) eventQ.setsockopt(zmq.SUBSCRIBE, b'') snapshot = context.socket(zmq.REQ) snapshot.connect('tcp://{}:{}'.format(host, SNAPSHOT)) snapshot.send(b'Send me the state, please') state = snapshot.recv_json() sender = context.socket(zmq.PUSH) sender.connect('tcp://{}:{}'.format(host, EXTERNAL)) sender.send_json('register {}'.format(me.name)) poller = zmq.Poller() poller.register(mic, zmq.POLLIN) poller.register(camera, zmq.POLLIN) poller.register(stateQ, zmq.POLLIN) poller.register(eventQ, zmq.POLLIN) previous_prediction = [] # Approximately 10 seconds of audio/video error = deque(maxlen=3400) audio = deque(maxlen=3400) video = deque(maxlen=80) while True: events = dict(poller.poll()) if stateQ in events: state = stateQ.recv_json() if mic in events: new_audio = np.atleast_2d(recv_array(mic)) if state['record']: scaled_signals = scaler.transform(new_audio) audio.append(np.ndarray.flatten(scaled_signals)) if len(previous_prediction): error.append(scaled_signals[:,idxs].flatten() - previous_prediction.flatten()) previous_prediction = audio_recognizer(scaled_signals[:,idxs]) # This would not be necessary in a centralized recognizer if camera in events: new_video = recv_array(camera) if state['record']: video.append(new_video) if eventQ in events: pushbutton = eventQ.recv_json() if 'reset' in pushbutton: error.clear() audio.clear() video.clear() previous_prediction = [] if 'rmse' in pushbutton: rmse = np.sqrt((np.array(list(error)).flatten() ** 2).mean()) sender.send_json('{} RMSE {}'.format(me.name, rmse)) if 'respond' in pushbutton and pushbutton['respond'] == me.name: audio_data = np.array(list(audio)) video_data = np.array(list(video)) print '{} chosen to respond. Audio data: {} Video data: {}'.format(me.name, audio_data.shape, video_data.shape) if audio_data.size == 0 and video_data.size == 0: print '*** Audio data and video data arrays are empty. Aborting the response. ***' continue row_diff = audio_data.shape[0] - audio_producer.length if row_diff < 0: audio_data = np.vstack([ audio_data, np.zeros((-row_diff, audio_data.shape[1])) ]) else: audio_data = audio_data[:audio_producer.length] sound = audio_producer(audio_data) stride = audio_producer.length/audio2video.length projection = audio2video(audio_data[audio_data.shape[0] - stride*audio2video.length::stride]) # DREAM MODE: You can train a network with zero audio input -> video output, and use this # to recreate the original training sequence with scary accuracy... for row in projection: send_array(projector, row) for row in scaler.inverse_transform(sound): send_array(speaker, row) if 'save' in pushbutton: filename = '{}.{}'.format(pushbutton['save'], me.name) pickle.dump((audio_recognizer, audio_producer, audio2video, scaler, host), file(filename, 'w')) print '{} saved as file {} ({})'.format(me.name, filename, filesize(filename))
def live(audio_recognizer, audio_producer, audio2video, scaler, host): import Oger me = mp.current_process() print me.name, 'PID', me.pid context = zmq.Context() mic = context.socket(zmq.SUB) mic.connect('tcp://{}:{}'.format(host, MIC)) mic.setsockopt(zmq.SUBSCRIBE, b'') speaker = context.socket(zmq.PUSH) speaker.connect('tcp://{}:{}'.format(host, SPEAKER)) camera = context.socket(zmq.SUB) camera.connect('tcp://{}:{}'.format(host, CAMERA)) camera.setsockopt(zmq.SUBSCRIBE, b'') projector = context.socket(zmq.PUSH) projector.connect('tcp://{}:{}'.format(host, PROJECTOR)) stateQ = context.socket(zmq.SUB) stateQ.connect('tcp://{}:{}'.format(host, STATE)) stateQ.setsockopt(zmq.SUBSCRIBE, b'') eventQ = context.socket(zmq.SUB) eventQ.connect('tcp://{}:{}'.format(host, EVENT)) eventQ.setsockopt(zmq.SUBSCRIBE, b'') snapshot = context.socket(zmq.REQ) snapshot.connect('tcp://{}:{}'.format(host, SNAPSHOT)) snapshot.send(b'Send me the state, please') state = snapshot.recv_json() sender = context.socket(zmq.PUSH) sender.connect('tcp://{}:{}'.format(host, EXTERNAL)) sender.send_json('register {}'.format(me.name)) poller = zmq.Poller() poller.register(mic, zmq.POLLIN) poller.register(camera, zmq.POLLIN) poller.register(stateQ, zmq.POLLIN) poller.register(eventQ, zmq.POLLIN) previous_prediction = [] # Approximately 10 seconds of audio/video error = deque(maxlen=3400) audio = deque(maxlen=3400) video = deque(maxlen=80) while True: events = dict(poller.poll()) if stateQ in events: state = stateQ.recv_json() if mic in events: new_audio = np.atleast_2d(recv_array(mic)) if state['record']: scaled_signals = scaler.transform(new_audio) audio.append(np.ndarray.flatten(scaled_signals)) if len(previous_prediction): error.append(scaled_signals[:, idxs].flatten() - previous_prediction.flatten()) previous_prediction = audio_recognizer( scaled_signals[:, idxs] ) # This would not be necessary in a centralized recognizer if camera in events: new_video = recv_array(camera) if state['record']: video.append(new_video) if eventQ in events: pushbutton = eventQ.recv_json() if 'reset' in pushbutton: error.clear() audio.clear() video.clear() previous_prediction = [] if 'rmse' in pushbutton: rmse = np.sqrt((np.array(list(error)).flatten()**2).mean()) sender.send_json('{} RMSE {}'.format(me.name, rmse)) if 'respond' in pushbutton and pushbutton['respond'] == me.name: audio_data = np.array(list(audio)) video_data = np.array(list(video)) print '{} chosen to respond. Audio data: {} Video data: {}'.format( me.name, audio_data.shape, video_data.shape) if audio_data.size == 0 and video_data.size == 0: print '*** Audio data and video data arrays are empty. Aborting the response. ***' continue row_diff = audio_data.shape[0] - audio_producer.length if row_diff < 0: audio_data = np.vstack([ audio_data, np.zeros((-row_diff, audio_data.shape[1])) ]) else: audio_data = audio_data[:audio_producer.length] sound = audio_producer(audio_data) stride = audio_producer.length / audio2video.length projection = audio2video( audio_data[audio_data.shape[0] - stride * audio2video.length::stride]) # DREAM MODE: You can train a network with zero audio input -> video output, and use this # to recreate the original training sequence with scary accuracy... for row in projection: send_array(projector, row) for row in scaler.inverse_transform(sound): send_array(speaker, row) if 'save' in pushbutton: filename = '{}.{}'.format(pushbutton['save'], me.name) pickle.dump((audio_recognizer, audio_producer, audio2video, scaler, host), file(filename, 'w')) print '{} saved as file {} ({})'.format( me.name, filename, filesize(filename))
def new_learn_audio(host, debug=False): context = zmq.Context() mic = context.socket(zmq.SUB) mic.connect('tcp://{}:{}'.format(host, IO.MIC)) mic.setsockopt(zmq.SUBSCRIBE, b'') dreamQ = context.socket(zmq.PUSH) dreamQ.connect('tcp://{}:{}'.format(host, IO.DREAM)) stateQ, eventQ, brainQ = _three_amigos(context, host) sender = context.socket(zmq.PUSH) sender.connect('tcp://{}:{}'.format(host, IO.EXTERNAL)) counterQ = context.socket(zmq.REQ) counterQ.connect('tcp://{}:{}'.format(host, IO.COUNTER)) poller = zmq.Poller() poller.register(mic, zmq.POLLIN) poller.register(stateQ, zmq.POLLIN) poller.register(eventQ, zmq.POLLIN) audio = deque() NAPs = [] wavs = [] wav_audio_ids = {} NAP_hashes = {} audio_classifier = [] audio_recognizer = [] global_audio_recognizer = [] mixture_audio_recognizer = [] maxlen = [] deleted_ids = [] state = stateQ.recv_json() black_list = open('black_list.txt', 'a') audio_memory = AudioMemory() if debug: import matplotlib.pyplot as plt plt.ion() while True: events = dict(poller.poll()) if stateQ in events: state = stateQ.recv_json() if mic in events: new_audio = utils.recv_array(mic) if state['record']: audio.append(new_audio) if eventQ in events: pushbutton = eventQ.recv_json() if 'learn' in pushbutton: try: t0 = time.time() filename = pushbutton['filename'] audio_segments = utils.get_segments(filename) print 'Learning {} duration {} seconds with {} segments'.format(filename, audio_segments[-1], len(audio_segments)-1) new_sentence = utils.csv_to_array(filename + 'cochlear') norm_segments = np.rint(new_sentence.shape[0]*audio_segments/audio_segments[-1]).astype('int') audio_ids = [] new_audio_hash = [] amps = utils.get_amps(filename) most_significant_value = -np.inf most_significant_audio_id = [] original_NAP_length = len(NAPs) for segment, new_sound in enumerate([ utils.trim_right(new_sentence[norm_segments[i]:norm_segments[i+1]]) for i in range(len(norm_segments)-1) ]): # We filter out short, abrupt sounds with lots of noise. if np.mean(new_sound) < 2 or new_sound.shape[0] == 0: black_list.write('{} {}\n'.format(filename, segment)) print 'BLACKLISTED segment {} in file {}'.format(segment, filename) continue if debug: utils.plot_NAP_and_energy(new_sound, plt) audio_id = audio_memory.learn(new_sound, filename, [ audio_segments[segment], audio_segments[segment+1] ]) # START LEGACY try: wavs[audio_id].append(filename) except: wavs.append([filename]) wav_audio_ids[(filename, audio_id)] = [ audio_segments[segment], audio_segments[segment+1] ] # END LEGACY audio_ids.append(audio_id) if amps[segment] > most_significant_value: most_significant_audio_id = audio_id most_significant_value = amps[segment] black_list.flush() print 'AUDIO IDs after blacklisting {}'. format(audio_ids) if len(audio_ids): # while len(NAPs) - len(deleted_ids) > AUDIO_MEMORY_SIZE: # utils.delete_loner(counterQ, NAPs, 'audio_ids_counter', int(AUDIO_MEMORY_SIZE*PROTECT_PERCENTAGE), deleted_ids) # maxlen = max([ m.shape[0] for memory in NAPs for m in memory if len(m) ]) # memories = [ np.ndarray.flatten(utils.zero_pad(m, maxlen)) for memory in NAPs for m in memory if len(m) ] # targets = [ i for i,f in enumerate(NAPs) for k in f if len(k) ] # audio_classifier = train_rPCA_SVM(memories, targets) # all_hammings = [ utils.hamming_distance(new_audio_hash[i], new_audio_hash[j]) # for i in range(len(new_audio_hash)) for j in range(len(new_audio_hash)) if i > j ] # print 'RHYME VALUE', np.mean(sorted(all_hammings)[int(len(all_hammings)/2):]) # rhyme = np.mean(sorted(all_hammings)[int(len(all_hammings)/2):]) < RHYME_HAMMERTIME # sender.send_json('rhyme {}'.format(rhyme)) brainQ.send_pyobj(['audio_learn', filename, audio_ids, audio_memory, most_significant_audio_id, wavs, wav_audio_ids]) print 'Audio learned from {} in {} seconds'.format(filename, time.time() - t0) else: print 'SKIPPING fully blacklisted file {}'.format(filename) except: utils.print_exception('Audio learning aborted.') audio.clear() if 'dream' in pushbutton: new_dream(audio_memory) if 'save' in pushbutton: utils.save('{}.{}'.format(pushbutton['save'], mp.current_process().name), [ deleted_ids, NAPs, wavs, wav_audio_ids, NAP_hashes, audio_classifier, maxlen, audio_memory ]) if 'load' in pushbutton: deleted_ids, NAPs, wavs, wav_audio_ids, NAP_hashes, audio_classifier, maxlen, audio_memory = utils.load('{}.{}'.format(pushbutton['load'], mp.current_process().name))